diff --git a/src/documents/models.py b/src/documents/models.py index 08f995ff6..96f027b94 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -1114,19 +1114,7 @@ class CustomFieldInstance(SoftDeleteModel): ] def __str__(self) -> str: - value = ( - next( - option.get("label") - for option in self.field.extra_data["select_options"] - if option.get("id") == self.value_select - ) - if ( - self.field.data_type == CustomField.FieldDataType.SELECT - and self.value_select is not None - ) - else self.value - ) - return str(self.field.name) + f" : {value}" + return str(self.field.name) + f" : {self.value_for_search}" @classmethod def get_value_field_name(cls, data_type: CustomField.FieldDataType): @@ -1144,6 +1132,25 @@ class CustomFieldInstance(SoftDeleteModel): value_field_name = self.get_value_field_name(self.field.data_type) return getattr(self, value_field_name) + @property + def value_for_search(self) -> str | None: + """ + Return the value suitable for full-text indexing and display, or None + if the value is unset. + + For SELECT fields, resolves the human-readable label rather than the + opaque option ID stored in value_select. + """ + if self.value is None: + return None + if self.field.data_type == CustomField.FieldDataType.SELECT: + options = (self.field.extra_data or {}).get("select_options", []) + return next( + (o["label"] for o in options if o.get("id") == self.value), + None, + ) + return str(self.value) + if settings.AUDIT_LOG_ENABLED: auditlog.register( diff --git a/src/documents/search/_backend.py b/src/documents/search/_backend.py index b97957f21..f299bcd3d 100644 --- a/src/documents/search/_backend.py +++ b/src/documents/search/_backend.py @@ -339,14 +339,17 @@ class TantivyBackend: # Custom fields — JSON for structured queries (custom_fields.name:x, custom_fields.value:y), # companion text field for default full-text search. for cfi in document.custom_fields.all(): + search_value = cfi.value_for_search + if search_value is None: + continue doc.add_json( "custom_fields", { "name": cfi.field.name, - "value": str(cfi.value), + "value": search_value, }, ) - doc.add_text("custom_field", str(cfi.value)) + doc.add_text("custom_field", str(cfi)) # Dates created_date = datetime( @@ -513,7 +516,7 @@ class TantivyBackend: all_hits = [(hit[0], hit[1] / max_score) for hit in all_hits] # Apply threshold filter if configured (score-based search only) - threshold = getattr(settings, "ADVANCED_FUZZY_SEARCH_THRESHOLD", None) + threshold = settings.ADVANCED_FUZZY_SEARCH_THRESHOLD if threshold is not None and not sort_field: all_hits = [hit for hit in all_hits if hit[1] >= threshold] diff --git a/src/documents/search/_query.py b/src/documents/search/_query.py index dfa73fa50..9cde9e510 100644 --- a/src/documents/search/_query.py +++ b/src/documents/search/_query.py @@ -479,7 +479,7 @@ def parse_user_query( field_boosts=_FIELD_BOOSTS, ) - threshold = getattr(settings, "ADVANCED_FUZZY_SEARCH_THRESHOLD", None) + threshold = settings.ADVANCED_FUZZY_SEARCH_THRESHOLD if threshold is not None: fuzzy = index.parse_query( query_str, diff --git a/src/documents/tests/search/test_backend.py b/src/documents/tests/search/test_backend.py index 4211611a0..12701d428 100644 --- a/src/documents/tests/search/test_backend.py +++ b/src/documents/tests/search/test_backend.py @@ -379,6 +379,84 @@ class TestFieldHandling: ) assert results.total == 1 + def test_select_custom_field_indexes_label_not_id(self, backend: TantivyBackend): + """SELECT custom fields must index the human-readable label, not the opaque option ID.""" + field = CustomField.objects.create( + name="Category", + data_type=CustomField.FieldDataType.SELECT, + extra_data={ + "select_options": [ + {"id": "opt_abc", "label": "Invoice"}, + {"id": "opt_def", "label": "Receipt"}, + ], + }, + ) + doc = Document.objects.create( + title="Categorised doc", + content="test", + checksum="SEL1", + pk=71, + ) + CustomFieldInstance.objects.create( + document=doc, + field=field, + value_select="opt_abc", + ) + backend.add_or_update(doc) + + # Label should be findable + results = backend.search( + "custom_fields.value:invoice", + user=None, + page=1, + page_size=10, + sort_field=None, + sort_reverse=False, + ) + assert results.total == 1 + + # Opaque ID must not appear in the index + results = backend.search( + "custom_fields.value:opt_abc", + user=None, + page=1, + page_size=10, + sort_field=None, + sort_reverse=False, + ) + assert results.total == 0 + + def test_none_custom_field_value_not_indexed(self, backend: TantivyBackend): + """Custom field instances with no value set must not produce an index entry.""" + field = CustomField.objects.create( + name="Optional", + data_type=CustomField.FieldDataType.SELECT, + extra_data={"select_options": [{"id": "opt_1", "label": "Yes"}]}, + ) + doc = Document.objects.create( + title="Unset field doc", + content="test", + checksum="SEL2", + pk=72, + ) + CustomFieldInstance.objects.create( + document=doc, + field=field, + value_select=None, + ) + backend.add_or_update(doc) + + # The string "none" must not appear as an indexed value + results = backend.search( + "custom_fields.value:none", + user=None, + page=1, + page_size=10, + sort_field=None, + sort_reverse=False, + ) + assert results.total == 0 + def test_notes_include_user_information(self, backend: TantivyBackend): """Notes must be indexed with user information when available for structured queries.""" user = User.objects.create_user("notewriter")