mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-03-31 21:32:45 +00:00
Custom field indexing wouldn't have matched exactly, also, index the select field label, not its ID (might break, don't want the VM)
This commit is contained in:
@@ -1114,19 +1114,7 @@ class CustomFieldInstance(SoftDeleteModel):
|
||||
]
|
||||
|
||||
def __str__(self) -> str:
|
||||
value = (
|
||||
next(
|
||||
option.get("label")
|
||||
for option in self.field.extra_data["select_options"]
|
||||
if option.get("id") == self.value_select
|
||||
)
|
||||
if (
|
||||
self.field.data_type == CustomField.FieldDataType.SELECT
|
||||
and self.value_select is not None
|
||||
)
|
||||
else self.value
|
||||
)
|
||||
return str(self.field.name) + f" : {value}"
|
||||
return str(self.field.name) + f" : {self.value_for_search}"
|
||||
|
||||
@classmethod
|
||||
def get_value_field_name(cls, data_type: CustomField.FieldDataType):
|
||||
@@ -1144,6 +1132,25 @@ class CustomFieldInstance(SoftDeleteModel):
|
||||
value_field_name = self.get_value_field_name(self.field.data_type)
|
||||
return getattr(self, value_field_name)
|
||||
|
||||
@property
|
||||
def value_for_search(self) -> str | None:
|
||||
"""
|
||||
Return the value suitable for full-text indexing and display, or None
|
||||
if the value is unset.
|
||||
|
||||
For SELECT fields, resolves the human-readable label rather than the
|
||||
opaque option ID stored in value_select.
|
||||
"""
|
||||
if self.value is None:
|
||||
return None
|
||||
if self.field.data_type == CustomField.FieldDataType.SELECT:
|
||||
options = (self.field.extra_data or {}).get("select_options", [])
|
||||
return next(
|
||||
(o["label"] for o in options if o.get("id") == self.value),
|
||||
None,
|
||||
)
|
||||
return str(self.value)
|
||||
|
||||
|
||||
if settings.AUDIT_LOG_ENABLED:
|
||||
auditlog.register(
|
||||
|
||||
@@ -339,14 +339,17 @@ class TantivyBackend:
|
||||
# Custom fields — JSON for structured queries (custom_fields.name:x, custom_fields.value:y),
|
||||
# companion text field for default full-text search.
|
||||
for cfi in document.custom_fields.all():
|
||||
search_value = cfi.value_for_search
|
||||
if search_value is None:
|
||||
continue
|
||||
doc.add_json(
|
||||
"custom_fields",
|
||||
{
|
||||
"name": cfi.field.name,
|
||||
"value": str(cfi.value),
|
||||
"value": search_value,
|
||||
},
|
||||
)
|
||||
doc.add_text("custom_field", str(cfi.value))
|
||||
doc.add_text("custom_field", str(cfi))
|
||||
|
||||
# Dates
|
||||
created_date = datetime(
|
||||
@@ -513,7 +516,7 @@ class TantivyBackend:
|
||||
all_hits = [(hit[0], hit[1] / max_score) for hit in all_hits]
|
||||
|
||||
# Apply threshold filter if configured (score-based search only)
|
||||
threshold = getattr(settings, "ADVANCED_FUZZY_SEARCH_THRESHOLD", None)
|
||||
threshold = settings.ADVANCED_FUZZY_SEARCH_THRESHOLD
|
||||
if threshold is not None and not sort_field:
|
||||
all_hits = [hit for hit in all_hits if hit[1] >= threshold]
|
||||
|
||||
|
||||
@@ -479,7 +479,7 @@ def parse_user_query(
|
||||
field_boosts=_FIELD_BOOSTS,
|
||||
)
|
||||
|
||||
threshold = getattr(settings, "ADVANCED_FUZZY_SEARCH_THRESHOLD", None)
|
||||
threshold = settings.ADVANCED_FUZZY_SEARCH_THRESHOLD
|
||||
if threshold is not None:
|
||||
fuzzy = index.parse_query(
|
||||
query_str,
|
||||
|
||||
@@ -379,6 +379,84 @@ class TestFieldHandling:
|
||||
)
|
||||
assert results.total == 1
|
||||
|
||||
def test_select_custom_field_indexes_label_not_id(self, backend: TantivyBackend):
|
||||
"""SELECT custom fields must index the human-readable label, not the opaque option ID."""
|
||||
field = CustomField.objects.create(
|
||||
name="Category",
|
||||
data_type=CustomField.FieldDataType.SELECT,
|
||||
extra_data={
|
||||
"select_options": [
|
||||
{"id": "opt_abc", "label": "Invoice"},
|
||||
{"id": "opt_def", "label": "Receipt"},
|
||||
],
|
||||
},
|
||||
)
|
||||
doc = Document.objects.create(
|
||||
title="Categorised doc",
|
||||
content="test",
|
||||
checksum="SEL1",
|
||||
pk=71,
|
||||
)
|
||||
CustomFieldInstance.objects.create(
|
||||
document=doc,
|
||||
field=field,
|
||||
value_select="opt_abc",
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
# Label should be findable
|
||||
results = backend.search(
|
||||
"custom_fields.value:invoice",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
)
|
||||
assert results.total == 1
|
||||
|
||||
# Opaque ID must not appear in the index
|
||||
results = backend.search(
|
||||
"custom_fields.value:opt_abc",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
)
|
||||
assert results.total == 0
|
||||
|
||||
def test_none_custom_field_value_not_indexed(self, backend: TantivyBackend):
|
||||
"""Custom field instances with no value set must not produce an index entry."""
|
||||
field = CustomField.objects.create(
|
||||
name="Optional",
|
||||
data_type=CustomField.FieldDataType.SELECT,
|
||||
extra_data={"select_options": [{"id": "opt_1", "label": "Yes"}]},
|
||||
)
|
||||
doc = Document.objects.create(
|
||||
title="Unset field doc",
|
||||
content="test",
|
||||
checksum="SEL2",
|
||||
pk=72,
|
||||
)
|
||||
CustomFieldInstance.objects.create(
|
||||
document=doc,
|
||||
field=field,
|
||||
value_select=None,
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
# The string "none" must not appear as an indexed value
|
||||
results = backend.search(
|
||||
"custom_fields.value:none",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
)
|
||||
assert results.total == 0
|
||||
|
||||
def test_notes_include_user_information(self, backend: TantivyBackend):
|
||||
"""Notes must be indexed with user information when available for structured queries."""
|
||||
user = User.objects.create_user("notewriter")
|
||||
|
||||
Reference in New Issue
Block a user