Compare commits

..

1 Commits

Author SHA1 Message Date
stumpylog
c4d26fba87 Fixes exact custom field monetary exact searching 2026-04-16 12:45:58 -07:00
5 changed files with 35 additions and 71 deletions

View File

@@ -514,9 +514,8 @@ class CustomFieldQueryParser:
value_field_name = CustomFieldInstance.get_value_field_name(
custom_field.data_type,
)
if (
custom_field.data_type == CustomField.FieldDataType.MONETARY
and op in self.EXPR_BY_CATEGORY["arithmetic"]
if custom_field.data_type == CustomField.FieldDataType.MONETARY and (
op in self.EXPR_BY_CATEGORY["arithmetic"] or op in {"exact", "in"}
):
value_field_name = "value_monetary_amount"
has_field = Q(custom_fields__field=custom_field)

View File

@@ -21,7 +21,6 @@ from guardian.shortcuts import get_users_with_perms
from documents.search._normalize import ascii_fold
from documents.search._query import build_permission_filter
from documents.search._query import parse_simple_highlight_query
from documents.search._query import parse_simple_text_query
from documents.search._query import parse_simple_title_query
from documents.search._query import parse_user_query
@@ -336,17 +335,6 @@ class TantivyBackend:
else:
return parse_user_query(self._index, query, tz)
def _parse_highlight_query(
self,
query: str,
search_mode: SearchMode,
) -> tantivy.Query:
if search_mode is SearchMode.TEXT:
# title does not supported highlight for now
return parse_simple_highlight_query(self._index, query, ["content"])
else:
return self._parse_query(query, search_mode)
def _apply_permission_filter(
self,
query: tantivy.Query,
@@ -561,7 +549,6 @@ class TantivyBackend:
self._ensure_open()
user_query = self._parse_query(query, search_mode)
highlight_query = self._parse_highlight_query(query, search_mode)
# For notes_text snippet generation, we need a query that targets the
# notes_text field directly. user_query may contain JSON-field terms
@@ -614,7 +601,7 @@ class TantivyBackend:
if snippet_generator is None:
snippet_generator = tantivy.SnippetGenerator.create(
searcher,
highlight_query,
user_query,
self._schema,
"content",
)
@@ -623,7 +610,7 @@ class TantivyBackend:
if content_html:
highlights["content"] = content_html
if search_mode is SearchMode.QUERY and "notes_text" in doc_dict:
if "notes_text" in doc_dict:
# Use notes_text (plain text) for snippet generation — tantivy's
# SnippetGenerator does not support JSON fields.
if notes_snippet_generator is None:

View File

@@ -452,14 +452,6 @@ _FIELD_BOOSTS = {"title": 2.0}
_SIMPLE_FIELD_BOOSTS = {"simple_title": 2.0}
def _simple_query_tokens(raw_query: str) -> list[str]:
tokens = [
ascii_fold(token.lower())
for token in _SIMPLE_QUERY_TOKEN_RE.findall(raw_query, timeout=_REGEX_TIMEOUT)
]
return [token for token in tokens if token]
def _build_simple_field_query(
index: tantivy.Index,
field: str,
@@ -555,7 +547,11 @@ def parse_simple_query(
Query string is escaped and normalized to be treated as "simple" text query.
"""
tokens = _simple_query_tokens(raw_query)
tokens = [
ascii_fold(token.lower())
for token in _SIMPLE_QUERY_TOKEN_RE.findall(raw_query, timeout=_REGEX_TIMEOUT)
]
tokens = [token for token in tokens if token]
if not tokens:
return tantivy.Query.empty_query()
@@ -568,30 +564,6 @@ def parse_simple_query(
return tantivy.Query.boolean_query(field_queries)
def parse_simple_highlight_query(
index: tantivy.Index,
raw_query: str,
fields: list[str],
) -> tantivy.Query:
"""Build a snippet-friendly query for simple text/title searches.
Simple search matching uses regex queries over the normalized shadow fields to
support substring matches. Tantivy's SnippetGenerator does not produce
highlights for that query shape, so for snippet generation we build a plain
term query over the real stored text fields instead.
"""
tokens = _simple_query_tokens(raw_query)
if not tokens:
return tantivy.Query.empty_query()
return index.parse_query(
" ".join(tokens),
fields,
field_boosts={field: _FIELD_BOOSTS.get(field, 1.0) for field in fields},
)
def parse_simple_text_query(
index: tantivy.Index,
raw_query: str,

View File

@@ -563,26 +563,6 @@ class TestFieldHandling:
class TestHighlightHits:
"""Test highlight_hits returns proper HTML strings, not raw Snippet objects."""
def test_highlights_simple_text_mode_returns_html_string(
self,
backend: TantivyBackend,
):
"""Simple text search should still produce content highlights for exact-token hits."""
doc = Document.objects.create(
title="Highlight Test",
content="The quick brown fox jumps over the lazy dog",
checksum="HH0",
pk=89,
)
backend.add_or_update(doc)
hits = backend.highlight_hits("quick", [doc.pk], search_mode=SearchMode.TEXT)
assert len(hits) == 1
highlights = hits[0]["highlights"]
assert "content" in highlights
assert "<b>" in highlights["content"]
def test_highlights_content_returns_html_string(self, backend: TantivyBackend):
"""highlight_hits must return HTML strings (from Snippet.to_html()), not Snippet objects."""
doc = Document.objects.create(

View File

@@ -453,6 +453,32 @@ class TestCustomFieldsSearch(DirectoriesMixin, APITestCase):
),
)
def test_exact_monetary(self) -> None:
# "exact" should match by numeric amount, ignoring currency code prefix.
self._assert_query_match_predicate(
["monetary_field", "exact", "100"],
lambda document: (
"monetary_field" in document
and document["monetary_field"] == "USD100.00"
),
)
self._assert_query_match_predicate(
["monetary_field", "exact", "101"],
lambda document: (
"monetary_field" in document and document["monetary_field"] == "101.00"
),
)
def test_in_monetary(self) -> None:
# "in" should match by numeric amount, ignoring currency code prefix.
self._assert_query_match_predicate(
["monetary_field", "in", ["100", "50"]],
lambda document: (
"monetary_field" in document
and document["monetary_field"] in {"USD100.00", "EUR50.00"}
),
)
# ==========================================================#
# Subset check (document link field only) #
# ==========================================================#