Fixes exact custom field monetary exact searching

2026-04-16 21:19:00 +00:00 · 2026-04-16 12:45:58 -07:00
5 changed files with 35 additions and 71 deletions
--- a/src/documents/filters.py
+++ b/src/documents/filters.py
@@ -514,9 +514,8 @@ class CustomFieldQueryParser:
        value_field_name = CustomFieldInstance.get_value_field_name(
            custom_field.data_type,
        )
-        if (
-            custom_field.data_type == CustomField.FieldDataType.MONETARY
-            and op in self.EXPR_BY_CATEGORY["arithmetic"]
+        if custom_field.data_type == CustomField.FieldDataType.MONETARY and (
+            op in self.EXPR_BY_CATEGORY["arithmetic"] or op in {"exact", "in"}
        ):
            value_field_name = "value_monetary_amount"
        has_field = Q(custom_fields__field=custom_field)
--- a/src/documents/search/_backend.py
+++ b/src/documents/search/_backend.py
@@ -21,7 +21,6 @@ from guardian.shortcuts import get_users_with_perms

 from documents.search._normalize import ascii_fold
 from documents.search._query import build_permission_filter
-from documents.search._query import parse_simple_highlight_query
 from documents.search._query import parse_simple_text_query
 from documents.search._query import parse_simple_title_query
 from documents.search._query import parse_user_query
@@ -336,17 +335,6 @@ class TantivyBackend:
        else:
            return parse_user_query(self._index, query, tz)

-    def _parse_highlight_query(
-        self,
-        query: str,
-        search_mode: SearchMode,
-    ) -> tantivy.Query:
-        if search_mode is SearchMode.TEXT:
-            # title does not supported highlight for now
-            return parse_simple_highlight_query(self._index, query, ["content"])
-        else:
-            return self._parse_query(query, search_mode)
-
    def _apply_permission_filter(
        self,
        query: tantivy.Query,
@@ -561,7 +549,6 @@ class TantivyBackend:

        self._ensure_open()
        user_query = self._parse_query(query, search_mode)
-        highlight_query = self._parse_highlight_query(query, search_mode)

        # For notes_text snippet generation, we need a query that targets the
        # notes_text field directly. user_query may contain JSON-field terms
@@ -614,7 +601,7 @@ class TantivyBackend:
                if snippet_generator is None:
                    snippet_generator = tantivy.SnippetGenerator.create(
                        searcher,
-                        highlight_query,
+                        user_query,
                        self._schema,
                        "content",
                    )
@@ -623,7 +610,7 @@ class TantivyBackend:
                if content_html:
                    highlights["content"] = content_html

-                if search_mode is SearchMode.QUERY and "notes_text" in doc_dict:
+                if "notes_text" in doc_dict:
                    # Use notes_text (plain text) for snippet generation — tantivy's
                    # SnippetGenerator does not support JSON fields.
                    if notes_snippet_generator is None:
--- a/src/documents/search/_query.py
+++ b/src/documents/search/_query.py
@@ -452,14 +452,6 @@ _FIELD_BOOSTS = {"title": 2.0}
 _SIMPLE_FIELD_BOOSTS = {"simple_title": 2.0}


-def _simple_query_tokens(raw_query: str) -> list[str]:
-    tokens = [
-        ascii_fold(token.lower())
-        for token in _SIMPLE_QUERY_TOKEN_RE.findall(raw_query, timeout=_REGEX_TIMEOUT)
-    ]
-    return [token for token in tokens if token]
-
-
 def _build_simple_field_query(
    index: tantivy.Index,
    field: str,
@@ -555,7 +547,11 @@ def parse_simple_query(

    Query string is escaped and normalized to be treated as "simple" text query.
    """
-    tokens = _simple_query_tokens(raw_query)
+    tokens = [
+        ascii_fold(token.lower())
+        for token in _SIMPLE_QUERY_TOKEN_RE.findall(raw_query, timeout=_REGEX_TIMEOUT)
+    ]
+    tokens = [token for token in tokens if token]
    if not tokens:
        return tantivy.Query.empty_query()

@@ -568,30 +564,6 @@ def parse_simple_query(
    return tantivy.Query.boolean_query(field_queries)


-def parse_simple_highlight_query(
-    index: tantivy.Index,
-    raw_query: str,
-    fields: list[str],
-) -> tantivy.Query:
-    """Build a snippet-friendly query for simple text/title searches.
-
-    Simple search matching uses regex queries over the normalized shadow fields to
-    support substring matches. Tantivy's SnippetGenerator does not produce
-    highlights for that query shape, so for snippet generation we build a plain
-    term query over the real stored text fields instead.
-    """
-
-    tokens = _simple_query_tokens(raw_query)
-    if not tokens:
-        return tantivy.Query.empty_query()
-
-    return index.parse_query(
-        " ".join(tokens),
-        fields,
-        field_boosts={field: _FIELD_BOOSTS.get(field, 1.0) for field in fields},
-    )
-
-
 def parse_simple_text_query(
    index: tantivy.Index,
    raw_query: str,
--- a/src/documents/tests/search/test_backend.py
+++ b/src/documents/tests/search/test_backend.py
@@ -563,26 +563,6 @@ class TestFieldHandling:
 class TestHighlightHits:
    """Test highlight_hits returns proper HTML strings, not raw Snippet objects."""

-    def test_highlights_simple_text_mode_returns_html_string(
-        self,
-        backend: TantivyBackend,
-    ):
-        """Simple text search should still produce content highlights for exact-token hits."""
-        doc = Document.objects.create(
-            title="Highlight Test",
-            content="The quick brown fox jumps over the lazy dog",
-            checksum="HH0",
-            pk=89,
-        )
-        backend.add_or_update(doc)
-
-        hits = backend.highlight_hits("quick", [doc.pk], search_mode=SearchMode.TEXT)
-
-        assert len(hits) == 1
-        highlights = hits[0]["highlights"]
-        assert "content" in highlights
-        assert "<b>" in highlights["content"]
-
    def test_highlights_content_returns_html_string(self, backend: TantivyBackend):
        """highlight_hits must return HTML strings (from Snippet.to_html()), not Snippet objects."""
        doc = Document.objects.create(
--- a/src/documents/tests/test_api_filter_by_custom_fields.py
+++ b/src/documents/tests/test_api_filter_by_custom_fields.py
@@ -453,6 +453,32 @@ class TestCustomFieldsSearch(DirectoriesMixin, APITestCase):
            ),
        )

+    def test_exact_monetary(self) -> None:
+        # "exact" should match by numeric amount, ignoring currency code prefix.
+        self._assert_query_match_predicate(
+            ["monetary_field", "exact", "100"],
+            lambda document: (
+                "monetary_field" in document
+                and document["monetary_field"] == "USD100.00"
+            ),
+        )
+        self._assert_query_match_predicate(
+            ["monetary_field", "exact", "101"],
+            lambda document: (
+                "monetary_field" in document and document["monetary_field"] == "101.00"
+            ),
+        )
+
+    def test_in_monetary(self) -> None:
+        # "in" should match by numeric amount, ignoring currency code prefix.
+        self._assert_query_match_predicate(
+            ["monetary_field", "in", ["100", "50"]],
+            lambda document: (
+                "monetary_field" in document
+                and document["monetary_field"] in {"USD100.00", "EUR50.00"}
+            ),
+        )
+
    # ==========================================================#
    # Subset check (document link field only)                   #
    # ==========================================================#