mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-04-16 21:19:00 +00:00
Compare commits
1 Commits
feature-hi
...
fix/moneta
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c4d26fba87 |
@@ -514,9 +514,8 @@ class CustomFieldQueryParser:
|
||||
value_field_name = CustomFieldInstance.get_value_field_name(
|
||||
custom_field.data_type,
|
||||
)
|
||||
if (
|
||||
custom_field.data_type == CustomField.FieldDataType.MONETARY
|
||||
and op in self.EXPR_BY_CATEGORY["arithmetic"]
|
||||
if custom_field.data_type == CustomField.FieldDataType.MONETARY and (
|
||||
op in self.EXPR_BY_CATEGORY["arithmetic"] or op in {"exact", "in"}
|
||||
):
|
||||
value_field_name = "value_monetary_amount"
|
||||
has_field = Q(custom_fields__field=custom_field)
|
||||
|
||||
@@ -21,7 +21,6 @@ from guardian.shortcuts import get_users_with_perms
|
||||
|
||||
from documents.search._normalize import ascii_fold
|
||||
from documents.search._query import build_permission_filter
|
||||
from documents.search._query import parse_simple_highlight_query
|
||||
from documents.search._query import parse_simple_text_query
|
||||
from documents.search._query import parse_simple_title_query
|
||||
from documents.search._query import parse_user_query
|
||||
@@ -336,17 +335,6 @@ class TantivyBackend:
|
||||
else:
|
||||
return parse_user_query(self._index, query, tz)
|
||||
|
||||
def _parse_highlight_query(
|
||||
self,
|
||||
query: str,
|
||||
search_mode: SearchMode,
|
||||
) -> tantivy.Query:
|
||||
if search_mode is SearchMode.TEXT:
|
||||
# title does not supported highlight for now
|
||||
return parse_simple_highlight_query(self._index, query, ["content"])
|
||||
else:
|
||||
return self._parse_query(query, search_mode)
|
||||
|
||||
def _apply_permission_filter(
|
||||
self,
|
||||
query: tantivy.Query,
|
||||
@@ -561,7 +549,6 @@ class TantivyBackend:
|
||||
|
||||
self._ensure_open()
|
||||
user_query = self._parse_query(query, search_mode)
|
||||
highlight_query = self._parse_highlight_query(query, search_mode)
|
||||
|
||||
# For notes_text snippet generation, we need a query that targets the
|
||||
# notes_text field directly. user_query may contain JSON-field terms
|
||||
@@ -614,7 +601,7 @@ class TantivyBackend:
|
||||
if snippet_generator is None:
|
||||
snippet_generator = tantivy.SnippetGenerator.create(
|
||||
searcher,
|
||||
highlight_query,
|
||||
user_query,
|
||||
self._schema,
|
||||
"content",
|
||||
)
|
||||
@@ -623,7 +610,7 @@ class TantivyBackend:
|
||||
if content_html:
|
||||
highlights["content"] = content_html
|
||||
|
||||
if search_mode is SearchMode.QUERY and "notes_text" in doc_dict:
|
||||
if "notes_text" in doc_dict:
|
||||
# Use notes_text (plain text) for snippet generation — tantivy's
|
||||
# SnippetGenerator does not support JSON fields.
|
||||
if notes_snippet_generator is None:
|
||||
|
||||
@@ -452,14 +452,6 @@ _FIELD_BOOSTS = {"title": 2.0}
|
||||
_SIMPLE_FIELD_BOOSTS = {"simple_title": 2.0}
|
||||
|
||||
|
||||
def _simple_query_tokens(raw_query: str) -> list[str]:
|
||||
tokens = [
|
||||
ascii_fold(token.lower())
|
||||
for token in _SIMPLE_QUERY_TOKEN_RE.findall(raw_query, timeout=_REGEX_TIMEOUT)
|
||||
]
|
||||
return [token for token in tokens if token]
|
||||
|
||||
|
||||
def _build_simple_field_query(
|
||||
index: tantivy.Index,
|
||||
field: str,
|
||||
@@ -555,7 +547,11 @@ def parse_simple_query(
|
||||
|
||||
Query string is escaped and normalized to be treated as "simple" text query.
|
||||
"""
|
||||
tokens = _simple_query_tokens(raw_query)
|
||||
tokens = [
|
||||
ascii_fold(token.lower())
|
||||
for token in _SIMPLE_QUERY_TOKEN_RE.findall(raw_query, timeout=_REGEX_TIMEOUT)
|
||||
]
|
||||
tokens = [token for token in tokens if token]
|
||||
if not tokens:
|
||||
return tantivy.Query.empty_query()
|
||||
|
||||
@@ -568,30 +564,6 @@ def parse_simple_query(
|
||||
return tantivy.Query.boolean_query(field_queries)
|
||||
|
||||
|
||||
def parse_simple_highlight_query(
|
||||
index: tantivy.Index,
|
||||
raw_query: str,
|
||||
fields: list[str],
|
||||
) -> tantivy.Query:
|
||||
"""Build a snippet-friendly query for simple text/title searches.
|
||||
|
||||
Simple search matching uses regex queries over the normalized shadow fields to
|
||||
support substring matches. Tantivy's SnippetGenerator does not produce
|
||||
highlights for that query shape, so for snippet generation we build a plain
|
||||
term query over the real stored text fields instead.
|
||||
"""
|
||||
|
||||
tokens = _simple_query_tokens(raw_query)
|
||||
if not tokens:
|
||||
return tantivy.Query.empty_query()
|
||||
|
||||
return index.parse_query(
|
||||
" ".join(tokens),
|
||||
fields,
|
||||
field_boosts={field: _FIELD_BOOSTS.get(field, 1.0) for field in fields},
|
||||
)
|
||||
|
||||
|
||||
def parse_simple_text_query(
|
||||
index: tantivy.Index,
|
||||
raw_query: str,
|
||||
|
||||
@@ -563,26 +563,6 @@ class TestFieldHandling:
|
||||
class TestHighlightHits:
|
||||
"""Test highlight_hits returns proper HTML strings, not raw Snippet objects."""
|
||||
|
||||
def test_highlights_simple_text_mode_returns_html_string(
|
||||
self,
|
||||
backend: TantivyBackend,
|
||||
):
|
||||
"""Simple text search should still produce content highlights for exact-token hits."""
|
||||
doc = Document.objects.create(
|
||||
title="Highlight Test",
|
||||
content="The quick brown fox jumps over the lazy dog",
|
||||
checksum="HH0",
|
||||
pk=89,
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
hits = backend.highlight_hits("quick", [doc.pk], search_mode=SearchMode.TEXT)
|
||||
|
||||
assert len(hits) == 1
|
||||
highlights = hits[0]["highlights"]
|
||||
assert "content" in highlights
|
||||
assert "<b>" in highlights["content"]
|
||||
|
||||
def test_highlights_content_returns_html_string(self, backend: TantivyBackend):
|
||||
"""highlight_hits must return HTML strings (from Snippet.to_html()), not Snippet objects."""
|
||||
doc = Document.objects.create(
|
||||
|
||||
@@ -453,6 +453,32 @@ class TestCustomFieldsSearch(DirectoriesMixin, APITestCase):
|
||||
),
|
||||
)
|
||||
|
||||
def test_exact_monetary(self) -> None:
|
||||
# "exact" should match by numeric amount, ignoring currency code prefix.
|
||||
self._assert_query_match_predicate(
|
||||
["monetary_field", "exact", "100"],
|
||||
lambda document: (
|
||||
"monetary_field" in document
|
||||
and document["monetary_field"] == "USD100.00"
|
||||
),
|
||||
)
|
||||
self._assert_query_match_predicate(
|
||||
["monetary_field", "exact", "101"],
|
||||
lambda document: (
|
||||
"monetary_field" in document and document["monetary_field"] == "101.00"
|
||||
),
|
||||
)
|
||||
|
||||
def test_in_monetary(self) -> None:
|
||||
# "in" should match by numeric amount, ignoring currency code prefix.
|
||||
self._assert_query_match_predicate(
|
||||
["monetary_field", "in", ["100", "50"]],
|
||||
lambda document: (
|
||||
"monetary_field" in document
|
||||
and document["monetary_field"] in {"USD100.00", "EUR50.00"}
|
||||
),
|
||||
)
|
||||
|
||||
# ==========================================================#
|
||||
# Subset check (document link field only) #
|
||||
# ==========================================================#
|
||||
|
||||
Reference in New Issue
Block a user