mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-04-17 21:48:55 +00:00
Compare commits
2 Commits
fix/moneta
...
feature-hi
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
dd56c2ec25 | ||
|
|
d60cb0e21f |
@@ -514,8 +514,9 @@ class CustomFieldQueryParser:
|
||||
value_field_name = CustomFieldInstance.get_value_field_name(
|
||||
custom_field.data_type,
|
||||
)
|
||||
if custom_field.data_type == CustomField.FieldDataType.MONETARY and (
|
||||
op in self.EXPR_BY_CATEGORY["arithmetic"] or op in {"exact", "in"}
|
||||
if (
|
||||
custom_field.data_type == CustomField.FieldDataType.MONETARY
|
||||
and op in self.EXPR_BY_CATEGORY["arithmetic"]
|
||||
):
|
||||
value_field_name = "value_monetary_amount"
|
||||
has_field = Q(custom_fields__field=custom_field)
|
||||
|
||||
@@ -21,6 +21,7 @@ from guardian.shortcuts import get_users_with_perms
|
||||
|
||||
from documents.search._normalize import ascii_fold
|
||||
from documents.search._query import build_permission_filter
|
||||
from documents.search._query import parse_simple_text_highlight_query
|
||||
from documents.search._query import parse_simple_text_query
|
||||
from documents.search._query import parse_simple_title_query
|
||||
from documents.search._query import parse_user_query
|
||||
@@ -549,6 +550,9 @@ class TantivyBackend:
|
||||
|
||||
self._ensure_open()
|
||||
user_query = self._parse_query(query, search_mode)
|
||||
highlight_query = user_query
|
||||
if search_mode is SearchMode.TEXT:
|
||||
highlight_query = parse_simple_text_highlight_query(self._index, query)
|
||||
|
||||
# For notes_text snippet generation, we need a query that targets the
|
||||
# notes_text field directly. user_query may contain JSON-field terms
|
||||
@@ -601,7 +605,7 @@ class TantivyBackend:
|
||||
if snippet_generator is None:
|
||||
snippet_generator = tantivy.SnippetGenerator.create(
|
||||
searcher,
|
||||
user_query,
|
||||
highlight_query,
|
||||
self._schema,
|
||||
"content",
|
||||
)
|
||||
@@ -610,7 +614,7 @@ class TantivyBackend:
|
||||
if content_html:
|
||||
highlights["content"] = content_html
|
||||
|
||||
if "notes_text" in doc_dict:
|
||||
if search_mode is SearchMode.QUERY and "notes_text" in doc_dict:
|
||||
# Use notes_text (plain text) for snippet generation — tantivy's
|
||||
# SnippetGenerator does not support JSON fields.
|
||||
if notes_snippet_generator is None:
|
||||
|
||||
@@ -452,6 +452,14 @@ _FIELD_BOOSTS = {"title": 2.0}
|
||||
_SIMPLE_FIELD_BOOSTS = {"simple_title": 2.0}
|
||||
|
||||
|
||||
def _simple_query_tokens(raw_query: str) -> list[str]:
|
||||
tokens = [
|
||||
ascii_fold(token.lower())
|
||||
for token in _SIMPLE_QUERY_TOKEN_RE.findall(raw_query, timeout=_REGEX_TIMEOUT)
|
||||
]
|
||||
return [token for token in tokens if token]
|
||||
|
||||
|
||||
def _build_simple_field_query(
|
||||
index: tantivy.Index,
|
||||
field: str,
|
||||
@@ -547,11 +555,7 @@ def parse_simple_query(
|
||||
|
||||
Query string is escaped and normalized to be treated as "simple" text query.
|
||||
"""
|
||||
tokens = [
|
||||
ascii_fold(token.lower())
|
||||
for token in _SIMPLE_QUERY_TOKEN_RE.findall(raw_query, timeout=_REGEX_TIMEOUT)
|
||||
]
|
||||
tokens = [token for token in tokens if token]
|
||||
tokens = _simple_query_tokens(raw_query)
|
||||
if not tokens:
|
||||
return tantivy.Query.empty_query()
|
||||
|
||||
@@ -564,6 +568,23 @@ def parse_simple_query(
|
||||
return tantivy.Query.boolean_query(field_queries)
|
||||
|
||||
|
||||
def parse_simple_text_highlight_query(
|
||||
index: tantivy.Index,
|
||||
raw_query: str,
|
||||
) -> tantivy.Query:
|
||||
"""Build a snippet-friendly query for simple text searches.
|
||||
|
||||
Simple search matching uses regex queries but for compatibility with Tantivy
|
||||
SnippetGenerator we build a plain term query over the content field instead.
|
||||
"""
|
||||
|
||||
tokens = _simple_query_tokens(raw_query)
|
||||
if not tokens:
|
||||
return tantivy.Query.empty_query()
|
||||
|
||||
return index.parse_query(" ".join(tokens), ["content"])
|
||||
|
||||
|
||||
def parse_simple_text_query(
|
||||
index: tantivy.Index,
|
||||
raw_query: str,
|
||||
|
||||
@@ -563,6 +563,26 @@ class TestFieldHandling:
|
||||
class TestHighlightHits:
|
||||
"""Test highlight_hits returns proper HTML strings, not raw Snippet objects."""
|
||||
|
||||
def test_highlights_simple_text_mode_returns_html_string(
|
||||
self,
|
||||
backend: TantivyBackend,
|
||||
):
|
||||
"""Simple text search should still produce content highlights for exact-token hits."""
|
||||
doc = Document.objects.create(
|
||||
title="Highlight Test",
|
||||
content="The quick brown fox jumps over the lazy dog",
|
||||
checksum="HH0",
|
||||
pk=89,
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
hits = backend.highlight_hits("quick", [doc.pk], search_mode=SearchMode.TEXT)
|
||||
|
||||
assert len(hits) == 1
|
||||
highlights = hits[0]["highlights"]
|
||||
assert "content" in highlights
|
||||
assert "<b>" in highlights["content"]
|
||||
|
||||
def test_highlights_content_returns_html_string(self, backend: TantivyBackend):
|
||||
"""highlight_hits must return HTML strings (from Snippet.to_html()), not Snippet objects."""
|
||||
doc = Document.objects.create(
|
||||
|
||||
@@ -453,32 +453,6 @@ class TestCustomFieldsSearch(DirectoriesMixin, APITestCase):
|
||||
),
|
||||
)
|
||||
|
||||
def test_exact_monetary(self) -> None:
|
||||
# "exact" should match by numeric amount, ignoring currency code prefix.
|
||||
self._assert_query_match_predicate(
|
||||
["monetary_field", "exact", "100"],
|
||||
lambda document: (
|
||||
"monetary_field" in document
|
||||
and document["monetary_field"] == "USD100.00"
|
||||
),
|
||||
)
|
||||
self._assert_query_match_predicate(
|
||||
["monetary_field", "exact", "101"],
|
||||
lambda document: (
|
||||
"monetary_field" in document and document["monetary_field"] == "101.00"
|
||||
),
|
||||
)
|
||||
|
||||
def test_in_monetary(self) -> None:
|
||||
# "in" should match by numeric amount, ignoring currency code prefix.
|
||||
self._assert_query_match_predicate(
|
||||
["monetary_field", "in", ["100", "50"]],
|
||||
lambda document: (
|
||||
"monetary_field" in document
|
||||
and document["monetary_field"] in {"USD100.00", "EUR50.00"}
|
||||
),
|
||||
)
|
||||
|
||||
# ==========================================================#
|
||||
# Subset check (document link field only) #
|
||||
# ==========================================================#
|
||||
|
||||
Reference in New Issue
Block a user