This commit is contained in:
shamoon
2026-04-17 08:12:56 -07:00
parent d60cb0e21f
commit dd56c2ec25
2 changed files with 8 additions and 22 deletions

View File

@@ -21,7 +21,7 @@ from guardian.shortcuts import get_users_with_perms
from documents.search._normalize import ascii_fold
from documents.search._query import build_permission_filter
from documents.search._query import parse_simple_highlight_query
from documents.search._query import parse_simple_text_highlight_query
from documents.search._query import parse_simple_text_query
from documents.search._query import parse_simple_title_query
from documents.search._query import parse_user_query
@@ -336,17 +336,6 @@ class TantivyBackend:
else:
return parse_user_query(self._index, query, tz)
def _parse_highlight_query(
self,
query: str,
search_mode: SearchMode,
) -> tantivy.Query:
if search_mode is SearchMode.TEXT:
# title does not supported highlight for now
return parse_simple_highlight_query(self._index, query, ["content"])
else:
return self._parse_query(query, search_mode)
def _apply_permission_filter(
self,
query: tantivy.Query,
@@ -561,7 +550,9 @@ class TantivyBackend:
self._ensure_open()
user_query = self._parse_query(query, search_mode)
highlight_query = self._parse_highlight_query(query, search_mode)
highlight_query = user_query
if search_mode is SearchMode.TEXT:
highlight_query = parse_simple_text_highlight_query(self._index, query)
# For notes_text snippet generation, we need a query that targets the
# notes_text field directly. user_query may contain JSON-field terms

View File

@@ -568,26 +568,21 @@ def parse_simple_query(
return tantivy.Query.boolean_query(field_queries)
def parse_simple_highlight_query(
def parse_simple_text_highlight_query(
index: tantivy.Index,
raw_query: str,
fields: list[str],
) -> tantivy.Query:
"""Build a snippet-friendly query for simple text/title searches.
"""Build a snippet-friendly query for simple text searches.
Simple search matching uses regex queries but for compatibility with Tantivy
SnippetGenerator we build a plain term query over the actual text fields instead.
SnippetGenerator we build a plain term query over the content field instead.
"""
tokens = _simple_query_tokens(raw_query)
if not tokens:
return tantivy.Query.empty_query()
return index.parse_query(
" ".join(tokens),
fields,
field_boosts={field: _FIELD_BOOSTS.get(field, 1.0) for field in fields},
)
return index.parse_query(" ".join(tokens), ["content"])
def parse_simple_text_query(