mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-04-03 06:38:51 +00:00
perf: pre-filter autocomplete candidates with regex prefix query
This commit is contained in:
@@ -605,11 +605,26 @@ class TantivyBackend:
|
||||
# Apply permission filter for non-superusers so autocomplete words
|
||||
# from invisible documents don't leak to other users.
|
||||
if user is not None and not user.is_superuser:
|
||||
base_query = build_permission_filter(self._schema, user)
|
||||
permission_query = build_permission_filter(self._schema, user)
|
||||
else:
|
||||
base_query = tantivy.Query.all_query()
|
||||
permission_query = tantivy.Query.all_query()
|
||||
|
||||
results = searcher.search(base_query, limit=10000)
|
||||
# Narrow to documents that actually contain a word starting with the
|
||||
# prefix before loading stored fields, avoiding a full collection scan.
|
||||
prefix_filter = tantivy.Query.regex_query(
|
||||
self._schema,
|
||||
"autocomplete_word",
|
||||
regex.escape(normalized_term) + ".*",
|
||||
)
|
||||
results = searcher.search(
|
||||
tantivy.Query.boolean_query(
|
||||
[
|
||||
(tantivy.Occur.Must, permission_query),
|
||||
(tantivy.Occur.Must, prefix_filter),
|
||||
],
|
||||
),
|
||||
limit=10000,
|
||||
)
|
||||
|
||||
# Count how many visible documents each word appears in.
|
||||
# Using Counter (not set) preserves per-word document frequency so
|
||||
|
||||
@@ -53,7 +53,7 @@ def build_schema() -> tantivy.Schema:
|
||||
# CJK support - not stored, indexed only
|
||||
sb.add_text_field("bigram_content", stored=False, tokenizer_name="bigram_analyzer")
|
||||
|
||||
# Autocomplete prefix scan - stored, not indexed
|
||||
# Autocomplete prefix scan - stored, indexed by default
|
||||
sb.add_text_field("autocomplete_word", stored=True, tokenizer_name="raw")
|
||||
|
||||
sb.add_text_field("tag", stored=True, tokenizer_name="paperless_text")
|
||||
|
||||
Reference in New Issue
Block a user