Fix: Don't store autocomplete_word, only index it (#12867)

This commit is contained in:
Trenton H
2026-05-29 14:09:04 -07:00
committed by GitHub
parent 98a7ed32e3
commit bbceb5dac6
2 changed files with 19 additions and 14 deletions
+4 -2
View File
@@ -81,8 +81,10 @@ def build_schema() -> tantivy.Schema:
tokenizer_name="simple_search_analyzer",
)
# Autocomplete prefix scan - stored, not indexed
sb.add_text_field("autocomplete_word", stored=True, tokenizer_name="raw")
# Autocomplete prefix scan via terms_with_prefix, which walks the field's
# term dictionary - so the field must be indexed (term dict), not stored.
# The stored value is never read back, so storing it only wastes space.
sb.add_text_field("autocomplete_word", stored=False, tokenizer_name="raw")
sb.add_text_field("tag", stored=True, tokenizer_name="paperless_text")
+15 -12
View File
@@ -987,29 +987,32 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
THEN:
- The similar documents are returned from the API request
"""
d1 = Document.objects.create(
# Distinct created/added dates: documents created at the same instant
# share a timestamp term, and more_like_this (which cannot be scoped to
# content fields) would then match on it, surfacing unrelated documents.
d1 = DocumentFactory(
title="invoice",
content="the thing i bought at a shop and paid with bank account",
checksum="A",
pk=1,
created=datetime.date(2018, 1, 1),
added=timezone.make_aware(datetime.datetime(2018, 1, 1)),
)
d2 = Document.objects.create(
d2 = DocumentFactory(
title="bank statement 1",
content="things i paid for in august",
pk=2,
checksum="B",
created=datetime.date(2019, 3, 4),
added=timezone.make_aware(datetime.datetime(2019, 3, 4)),
)
d3 = Document.objects.create(
d3 = DocumentFactory(
title="bank statement 3",
content="things i paid for in september",
pk=3,
checksum="C",
created=datetime.date(2020, 7, 9),
added=timezone.make_aware(datetime.datetime(2020, 7, 9)),
)
d4 = Document.objects.create(
d4 = DocumentFactory(
title="Quarterly Report",
content="quarterly revenue profit margin earnings growth",
pk=4,
checksum="ABC",
created=datetime.date(2021, 11, 30),
added=timezone.make_aware(datetime.datetime(2021, 11, 30)),
)
backend = get_backend()
backend.add_or_update(d1)