mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-06-08 06:39:46 +00:00
Enhancement: rank autocomplete suggestions by document frequency
Replace set-based alphabetical autocomplete with Counter-based document-frequency ordering. Words appearing in more of the user's visible documents rank first — the same signal Whoosh used for Tf/Idf-based ordering, computed permission-correctly from already- fetched stored values at no extra index cost. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -137,6 +137,30 @@ class TestAutocomplete:
|
||||
results = backend.autocomplete("micro", limit=10)
|
||||
assert "microsoft" in results
|
||||
|
||||
def test_results_ordered_by_document_frequency(self, backend: TantivyBackend):
|
||||
"""Most-used prefix match should rank first."""
|
||||
# "payment" appears in 3 docs; "payslip" in 1 — "pay" prefix should
|
||||
# return "payment" before "payslip".
|
||||
for i, (title, checksum) in enumerate(
|
||||
[
|
||||
("payment invoice", "AF1"),
|
||||
("payment receipt", "AF2"),
|
||||
("payment confirmation", "AF3"),
|
||||
("payslip march", "AF4"),
|
||||
],
|
||||
start=41,
|
||||
):
|
||||
doc = Document.objects.create(
|
||||
title=title,
|
||||
content="details",
|
||||
checksum=checksum,
|
||||
pk=i,
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
results = backend.autocomplete("pay", limit=10)
|
||||
assert results.index("payment") < results.index("payslip")
|
||||
|
||||
|
||||
class TestMoreLikeThis:
|
||||
"""Test more like this functionality."""
|
||||
|
||||
Reference in New Issue
Block a user