Enhancement: rank autocomplete suggestions by document frequency

Replace set-based alphabetical autocomplete with Counter-based document-frequency ordering. Words appearing in more of the user's visible documents rank first — the same signal Whoosh used for Tf/Idf-based ordering, computed permission-correctly from already- fetched stored values at no extra index cost. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-08 06:39:46 +00:00 · 2026-03-30 13:25:56 -07:00
parent b626f5602c
commit b10f3de2eb
3 changed files with 40 additions and 24 deletions
@@ -137,6 +137,30 @@ class TestAutocomplete:
        results = backend.autocomplete("micro", limit=10)
        assert "microsoft" in results

+    def test_results_ordered_by_document_frequency(self, backend: TantivyBackend):
+        """Most-used prefix match should rank first."""
+        # "payment" appears in 3 docs; "payslip" in 1 — "pay" prefix should
+        # return "payment" before "payslip".
+        for i, (title, checksum) in enumerate(
+            [
+                ("payment invoice", "AF1"),
+                ("payment receipt", "AF2"),
+                ("payment confirmation", "AF3"),
+                ("payslip march", "AF4"),
+            ],
+            start=41,
+        ):
+            doc = Document.objects.create(
+                title=title,
+                content="details",
+                checksum=checksum,
+                pk=i,
+            )
+            backend.add_or_update(doc)
+
+        results = backend.autocomplete("pay", limit=10)
+        assert results.index("payment") < results.index("payslip")
+

 class TestMoreLikeThis:
    """Test more like this functionality."""