feat: delegate sorting to Tantivy and use page-only highlights in viewset

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Trenton Holmes
2026-04-03 15:35:14 -07:00
parent 6cf01dd383
commit d998d3fbaf
2 changed files with 157 additions and 28 deletions
+83
View File
@@ -1503,6 +1503,89 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
[d2.id, d1.id, d3.id],
)
def test_search_with_tantivy_native_sort(self) -> None:
"""When ordering by a Tantivy-sortable field, results must be correctly sorted."""
backend = get_backend()
for i, asn in enumerate([30, 10, 20]):
doc = Document.objects.create(
title=f"sortable doc {i}",
content="searchable content",
checksum=f"TNS{i}",
archive_serial_number=asn,
)
backend.add_or_update(doc)
response = self.client.get(
"/api/documents/?query=searchable&ordering=archive_serial_number",
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
asns = [doc["archive_serial_number"] for doc in response.data["results"]]
self.assertEqual(asns, [10, 20, 30])
response = self.client.get(
"/api/documents/?query=searchable&ordering=-archive_serial_number",
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
asns = [doc["archive_serial_number"] for doc in response.data["results"]]
self.assertEqual(asns, [30, 20, 10])
def test_search_page_2_returns_correct_slice(self) -> None:
"""Page 2 must return the second slice, not overlap with page 1."""
backend = get_backend()
for i in range(10):
doc = Document.objects.create(
title=f"doc {i}",
content="paginated content",
checksum=f"PG2{i}",
archive_serial_number=i + 1,
)
backend.add_or_update(doc)
response = self.client.get(
"/api/documents/?query=paginated&ordering=archive_serial_number&page=1&page_size=3",
)
page1_ids = [r["id"] for r in response.data["results"]]
self.assertEqual(len(page1_ids), 3)
response = self.client.get(
"/api/documents/?query=paginated&ordering=archive_serial_number&page=2&page_size=3",
)
page2_ids = [r["id"] for r in response.data["results"]]
self.assertEqual(len(page2_ids), 3)
# No overlap between pages
self.assertEqual(set(page1_ids) & set(page2_ids), set())
# Page 2 ASNs are higher than page 1
page1_asns = [
Document.objects.get(pk=pk).archive_serial_number for pk in page1_ids
]
page2_asns = [
Document.objects.get(pk=pk).archive_serial_number for pk in page2_ids
]
self.assertTrue(max(page1_asns) < min(page2_asns))
def test_search_all_field_contains_all_ids_when_paginated(self) -> None:
"""The 'all' field must contain every matching ID, even when paginated."""
backend = get_backend()
doc_ids = []
for i in range(10):
doc = Document.objects.create(
title=f"all field doc {i}",
content="allfield content",
checksum=f"AF{i}",
)
backend.add_or_update(doc)
doc_ids.append(doc.pk)
response = self.client.get(
"/api/documents/?query=allfield&page=1&page_size=3",
headers={"Accept": "application/json; version=9"},
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertEqual(len(response.data["results"]), 3)
# "all" must contain ALL 10 matching IDs
self.assertCountEqual(response.data["all"], doc_ids)
@mock.patch("documents.bulk_edit.bulk_update_documents")
def test_global_search(self, m) -> None:
"""