feat: add highlight_page/highlight_page_size params to search()

Gate expensive snippet/highlight generation to only the requested
slice of hits, allowing the viewset to avoid generating highlights
for all 10k results when only 25 are displayed.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Trenton Holmes
2026-04-03 15:10:00 -07:00
parent 46008d2da7
commit 0d915c58a4
2 changed files with 94 additions and 2 deletions

View File

@@ -435,6 +435,8 @@ class TantivyBackend:
*,
sort_reverse: bool,
search_mode: SearchMode = SearchMode.QUERY,
highlight_page: int | None = None,
highlight_page_size: int | None = None,
) -> SearchResults:
"""
Execute a search query against the document index.
@@ -533,6 +535,15 @@ class TantivyBackend:
hits: list[SearchHit] = []
snippet_generator = None
# Determine which hits need highlights
if highlight_page is not None and highlight_page_size is not None:
hl_start = (highlight_page - 1) * highlight_page_size
hl_end = hl_start + highlight_page_size
else:
# Highlight all hits (backward-compatible default)
hl_start = 0
hl_end = len(page_hits)
for rank, (doc_address, score) in enumerate(page_hits, start=offset + 1):
# Get the actual document from the searcher using the doc address
actual_doc = searcher.doc(doc_address)
@@ -541,8 +552,9 @@ class TantivyBackend:
highlights: dict[str, str] = {}
# Generate highlights if score > 0
if score > 0:
# Generate highlights if score > 0 and hit is in the highlight window
hit_index = rank - offset - 1 # 0-based index within page_hits
if score > 0 and hl_start <= hit_index < hl_end:
try:
if snippet_generator is None:
snippet_generator = tantivy.SnippetGenerator.create(

View File

@@ -428,6 +428,86 @@ class TestSearch:
== 0
)
def test_highlight_page_only_highlights_requested_slice(
self,
backend: TantivyBackend,
):
"""Only hits in the highlight_page slice should have non-empty highlights."""
for i in range(6):
doc = Document.objects.create(
title=f"highlight doc {i}",
content=f"searchable highlight content number {i}",
checksum=f"HP{i}",
)
backend.add_or_update(doc)
r = backend.search(
"searchable",
user=None,
page=1,
page_size=10000,
sort_field=None,
sort_reverse=False,
highlight_page=1,
highlight_page_size=3,
)
assert r.total == 6
assert len(r.hits) == 6
highlighted = [h for h in r.hits if h["highlights"]]
not_highlighted = [h for h in r.hits if not h["highlights"]]
assert len(highlighted) == 3
assert len(not_highlighted) == 3
def test_highlight_page_2_highlights_correct_slice(self, backend: TantivyBackend):
"""highlight_page=2 should highlight only the second page of results."""
for i in range(6):
doc = Document.objects.create(
title=f"page2 doc {i}",
content=f"searchable page2 content number {i}",
checksum=f"HP2{i}",
)
backend.add_or_update(doc)
r = backend.search(
"searchable",
user=None,
page=1,
page_size=10000,
sort_field=None,
sort_reverse=False,
highlight_page=2,
highlight_page_size=2,
)
assert r.total == 6
assert len(r.hits) == 6
highlighted = [h for h in r.hits if h["highlights"]]
not_highlighted = [h for h in r.hits if not h["highlights"]]
# Only 2 hits (the second page) should have highlights
assert len(highlighted) == 2
assert len(not_highlighted) == 4
def test_no_highlight_page_highlights_all(self, backend: TantivyBackend):
"""When highlight_page is not specified, all hits get highlights (backward compat)."""
for i in range(3):
doc = Document.objects.create(
title=f"compat doc {i}",
content=f"searchable compat content {i}",
checksum=f"HC{i}",
)
backend.add_or_update(doc)
r = backend.search(
"searchable",
user=None,
page=1,
page_size=10000,
sort_field=None,
sort_reverse=False,
)
assert len(r.hits) == 3
for hit in r.hits:
assert "content" in hit["highlights"]
class TestRebuild:
"""Test index rebuilding functionality."""