From 0d915c58a4fcc2a7d2cf57764ebc0f2a76551046 Mon Sep 17 00:00:00 2001 From: Trenton Holmes <797416+stumpylog@users.noreply.github.com> Date: Fri, 3 Apr 2026 15:10:00 -0700 Subject: [PATCH] feat: add highlight_page/highlight_page_size params to search() Gate expensive snippet/highlight generation to only the requested slice of hits, allowing the viewset to avoid generating highlights for all 10k results when only 25 are displayed. Co-Authored-By: Claude Opus 4.6 --- src/documents/search/_backend.py | 16 ++++- src/documents/tests/search/test_backend.py | 80 ++++++++++++++++++++++ 2 files changed, 94 insertions(+), 2 deletions(-) diff --git a/src/documents/search/_backend.py b/src/documents/search/_backend.py index 405c24360..8cb7e4d3b 100644 --- a/src/documents/search/_backend.py +++ b/src/documents/search/_backend.py @@ -435,6 +435,8 @@ class TantivyBackend: *, sort_reverse: bool, search_mode: SearchMode = SearchMode.QUERY, + highlight_page: int | None = None, + highlight_page_size: int | None = None, ) -> SearchResults: """ Execute a search query against the document index. @@ -533,6 +535,15 @@ class TantivyBackend: hits: list[SearchHit] = [] snippet_generator = None + # Determine which hits need highlights + if highlight_page is not None and highlight_page_size is not None: + hl_start = (highlight_page - 1) * highlight_page_size + hl_end = hl_start + highlight_page_size + else: + # Highlight all hits (backward-compatible default) + hl_start = 0 + hl_end = len(page_hits) + for rank, (doc_address, score) in enumerate(page_hits, start=offset + 1): # Get the actual document from the searcher using the doc address actual_doc = searcher.doc(doc_address) @@ -541,8 +552,9 @@ class TantivyBackend: highlights: dict[str, str] = {} - # Generate highlights if score > 0 - if score > 0: + # Generate highlights if score > 0 and hit is in the highlight window + hit_index = rank - offset - 1 # 0-based index within page_hits + if score > 0 and hl_start <= hit_index < hl_end: try: if snippet_generator is None: snippet_generator = tantivy.SnippetGenerator.create( diff --git a/src/documents/tests/search/test_backend.py b/src/documents/tests/search/test_backend.py index ff9638e63..4928d402b 100644 --- a/src/documents/tests/search/test_backend.py +++ b/src/documents/tests/search/test_backend.py @@ -428,6 +428,86 @@ class TestSearch: == 0 ) + def test_highlight_page_only_highlights_requested_slice( + self, + backend: TantivyBackend, + ): + """Only hits in the highlight_page slice should have non-empty highlights.""" + for i in range(6): + doc = Document.objects.create( + title=f"highlight doc {i}", + content=f"searchable highlight content number {i}", + checksum=f"HP{i}", + ) + backend.add_or_update(doc) + + r = backend.search( + "searchable", + user=None, + page=1, + page_size=10000, + sort_field=None, + sort_reverse=False, + highlight_page=1, + highlight_page_size=3, + ) + assert r.total == 6 + assert len(r.hits) == 6 + highlighted = [h for h in r.hits if h["highlights"]] + not_highlighted = [h for h in r.hits if not h["highlights"]] + assert len(highlighted) == 3 + assert len(not_highlighted) == 3 + + def test_highlight_page_2_highlights_correct_slice(self, backend: TantivyBackend): + """highlight_page=2 should highlight only the second page of results.""" + for i in range(6): + doc = Document.objects.create( + title=f"page2 doc {i}", + content=f"searchable page2 content number {i}", + checksum=f"HP2{i}", + ) + backend.add_or_update(doc) + + r = backend.search( + "searchable", + user=None, + page=1, + page_size=10000, + sort_field=None, + sort_reverse=False, + highlight_page=2, + highlight_page_size=2, + ) + assert r.total == 6 + assert len(r.hits) == 6 + highlighted = [h for h in r.hits if h["highlights"]] + not_highlighted = [h for h in r.hits if not h["highlights"]] + # Only 2 hits (the second page) should have highlights + assert len(highlighted) == 2 + assert len(not_highlighted) == 4 + + def test_no_highlight_page_highlights_all(self, backend: TantivyBackend): + """When highlight_page is not specified, all hits get highlights (backward compat).""" + for i in range(3): + doc = Document.objects.create( + title=f"compat doc {i}", + content=f"searchable compat content {i}", + checksum=f"HC{i}", + ) + backend.add_or_update(doc) + + r = backend.search( + "searchable", + user=None, + page=1, + page_size=10000, + sort_field=None, + sort_reverse=False, + ) + assert len(r.hits) == 3 + for hit in r.hits: + assert "content" in hit["highlights"] + class TestRebuild: """Test index rebuilding functionality."""