Change b to span like it was before

2026-04-10 01:58:53 +00:00 · 2026-04-08 22:21:02 -07:00
parent 807e32c61f
commit 6a4144cdfb
2 changed files with 51 additions and 13 deletions
--- a/src/documents/search/_backend.py
+++ b/src/documents/search/_backend.py
@@ -7,6 +7,7 @@ from collections import Counter
 from datetime import UTC
 from datetime import datetime
 from enum import StrEnum
+from html import escape
 from typing import TYPE_CHECKING
 from typing import Self
 from typing import TypedDict
@@ -54,6 +55,36 @@ class SearchMode(StrEnum):
    TITLE = "title"


+def _render_snippet_html(snippet: tantivy.Snippet) -> str:
+    fragment = snippet.fragment()
+    highlighted = sorted(snippet.highlighted(), key=lambda r: r.start)
+
+    if not highlighted:
+        return escape(fragment)
+
+    parts: list[str] = []
+    cursor = 0
+    fragment_len = len(fragment)
+
+    for highlight in highlighted:
+        start = max(0, min(fragment_len, highlight.start))
+        end = max(start, min(fragment_len, highlight.end))
+
+        if end <= cursor:
+            continue
+
+        if start > cursor:
+            parts.append(escape(fragment[cursor:start]))
+
+        parts.append(f'<span class="match">{escape(fragment[start:end])}</span>')
+        cursor = end
+
+    if cursor < fragment_len:
+        parts.append(escape(fragment[cursor:]))
+
+    return "".join(parts)
+
+
 def _extract_autocomplete_words(text_sources: list[str]) -> set[str]:
    """Extract and normalize words for autocomplete.

@@ -606,7 +637,9 @@ class TantivyBackend:
                        "content",
                    )

-                content_html = snippet_generator.snippet_from_doc(actual_doc).to_html()
+                content_html = _render_snippet_html(
+                    snippet_generator.snippet_from_doc(actual_doc),
+                )
                if content_html:
                    highlights["content"] = content_html

@@ -620,9 +653,9 @@ class TantivyBackend:
                            self._schema,
                            "notes_text",
                        )
-                    notes_html = notes_snippet_generator.snippet_from_doc(
-                        actual_doc,
-                    ).to_html()
+                    notes_html = _render_snippet_html(
+                        notes_snippet_generator.snippet_from_doc(actual_doc),
+                    )
                    if notes_html:
                        highlights["notes"] = notes_html

--- a/src/documents/tests/search/test_backend.py
+++ b/src/documents/tests/search/test_backend.py
@@ -563,8 +563,11 @@ class TestFieldHandling:
 class TestHighlightHits:
    """Test highlight_hits returns proper HTML strings, not raw Snippet objects."""

-    def test_highlights_content_returns_html_string(self, backend: TantivyBackend):
-        """highlight_hits must return HTML strings (from Snippet.to_html()), not Snippet objects."""
+    def test_highlights_content_returns_match_span_html(
+        self,
+        backend: TantivyBackend,
+    ):
+        """highlight_hits must return frontend-ready highlight spans."""
        doc = Document.objects.create(
            title="Highlight Test",
            content="The quick brown fox jumps over the lazy dog",
@@ -582,13 +585,15 @@ class TestHighlightHits:
        assert isinstance(content_highlight, str), (
            f"Expected str, got {type(content_highlight)}: {content_highlight!r}"
        )
-        # Tantivy wraps matched terms in <b> tags
-        assert "<b>" in content_highlight, (
-            f"Expected HTML with <b> tags, got: {content_highlight!r}"
+        assert '<span class="match">' in content_highlight, (
+            f"Expected HTML with match span, got: {content_highlight!r}"
        )

-    def test_highlights_notes_returns_html_string(self, backend: TantivyBackend):
-        """Note highlights must be HTML strings via notes_text companion field.
+    def test_highlights_notes_returns_match_span_html(
+        self,
+        backend: TantivyBackend,
+    ):
+        """Note highlights must be frontend-ready HTML via notes_text companion field.

        The notes JSON field does not support tantivy SnippetGenerator; the
        notes_text plain-text field is used instead.  We use the full-text
@@ -618,8 +623,8 @@ class TestHighlightHits:
        assert isinstance(note_highlight, str), (
            f"Expected str, got {type(note_highlight)}: {note_highlight!r}"
        )
-        assert "<b>" in note_highlight, (
-            f"Expected HTML with <b> tags, got: {note_highlight!r}"
+        assert '<span class="match">' in note_highlight, (
+            f"Expected HTML with match span, got: {note_highlight!r}"
        )

    def test_empty_doc_list_returns_empty_hits(self, backend: TantivyBackend):