Change b to span like it was before

This commit is contained in:
shamoon
2026-04-08 22:21:02 -07:00
parent 807e32c61f
commit 6a4144cdfb
2 changed files with 51 additions and 13 deletions

View File

@@ -7,6 +7,7 @@ from collections import Counter
from datetime import UTC
from datetime import datetime
from enum import StrEnum
from html import escape
from typing import TYPE_CHECKING
from typing import Self
from typing import TypedDict
@@ -54,6 +55,36 @@ class SearchMode(StrEnum):
TITLE = "title"
def _render_snippet_html(snippet: tantivy.Snippet) -> str:
fragment = snippet.fragment()
highlighted = sorted(snippet.highlighted(), key=lambda r: r.start)
if not highlighted:
return escape(fragment)
parts: list[str] = []
cursor = 0
fragment_len = len(fragment)
for highlight in highlighted:
start = max(0, min(fragment_len, highlight.start))
end = max(start, min(fragment_len, highlight.end))
if end <= cursor:
continue
if start > cursor:
parts.append(escape(fragment[cursor:start]))
parts.append(f'<span class="match">{escape(fragment[start:end])}</span>')
cursor = end
if cursor < fragment_len:
parts.append(escape(fragment[cursor:]))
return "".join(parts)
def _extract_autocomplete_words(text_sources: list[str]) -> set[str]:
"""Extract and normalize words for autocomplete.
@@ -606,7 +637,9 @@ class TantivyBackend:
"content",
)
content_html = snippet_generator.snippet_from_doc(actual_doc).to_html()
content_html = _render_snippet_html(
snippet_generator.snippet_from_doc(actual_doc),
)
if content_html:
highlights["content"] = content_html
@@ -620,9 +653,9 @@ class TantivyBackend:
self._schema,
"notes_text",
)
notes_html = notes_snippet_generator.snippet_from_doc(
actual_doc,
).to_html()
notes_html = _render_snippet_html(
notes_snippet_generator.snippet_from_doc(actual_doc),
)
if notes_html:
highlights["notes"] = notes_html

View File

@@ -563,8 +563,11 @@ class TestFieldHandling:
class TestHighlightHits:
"""Test highlight_hits returns proper HTML strings, not raw Snippet objects."""
def test_highlights_content_returns_html_string(self, backend: TantivyBackend):
"""highlight_hits must return HTML strings (from Snippet.to_html()), not Snippet objects."""
def test_highlights_content_returns_match_span_html(
self,
backend: TantivyBackend,
):
"""highlight_hits must return frontend-ready highlight spans."""
doc = Document.objects.create(
title="Highlight Test",
content="The quick brown fox jumps over the lazy dog",
@@ -582,13 +585,15 @@ class TestHighlightHits:
assert isinstance(content_highlight, str), (
f"Expected str, got {type(content_highlight)}: {content_highlight!r}"
)
# Tantivy wraps matched terms in <b> tags
assert "<b>" in content_highlight, (
f"Expected HTML with <b> tags, got: {content_highlight!r}"
assert '<span class="match">' in content_highlight, (
f"Expected HTML with match span, got: {content_highlight!r}"
)
def test_highlights_notes_returns_html_string(self, backend: TantivyBackend):
"""Note highlights must be HTML strings via notes_text companion field.
def test_highlights_notes_returns_match_span_html(
self,
backend: TantivyBackend,
):
"""Note highlights must be frontend-ready HTML via notes_text companion field.
The notes JSON field does not support tantivy SnippetGenerator; the
notes_text plain-text field is used instead. We use the full-text
@@ -618,8 +623,8 @@ class TestHighlightHits:
assert isinstance(note_highlight, str), (
f"Expected str, got {type(note_highlight)}: {note_highlight!r}"
)
assert "<b>" in note_highlight, (
f"Expected HTML with <b> tags, got: {note_highlight!r}"
assert '<span class="match">' in note_highlight, (
f"Expected HTML with match span, got: {note_highlight!r}"
)
def test_empty_doc_list_returns_empty_hits(self, backend: TantivyBackend):