From 98a7ed32e3f06a0a19a15a5c73da59e83151587e Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Fri, 29 May 2026 13:21:59 -0700 Subject: [PATCH] Fix: Preserve Whoosh date range swapping in Tantviy (#12866) --- src/documents/search/_query.py | 9 +++++++-- src/documents/tests/search/test_query.py | 8 ++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/documents/search/_query.py b/src/documents/search/_query.py index fef248253..cced4c64c 100644 --- a/src/documents/search/_query.py +++ b/src/documents/search/_query.py @@ -390,8 +390,13 @@ def _rewrite_year_range(query: str) -> str: def _sub(m: regex.Match[str]) -> str: field = m.group("field") - lo = datetime(int(m.group("y1")), 1, 1, tzinfo=UTC) - hi = datetime(int(m.group("y2")) + 1, 1, 1, tzinfo=UTC) + y1, y2 = int(m.group("y1")), int(m.group("y2")) + # Whoosh swaps a reversed range when both years are explicit + # (whoosh.util.times.timespan.disambiguated); match that so a backwards + # range spans the intended years instead of matching nothing. + lo_year, hi_year = min(y1, y2), max(y1, y2) + lo = datetime(lo_year, 1, 1, tzinfo=UTC) + hi = datetime(hi_year + 1, 1, 1, tzinfo=UTC) return f"{field}:[{_fmt(lo)} TO {_fmt(hi)}]" try: diff --git a/src/documents/tests/search/test_query.py b/src/documents/tests/search/test_query.py index 9c52562d3..08d8ee260 100644 --- a/src/documents/tests/search/test_query.py +++ b/src/documents/tests/search/test_query.py @@ -519,6 +519,14 @@ class TestYearRangeRewriting: assert lo == expected_lo assert hi == expected_hi + def test_reversed_year_range_is_swapped(self) -> None: + # A reversed range must not yield lo > hi, which Tantivy treats as an + # empty range (silently zero results). The bounds are swapped instead. + result = rewrite_natural_date_keywords("created:[2025 TO 2020]", UTC) + lo, hi = _range(result, "created") + assert lo == "2020-01-01T00:00:00Z" + assert hi == "2026-01-01T00:00:00Z" + def test_year_range_in_complex_boolean_query(self) -> None: query = "tag:steuer AND (title:2020 OR (NOT title:2019 AND NOT title:2018 AND created:[2020 TO 2020]))" result = rewrite_natural_date_keywords(query, UTC)