Fix (beta): restore v2 (Whoosh) advanced-search query compatibility (#13010)

Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-08-03 01:22:17 +00:00 · 2026-06-15 15:32:44 -07:00
parent 75f0c4c92e
commit f4fa916579
9 changed files with 1614 additions and 418 deletions
@@ -8,11 +8,15 @@ from documents.search._backend import get_backend
 from documents.search._backend import reset_backend
 from documents.search._schema import needs_rebuild
 from documents.search._schema import wipe_index
+from documents.search._translate import InvalidDateQuery
+from documents.search._translate import SearchQueryError

 __all__ = [
+    "InvalidDateQuery",
    "SearchHit",
    "SearchIndexLockError",
    "SearchMode",
+    "SearchQueryError",
    "TantivyBackend",
    "TantivyRelevanceList",
    "WriteBatch",
@@ -0,0 +1,163 @@
+from __future__ import annotations
+
+from datetime import UTC
+from datetime import date
+from datetime import datetime
+from datetime import timedelta
+from typing import TYPE_CHECKING
+from typing import Final
+
+from dateutil.relativedelta import relativedelta
+
+if TYPE_CHECKING:
+    from datetime import tzinfo
+
+_DATE_ONLY_FIELDS = frozenset({"created"})
+
+_TODAY: Final[str] = "today"
+_YESTERDAY: Final[str] = "yesterday"
+_PREVIOUS_WEEK: Final[str] = "previous week"
+_THIS_MONTH: Final[str] = "this month"
+_PREVIOUS_MONTH: Final[str] = "previous month"
+_THIS_YEAR: Final[str] = "this year"
+_PREVIOUS_YEAR: Final[str] = "previous year"
+_PREVIOUS_QUARTER: Final[str] = "previous quarter"
+
+_DATE_KEYWORDS = frozenset(
+    {
+        _TODAY,
+        _YESTERDAY,
+        _PREVIOUS_WEEK,
+        _THIS_MONTH,
+        _PREVIOUS_MONTH,
+        _THIS_YEAR,
+        _PREVIOUS_YEAR,
+        _PREVIOUS_QUARTER,
+    },
+)
+
+
+def _fmt(dt: datetime) -> str:
+    """Format a datetime as an ISO 8601 UTC string for use in Tantivy range queries."""
+    return dt.astimezone(UTC).strftime("%Y-%m-%dT%H:%M:%SZ")
+
+
+def _iso_range(lo: datetime, hi: datetime) -> str:
+    """Format a [lo TO hi] range string in ISO 8601 for Tantivy query syntax."""
+    return f"[{_fmt(lo)} TO {_fmt(hi)}]"
+
+
+def _quarter_start(d: date) -> date:
+    """Return the first day of the calendar quarter containing ``d``."""
+    return date(d.year, ((d.month - 1) // 3) * 3 + 1, 1)
+
+
+def _midnight(d: date, tz: tzinfo) -> datetime:
+    """Convert a calendar date at local-timezone midnight to a UTC datetime."""
+    return datetime(d.year, d.month, d.day, tzinfo=tz).astimezone(UTC)
+
+
+def _keyword_bounds(keyword: str, tz: tzinfo) -> tuple[date, date]:
+    """
+    Map a relative date keyword to ``(start, exclusive_end)`` calendar dates.
+
+    ``tz`` only determines what "today" is; the caller decides how the returned
+    dates become UTC datetime boundaries (date-only vs. local-midnight offset).
+    """
+    today = datetime.now(tz).date()
+    if keyword == _TODAY:
+        return today, today + timedelta(days=1)
+    if keyword == _YESTERDAY:
+        return today - timedelta(days=1), today
+    if keyword == _PREVIOUS_WEEK:
+        this_monday = today - timedelta(days=today.weekday())
+        return this_monday - timedelta(weeks=1), this_monday
+    if keyword == _THIS_MONTH:
+        first = today.replace(day=1)
+        return first, first + relativedelta(months=1)
+    if keyword == _PREVIOUS_MONTH:
+        this_first = today.replace(day=1)
+        return this_first - relativedelta(months=1), this_first
+    if keyword == _THIS_YEAR:
+        return date(today.year, 1, 1), date(today.year + 1, 1, 1)
+    if keyword == _PREVIOUS_YEAR:
+        return date(today.year - 1, 1, 1), date(today.year, 1, 1)
+    if keyword == _PREVIOUS_QUARTER:
+        this_quarter = _quarter_start(today)
+        return this_quarter - relativedelta(months=3), this_quarter
+    raise ValueError(f"Unknown keyword: {keyword}")
+
+
+def _date_only_range(keyword: str, tz: tzinfo) -> str:
+    """
+    For `created` (DateField): use the local calendar date, converted to
+    midnight UTC boundaries. No offset arithmetic — date only.
+    """
+    start, end = _keyword_bounds(keyword, tz)
+    lo = datetime(start.year, start.month, start.day, tzinfo=UTC)
+    hi = datetime(end.year, end.month, end.day, tzinfo=UTC)
+    return _iso_range(lo, hi)
+
+
+def _datetime_range(keyword: str, tz: tzinfo) -> str:
+    """
+    For `added` / `modified` (DateTimeField, stored as UTC): convert local day
+    boundaries to UTC — full offset arithmetic required.
+    """
+    start, end = _keyword_bounds(keyword, tz)
+    return _iso_range(_midnight(start, tz), _midnight(end, tz))
+
+
+def _precision_bounds(digits: str) -> tuple[date, date] | None:
+    """
+    Map a 4/6/8-digit date token to (start, exclusive_end) calendar dates.
+
+    YYYY -> whole year, YYYYMM -> whole month, YYYYMMDD -> single day.
+    Returns None for any unparsable or out-of-range value (e.g. month 23),
+    so callers can emit a no-match clause instead of erroring (Whoosh parity).
+    """
+    try:
+        if len(digits) == 4:
+            year = int(digits)
+            return date(year, 1, 1), date(year + 1, 1, 1)
+        if len(digits) == 6:
+            year, month = int(digits[:4]), int(digits[4:6])
+            start = date(year, month, 1)
+            end = date(year + 1, 1, 1) if month == 12 else date(year, month + 1, 1)
+            return start, end
+        if len(digits) == 8:
+            start = date(int(digits[:4]), int(digits[4:6]), int(digits[6:8]))
+            return start, start + timedelta(days=1)
+    except ValueError:
+        return None
+    return None
+
+
+def _utc_bounds_for_field(
+    field: str,
+    start: date,
+    end: date,
+    tz: tzinfo,
+) -> tuple[datetime, datetime]:
+    """
+    Convert calendar-date bounds to UTC datetimes per the field's storage type.
+
+    For DateField (``created``) the bounds are UTC midnight (no offset). For
+    DateTimeField (``added``/``modified``) the bounds are local-tz midnight
+    converted to UTC, matching how each field is indexed.
+    """
+    if field in _DATE_ONLY_FIELDS:
+        return (
+            datetime(start.year, start.month, start.day, tzinfo=UTC),
+            datetime(end.year, end.month, end.day, tzinfo=UTC),
+        )
+    return (
+        datetime(start.year, start.month, start.day, tzinfo=tz).astimezone(UTC),
+        datetime(end.year, end.month, end.day, tzinfo=tz).astimezone(UTC),
+    )
+
+
+def _field_range_from_dates(field: str, start: date, end: date, tz: tzinfo) -> str:
+    """Build a Tantivy ``field:[lo TO hi]`` ISO range from calendar-date bounds."""
+    lo, hi = _utc_bounds_for_field(field, start, end, tz)
+    return f"{field}:{_iso_range(lo, hi)}"
@@ -1,88 +1,35 @@
 from __future__ import annotations

+import logging
 from datetime import UTC
-from datetime import date
-from datetime import datetime
-from datetime import timedelta
 from typing import TYPE_CHECKING
 from typing import Final

 import regex
 import tantivy
-from dateutil.relativedelta import relativedelta
 from django.conf import settings

+from documents.search._dates import (
+    _date_only_range,  # noqa: F401 — re-exported for test imports
+)
+from documents.search._dates import (
+    _datetime_range,  # noqa: F401 — re-exported for test imports
+)
 from documents.search._tokenizer import simple_search_tokens
+from documents.search._translate import SearchQueryError
+from documents.search._translate import translate_query

 if TYPE_CHECKING:
    from datetime import tzinfo

    from django.contrib.auth.base_user import AbstractBaseUser

+logger = logging.getLogger("paperless.search")
+
 # Maximum seconds any single regex substitution may run.
 # Prevents ReDoS on adversarial user-supplied query strings.
 _REGEX_TIMEOUT: Final[float] = 1.0

-_DATE_ONLY_FIELDS = frozenset({"created"})
-
-_TODAY: Final[str] = "today"
-_YESTERDAY: Final[str] = "yesterday"
-_PREVIOUS_WEEK: Final[str] = "previous week"
-_THIS_MONTH: Final[str] = "this month"
-_PREVIOUS_MONTH: Final[str] = "previous month"
-_THIS_YEAR: Final[str] = "this year"
-_PREVIOUS_YEAR: Final[str] = "previous year"
-_PREVIOUS_QUARTER: Final[str] = "previous quarter"
-
-_DATE_KEYWORDS = frozenset(
-    {
-        _TODAY,
-        _YESTERDAY,
-        _PREVIOUS_WEEK,
-        _THIS_MONTH,
-        _PREVIOUS_MONTH,
-        _THIS_YEAR,
-        _PREVIOUS_YEAR,
-        _PREVIOUS_QUARTER,
-    },
-)
-
-_DATE_KEYWORD_PATTERN = "|".join(
-    sorted((regex.escape(k) for k in _DATE_KEYWORDS), key=len, reverse=True),
-)
-
-_FIELD_DATE_RE = regex.compile(
-    rf"""(?<!\w)(?P<field>created|modified|added)\s*:\s*(?:
-    (?P<quote>["'])(?P<quoted>{_DATE_KEYWORD_PATTERN})(?P=quote)
-    |
-    (?P<bare>{_DATE_KEYWORD_PATTERN})(?![\w-])
-)""",
-    regex.IGNORECASE | regex.VERBOSE,
-)
-_COMPACT_DATE_RE = regex.compile(r"\b(\d{14})\b")
-_RELATIVE_RANGE_RE = regex.compile(
-    r"\[now([+-]\d+[dhm])?\s+TO\s+now([+-]\d+[dhm])?\]",
-    regex.IGNORECASE,
-)
-# Whoosh-style relative date range: e.g. [-1 week to now], [-7 days to now]
-_WHOOSH_REL_RANGE_RE = regex.compile(
-    r"\[-(?P<n>\d+)\s+(?P<unit>second|minute|hour|day|week|month|year)s?\s+to\s+now\]",
-    regex.IGNORECASE,
-)
-# Whoosh-style 8-digit date: field:YYYYMMDD — field-aware so timezone can be applied correctly.
-# Scoped to date fields only; numeric fields (asn, id, page_count, ...) must not be rewritten.
-_DATE8_RE = regex.compile(
-    r"(?<!\w)(?P<field>created|modified|added):(?P<date8>\d{8})\b",
-)
-_YEAR_RANGE_RE = regex.compile(
-    r"(?<!\w)(?P<field>created|modified|added):\[(?P<y1>\d{4})\s+TO\s+(?P<y2>\d{4})\]",
-    regex.IGNORECASE,
-)
-# Tantivy syntax error: " - " and " + " with spaces on both sides are invalid because
-# the NOT/MUST operators require no space between the operator and the term.
-# In natural-language queries (e.g., "H52.1 - Kurzsichtigkeit"), the dash is a separator.
-_SPACED_OPERATOR_RE = regex.compile(r"\s+[-+]\s+")
-_TRAILING_OPERATOR_RE = regex.compile(r"\s+[-+]+\s*$")
 # Matches CJK/Hangul characters so queries can be routed to bigram fields.
 # Uses Unicode properties to cover all blocks including Extension B+ planes.
 _CJK_RE: Final = regex.compile(r"[\p{Han}\p{Hiragana}\p{Katakana}\p{Hangul}]+")
@@ -117,303 +64,12 @@ def _build_cjk_query(
        return None


-def _fmt(dt: datetime) -> str:
-    """Format a datetime as an ISO 8601 UTC string for use in Tantivy range queries."""
-    return dt.astimezone(UTC).strftime("%Y-%m-%dT%H:%M:%SZ")
-
-
-def _iso_range(lo: datetime, hi: datetime) -> str:
-    """Format a [lo TO hi] range string in ISO 8601 for Tantivy query syntax."""
-    return f"[{_fmt(lo)} TO {_fmt(hi)}]"
-
-
-def _date_only_range(keyword: str, tz: tzinfo) -> str:
-    """
-    For `created` (DateField): use the local calendar date, converted to
-    midnight UTC boundaries. No offset arithmetic — date only.
-    """
-
-    today = datetime.now(tz).date()
-
-    def _quarter_start(d: date) -> date:
-        return date(d.year, ((d.month - 1) // 3) * 3 + 1, 1)
-
-    if keyword == _TODAY:
-        lo = datetime(today.year, today.month, today.day, tzinfo=UTC)
-        return _iso_range(lo, lo + timedelta(days=1))
-    if keyword == _YESTERDAY:
-        y = today - timedelta(days=1)
-        lo = datetime(y.year, y.month, y.day, tzinfo=UTC)
-        hi = datetime(today.year, today.month, today.day, tzinfo=UTC)
-        return _iso_range(lo, hi)
-    if keyword == _PREVIOUS_WEEK:
-        this_mon = today - timedelta(days=today.weekday())
-        last_mon = this_mon - timedelta(weeks=1)
-        lo = datetime(last_mon.year, last_mon.month, last_mon.day, tzinfo=UTC)
-        hi = datetime(this_mon.year, this_mon.month, this_mon.day, tzinfo=UTC)
-        return _iso_range(lo, hi)
-    if keyword == _THIS_MONTH:
-        lo = datetime(today.year, today.month, 1, tzinfo=UTC)
-        if today.month == 12:
-            hi = datetime(today.year + 1, 1, 1, tzinfo=UTC)
-        else:
-            hi = datetime(today.year, today.month + 1, 1, tzinfo=UTC)
-        return _iso_range(lo, hi)
-    if keyword == _PREVIOUS_MONTH:
-        if today.month == 1:
-            lo = datetime(today.year - 1, 12, 1, tzinfo=UTC)
-        else:
-            lo = datetime(today.year, today.month - 1, 1, tzinfo=UTC)
-        hi = datetime(today.year, today.month, 1, tzinfo=UTC)
-        return _iso_range(lo, hi)
-    if keyword == _THIS_YEAR:
-        lo = datetime(today.year, 1, 1, tzinfo=UTC)
-        return _iso_range(lo, datetime(today.year + 1, 1, 1, tzinfo=UTC))
-    if keyword == _PREVIOUS_YEAR:
-        lo = datetime(today.year - 1, 1, 1, tzinfo=UTC)
-        return _iso_range(lo, datetime(today.year, 1, 1, tzinfo=UTC))
-    if keyword == _PREVIOUS_QUARTER:
-        this_quarter = _quarter_start(today)
-        last_quarter = this_quarter - relativedelta(months=3)
-        lo = datetime(
-            last_quarter.year,
-            last_quarter.month,
-            last_quarter.day,
-            tzinfo=UTC,
-        )
-        hi = datetime(
-            this_quarter.year,
-            this_quarter.month,
-            this_quarter.day,
-            tzinfo=UTC,
-        )
-        return _iso_range(lo, hi)
-    raise ValueError(f"Unknown keyword: {keyword}")
-
-
-def _datetime_range(keyword: str, tz: tzinfo) -> str:
-    """
-    For `added` / `modified` (DateTimeField, stored as UTC): convert local day
-    boundaries to UTC — full offset arithmetic required.
-    """
-
-    now_local = datetime.now(tz)
-    today = now_local.date()
-
-    def _midnight(d: date) -> datetime:
-        return datetime(d.year, d.month, d.day, tzinfo=tz).astimezone(UTC)
-
-    def _quarter_start(d: date) -> date:
-        return date(d.year, ((d.month - 1) // 3) * 3 + 1, 1)
-
-    if keyword == _TODAY:
-        return _iso_range(_midnight(today), _midnight(today + timedelta(days=1)))
-    if keyword == _YESTERDAY:
-        y = today - timedelta(days=1)
-        return _iso_range(_midnight(y), _midnight(today))
-    if keyword == _PREVIOUS_WEEK:
-        this_mon = today - timedelta(days=today.weekday())
-        last_mon = this_mon - timedelta(weeks=1)
-        return _iso_range(_midnight(last_mon), _midnight(this_mon))
-    if keyword == _THIS_MONTH:
-        first = today.replace(day=1)
-        if today.month == 12:
-            next_first = date(today.year + 1, 1, 1)
-        else:
-            next_first = date(today.year, today.month + 1, 1)
-        return _iso_range(_midnight(first), _midnight(next_first))
-    if keyword == _PREVIOUS_MONTH:
-        this_first = today.replace(day=1)
-        if today.month == 1:
-            last_first = date(today.year - 1, 12, 1)
-        else:
-            last_first = date(today.year, today.month - 1, 1)
-        return _iso_range(_midnight(last_first), _midnight(this_first))
-    if keyword == _THIS_YEAR:
-        return _iso_range(
-            _midnight(date(today.year, 1, 1)),
-            _midnight(date(today.year + 1, 1, 1)),
-        )
-    if keyword == _PREVIOUS_YEAR:
-        return _iso_range(
-            _midnight(date(today.year - 1, 1, 1)),
-            _midnight(date(today.year, 1, 1)),
-        )
-    if keyword == _PREVIOUS_QUARTER:
-        this_quarter = _quarter_start(today)
-        last_quarter = this_quarter - relativedelta(months=3)
-        return _iso_range(_midnight(last_quarter), _midnight(this_quarter))
-    raise ValueError(f"Unknown keyword: {keyword}")
-
-
-def _rewrite_compact_date(query: str) -> str:
-    """Rewrite Whoosh compact date tokens (14-digit YYYYMMDDHHmmss) to ISO 8601."""
-
-    def _sub(m: regex.Match[str]) -> str:
-        raw = m.group(1)
-        try:
-            dt = datetime(
-                int(raw[0:4]),
-                int(raw[4:6]),
-                int(raw[6:8]),
-                int(raw[8:10]),
-                int(raw[10:12]),
-                int(raw[12:14]),
-                tzinfo=UTC,
-            )
-            return dt.strftime("%Y-%m-%dT%H:%M:%SZ")
-        except ValueError:
-            return str(m.group(0))
-
-    try:
-        return _COMPACT_DATE_RE.sub(_sub, query, timeout=_REGEX_TIMEOUT)
-    except TimeoutError:  # pragma: no cover
-        raise ValueError(
-            "Query too complex to process (compact date rewrite timed out)",
-        )
-
-
-def _rewrite_relative_range(query: str) -> str:
-    """Rewrite Whoosh relative ranges ([now-7d TO now]) to concrete ISO 8601 UTC boundaries."""
-
-    def _sub(m: regex.Match[str]) -> str:
-        now = datetime.now(UTC)
-
-        def _offset(s: str | None) -> timedelta:
-            if not s:
-                return timedelta(0)
-            sign = 1 if s[0] == "+" else -1
-            n, unit = int(s[1:-1]), s[-1]
-            return (
-                sign
-                * {
-                    "d": timedelta(days=n),
-                    "h": timedelta(hours=n),
-                    "m": timedelta(minutes=n),
-                }[unit]
-            )
-
-        lo, hi = now + _offset(m.group(1)), now + _offset(m.group(2))
-        if lo > hi:
-            lo, hi = hi, lo
-        return f"[{_fmt(lo)} TO {_fmt(hi)}]"
-
-    try:
-        return _RELATIVE_RANGE_RE.sub(_sub, query, timeout=_REGEX_TIMEOUT)
-    except TimeoutError:  # pragma: no cover
-        raise ValueError(
-            "Query too complex to process (relative range rewrite timed out)",
-        )
-
-
-def _rewrite_whoosh_relative_range(query: str) -> str:
-    """Rewrite Whoosh-style relative date ranges ([-N unit to now]) to ISO 8601.
-
-    Supports: second, minute, hour, day, week, month, year (singular and plural).
-    Example: ``added:[-1 week to now]`` → ``added:[2025-01-01T… TO 2025-01-08T…]``
-    """
-    now = datetime.now(UTC)
-
-    def _sub(m: regex.Match[str]) -> str:
-        n = int(m.group("n"))
-        unit = m.group("unit").lower()
-        delta_map: dict[str, timedelta | relativedelta] = {
-            "second": timedelta(seconds=n),
-            "minute": timedelta(minutes=n),
-            "hour": timedelta(hours=n),
-            "day": timedelta(days=n),
-            "week": timedelta(weeks=n),
-            "month": relativedelta(months=n),
-            "year": relativedelta(years=n),
-        }
-        lo = now - delta_map[unit]
-        return f"[{_fmt(lo)} TO {_fmt(now)}]"
-
-    try:
-        return _WHOOSH_REL_RANGE_RE.sub(_sub, query, timeout=_REGEX_TIMEOUT)
-    except TimeoutError:  # pragma: no cover
-        raise ValueError(
-            "Query too complex to process (Whoosh relative range rewrite timed out)",
-        )
-
-
-def _rewrite_8digit_date(query: str, tz: tzinfo) -> str:
-    """Rewrite field:YYYYMMDD date tokens to an ISO 8601 day range.
-
-    Runs after ``_rewrite_compact_date`` so 14-digit timestamps are already
-    converted and won't spuriously match here.
-
-    For DateField fields (e.g. ``created``) uses UTC midnight boundaries.
-    For DateTimeField fields (e.g. ``added``, ``modified``) uses local TZ
-    midnight boundaries converted to UTC — matching the ``_datetime_range``
-    behaviour for keyword dates.
-    """
-
-    def _sub(m: regex.Match[str]) -> str:
-        field = m.group("field")
-        raw = m.group("date8")
-        try:
-            year, month, day = int(raw[0:4]), int(raw[4:6]), int(raw[6:8])
-            d = date(year, month, day)
-            if field in _DATE_ONLY_FIELDS:
-                lo = datetime(d.year, d.month, d.day, tzinfo=UTC)
-                hi = lo + timedelta(days=1)
-            else:
-                # DateTimeField: use local-timezone midnight → UTC
-                lo = datetime(d.year, d.month, d.day, tzinfo=tz).astimezone(UTC)
-                hi = datetime(
-                    (d + timedelta(days=1)).year,
-                    (d + timedelta(days=1)).month,
-                    (d + timedelta(days=1)).day,
-                    tzinfo=tz,
-                ).astimezone(UTC)
-            return f"{field}:[{_fmt(lo)} TO {_fmt(hi)}]"
-        except ValueError:
-            return m.group(0)
-
-    try:
-        return _DATE8_RE.sub(_sub, query, timeout=_REGEX_TIMEOUT)
-    except TimeoutError:  # pragma: no cover
-        raise ValueError(
-            "Query too complex to process (8-digit date rewrite timed out)",
-        )
-
-
-def _rewrite_year_range(query: str) -> str:
-    """Rewrite Whoosh-style year-only date ranges to ISO 8601 UTC boundaries.
-
-    Converts ``field:[YYYY TO YYYY]`` to a full ISO 8601 datetime range.
-    The upper bound is the start of the year after the end year (exclusive),
-    matching the Whoosh convention of treating year-only ranges as full-year spans.
-    """
-
-    def _sub(m: regex.Match[str]) -> str:
-        field = m.group("field")
-        y1, y2 = int(m.group("y1")), int(m.group("y2"))
-        # Whoosh swaps a reversed range when both years are explicit
-        # (whoosh.util.times.timespan.disambiguated); match that so a backwards
-        # range spans the intended years instead of matching nothing.
-        lo_year, hi_year = min(y1, y2), max(y1, y2)
-        lo = datetime(lo_year, 1, 1, tzinfo=UTC)
-        hi = datetime(hi_year + 1, 1, 1, tzinfo=UTC)
-        return f"{field}:[{_fmt(lo)} TO {_fmt(hi)}]"
-
-    try:
-        return _YEAR_RANGE_RE.sub(_sub, query, timeout=_REGEX_TIMEOUT)
-    except TimeoutError:  # pragma: no cover
-        raise ValueError("Query too complex to process (year range rewrite timed out)")
-
-
 def rewrite_natural_date_keywords(query: str, tz: tzinfo) -> str:
    """
    Rewrite natural date syntax to ISO 8601 format for Tantivy compatibility.

-    Performs the first stage of query preprocessing, converting various date
-    formats and keywords to ISO 8601 datetime ranges that Tantivy can parse:
-    - Compact 14-digit dates (YYYYMMDDHHmmss)
-    - Whoosh relative ranges ([-7 days to now], [now-1h TO now+2h])
-    - 8-digit dates with field awareness (created:20240115)
-    - Natural keywords (field:today, field:"previous quarter", etc.)
+    Delegates to ``translate_query`` which handles all date forms, comma
+    expansion, field aliasing, relative ranges, and operator normalization.

    Args:
        query: Raw user query string
@@ -425,35 +81,15 @@ def rewrite_natural_date_keywords(query: str, tz: tzinfo) -> str:
    Note:
        Bare keywords without field prefixes pass through unchanged.
    """
-    query = _rewrite_compact_date(query)
-    query = _rewrite_whoosh_relative_range(query)
-    query = _rewrite_year_range(query)
-    query = _rewrite_8digit_date(query, tz)
-    query = _rewrite_relative_range(query)
-
-    def _replace(m: regex.Match[str]) -> str:
-        field = m.group("field")
-        keyword = (m.group("quoted") or m.group("bare")).lower()
-        if field in _DATE_ONLY_FIELDS:
-            return f"{field}:{_date_only_range(keyword, tz)}"
-        return f"{field}:{_datetime_range(keyword, tz)}"
-
-    try:
-        return _FIELD_DATE_RE.sub(_replace, query, timeout=_REGEX_TIMEOUT)
-    except TimeoutError:  # pragma: no cover
-        raise ValueError(
-            "Query too complex to process (date keyword rewrite timed out)",
-        )
+    return translate_query(query, tz)


 def normalize_query(query: str) -> str:
    """
    Normalize query syntax for better search behavior.

-    Expands comma-separated field values to explicit AND clauses and
-    collapses excessive whitespace for cleaner parsing:
-    - tag:foo,bar → tag:foo AND tag:bar
-    - multiple spaces → single spaces
+    Delegates to ``translate_query`` which handles comma expansion, whitespace
+    collapsing, operator normalization, and field aliasing.

    Args:
        query: Query string after date rewriting
@@ -461,29 +97,7 @@ def normalize_query(query: str) -> str:
    Returns:
        Normalized query string ready for Tantivy parsing
    """
-
-    def _expand(m: regex.Match[str]) -> str:
-        field = m.group(1)
-        values = [v.strip() for v in m.group(2).split(",") if v.strip()]
-        return " AND ".join(f"{field}:{v}" for v in values)
-
-    try:
-        query = regex.sub(
-            r"(\w+):([^\s\[\]]+(?:,[^\s\[\]]+)+)",
-            _expand,
-            query,
-            timeout=_REGEX_TIMEOUT,
-        )
-        query = regex.sub(r" {2,}", " ", query, timeout=_REGEX_TIMEOUT).strip()
-        # Strip trailing dangling operators before Tantivy sees them.
-        query = _TRAILING_OPERATOR_RE.sub("", query, timeout=_REGEX_TIMEOUT).strip()
-        # Replace " - " / " + " with a space: Tantivy requires no space between
-        # the operator and its operand (-term / +term), so spaces on both sides
-        # means this is a natural-language separator, not a query operator.
-        query = _SPACED_OPERATOR_RE.sub(" ", query, timeout=_REGEX_TIMEOUT).strip()
-        return query
-    except TimeoutError:  # pragma: no cover
-        raise ValueError("Query too complex to process (normalization timed out)")
+    return translate_query(query, UTC)


 def build_permission_filter(
@@ -603,8 +217,16 @@ def parse_user_query(
        as a post-search score filter, not during query construction.
    """

-    query_str = rewrite_natural_date_keywords(raw_query, tz)
-    query_str = normalize_query(query_str)
+    try:
+        query_str = translate_query(raw_query, tz)
+    except SearchQueryError:
+        # Intentional, user-fixable error (e.g. an unparsable date). Propagate so
+        # the view can return a 400 with a helpful message rather than falling
+        # back to the raw (still-invalid) query.
+        raise
+    except Exception:  # pragma: no cover - defensive
+        logger.warning("Query translation failed; using raw query", exc_info=True)
+        query_str = raw_query

    exact = index.parse_query(
        query_str,
@@ -0,0 +1,566 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from datetime import UTC
+from datetime import datetime
+from datetime import timedelta
+from typing import TYPE_CHECKING
+from typing import TypeAlias
+
+import regex
+from dateutil.relativedelta import relativedelta
+
+from documents.search._dates import _DATE_KEYWORDS
+from documents.search._dates import _DATE_ONLY_FIELDS
+from documents.search._dates import _date_only_range
+from documents.search._dates import _datetime_range
+from documents.search._dates import _field_range_from_dates
+from documents.search._dates import _fmt
+from documents.search._dates import _precision_bounds
+from documents.search._dates import _utc_bounds_for_field
+
+# Compiled regex that matches any known multi-word (or single-word) date keyword
+# at the start of a match position, longest alternatives first so "previous week"
+# wins over a hypothetical shorter "previous".
+_KEYWORD_VALUE_RE = regex.compile(
+    "|".join(sorted((regex.escape(k) for k in _DATE_KEYWORDS), key=len, reverse=True)),
+    regex.IGNORECASE,
+)
+
+if TYPE_CHECKING:
+    from datetime import tzinfo
+
+# TODO: this module translates date queries into Tantivy *string* syntax, which
+# forces a workaround for something Tantivy's string parser cannot express on
+# date fields: open-ended ranges use far-past/far-future string sentinels
+# (OPEN_LO/OPEN_HI). These can be replaced with a real tantivy.Query object
+# (Query.range_query(..., None) for open bounds) once tantivy-py accepts Python
+# datetimes in range_query/term_query on Date fields. That support exists on
+# tantivy-py master (PRs #655 + #666) but postdates the pinned 0.26.0 wheel, so
+# it is blocked only on a published release > 0.26.0 and a dependency bump.
+# (Unparsable dates now raise InvalidDateQuery -> HTTP 400 rather than using a
+# no-match string sentinel.)
+
+# Fields that store exact, non-analyzed comma-joined tokens in the index and so
+# need explicit comma->AND expansion (Whoosh KEYWORD(commas=True) set).
+MULTI_VALUE_FIELDS = frozenset({"tag", "tag_id", "viewer_id"})
+
+# Date fields whose values/ranges get rewritten to RFC3339 Tantivy ranges.
+DATE_FIELDS = frozenset({"created", "modified", "added"})
+
+# Field aliases: Whoosh (v2) field names that were renamed in the Tantivy schema.
+# Preserved here so v2 queries using the old names continue to work without 400
+# errors instead of silently failing. Applied by _render to non-date field tokens.
+FIELD_ALIASES: dict[str, str] = {
+    "type": "document_type",
+    "type_id": "document_type_id",
+    "path": "storage_path",
+    "path_id": "storage_path_id",
+}
+
+# Known schema fields: a comma immediately followed by ``<known>:`` is a clause
+# separator. Restricting to known fields prevents URL-like ``http:`` misfires.
+KNOWN_FIELDS = frozenset(
+    {
+        "title",
+        "content",
+        "correspondent",
+        "document_type",
+        "type",  # v2 alias -> document_type
+        "storage_path",
+        "path",  # v2 alias -> storage_path
+        "tag",
+        "tag_id",
+        "correspondent_id",
+        "document_type_id",
+        "type_id",  # v2 alias -> document_type_id
+        "storage_path_id",
+        "path_id",  # v2 alias -> storage_path_id
+        "owner_id",
+        "viewer_id",
+        "asn",
+        "page_count",
+        "num_notes",
+        "created",
+        "modified",
+        "added",
+        "original_filename",
+        "checksum",
+        "notes",
+        "custom_fields",
+    },
+)
+
+_FIELD_RE = regex.compile(r"(?P<field>\w+):")
+
+# Matches the TO separator inside a range bracket. Handles three forms:
+#   middle:   "lo TO hi"   (either lo or hi may be empty)
+#   trailing: "lo TO"      (open upper bound)
+#   leading:  "TO hi"      (open lower bound)
+# Bounds MAY contain internal spaces (e.g. "-7 days"), so we use .*? / .+?
+# and split on the whitespace-delimited " TO " / " to " separator.
+_RANGE_RE = regex.compile(
+    r"^\s*(?P<lo>.*?)\s+[Tt][Oo]\s+(?P<hi>.+?)\s*$"
+    r"|"
+    r"^\s*(?P<lo2>.+?)\s+[Tt][Oo]\s*$"
+    r"|"
+    r"^\s*[Tt][Oo]\s+(?P<hi2>.+?)\s*$",
+)
+
+
+@dataclass(frozen=True, slots=True)
+class FieldValue:
+    field: str
+    value: str
+
+
+# Produced by the comma-resolution pass (not by scan()).
+@dataclass(frozen=True, slots=True)
+class FieldValueList:
+    field: str
+    values: tuple[str, ...]
+
+
+@dataclass(frozen=True, slots=True)
+class FieldRange:
+    field: str
+    open: str
+    lo: str
+    hi: str
+    close: str
+
+
+# Produced by the comma-resolution pass (not by scan()).
+@dataclass(frozen=True, slots=True)
+class Comma:
+    pass
+
+
+@dataclass(frozen=True, slots=True)
+class Passthrough:
+    raw: str
+
+
+Token: TypeAlias = FieldValue | FieldValueList | FieldRange | Comma | Passthrough
+
+_CLOSE: dict[str, str] = {"[": "]", "{": "}"}
+
+
+def scan(query: str) -> list[Token]:
+    """
+    Tokenize a raw query into date/comma-aware tokens, leaving everything else
+    as verbatim ``Passthrough`` runs. Non-recursive: finds the first matching
+    close bracket/quote. Nested brackets are not valid Tantivy range syntax and
+    pass through verbatim on mismatch.
+    """
+    tokens: list[Token] = []
+    buf: list[str] = []  # accumulates passthrough chars
+    i, n = 0, len(query)
+    while i < n:
+        matched = _match_field_token(query, i)
+        if matched is None:
+            buf.append(query[i])
+            i += 1
+            continue
+        token, i = matched
+        _flush(buf, tokens)
+        tokens.append(token)
+        i = _maybe_comma(query, i, tokens)
+    _flush(buf, tokens)
+    return tokens
+
+
+def _flush(buf: list[str], tokens: list[Token]) -> None:
+    """Emit any accumulated passthrough characters as a single token."""
+    if buf:
+        tokens.append(Passthrough("".join(buf)))
+        buf.clear()
+
+
+def _at_word_boundary(query: str, i: int) -> bool:
+    """A field token may begin only at the start or after a non-word character."""
+    return i == 0 or not (query[i - 1].isalnum() or query[i - 1] == "_")
+
+
+def _match_field_token(query: str, i: int) -> tuple[Token, int] | None:
+    """
+    If a known ``field:`` token starts at ``i``, consume it and return
+    ``(token, end_index)``; otherwise return None so the caller treats the
+    character as passthrough. Handles both ``field:[range]`` and ``field:value``,
+    and returns None when the range/value cannot be consumed.
+    """
+    m = _FIELD_RE.match(query, i)
+    if m is None or m.group("field") not in KNOWN_FIELDS:
+        return None
+    if not _at_word_boundary(query, i):
+        return None
+    field = m.group("field")
+    j = m.end()
+    if j < len(query) and query[j] in "[{":
+        return _consume_range(query, j, field)
+    consumed = _consume_field_value(query, field, j)
+    if consumed is None:
+        return None
+    value, end = consumed
+    return FieldValue(field, value), end
+
+
+def _consume_field_value(query: str, field: str, start: int) -> tuple[str, int] | None:
+    """
+    Consume a field value starting at ``start``: a multi-word date keyword phrase
+    (date fields only), or a bare/quoted value, then absorb any comma-joined
+    continuation that is not a clause separator. ``resolve_commas`` later splits a
+    multi-value field's joined value into a ``FieldValueList``; for other fields
+    the comma stays literal.
+    """
+    n = len(query)
+    consumed = None
+    if field in DATE_FIELDS:
+        km = _KEYWORD_VALUE_RE.match(query, start)
+        if km is not None and (km.end() >= n or query[km.end()] in " \t),"):
+            consumed = (km.group(0), km.end())
+    if consumed is None:
+        consumed = _consume_value(query, start)
+    if consumed is None:
+        return None
+    value, k = consumed
+    while k < n and query[k] == ",":
+        if _looks_like_known_field(query, k + 1):
+            break  # clause separator: left for _maybe_comma to emit a Comma()
+        more = _consume_value(query, k + 1)
+        if more is None:
+            break
+        value = f"{value},{more[0]}"
+        k = more[1]
+    return value, k
+
+
+def _consume_range(
+    query: str,
+    start: int,
+    field: str,
+) -> tuple[FieldRange, int] | None:
+    """Consume ``[lo TO hi]`` / ``{lo TO hi}`` from ``start`` (the bracket)."""
+    open_br = query[start]
+    close_br = _CLOSE[open_br]
+    end = query.find(close_br, start + 1)
+    if end == -1:
+        return None
+    inner = query[start + 1 : end]
+    m = _RANGE_RE.match(inner)
+    if m is not None:
+        if m.group("lo") is not None or m.group("hi") is not None:
+            # Middle form: "lo TO hi" (either may be empty string)
+            lo = (m.group("lo") or "").strip()
+            hi = (m.group("hi") or "").strip()
+        elif m.group("lo2") is not None:
+            # Trailing form: "lo TO"
+            lo = m.group("lo2").strip()
+            hi = ""
+        else:
+            # Leading form: "TO hi"
+            lo = ""
+            hi = (m.group("hi2") or "").strip()
+    else:
+        lo, hi = inner.strip(), ""
+    return FieldRange(field, open_br, lo, hi, close_br), end + 1
+
+
+def _consume_value(query: str, start: int) -> tuple[str, int] | None:
+    """Consume a bare or quoted field value from ``start``, stopping at comma."""
+    n = len(query)
+    if start >= n or query[start] in " \t":
+        return None
+    if query[start] in "\"'":
+        quote = query[start]
+        end = query.find(quote, start + 1)
+        if end == -1:
+            return None
+        return query[start : end + 1], end + 1
+    j = start
+    while j < n and query[j] not in " \t),":
+        j += 1
+    return query[start:j], j
+
+
+def _looks_like_known_field(query: str, pos: int) -> bool:
+    """True if a known ``field:`` token starts at ``pos``."""
+    m = _FIELD_RE.match(query, pos)
+    return bool(m and m.group("field") in KNOWN_FIELDS)
+
+
+def _maybe_comma(query: str, i: int, tokens: list) -> int:
+    """If a clause-separator comma follows at ``i``, emit ``Comma()`` and advance."""
+    if i < len(query) and query[i] == "," and _looks_like_known_field(query, i + 1):
+        tokens.append(Comma())
+        return i + 1
+    return i
+
+
+def resolve_commas(tokens: list) -> list:
+    """
+    Collapse value-list commas into ``FieldValueList`` and keep clause-separator
+    commas as ``Comma``. (Clause-sep commas are already emitted by ``scan`` via
+    the value-stop logic; this pass folds value-lists.)
+    """
+    out: list = []
+    for tok in tokens:
+        if (
+            isinstance(tok, FieldValue)
+            and tok.field in MULTI_VALUE_FIELDS
+            and "," in tok.value
+        ):
+            values = tuple(v for v in tok.value.split(",") if v)
+            out.append(FieldValueList(tok.field, values))
+        else:
+            out.append(tok)
+    return out
+
+
+class SearchQueryError(ValueError):
+    """
+    Base for user-fixable search query errors.
+
+    Carries a message safe to surface to the user (no internal details). The view
+    layer catches this and returns an HTTP 400, so any future subclass (unknown
+    field, malformed range, wrapped parser errors) gets the same treatment.
+    """
+
+
+class InvalidDateQuery(SearchQueryError):
+    """Raised when a date field value or range bound cannot be parsed."""
+
+    def __init__(self, field: str, value: str) -> None:
+        self.field = field
+        self.value = value
+        super().__init__(f"Invalid date value {value!r} for field {field!r}.")
+
+
+_DIGITS_RE = regex.compile(r"^\d{4}(?:\d{2}){0,2}$")
+_ISO_RE = regex.compile(r"^\d{4}(?:-\d{2}(?:-\d{2})?)?$")
+
+
+def translate_scalar(field: str, value: str, tz: tzinfo) -> str:
+    """Translate a bare date-field value to a Tantivy range string."""
+    bare = value.strip("\"'").lower()
+    if bare in _DATE_KEYWORDS:
+        if field in _DATE_ONLY_FIELDS:
+            return f"{field}:{_date_only_range(bare, tz)}"
+        return f"{field}:{_datetime_range(bare, tz)}"
+    digits = value.replace("-", "")
+    if _DIGITS_RE.match(value) or _ISO_RE.match(value):
+        bounds = _precision_bounds(digits)
+        if bounds is None:
+            raise InvalidDateQuery(field, value)
+        return _field_range_from_dates(field, bounds[0], bounds[1], tz)
+    if regex.fullmatch(r"\d{14}", value):
+        try:
+            dt = datetime(
+                int(value[0:4]),
+                int(value[4:6]),
+                int(value[6:8]),
+                int(value[8:10]),
+                int(value[10:12]),
+                int(value[12:14]),
+                tzinfo=UTC,
+            )
+        except ValueError:
+            raise InvalidDateQuery(field, value) from None
+        iso = _fmt(dt)
+        return f"{field}:[{iso} TO {iso}]"
+    # Unrecognized shape -> tell the user their date is malformed rather than
+    # silently matching nothing or emitting invalid Tantivy syntax.
+    raise InvalidDateQuery(field, value)
+
+
+# Open-bound sentinels for date ranges. These far-past/far-future strings allow
+# open-ended ranges to be expressed as Tantivy string queries until tantivy-py
+# exposes Query.range_query(..., None) on Date fields (see module TODO).
+OPEN_LO = "0001-01-01T00:00:00Z"
+OPEN_HI = "9999-12-31T23:59:59Z"
+
+
+# Matches compact now-offset tokens like now-7d, now+1h, now-30m.
+_NOW_COMPACT_RE = regex.compile(
+    r"^now(?P<sign>[+-])(?P<n>\d+)(?P<unit>[dhm])$",
+    regex.IGNORECASE,
+)
+
+# Matches "±N <unit>" Whoosh-style offsets (e.g. -7 days, -1 week, +3 hours)
+# Unit is singular or plural; sign prefix is mandatory.
+_NOW_SPACED_RE = regex.compile(
+    r"^(?P<sign>[+-])(?P<n>\d+)\s*"
+    r"(?P<unit>second|minute|hour|day|week|month|year)s?$",
+    regex.IGNORECASE,
+)
+
+
+def _resolve_relative_bound(token: str) -> datetime | None:
+    """
+    Resolve a relative bound token to an exact UTC instant, or return None.
+
+    Supported forms:
+      - ``now``            -> current UTC instant
+      - ``now+/-<n>d/h/m`` -> now +/- timedelta (d=days, h=hours, m=minutes)
+      - ``±N <unit>``     -> now +/- delta; month/year use relativedelta
+    """
+    stripped = token.strip()
+    low = stripped.lower()
+    now = datetime.now(UTC)
+
+    if low == "now":
+        return now
+
+    m = _NOW_COMPACT_RE.match(stripped)
+    if m:
+        sign = 1 if m.group("sign") == "+" else -1
+        n = int(m.group("n"))
+        unit = m.group("unit").lower()
+        delta = (
+            sign
+            * {
+                "d": timedelta(days=n),
+                "h": timedelta(hours=n),
+                "m": timedelta(minutes=n),
+            }[unit]
+        )
+        return now + delta
+
+    m = _NOW_SPACED_RE.match(stripped)
+    if m:
+        sign = 1 if m.group("sign") == "+" else -1
+        n = int(m.group("n"))
+        unit = m.group("unit").lower()
+        delta_map: dict[str, timedelta | relativedelta] = {
+            "second": timedelta(seconds=n),
+            "minute": timedelta(minutes=n),
+            "hour": timedelta(hours=n),
+            "day": timedelta(days=n),
+            "week": timedelta(weeks=n),
+            "month": relativedelta(months=n),
+            "year": relativedelta(years=n),
+        }
+        return now - delta_map[unit] if sign == -1 else now + delta_map[unit]
+
+    return None
+
+
+def _bound_datetimes(
+    field: str,
+    token: str,
+    tz: tzinfo,
+) -> tuple[datetime, datetime] | None:
+    """
+    Return (floor_dt, ceil_dt) UTC datetimes for a single range bound token, or
+    None if the token is unparsable. ``now`` and relative offsets resolve to the
+    current instant (floor == ceil == that instant; no day-flooring).
+    """
+    token = token.strip()
+
+    # Try relative/now forms first (before stripping hyphens which would mangle them).
+    rel = _resolve_relative_bound(token)
+    if rel is not None:
+        return rel, rel
+
+    # Full ISO datetime token (contains "T"): parse directly and return an exact
+    # instant (floor == ceil). Python 3.11+ datetime.fromisoformat accepts trailing Z.
+    if "T" in token:
+        try:
+            dt = datetime.fromisoformat(token)
+            # Ensure timezone-aware UTC result.
+            dt = dt.replace(tzinfo=UTC) if dt.tzinfo is None else dt.astimezone(UTC)
+            return dt, dt
+        except ValueError:
+            return None
+
+    digits = token.replace("-", "")
+    bounds = _precision_bounds(digits)
+    if bounds is None:
+        return None
+    start, end = bounds
+    return _utc_bounds_for_field(field, start, end, tz)
+
+
+def _render(tok: Token, tz: tzinfo) -> str:
+    """Render a single token back to a Tantivy query string fragment."""
+    if isinstance(tok, Passthrough):
+        return tok.raw
+    if isinstance(tok, Comma):
+        return " AND "
+    if isinstance(tok, FieldValueList):
+        field = FIELD_ALIASES.get(tok.field, tok.field)
+        return " AND ".join(f"{field}:{v}" for v in tok.values)
+    if isinstance(tok, FieldValue):
+        field = FIELD_ALIASES.get(tok.field, tok.field)
+        if field in DATE_FIELDS:
+            return translate_scalar(field, tok.value, tz)
+        return f"{field}:{tok.value}"
+    if isinstance(tok, FieldRange):
+        field = FIELD_ALIASES.get(tok.field, tok.field)
+        if field in DATE_FIELDS:
+            return translate_range(field, tok.lo, tok.hi, tz)
+        return f"{field}:{tok.open}{tok.lo} TO {tok.hi}{tok.close}"
+    return ""  # pragma: no cover
+
+
+# Post-render operator normalization patterns: collapse repeated whitespace and
+# strip spaced/trailing Tantivy boolean operators that would otherwise be invalid.
+_MULTI_SPACE_RE = regex.compile(r" {2,}")
+_TRAILING_OP_RE = regex.compile(r"\s+[-+]+\s*$")
+_SPACED_OP_RE = regex.compile(r"\s+[-+]\s+")
+
+
+def _normalize_operators(text: str) -> str:
+    """
+    Collapse multiple spaces, strip trailing dangling operators, and replace
+    spaced operators (`` - `` / `` + ``) with a single space.
+
+    Applied only to Passthrough fragments (the rendered output is scanned for
+    operator artifacts outside bracketed ranges) via a post-render pass on the
+    full rendered string. This preserves date ranges (``[... TO ...]``) verbatim
+    while cleaning natural-language separators in the surrounding text.
+    """
+    text = _MULTI_SPACE_RE.sub(" ", text)
+    text = _TRAILING_OP_RE.sub("", text).strip()
+    text = _SPACED_OP_RE.sub(" ", text).strip()
+    return text
+
+
+def translate_query(raw: str, tz: tzinfo) -> str:
+    """Translate a raw Whoosh-style query into Tantivy-compatible syntax."""
+    tokens = resolve_commas(scan(raw))
+    rendered = "".join(_render(t, tz) for t in tokens)
+    return _normalize_operators(rendered)
+
+
+def translate_range(field: str, lo: str, hi: str, tz: tzinfo) -> str:
+    """Translate a date-field ``[lo TO hi]`` range to a Tantivy ISO range string.
+
+    Handles partial-date bounds (YYYY, YYYYMM, YYYYMMDD, ISO dash variants),
+    open bounds (empty string -> OPEN_LO/OPEN_HI), ``now``, and reversed ranges
+    (swaps tokens before computing floor/ceil so the span is always correct).
+    """
+    lo_s = lo.strip()
+    hi_s = hi.strip()
+
+    # Parse both bounds to (floor, ceil) pairs when present.
+    lo_pair: tuple[datetime, datetime] | None = None
+    hi_pair: tuple[datetime, datetime] | None = None
+
+    if lo_s:
+        lo_pair = _bound_datetimes(field, lo_s, tz)
+        if lo_pair is None:
+            raise InvalidDateQuery(field, lo_s)
+    if hi_s:
+        hi_pair = _bound_datetimes(field, hi_s, tz)
+        if hi_pair is None:
+            raise InvalidDateQuery(field, hi_s)
+
+    # Detect a reversed range: only swap when BOTH bounds are present.
+    if lo_pair is not None and hi_pair is not None and lo_pair[0] > hi_pair[0]:
+        lo_pair, hi_pair = hi_pair, lo_pair
+
+    lo_iso = _fmt(lo_pair[0]) if lo_pair is not None else OPEN_LO
+    hi_iso = _fmt(hi_pair[1]) if hi_pair is not None else OPEN_HI
+
+    return f"{field}:[{lo_iso} TO {hi_iso}]"
@@ -1,11 +1,15 @@
 from __future__ import annotations

+import tempfile
 from typing import TYPE_CHECKING

 import pytest
+import tantivy

 from documents.search._backend import TantivyBackend
 from documents.search._backend import reset_backend
+from documents.search._schema import build_schema
+from documents.search._tokenizer import register_tokenizers

 if TYPE_CHECKING:
    from collections.abc import Generator
@@ -31,3 +35,11 @@ def backend() -> Generator[TantivyBackend, None, None]:
    finally:
        b.close()
        reset_backend()
+
+
+@pytest.fixture(scope="module")
+def index() -> tantivy.Index:
+    """A real Tantivy index for parse-acceptance tests (module scope for speed)."""
+    idx = tantivy.Index(build_schema(), path=tempfile.mkdtemp())
+    register_tokenizers(idx, "english")
+    return idx
@@ -13,7 +13,6 @@ import time_machine

 from documents.search._query import _date_only_range
 from documents.search._query import _datetime_range
-from documents.search._query import _rewrite_compact_date
 from documents.search._query import build_permission_filter
 from documents.search._query import normalize_query
 from documents.search._query import parse_simple_text_highlight_query
@@ -21,6 +20,7 @@ from documents.search._query import parse_user_query
 from documents.search._query import rewrite_natural_date_keywords
 from documents.search._schema import build_schema
 from documents.search._tokenizer import register_tokenizers
+from documents.search._translate import InvalidDateQuery

 if TYPE_CHECKING:
    from django.contrib.auth.base_user import AbstractBaseUser
@@ -405,12 +405,14 @@ class TestWhooshQueryRewriting:
        assert lo == "2023-12-01T05:00:00Z"
        assert hi == "2023-12-02T05:00:00Z"

-    def test_8digit_invalid_date_passes_through_unchanged(self) -> None:
-        assert rewrite_natural_date_keywords("added:20231340", UTC) == "added:20231340"
-
-    def test_compact_14digit_invalid_date_passes_through_unchanged(self) -> None:
-        # Month=13 makes datetime() raise ValueError; the token must be left as-is
-        assert _rewrite_compact_date("20231300120000") == "20231300120000"
+    def test_8digit_invalid_date_raises(self) -> None:
+        # The translation pipeline raises InvalidDateQuery for unparsable dates
+        # (e.g. month=13) so the API can surface a 400 telling the user the date
+        # is malformed instead of silently returning zero results.
+        with pytest.raises(InvalidDateQuery) as exc_info:
+            rewrite_natural_date_keywords("added:20231340", UTC)
+        assert exc_info.value.field == "added"
+        assert exc_info.value.value == "20231340"


 class TestParseUserQuery:
@@ -463,6 +465,67 @@ class TestParseUserQuery:
    ) -> None:
        assert isinstance(parse_user_query(query_index, raw_query, UTC), tantivy.Query)

+    @pytest.mark.parametrize(
+        "raw_query",
+        [
+            # Partial date scalar (year only)
+            pytest.param("created:2020", id="created_year_scalar"),
+            # 8-digit compact date range in brackets
+            pytest.param(
+                "created:[20200101 TO 20201231]",
+                id="created_8digit_bracket_range",
+            ),
+            # Comma-separated field + date range (Whoosh v2 multi-clause syntax)
+            pytest.param(
+                "title:x,created:[2020 TO 2021]",
+                id="title_comma_created_range",
+            ),
+            # Field alias: type -> document_type
+            pytest.param("type:invoice", id="type_alias"),
+            # Multi-word date keyword
+            pytest.param("created:previous week", id="created_previous_week"),
+            # Full ISO datetime range
+            pytest.param(
+                "created:[2026-01-01T00:00:00Z TO 2026-06-01T00:00:00Z]",
+                id="created_iso_range",
+            ),
+            # Comma-separated ISO ranges (Whoosh v2 syntax)
+            pytest.param(
+                "created:[2026-01-01T00:00:00Z TO 2026-06-01T00:00:00Z],"
+                "added:[2026-05-01T00:00:00Z TO 2026-06-01T00:00:00Z]",
+                id="comma_iso_ranges",
+            ),
+        ],
+    )
+    def test_advanced_search_queries_do_not_raise(
+        self,
+        query_index: tantivy.Index,
+        raw_query: str,
+    ) -> None:
+        """
+        End-to-end: queries that the frontend sends must parse without raising.
+
+        This tests the full pipeline: translate_query -> tantivy parse_query.
+        Equivalent to asserting HTTP 200 (not 400) for each query form.
+        """
+        with time_machine.travel(datetime(2026, 6, 15, 12, 0, tzinfo=UTC), tick=False):
+            assert isinstance(
+                parse_user_query(query_index, raw_query, UTC),
+                tantivy.Query,
+            )
+
+    def test_invalid_date_propagates_not_swallowed(
+        self,
+        query_index: tantivy.Index,
+    ) -> None:
+        # parse_user_query falls back to the raw query on unexpected translation
+        # errors, but an InvalidDateQuery is intentional and must propagate so the
+        # view can return a 400 instead of silently parsing the raw (invalid) date.
+        with pytest.raises(InvalidDateQuery) as exc_info:
+            parse_user_query(query_index, "created:202023", UTC)
+        assert exc_info.value.field == "created"
+        assert exc_info.value.value == "202023"
+

 class TestYearRangeRewriting:
    """Whoosh-style year-only date ranges must be rewritten to ISO 8601."""
@@ -542,11 +605,16 @@ class TestYearRangeRewriting:
        assert rewrite_natural_date_keywords(original, UTC) == original

    def test_8digit_in_brackets_not_matched_as_year_range(self) -> None:
-        # [YYYYMMDD TO YYYYMMDD] has 8-digit values - must not be caught by year rewriter
+        # [YYYYMMDD TO YYYYMMDD]: the translation layer converts 8-digit bounds to
+        # ISO day ranges. 20200101 -> 2020-01-01T00:00:00Z (lo of that day);
+        # 20201231 -> the ceil of Dec 31 = 2021-01-01T00:00:00Z (exclusive end).
+        # This is the correct and accepted behavior: old compact form becomes a
+        # proper Tantivy-parseable ISO range.
        original = "created:[20200101 TO 20201231]"
        result = rewrite_natural_date_keywords(original, UTC)
-        assert "20200101" in result or "2020-01-01" in result
-        assert "20201231" in result or "2020-12-31" in result
+        lo, hi = _range(result, "created")
+        assert lo == "2020-01-01T00:00:00Z"
+        assert hi == "2021-01-01T00:00:00Z"


 class TestNonDateFieldsNotRewritten:
@@ -606,6 +674,16 @@ class TestNormalizeQuery:
    def test_normalize_expands_comma_separated_tags(self) -> None:
        assert normalize_query("tag:foo,bar") == "tag:foo AND tag:bar"

+    def test_normalize_comma_between_range_expressions(self) -> None:
+        # Comma-separated field range expressions (Whoosh v2 syntax) must be
+        # converted to AND so Tantivy does not receive an invalid comma.
+        q = "created:[2026-01-01T00:00:00Z TO 2026-06-01T00:00:00Z],added:[2026-05-01T00:00:00Z TO 2026-06-01T00:00:00Z]"
+        assert normalize_query(q) == (
+            "created:[2026-01-01T00:00:00Z TO 2026-06-01T00:00:00Z]"
+            " AND "
+            "added:[2026-05-01T00:00:00Z TO 2026-06-01T00:00:00Z]"
+        )
+
    def test_normalize_expands_three_values(self) -> None:
        assert normalize_query("tag:foo,bar,baz") == "tag:foo AND tag:bar AND tag:baz"

@@ -0,0 +1,742 @@
+from __future__ import annotations
+
+from datetime import UTC
+from datetime import datetime
+from typing import TYPE_CHECKING
+from zoneinfo import ZoneInfo
+
+import pytest
+import time_machine
+
+from documents.search._dates import _precision_bounds
+
+if TYPE_CHECKING:
+    import tantivy
+from documents.search._query import _FIELD_BOOSTS
+from documents.search._query import DEFAULT_SEARCH_FIELDS
+from documents.search._translate import OPEN_HI
+from documents.search._translate import OPEN_LO
+from documents.search._translate import Comma
+from documents.search._translate import FieldRange
+from documents.search._translate import FieldValue
+from documents.search._translate import FieldValueList
+from documents.search._translate import InvalidDateQuery
+from documents.search._translate import Passthrough
+from documents.search._translate import resolve_commas
+from documents.search._translate import scan
+from documents.search._translate import translate_query
+from documents.search._translate import translate_range
+from documents.search._translate import translate_scalar
+
+
+@pytest.mark.search
+class TestPrecisionBounds:
+    @pytest.mark.parametrize(
+        ("digits", "expected"),
+        [
+            ("2020", ((2020, 1, 1), (2021, 1, 1))),
+            ("202003", ((2020, 3, 1), (2020, 4, 1))),
+            ("202012", ((2020, 12, 1), (2021, 1, 1))),
+            ("20200115", ((2020, 1, 15), (2020, 1, 16))),
+            ("20201231", ((2020, 12, 31), (2021, 1, 1))),
+        ],
+    )
+    def test_valid(self, digits, expected):
+        lo, hi = _precision_bounds(digits)
+        assert (lo.year, lo.month, lo.day) == expected[0]
+        assert (hi.year, hi.month, hi.day) == expected[1]
+
+    @pytest.mark.parametrize("digits", ["202023", "20200230", "20201301", "20", "abcd"])
+    def test_invalid_returns_none(self, digits):
+        assert _precision_bounds(digits) is None
+
+
+@pytest.mark.search
+class TestScan:
+    def test_plain_words_are_passthrough(self):
+        assert scan("bank statement") == [Passthrough("bank statement")]
+
+    def test_field_value(self):
+        assert scan("created:2020") == [FieldValue("created", "2020")]
+
+    def test_field_value_in_boolean(self):
+        toks = scan("created:2020 OR foo")
+        assert toks == [
+            FieldValue("created", "2020"),
+            Passthrough(" OR foo"),
+        ]
+
+    def test_field_value_in_parens(self):
+        toks = scan("(created:2020 OR foo)")
+        assert toks == [
+            Passthrough("("),
+            FieldValue("created", "2020"),
+            Passthrough(" OR foo)"),
+        ]
+
+    def test_quoted_value(self):
+        assert scan('correspondent:"A B"') == [FieldValue("correspondent", '"A B"')]
+
+    def test_field_range(self):
+        assert scan("created:[2020 TO 2021]") == [
+            FieldRange("created", "[", "2020", "2021", "]"),
+        ]
+
+    @pytest.mark.parametrize(
+        ("query", "expected"),
+        [
+            pytest.param(
+                "created:[2020 to]",
+                FieldRange("created", "[", "2020", "", "]"),
+                id="open_upper",
+            ),
+            pytest.param(
+                "created:[to 2020]",
+                FieldRange("created", "[", "", "2020", "]"),
+                id="open_lower",
+            ),
+        ],
+    )
+    def test_open_range(self, query, expected):
+        assert scan(query) == [expected]
+
+    def test_comma_inside_range_not_split(self):
+        # No depth-0 comma here; the whole thing is one range token.
+        toks = scan("created:[2020 TO 2021]")
+        assert len(toks) == 1
+
+    # --- Edge-case / regression tests (scan must never raise) ---
+
+    def test_url_is_passthrough(self):
+        # "http" is not a known field; the whole URL must pass through verbatim.
+        assert scan("http://example.com") == [Passthrough("http://example.com")]
+
+    def test_unterminated_quote_is_passthrough(self):
+        # title is a known field but the quoted value has no closing quote;
+        # _consume_value returns None so the whole string falls into passthrough.
+        assert scan('title:"abc') == [Passthrough('title:"abc')]
+
+    def test_unterminated_bracket_is_passthrough(self):
+        # created is a known field but the range bracket is never closed;
+        # _consume_range returns None so the whole string falls into passthrough.
+        assert scan("created:[2020") == [Passthrough("created:[2020")]
+
+    def test_empty_value_at_end_is_passthrough(self):
+        # created is a known field but there is no value after the colon
+        # (_consume_value returns None for start >= n), so passthrough.
+        assert scan("created:") == [Passthrough("created:")]
+
+    def test_value_containing_colon(self):
+        # The bare-word value reader stops at whitespace/paren, not at colon,
+        # so "2020:30" is consumed as a single value token.
+        assert scan("created:2020:30") == [FieldValue("created", "2020:30")]
+
+    def test_comma_followed_by_unconsumable_value_stops(self):
+        # A comma followed by whitespace is neither a value-list continuation nor a
+        # clause separator: the value stops and the comma stays as passthrough.
+        assert scan("tag:foo, bar") == [
+            FieldValue("tag", "foo"),
+            Passthrough(", bar"),
+        ]
+
+    def test_bracket_without_to_is_open_upper_bound(self):
+        # A bracketed value with no TO falls back to (value, "") -> open upper bound.
+        assert scan("created:[2020]") == [
+            FieldRange("created", "[", "2020", "", "]"),
+        ]
+
+    def test_known_field_name_midword_is_passthrough(self):
+        # A known field name embedded mid-word is not a field token (the
+        # word-boundary guard); the whole run stays passthrough.
+        assert scan("xtag:foo") == [Passthrough("xtag:foo")]
+
+
+@pytest.mark.search
+class TestCommaResolution:
+    def test_value_list_multi_value_field(self):
+        toks = resolve_commas(scan("tag:foo,bar"))
+        assert toks == [FieldValueList("tag", ("foo", "bar"))]
+
+    def test_value_list_three(self):
+        toks = resolve_commas(scan("tag_id:1,2,3"))
+        assert toks == [FieldValueList("tag_id", ("1", "2", "3"))]
+
+    def test_text_field_comma_is_literal(self):
+        # correspondent is not multi-value: comma stays inside the value.
+        toks = resolve_commas(scan("correspondent:foo,bar"))
+        assert toks == [FieldValue("correspondent", "foo,bar")]
+
+    def test_clause_separator_before_known_field(self):
+        toks = resolve_commas(scan("tag:foo,type:bar"))
+        assert toks == [FieldValue("tag", "foo"), Comma(), FieldValue("type", "bar")]
+
+    def test_clause_separator_after_range(self):
+        toks = resolve_commas(scan("created:[2020 TO 2021],added:[2022 TO 2023]"))
+        assert toks == [
+            FieldRange("created", "[", "2020", "2021", "]"),
+            Comma(),
+            FieldRange("added", "[", "2022", "2023", "]"),
+        ]
+
+    def test_clause_separator_after_quote(self):
+        toks = resolve_commas(scan('correspondent:"A B",created:[2020 TO 2021]'))
+        assert toks == [
+            FieldValue("correspondent", '"A B"'),
+            Comma(),
+            FieldRange("created", "[", "2020", "2021", "]"),
+        ]
+
+    def test_url_comma_is_literal_passthrough(self):
+        toks = resolve_commas(scan("http://example.com/a,b"))
+        assert toks == [Passthrough("http://example.com/a,b")]
+
+    def test_non_multi_value_comma_is_literal(self):
+        # title is not in MULTI_VALUE_FIELDS: comma stays inside the value.
+        toks = resolve_commas(scan("title:10,20"))
+        assert toks == [FieldValue("title", "10,20")]
+
+    def test_clause_separator_before_known_date_field(self):
+        # The comma between a bare value and a known date field acts as a
+        # clause separator; both sides survive as distinct tokens.
+        toks = resolve_commas(scan("correspondent:foo,created:[2020 TO 2021]"))
+        assert toks == [
+            FieldValue("correspondent", "foo"),
+            Comma(),
+            FieldRange("created", "[", "2020", "2021", "]"),
+        ]
+
+
+@pytest.mark.search
+class TestTranslateScalar:
+    @pytest.mark.parametrize(
+        ("field", "value", "expected"),
+        [
+            (
+                "created",
+                "2020",
+                "created:[2020-01-01T00:00:00Z TO 2021-01-01T00:00:00Z]",
+            ),
+            (
+                "created",
+                "202003",
+                "created:[2020-03-01T00:00:00Z TO 2020-04-01T00:00:00Z]",
+            ),
+            (
+                "created",
+                "20200115",
+                "created:[2020-01-15T00:00:00Z TO 2020-01-16T00:00:00Z]",
+            ),
+            (
+                "created",
+                "2020-01-15",
+                "created:[2020-01-15T00:00:00Z TO 2020-01-16T00:00:00Z]",
+            ),
+            (
+                "created",
+                "2020-03",
+                "created:[2020-03-01T00:00:00Z TO 2020-04-01T00:00:00Z]",
+            ),
+        ],
+    )
+    def test_partial_and_iso_dates(self, field: str, value: str, expected: str) -> None:
+        assert translate_scalar(field, value, UTC) == expected
+
+    def test_invalid_date_raises(self) -> None:
+        with pytest.raises(InvalidDateQuery) as exc_info:
+            translate_scalar("created", "202023", UTC)
+        assert exc_info.value.field == "created"
+        assert exc_info.value.value == "202023"
+
+    def test_keyword_delegates(self) -> None:
+        # keyword path produces a range; just assert it is a created range
+        out = translate_scalar("created", "today", UTC)
+        assert out.startswith("created:[") and out.endswith("]")
+
+    def test_14digit_compact_datetime(self) -> None:
+        out = translate_scalar("created", "20240115120000", UTC)
+        assert "20240115120000" not in out
+        assert out.startswith("created:")
+        assert out == "created:[2024-01-15T12:00:00Z TO 2024-01-15T12:00:00Z]"
+
+    def test_14digit_invalid_month_raises(self) -> None:
+        with pytest.raises(InvalidDateQuery) as exc_info:
+            translate_scalar("created", "20231300120000", UTC)
+        assert exc_info.value.field == "created"
+        assert exc_info.value.value == "20231300120000"
+
+    def test_unrecognized_value_raises(self) -> None:
+        # A value that is not a keyword, digits, ISO date, or compact timestamp
+        # raises rather than producing invalid Tantivy syntax or silently matching
+        # nothing.
+        with pytest.raises(InvalidDateQuery) as exc_info:
+            translate_scalar("created", "garbage", UTC)
+        assert exc_info.value.field == "created"
+        assert exc_info.value.value == "garbage"
+
+
+@pytest.mark.search
+class TestTranslateRange:
+    @pytest.mark.parametrize(
+        ("lo", "hi", "expected"),
+        [
+            ("2005", "2009", "created:[2005-01-01T00:00:00Z TO 2010-01-01T00:00:00Z]"),
+            (
+                "202001",
+                "202006",
+                "created:[2020-01-01T00:00:00Z TO 2020-07-01T00:00:00Z]",
+            ),
+            (
+                "20200101",
+                "20201231",
+                "created:[2020-01-01T00:00:00Z TO 2021-01-01T00:00:00Z]",
+            ),
+            (
+                "2020-01-01",
+                "2020-12-31",
+                "created:[2020-01-01T00:00:00Z TO 2021-01-01T00:00:00Z]",
+            ),
+        ],
+    )
+    def test_absolute_ranges(self, lo, hi, expected):
+        assert translate_range("created", lo, hi, UTC) == expected
+
+    def test_reversed_swaps(self):
+        assert translate_range("created", "2009", "2005", UTC) == (
+            "created:[2005-01-01T00:00:00Z TO 2010-01-01T00:00:00Z]"
+        )
+
+    def test_open_upper(self):
+        out = translate_range("created", "2020", "", UTC)
+        assert out == f"created:[2020-01-01T00:00:00Z TO {OPEN_HI}]"
+
+    def test_open_lower(self):
+        out = translate_range("created", "", "2020", UTC)
+        assert out == f"created:[{OPEN_LO} TO 2021-01-01T00:00:00Z]"
+
+    def test_invalid_bound_raises(self):
+        with pytest.raises(InvalidDateQuery) as exc_info:
+            translate_range("created", "202023", "2025", UTC)
+        assert exc_info.value.field == "created"
+        assert exc_info.value.value == "202023"
+
+    def test_invalid_high_bound_raises(self):
+        # Low bound parses, high bound does not -> raise on the high bound.
+        with pytest.raises(InvalidDateQuery) as exc_info:
+            translate_range("created", "2020", "garbage", UTC)
+        assert exc_info.value.field == "created"
+        assert exc_info.value.value == "garbage"
+
+
+@pytest.mark.search
+class TestTranslateQuery:
+    @pytest.mark.parametrize(
+        ("raw", "expected"),
+        [
+            (
+                "created:2020",
+                "created:[2020-01-01T00:00:00Z TO 2021-01-01T00:00:00Z]",
+            ),
+            ("tag:foo,bar", "tag:foo AND tag:bar"),
+            # 'type' is a user-facing alias rewritten to 'document_type' (the real schema field)
+            ("tag:foo,type:bar", "tag:foo AND document_type:bar"),
+            (
+                "created:[2020 TO 2021],added:[2022 TO 2023]",
+                "created:[2020-01-01T00:00:00Z TO 2022-01-01T00:00:00Z]"
+                " AND "
+                "added:[2022-01-01T00:00:00Z TO 2024-01-01T00:00:00Z]",
+            ),
+            # correspondent is not multi-value: comma stays literal inside the value
+            ("correspondent:foo,bar", "correspondent:foo,bar"),
+        ],
+    )
+    def test_golden(self, raw: str, expected: str) -> None:
+        assert translate_query(raw, UTC) == expected
+
+    @pytest.mark.parametrize(
+        "raw",
+        [
+            "created:2020",
+            "created:202003",
+            "created:[20200101 TO 20201231]",
+            "created:[2020-01-01 TO 2020-12-31]",
+            "created:[2020 to]",
+            "created:[to 2020]",
+            "title:x,created:[2020 TO 2021]",
+            "created:2020 OR foo",
+            "(created:2020 OR invoice)",
+            "tag:foo,type:bar",
+            "bank statement",
+        ],
+    )
+    def test_parse_acceptance(self, index: tantivy.Index, raw: str) -> None:
+        translated = translate_query(raw, UTC)
+        # Must not raise:
+        index.parse_query(translated, DEFAULT_SEARCH_FIELDS, field_boosts=_FIELD_BOOSTS)
+
+
+@pytest.mark.search
+class TestFieldAliasing:
+    """Whoosh->Tantivy field-name aliasing (type/path -> document_type/storage_path)."""
+
+    def test_type_alias(self) -> None:
+        assert translate_query("type:invoice", UTC) == "document_type:invoice"
+
+    def test_path_alias(self) -> None:
+        assert translate_query("path:/foo/bar", UTC) == "storage_path:/foo/bar"
+
+    def test_type_id_alias(self) -> None:
+        assert translate_query("type_id:5", UTC) == "document_type_id:5"
+
+    def test_path_id_alias(self) -> None:
+        assert translate_query("path_id:7", UTC) == "storage_path_id:7"
+
+    def test_clause_separator_plus_alias(self) -> None:
+        # Comma between known fields acts as AND separator; alias still applied.
+        assert (
+            translate_query("tag:foo,type:bar", UTC) == "tag:foo AND document_type:bar"
+        )
+
+    def test_type_range_alias(self) -> None:
+        # type is not a date field; range passes through verbatim with alias applied.
+        assert (
+            translate_query("type:[2020 TO 2021]", UTC)
+            == "document_type:[2020 TO 2021]"
+        )
+
+    def test_parse_acceptance_type(self, index: tantivy.Index) -> None:
+        # Translated output must be accepted by the real Tantivy parser.
+        translated = translate_query("type:invoice", UTC)
+        index.parse_query(translated, DEFAULT_SEARCH_FIELDS, field_boosts=_FIELD_BOOSTS)
+
+    def test_parse_acceptance_path(self, index: tantivy.Index) -> None:
+        translated = translate_query("path:foo", UTC)
+        index.parse_query(translated, DEFAULT_SEARCH_FIELDS, field_boosts=_FIELD_BOOSTS)
+
+
+# Freeze time so relative-date tests are deterministic.
+_FROZEN_NOW = datetime(2026, 3, 28, 12, 0, 0, tzinfo=UTC)
+
+
+@pytest.mark.search
+class TestRelativeRanges:
+    """Relative date-range tokens resolved against a frozen clock."""
+
+    @time_machine.travel(_FROZEN_NOW, tick=False)
+    def test_minus_7_days_to_now(self) -> None:
+        assert translate_query("added:[-7 days to now]", UTC) == (
+            "added:[2026-03-21T12:00:00Z TO 2026-03-28T12:00:00Z]"
+        )
+
+    @time_machine.travel(_FROZEN_NOW, tick=False)
+    def test_minus_1_week_to_now(self) -> None:
+        assert translate_query("added:[-1 week to now]", UTC) == (
+            "added:[2026-03-21T12:00:00Z TO 2026-03-28T12:00:00Z]"
+        )
+
+    @time_machine.travel(_FROZEN_NOW, tick=False)
+    def test_minus_1_month_to_now(self) -> None:
+        assert translate_query("created:[-1 month to now]", UTC) == (
+            "created:[2026-02-28T12:00:00Z TO 2026-03-28T12:00:00Z]"
+        )
+
+    @time_machine.travel(_FROZEN_NOW, tick=False)
+    def test_minus_1_year_to_now(self) -> None:
+        assert translate_query("modified:[-1 year to now]", UTC) == (
+            "modified:[2025-03-28T12:00:00Z TO 2026-03-28T12:00:00Z]"
+        )
+
+    @time_machine.travel(_FROZEN_NOW, tick=False)
+    def test_minus_3_hours_to_now(self) -> None:
+        assert translate_query("added:[-3 hours to now]", UTC) == (
+            "added:[2026-03-28T09:00:00Z TO 2026-03-28T12:00:00Z]"
+        )
+
+    @time_machine.travel(_FROZEN_NOW, tick=False)
+    def test_uppercase_units(self) -> None:
+        assert translate_query("added:[-1 WEEK TO NOW]", UTC) == (
+            "added:[2026-03-21T12:00:00Z TO 2026-03-28T12:00:00Z]"
+        )
+
+    @time_machine.travel(_FROZEN_NOW, tick=False)
+    def test_now_minus_7d_compact(self) -> None:
+        assert translate_query("added:[now-7d TO now]", UTC) == (
+            "added:[2026-03-21T12:00:00Z TO 2026-03-28T12:00:00Z]"
+        )
+
+    @time_machine.travel(_FROZEN_NOW, tick=False)
+    def test_reversed_range_swapped(self) -> None:
+        # now+1h TO now-1h is reversed; translate_range swaps -> lo=now-1h, hi=now+1h
+        assert translate_query("added:[now+1h TO now-1h]", UTC) == (
+            "added:[2026-03-28T11:00:00Z TO 2026-03-28T13:00:00Z]"
+        )
+
+    @pytest.mark.parametrize(
+        "raw",
+        [
+            "added:[-7 days to now]",
+            "added:[-1 week to now]",
+            "created:[-1 month to now]",
+            "modified:[-1 year to now]",
+            "added:[-3 hours to now]",
+            "added:[now-7d TO now]",
+            "added:[now+1h TO now-1h]",
+        ],
+    )
+    @time_machine.travel(_FROZEN_NOW, tick=False)
+    def test_parse_acceptance(self, index: tantivy.Index, raw: str) -> None:
+        translated = translate_query(raw, UTC)
+        index.parse_query(translated, DEFAULT_SEARCH_FIELDS, field_boosts=_FIELD_BOOSTS)
+
+
+@pytest.mark.search
+class TestOperatorNormalization:
+    """Post-render operator normalization in translate_query."""
+
+    def test_spaced_dash_removed(self) -> None:
+        assert (
+            translate_query("H52.1 - Kurzsichtigkeit", UTC) == "H52.1 Kurzsichtigkeit"
+        )
+
+    def test_spaced_dash_simple(self) -> None:
+        assert translate_query("bar - baz", UTC) == "bar baz"
+
+    def test_trailing_operator_stripped(self) -> None:
+        assert translate_query("foo -", UTC) == "foo"
+
+    def test_date_range_preserved(self) -> None:
+        out = translate_query("created:[2020 TO 2021]", UTC)
+        # Must not corrupt the ISO range
+        assert out == "created:[2020-01-01T00:00:00Z TO 2022-01-01T00:00:00Z]"
+
+    def test_date_scalar_with_or(self) -> None:
+        out = translate_query("created:2020 OR foo", UTC)
+        # The created scalar becomes a range; " OR foo" passes through verbatim.
+        assert out.startswith("created:[")
+        assert "OR foo" in out
+
+    def test_parse_acceptance_spaced_dash(self, index: tantivy.Index) -> None:
+        translated = translate_query("H52.1 - Kurzsichtigkeit", UTC)
+        index.parse_query(translated, DEFAULT_SEARCH_FIELDS, field_boosts=_FIELD_BOOSTS)
+
+    def test_parse_acceptance_trailing_op(self, index: tantivy.Index) -> None:
+        translated = translate_query("foo -", UTC)
+        index.parse_query(translated, DEFAULT_SEARCH_FIELDS, field_boosts=_FIELD_BOOSTS)
+
+
+@pytest.mark.search
+class TestMultiWordDateKeywords:
+    """scan() must consume multi-word date keywords as a single value."""
+
+    def test_scan_previous_week_as_single_token(self) -> None:
+        # "created:previous week" must produce one FieldValue with value "previous week",
+        # not FieldValue("created","previous") + Passthrough(" week").
+        toks = scan("created:previous week")
+        assert toks == [FieldValue("created", "previous week")]
+
+    def test_scan_this_month_as_single_token(self) -> None:
+        toks = scan("added:this month")
+        assert toks == [FieldValue("added", "this month")]
+
+    def test_scan_previous_month_as_single_token(self) -> None:
+        toks = scan("created:previous month")
+        assert toks == [FieldValue("created", "previous month")]
+
+    def test_scan_this_year_as_single_token(self) -> None:
+        toks = scan("added:this year")
+        assert toks == [FieldValue("added", "this year")]
+
+    def test_scan_previous_year_as_single_token(self) -> None:
+        toks = scan("created:previous year")
+        assert toks == [FieldValue("created", "previous year")]
+
+    def test_scan_previous_quarter_as_single_token(self) -> None:
+        toks = scan("created:previous quarter")
+        assert toks == [FieldValue("created", "previous quarter")]
+
+    def test_quoted_multi_word_keyword_still_works(self) -> None:
+        # The quoted form must continue to work as before.
+        toks = scan('created:"previous week"')
+        assert toks == [FieldValue("created", '"previous week"')]
+
+    def test_non_date_field_not_affected(self) -> None:
+        # "previous" stops at the space for non-date fields; " week" passes through.
+        toks = scan("correspondent:previous week")
+        assert toks == [
+            FieldValue("correspondent", "previous"),
+            Passthrough(" week"),
+        ]
+
+
+@pytest.mark.search
+class TestKeywordDateResolution:
+    """Relative date keywords resolve to exact ISO ranges against a frozen clock.
+
+    Frozen at 2026-03-28 12:00 UTC (a Saturday in Q1) so the week, month,
+    quarter and year rollovers are all exercised by a single anchor.
+    """
+
+    # created is a DateField: bounds are UTC midnight, no timezone offset.
+    @pytest.mark.parametrize(
+        ("keyword", "expected"),
+        [
+            pytest.param(
+                "today",
+                "created:[2026-03-28T00:00:00Z TO 2026-03-29T00:00:00Z]",
+                id="today",
+            ),
+            pytest.param(
+                "yesterday",
+                "created:[2026-03-27T00:00:00Z TO 2026-03-28T00:00:00Z]",
+                id="yesterday",
+            ),
+            pytest.param(
+                "previous week",
+                "created:[2026-03-16T00:00:00Z TO 2026-03-23T00:00:00Z]",
+                id="previous-week",
+            ),
+            pytest.param(
+                "this month",
+                "created:[2026-03-01T00:00:00Z TO 2026-04-01T00:00:00Z]",
+                id="this-month",
+            ),
+            pytest.param(
+                "previous month",
+                "created:[2026-02-01T00:00:00Z TO 2026-03-01T00:00:00Z]",
+                id="previous-month",
+            ),
+            pytest.param(
+                "this year",
+                "created:[2026-01-01T00:00:00Z TO 2027-01-01T00:00:00Z]",
+                id="this-year",
+            ),
+            pytest.param(
+                "previous year",
+                "created:[2025-01-01T00:00:00Z TO 2026-01-01T00:00:00Z]",
+                id="previous-year",
+            ),
+            pytest.param(
+                "previous quarter",
+                "created:[2025-10-01T00:00:00Z TO 2026-01-01T00:00:00Z]",
+                id="previous-quarter",
+            ),
+        ],
+    )
+    @time_machine.travel(_FROZEN_NOW, tick=False)
+    def test_date_only_field_keyword_ranges(
+        self,
+        keyword: str,
+        expected: str,
+    ) -> None:
+        assert translate_query(f"created:{keyword}", UTC) == expected
+
+    # added is a DateTimeField: local-tz midnight converted to UTC. Tokyo
+    # (+09:00, no DST) shifts each midnight boundary back to 15:00Z the day
+    # before, so this also exercises the local-midnight offset path.
+    @pytest.mark.parametrize(
+        ("keyword", "expected"),
+        [
+            pytest.param(
+                "today",
+                "added:[2026-03-27T15:00:00Z TO 2026-03-28T15:00:00Z]",
+                id="today",
+            ),
+            pytest.param(
+                "yesterday",
+                "added:[2026-03-26T15:00:00Z TO 2026-03-27T15:00:00Z]",
+                id="yesterday",
+            ),
+            pytest.param(
+                "previous week",
+                "added:[2026-03-15T15:00:00Z TO 2026-03-22T15:00:00Z]",
+                id="previous-week",
+            ),
+            pytest.param(
+                "this month",
+                "added:[2026-02-28T15:00:00Z TO 2026-03-31T15:00:00Z]",
+                id="this-month",
+            ),
+            pytest.param(
+                "previous month",
+                "added:[2026-01-31T15:00:00Z TO 2026-02-28T15:00:00Z]",
+                id="previous-month",
+            ),
+            pytest.param(
+                "this year",
+                "added:[2025-12-31T15:00:00Z TO 2026-12-31T15:00:00Z]",
+                id="this-year",
+            ),
+            pytest.param(
+                "previous year",
+                "added:[2024-12-31T15:00:00Z TO 2025-12-31T15:00:00Z]",
+                id="previous-year",
+            ),
+            pytest.param(
+                "previous quarter",
+                "added:[2025-09-30T15:00:00Z TO 2025-12-31T15:00:00Z]",
+                id="previous-quarter",
+            ),
+        ],
+    )
+    @time_machine.travel(_FROZEN_NOW, tick=False)
+    def test_datetime_field_keyword_ranges_local_tz(
+        self,
+        keyword: str,
+        expected: str,
+    ) -> None:
+        assert translate_query(f"added:{keyword}", ZoneInfo("Asia/Tokyo")) == expected
+
+
+@pytest.mark.search
+class TestISODatetimeBounds:
+    """Full ISO datetime tokens in range bounds must be parsed directly."""
+
+    def test_translate_range_iso_bounds_passthrough(self) -> None:
+        # Already-ISO datetime bounds must pass through as-is (exact instant).
+        result = translate_range(
+            "created",
+            "2020-01-01T00:00:00Z",
+            "2021-01-01T00:00:00Z",
+            UTC,
+        )
+        assert result == "created:[2020-01-01T00:00:00Z TO 2021-01-01T00:00:00Z]"
+
+    def test_translate_query_iso_range_preserved(self) -> None:
+        q = "created:[2026-01-01T00:00:00Z TO 2026-06-01T00:00:00Z]"
+        assert translate_query(q, UTC) == q
+
+    def test_translate_query_comma_separated_iso_ranges(self) -> None:
+        q = (
+            "created:[2026-01-01T00:00:00Z TO 2026-06-01T00:00:00Z],"
+            "added:[2026-05-01T00:00:00Z TO 2026-06-01T00:00:00Z]"
+        )
+        result = translate_query(q, UTC)
+        assert result == (
+            "created:[2026-01-01T00:00:00Z TO 2026-06-01T00:00:00Z]"
+            " AND "
+            "added:[2026-05-01T00:00:00Z TO 2026-06-01T00:00:00Z]"
+        )
+
+    def test_invalid_iso_datetime_raises(self) -> None:
+        # A token with "T" that is not valid ISO datetime -> raise.
+        with pytest.raises(InvalidDateQuery) as exc_info:
+            translate_range(
+                "created",
+                "2020-01-01T99:00:00Z",
+                "2021-01-01T00:00:00Z",
+                UTC,
+            )
+        assert exc_info.value.field == "created"
+        assert exc_info.value.value == "2020-01-01T99:00:00Z"
+
+    def test_parse_acceptance_iso_bounds(self, index: tantivy.Index) -> None:
+        q = "created:[2026-01-01T00:00:00Z TO 2026-06-01T00:00:00Z]"
+        translated = translate_query(q, UTC)
+        index.parse_query(translated, DEFAULT_SEARCH_FIELDS, field_boosts=_FIELD_BOOSTS)
+
+    def test_parse_acceptance_comma_iso_ranges(self, index: tantivy.Index) -> None:
+        q = (
+            "created:[2026-01-01T00:00:00Z TO 2026-06-01T00:00:00Z],"
+            "added:[2026-05-01T00:00:00Z TO 2026-06-01T00:00:00Z]"
+        )
+        translated = translate_query(q, UTC)
+        index.parse_query(translated, DEFAULT_SEARCH_FIELDS, field_boosts=_FIELD_BOOSTS)
@@ -725,9 +725,11 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
        GIVEN:
            - One document added right now
        WHEN:
-            - Query with invalid added date
+            - Query with an invalid added date
        THEN:
-            - 400 Bad Request returned (Tantivy rejects invalid date field syntax)
+            - 400 Bad Request with a message naming the malformed date, so the
+              user knows their date is invalid rather than silently getting zero
+              results
        """
        d1 = Document.objects.create(
            title="invoice",
@@ -740,8 +742,9 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):

        response = self.client.get("/api/documents/?query=added:invalid-date")

-        # Tantivy rejects unparsable field queries with a 400
+        # An unparsable date is reported as a malformed query, not silently empty.
        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
+        self.assertIn("invalid-date", str(response.data["query"]))

    @override_settings(
        TIME_ZONE="UTC",
@@ -2277,6 +2277,7 @@ class UnifiedSearchViewSet(DocumentViewSet):
            return super().list(request)

        from documents.search import SearchHit
+        from documents.search import SearchQueryError
        from documents.search import TantivyBackend
        from documents.search import TantivyRelevanceList
        from documents.search import get_backend
@@ -2469,6 +2470,11 @@ class UnifiedSearchViewSet(DocumentViewSet):
            return HttpResponseForbidden(_("Insufficient permissions."))
        except ValidationError:
            raise
+        except SearchQueryError as e:
+            # User-fixable query error (e.g. an unparsable date): surface the
+            # specific message so the user can correct it, rather than a generic
+            # 400 or silently empty results.
+            raise ValidationError({"query": [str(e)]}) from e
        except Exception as e:
            logger.warning(f"An error occurred listing search results: {e!s}")
            return HttpResponseBadRequest(