diff --git a/src/documents/search/_query.py b/src/documents/search/_query.py index ed0bb4c15..0cd5b1819 100644 --- a/src/documents/search/_query.py +++ b/src/documents/search/_query.py @@ -25,21 +25,43 @@ _REGEX_TIMEOUT: Final[float] = 1.0 _DATE_ONLY_FIELDS = frozenset({"created"}) -_DATE_KEYWORDS = frozenset( - { - "today", - "yesterday", - "this_week", - "last_week", - "this_month", - "last_month", - "this_year", - "last_year", - }, +# Natural date keywords with whoosh-style aliases mapped to +# canonical forms. +_DATE_KEYWORD_ALIASES: Final[dict[str, str]] = { + "today": "today", + "yesterday": "yesterday", + "this_week": "this_week", + "this week": "this_week", + "last_week": "last_week", + "last week": "last_week", + "previous week": "last_week", + "this_month": "this_month", + "this month": "this_month", + "last_month": "last_month", + "last month": "last_month", + "previous month": "last_month", + "last_quarter": "last_quarter", + "last quarter": "last_quarter", + "previous_quarter": "last_quarter", + "previous quarter": "last_quarter", + "this_year": "this_year", + "this year": "this_year", + "last_year": "last_year", + "last year": "last_year", + "previous year": "last_year", +} +_DATE_KEYWORDS = frozenset(_DATE_KEYWORD_ALIASES.values()) +_DATE_KEYWORD_PATTERN = "|".join( + sorted((regex.escape(k) for k in _DATE_KEYWORD_ALIASES), key=len, reverse=True), ) _FIELD_DATE_RE = regex.compile( - r"(\w+):(" + "|".join(_DATE_KEYWORDS) + r")\b", + rf"""(?P\w+)\s*:\s*(?: + (?P["'])(?P{_DATE_KEYWORD_PATTERN})(?P=quote) + | + (?P{_DATE_KEYWORD_PATTERN})(?![\w-]) +)""", + regex.IGNORECASE | regex.VERBOSE, ) _COMPACT_DATE_RE = regex.compile(r"\b(\d{14})\b") _RELATIVE_RANGE_RE = regex.compile( @@ -74,6 +96,9 @@ def _date_only_range(keyword: str, tz: tzinfo) -> str: today = datetime.now(tz).date() + def _quarter_start(d: date) -> date: + return date(d.year, ((d.month - 1) // 3) * 3 + 1, 1) + if keyword == "today": lo = datetime(today.year, today.month, today.day, tzinfo=UTC) return _iso_range(lo, lo + timedelta(days=1)) @@ -106,6 +131,22 @@ def _date_only_range(keyword: str, tz: tzinfo) -> str: lo = datetime(today.year, today.month - 1, 1, tzinfo=UTC) hi = datetime(today.year, today.month, 1, tzinfo=UTC) return _iso_range(lo, hi) + if keyword == "last_quarter": + this_quarter = _quarter_start(today) + last_quarter = this_quarter - relativedelta(months=3) + lo = datetime( + last_quarter.year, + last_quarter.month, + last_quarter.day, + tzinfo=UTC, + ) + hi = datetime( + this_quarter.year, + this_quarter.month, + this_quarter.day, + tzinfo=UTC, + ) + return _iso_range(lo, hi) if keyword == "this_year": lo = datetime(today.year, 1, 1, tzinfo=UTC) return _iso_range(lo, datetime(today.year + 1, 1, 1, tzinfo=UTC)) @@ -127,6 +168,9 @@ def _datetime_range(keyword: str, tz: tzinfo) -> str: def _midnight(d: date) -> datetime: return datetime(d.year, d.month, d.day, tzinfo=tz).astimezone(UTC) + def _quarter_start(d: date) -> date: + return date(d.year, ((d.month - 1) // 3) * 3 + 1, 1) + if keyword == "today": return _iso_range(_midnight(today), _midnight(today + timedelta(days=1))) if keyword == "yesterday": @@ -153,6 +197,10 @@ def _datetime_range(keyword: str, tz: tzinfo) -> str: else: last_first = date(today.year, today.month - 1, 1) return _iso_range(_midnight(last_first), _midnight(this_first)) + if keyword == "last_quarter": + this_quarter = _quarter_start(today) + last_quarter = this_quarter - relativedelta(months=3) + return _iso_range(_midnight(last_quarter), _midnight(this_quarter)) if keyword == "this_year": return _iso_range( _midnight(date(today.year, 1, 1)), @@ -326,7 +374,9 @@ def rewrite_natural_date_keywords(query: str, tz: tzinfo) -> str: query = _rewrite_relative_range(query) def _replace(m: regex.Match[str]) -> str: - field, keyword = m.group(1), m.group(2) + field = m.group("field") + raw_keyword = m.group("quoted") or m.group("bare") + keyword = _DATE_KEYWORD_ALIASES[raw_keyword.lower()] if field in _DATE_ONLY_FIELDS: return f"{field}:{_date_only_range(keyword, tz)}" return f"{field}:{_datetime_range(keyword, tz)}" diff --git a/src/documents/tests/search/test_query.py b/src/documents/tests/search/test_query.py index 74a064dbb..d64632146 100644 --- a/src/documents/tests/search/test_query.py +++ b/src/documents/tests/search/test_query.py @@ -265,6 +265,40 @@ class TestDateTimeFields: assert lo == "2025-12-01T00:00:00Z" assert hi == "2026-01-01T00:00:00Z" + @pytest.mark.parametrize( + ("query", "expected_lo", "expected_hi"), + [ + pytest.param( + 'added:"previous quarter"', + "2026-04-01T00:00:00Z", + "2026-07-01T00:00:00Z", + id="quoted_previous_quarter", + ), + pytest.param( + "added:previous month", + "2026-06-01T00:00:00Z", + "2026-07-01T00:00:00Z", + id="bare_previous_month", + ), + pytest.param( + "added:this month", + "2026-07-01T00:00:00Z", + "2026-08-01T00:00:00Z", + id="bare_this_month", + ), + ], + ) + @time_machine.travel(datetime(2026, 7, 15, 12, 0, tzinfo=UTC), tick=False) + def test_legacy_natural_language_aliases( + self, + query: str, + expected_lo: str, + expected_hi: str, + ) -> None: + lo, hi = _range(rewrite_natural_date_keywords(query, UTC), "added") + assert lo == expected_lo + assert hi == expected_hi + def test_unknown_keyword_raises(self) -> None: with pytest.raises(ValueError, match="Unknown keyword"): _datetime_range("bogus_keyword", UTC) diff --git a/src/documents/tests/test_api_search.py b/src/documents/tests/test_api_search.py index 85f479010..195baaec0 100644 --- a/src/documents/tests/test_api_search.py +++ b/src/documents/tests/test_api_search.py @@ -3,6 +3,7 @@ from datetime import timedelta from unittest import mock import pytest +import time_machine from dateutil.relativedelta import relativedelta from django.contrib.auth.models import Group from django.contrib.auth.models import Permission @@ -741,6 +742,55 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase): # Tantivy rejects unparsable field queries with a 400 self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + @override_settings( + TIME_ZONE="UTC", + ) + @time_machine.travel( + datetime.datetime(2026, 7, 15, 12, 0, tzinfo=datetime.UTC), + tick=False, + ) + def test_search_added_previous_quarter_legacy_phrase(self) -> None: + """ + GIVEN: + - Documents inside and outside the previous quarter + WHEN: + - Query with the legacy natural-language phrase used by the UI + THEN: + - Previous-quarter documents are returned + """ + d1 = Document.objects.create( + title="quarterly statement april", + content="bank statement", + checksum="Q1", + pk=1, + added=datetime.datetime(2026, 4, 10, 12, 0, tzinfo=datetime.UTC), + ) + d2 = Document.objects.create( + title="quarterly statement june", + content="bank statement", + checksum="Q2", + pk=2, + added=datetime.datetime(2026, 6, 20, 12, 0, tzinfo=datetime.UTC), + ) + d3 = Document.objects.create( + title="quarterly statement july", + content="bank statement", + checksum="Q3", + pk=3, + added=datetime.datetime(2026, 7, 10, 12, 0, tzinfo=datetime.UTC), + ) + + backend = get_backend() + backend.add_or_update(d1) + backend.add_or_update(d2) + backend.add_or_update(d3) + + response = self.client.get('/api/documents/?query=added:"previous quarter"') + self.assertEqual(response.status_code, status.HTTP_200_OK) + + results = response.data["results"] + self.assertEqual({r["id"] for r in results}, {1, 2}) + @mock.patch("documents.search._backend.TantivyBackend.autocomplete") def test_search_autocomplete_limits(self, m) -> None: """