Compare commits

...

2 Commits

Author SHA1 Message Date
shamoon
74478b7fd7 Cover the last quarter 2026-04-18 16:31:09 -07:00
shamoon
287961986e Fix: retain backwards compatibility with natural-date keywords in tantivy 2026-04-18 16:27:09 -07:00
3 changed files with 156 additions and 13 deletions

View File

@@ -25,21 +25,43 @@ _REGEX_TIMEOUT: Final[float] = 1.0
_DATE_ONLY_FIELDS = frozenset({"created"})
_DATE_KEYWORDS = frozenset(
{
"today",
"yesterday",
"this_week",
"last_week",
"this_month",
"last_month",
"this_year",
"last_year",
},
# Natural date keywords with whoosh-style aliases mapped to
# canonical forms.
_DATE_KEYWORD_ALIASES: Final[dict[str, str]] = {
"today": "today",
"yesterday": "yesterday",
"this_week": "this_week",
"this week": "this_week",
"last_week": "last_week",
"last week": "last_week",
"previous week": "last_week",
"this_month": "this_month",
"this month": "this_month",
"last_month": "last_month",
"last month": "last_month",
"previous month": "last_month",
"last_quarter": "last_quarter",
"last quarter": "last_quarter",
"previous_quarter": "last_quarter",
"previous quarter": "last_quarter",
"this_year": "this_year",
"this year": "this_year",
"last_year": "last_year",
"last year": "last_year",
"previous year": "last_year",
}
_DATE_KEYWORDS = frozenset(_DATE_KEYWORD_ALIASES.values())
_DATE_KEYWORD_PATTERN = "|".join(
sorted((regex.escape(k) for k in _DATE_KEYWORD_ALIASES), key=len, reverse=True),
)
_FIELD_DATE_RE = regex.compile(
r"(\w+):(" + "|".join(_DATE_KEYWORDS) + r")\b",
rf"""(?P<field>\w+)\s*:\s*(?:
(?P<quote>["'])(?P<quoted>{_DATE_KEYWORD_PATTERN})(?P=quote)
|
(?P<bare>{_DATE_KEYWORD_PATTERN})(?![\w-])
)""",
regex.IGNORECASE | regex.VERBOSE,
)
_COMPACT_DATE_RE = regex.compile(r"\b(\d{14})\b")
_RELATIVE_RANGE_RE = regex.compile(
@@ -74,6 +96,9 @@ def _date_only_range(keyword: str, tz: tzinfo) -> str:
today = datetime.now(tz).date()
def _quarter_start(d: date) -> date:
return date(d.year, ((d.month - 1) // 3) * 3 + 1, 1)
if keyword == "today":
lo = datetime(today.year, today.month, today.day, tzinfo=UTC)
return _iso_range(lo, lo + timedelta(days=1))
@@ -106,6 +131,22 @@ def _date_only_range(keyword: str, tz: tzinfo) -> str:
lo = datetime(today.year, today.month - 1, 1, tzinfo=UTC)
hi = datetime(today.year, today.month, 1, tzinfo=UTC)
return _iso_range(lo, hi)
if keyword == "last_quarter":
this_quarter = _quarter_start(today)
last_quarter = this_quarter - relativedelta(months=3)
lo = datetime(
last_quarter.year,
last_quarter.month,
last_quarter.day,
tzinfo=UTC,
)
hi = datetime(
this_quarter.year,
this_quarter.month,
this_quarter.day,
tzinfo=UTC,
)
return _iso_range(lo, hi)
if keyword == "this_year":
lo = datetime(today.year, 1, 1, tzinfo=UTC)
return _iso_range(lo, datetime(today.year + 1, 1, 1, tzinfo=UTC))
@@ -127,6 +168,9 @@ def _datetime_range(keyword: str, tz: tzinfo) -> str:
def _midnight(d: date) -> datetime:
return datetime(d.year, d.month, d.day, tzinfo=tz).astimezone(UTC)
def _quarter_start(d: date) -> date:
return date(d.year, ((d.month - 1) // 3) * 3 + 1, 1)
if keyword == "today":
return _iso_range(_midnight(today), _midnight(today + timedelta(days=1)))
if keyword == "yesterday":
@@ -153,6 +197,10 @@ def _datetime_range(keyword: str, tz: tzinfo) -> str:
else:
last_first = date(today.year, today.month - 1, 1)
return _iso_range(_midnight(last_first), _midnight(this_first))
if keyword == "last_quarter":
this_quarter = _quarter_start(today)
last_quarter = this_quarter - relativedelta(months=3)
return _iso_range(_midnight(last_quarter), _midnight(this_quarter))
if keyword == "this_year":
return _iso_range(
_midnight(date(today.year, 1, 1)),
@@ -326,7 +374,9 @@ def rewrite_natural_date_keywords(query: str, tz: tzinfo) -> str:
query = _rewrite_relative_range(query)
def _replace(m: regex.Match[str]) -> str:
field, keyword = m.group(1), m.group(2)
field = m.group("field")
raw_keyword = m.group("quoted") or m.group("bare")
keyword = _DATE_KEYWORD_ALIASES[raw_keyword.lower()]
if field in _DATE_ONLY_FIELDS:
return f"{field}:{_date_only_range(keyword, tz)}"
return f"{field}:{_datetime_range(keyword, tz)}"

View File

@@ -157,6 +157,15 @@ class TestCreatedDateField:
assert lo == "2025-12-01T00:00:00Z"
assert hi == "2026-01-01T00:00:00Z"
@time_machine.travel(datetime(2026, 7, 15, 12, 0, tzinfo=UTC), tick=False)
def test_legacy_previous_quarter_alias(self) -> None:
lo, hi = _range(
rewrite_natural_date_keywords('created:"previous quarter"', UTC),
"created",
)
assert lo == "2026-04-01T00:00:00Z"
assert hi == "2026-07-01T00:00:00Z"
def test_unknown_keyword_raises(self) -> None:
with pytest.raises(ValueError, match="Unknown keyword"):
_date_only_range("bogus_keyword", UTC)
@@ -265,6 +274,40 @@ class TestDateTimeFields:
assert lo == "2025-12-01T00:00:00Z"
assert hi == "2026-01-01T00:00:00Z"
@pytest.mark.parametrize(
("query", "expected_lo", "expected_hi"),
[
pytest.param(
'added:"previous quarter"',
"2026-04-01T00:00:00Z",
"2026-07-01T00:00:00Z",
id="quoted_previous_quarter",
),
pytest.param(
"added:previous month",
"2026-06-01T00:00:00Z",
"2026-07-01T00:00:00Z",
id="bare_previous_month",
),
pytest.param(
"added:this month",
"2026-07-01T00:00:00Z",
"2026-08-01T00:00:00Z",
id="bare_this_month",
),
],
)
@time_machine.travel(datetime(2026, 7, 15, 12, 0, tzinfo=UTC), tick=False)
def test_legacy_natural_language_aliases(
self,
query: str,
expected_lo: str,
expected_hi: str,
) -> None:
lo, hi = _range(rewrite_natural_date_keywords(query, UTC), "added")
assert lo == expected_lo
assert hi == expected_hi
def test_unknown_keyword_raises(self) -> None:
with pytest.raises(ValueError, match="Unknown keyword"):
_datetime_range("bogus_keyword", UTC)

View File

@@ -3,6 +3,7 @@ from datetime import timedelta
from unittest import mock
import pytest
import time_machine
from dateutil.relativedelta import relativedelta
from django.contrib.auth.models import Group
from django.contrib.auth.models import Permission
@@ -741,6 +742,55 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
# Tantivy rejects unparsable field queries with a 400
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
@override_settings(
TIME_ZONE="UTC",
)
@time_machine.travel(
datetime.datetime(2026, 7, 15, 12, 0, tzinfo=datetime.UTC),
tick=False,
)
def test_search_added_previous_quarter_legacy_phrase(self) -> None:
"""
GIVEN:
- Documents inside and outside the previous quarter
WHEN:
- Query with the legacy natural-language phrase used by the UI
THEN:
- Previous-quarter documents are returned
"""
d1 = Document.objects.create(
title="quarterly statement april",
content="bank statement",
checksum="Q1",
pk=1,
added=datetime.datetime(2026, 4, 10, 12, 0, tzinfo=datetime.UTC),
)
d2 = Document.objects.create(
title="quarterly statement june",
content="bank statement",
checksum="Q2",
pk=2,
added=datetime.datetime(2026, 6, 20, 12, 0, tzinfo=datetime.UTC),
)
d3 = Document.objects.create(
title="quarterly statement july",
content="bank statement",
checksum="Q3",
pk=3,
added=datetime.datetime(2026, 7, 10, 12, 0, tzinfo=datetime.UTC),
)
backend = get_backend()
backend.add_or_update(d1)
backend.add_or_update(d2)
backend.add_or_update(d3)
response = self.client.get('/api/documents/?query=added:"previous quarter"')
self.assertEqual(response.status_code, status.HTTP_200_OK)
results = response.data["results"]
self.assertEqual({r["id"] for r in results}, {1, 2})
@mock.patch("documents.search._backend.TantivyBackend.autocomplete")
def test_search_autocomplete_limits(self, m) -> None:
"""