Fix (dev): retain backwards compatibility with natural-date keywords in tantivy (#12602)

This commit is contained in:
shamoon
2026-04-20 08:26:33 -07:00
committed by GitHub
parent 750a2723a2
commit 20aa0937e8
4 changed files with 187 additions and 70 deletions

View File

@@ -855,13 +855,14 @@ Matching natural date keywords:
```
added:today
modified:yesterday
created:this_week
added:last_month
modified:this_year
created:"previous week"
added:"previous month"
modified:"this year"
```
Supported date keywords: `today`, `yesterday`, `this_week`, `last_week`,
`this_month`, `last_month`, `this_year`, `last_year`.
Supported date keywords: `today`, `yesterday`, `previous week`,
`this month`, `previous month`, `this year`, `previous year`,
`previous quarter`.
#### Searching custom fields

View File

@@ -25,21 +25,39 @@ _REGEX_TIMEOUT: Final[float] = 1.0
_DATE_ONLY_FIELDS = frozenset({"created"})
_TODAY: Final[str] = "today"
_YESTERDAY: Final[str] = "yesterday"
_PREVIOUS_WEEK: Final[str] = "previous week"
_THIS_MONTH: Final[str] = "this month"
_PREVIOUS_MONTH: Final[str] = "previous month"
_THIS_YEAR: Final[str] = "this year"
_PREVIOUS_YEAR: Final[str] = "previous year"
_PREVIOUS_QUARTER: Final[str] = "previous quarter"
_DATE_KEYWORDS = frozenset(
{
"today",
"yesterday",
"this_week",
"last_week",
"this_month",
"last_month",
"this_year",
"last_year",
_TODAY,
_YESTERDAY,
_PREVIOUS_WEEK,
_THIS_MONTH,
_PREVIOUS_MONTH,
_THIS_YEAR,
_PREVIOUS_YEAR,
_PREVIOUS_QUARTER,
},
)
_DATE_KEYWORD_PATTERN = "|".join(
sorted((regex.escape(k) for k in _DATE_KEYWORDS), key=len, reverse=True),
)
_FIELD_DATE_RE = regex.compile(
r"(\w+):(" + "|".join(_DATE_KEYWORDS) + r")\b",
rf"""(?P<field>\w+)\s*:\s*(?:
(?P<quote>["'])(?P<quoted>{_DATE_KEYWORD_PATTERN})(?P=quote)
|
(?P<bare>{_DATE_KEYWORD_PATTERN})(?![\w-])
)""",
regex.IGNORECASE | regex.VERBOSE,
)
_COMPACT_DATE_RE = regex.compile(r"\b(\d{14})\b")
_RELATIVE_RANGE_RE = regex.compile(
@@ -74,44 +92,59 @@ def _date_only_range(keyword: str, tz: tzinfo) -> str:
today = datetime.now(tz).date()
if keyword == "today":
def _quarter_start(d: date) -> date:
return date(d.year, ((d.month - 1) // 3) * 3 + 1, 1)
if keyword == _TODAY:
lo = datetime(today.year, today.month, today.day, tzinfo=UTC)
return _iso_range(lo, lo + timedelta(days=1))
if keyword == "yesterday":
if keyword == _YESTERDAY:
y = today - timedelta(days=1)
lo = datetime(y.year, y.month, y.day, tzinfo=UTC)
hi = datetime(today.year, today.month, today.day, tzinfo=UTC)
return _iso_range(lo, hi)
if keyword == "this_week":
mon = today - timedelta(days=today.weekday())
lo = datetime(mon.year, mon.month, mon.day, tzinfo=UTC)
return _iso_range(lo, lo + timedelta(weeks=1))
if keyword == "last_week":
if keyword == _PREVIOUS_WEEK:
this_mon = today - timedelta(days=today.weekday())
last_mon = this_mon - timedelta(weeks=1)
lo = datetime(last_mon.year, last_mon.month, last_mon.day, tzinfo=UTC)
hi = datetime(this_mon.year, this_mon.month, this_mon.day, tzinfo=UTC)
return _iso_range(lo, hi)
if keyword == "this_month":
if keyword == _THIS_MONTH:
lo = datetime(today.year, today.month, 1, tzinfo=UTC)
if today.month == 12:
hi = datetime(today.year + 1, 1, 1, tzinfo=UTC)
else:
hi = datetime(today.year, today.month + 1, 1, tzinfo=UTC)
return _iso_range(lo, hi)
if keyword == "last_month":
if keyword == _PREVIOUS_MONTH:
if today.month == 1:
lo = datetime(today.year - 1, 12, 1, tzinfo=UTC)
else:
lo = datetime(today.year, today.month - 1, 1, tzinfo=UTC)
hi = datetime(today.year, today.month, 1, tzinfo=UTC)
return _iso_range(lo, hi)
if keyword == "this_year":
if keyword == _THIS_YEAR:
lo = datetime(today.year, 1, 1, tzinfo=UTC)
return _iso_range(lo, datetime(today.year + 1, 1, 1, tzinfo=UTC))
if keyword == "last_year":
if keyword == _PREVIOUS_YEAR:
lo = datetime(today.year - 1, 1, 1, tzinfo=UTC)
return _iso_range(lo, datetime(today.year, 1, 1, tzinfo=UTC))
if keyword == _PREVIOUS_QUARTER:
this_quarter = _quarter_start(today)
last_quarter = this_quarter - relativedelta(months=3)
lo = datetime(
last_quarter.year,
last_quarter.month,
last_quarter.day,
tzinfo=UTC,
)
hi = datetime(
this_quarter.year,
this_quarter.month,
this_quarter.day,
tzinfo=UTC,
)
return _iso_range(lo, hi)
raise ValueError(f"Unknown keyword: {keyword}")
@@ -127,42 +160,46 @@ def _datetime_range(keyword: str, tz: tzinfo) -> str:
def _midnight(d: date) -> datetime:
return datetime(d.year, d.month, d.day, tzinfo=tz).astimezone(UTC)
if keyword == "today":
def _quarter_start(d: date) -> date:
return date(d.year, ((d.month - 1) // 3) * 3 + 1, 1)
if keyword == _TODAY:
return _iso_range(_midnight(today), _midnight(today + timedelta(days=1)))
if keyword == "yesterday":
if keyword == _YESTERDAY:
y = today - timedelta(days=1)
return _iso_range(_midnight(y), _midnight(today))
if keyword == "this_week":
mon = today - timedelta(days=today.weekday())
return _iso_range(_midnight(mon), _midnight(mon + timedelta(weeks=1)))
if keyword == "last_week":
if keyword == _PREVIOUS_WEEK:
this_mon = today - timedelta(days=today.weekday())
last_mon = this_mon - timedelta(weeks=1)
return _iso_range(_midnight(last_mon), _midnight(this_mon))
if keyword == "this_month":
if keyword == _THIS_MONTH:
first = today.replace(day=1)
if today.month == 12:
next_first = date(today.year + 1, 1, 1)
else:
next_first = date(today.year, today.month + 1, 1)
return _iso_range(_midnight(first), _midnight(next_first))
if keyword == "last_month":
if keyword == _PREVIOUS_MONTH:
this_first = today.replace(day=1)
if today.month == 1:
last_first = date(today.year - 1, 12, 1)
else:
last_first = date(today.year, today.month - 1, 1)
return _iso_range(_midnight(last_first), _midnight(this_first))
if keyword == "this_year":
if keyword == _THIS_YEAR:
return _iso_range(
_midnight(date(today.year, 1, 1)),
_midnight(date(today.year + 1, 1, 1)),
)
if keyword == "last_year":
if keyword == _PREVIOUS_YEAR:
return _iso_range(
_midnight(date(today.year - 1, 1, 1)),
_midnight(date(today.year, 1, 1)),
)
if keyword == _PREVIOUS_QUARTER:
this_quarter = _quarter_start(today)
last_quarter = this_quarter - relativedelta(months=3)
return _iso_range(_midnight(last_quarter), _midnight(this_quarter))
raise ValueError(f"Unknown keyword: {keyword}")
@@ -308,7 +345,7 @@ def rewrite_natural_date_keywords(query: str, tz: tzinfo) -> str:
- Compact 14-digit dates (YYYYMMDDHHmmss)
- Whoosh relative ranges ([-7 days to now], [now-1h TO now+2h])
- 8-digit dates with field awareness (created:20240115)
- Natural keywords (field:today, field:last_week, etc.)
- Natural keywords (field:today, field:"previous quarter", etc.)
Args:
query: Raw user query string
@@ -326,7 +363,8 @@ def rewrite_natural_date_keywords(query: str, tz: tzinfo) -> str:
query = _rewrite_relative_range(query)
def _replace(m: regex.Match[str]) -> str:
field, keyword = m.group(1), m.group(2)
field = m.group("field")
keyword = (m.group("quoted") or m.group("bare")).lower()
if field in _DATE_ONLY_FIELDS:
return f"{field}:{_date_only_range(keyword, tz)}"
return f"{field}:{_datetime_range(keyword, tz)}"

View File

@@ -81,45 +81,38 @@ class TestCreatedDateField:
),
pytest.param(
"created",
"this_week",
"2026-03-23T00:00:00Z",
"2026-03-30T00:00:00Z",
id="this_week_mon_sun",
),
pytest.param(
"created",
"last_week",
"previous week",
"2026-03-16T00:00:00Z",
"2026-03-23T00:00:00Z",
id="last_week",
id="previous_week",
),
pytest.param(
"created",
"this_month",
"this month",
"2026-03-01T00:00:00Z",
"2026-04-01T00:00:00Z",
id="this_month",
),
pytest.param(
"created",
"last_month",
"previous month",
"2026-02-01T00:00:00Z",
"2026-03-01T00:00:00Z",
id="last_month",
id="previous_month",
),
pytest.param(
"created",
"this_year",
"this year",
"2026-01-01T00:00:00Z",
"2027-01-01T00:00:00Z",
id="this_year",
),
pytest.param(
"created",
"last_year",
"previous year",
"2025-01-01T00:00:00Z",
"2026-01-01T00:00:00Z",
id="last_year",
id="previous_year",
),
],
)
@@ -141,7 +134,7 @@ class TestCreatedDateField:
def test_this_month_december_wraps_to_next_year(self) -> None:
# December: next month must roll over to January 1 of next year
lo, hi = _range(
rewrite_natural_date_keywords("created:this_month", UTC),
rewrite_natural_date_keywords("created:this month", UTC),
"created",
)
assert lo == "2026-12-01T00:00:00Z"
@@ -151,12 +144,21 @@ class TestCreatedDateField:
def test_last_month_january_wraps_to_previous_year(self) -> None:
# January: last month must roll back to December 1 of previous year
lo, hi = _range(
rewrite_natural_date_keywords("created:last_month", UTC),
rewrite_natural_date_keywords("created:previous month", UTC),
"created",
)
assert lo == "2025-12-01T00:00:00Z"
assert hi == "2026-01-01T00:00:00Z"
@time_machine.travel(datetime(2026, 7, 15, 12, 0, tzinfo=UTC), tick=False)
def test_previous_quarter(self) -> None:
lo, hi = _range(
rewrite_natural_date_keywords('created:"previous quarter"', UTC),
"created",
)
assert lo == "2026-04-01T00:00:00Z"
assert hi == "2026-07-01T00:00:00Z"
def test_unknown_keyword_raises(self) -> None:
with pytest.raises(ValueError, match="Unknown keyword"):
_date_only_range("bogus_keyword", UTC)
@@ -202,40 +204,34 @@ class TestDateTimeFields:
id="yesterday",
),
pytest.param(
"this_week",
"2026-03-23T00:00:00Z",
"2026-03-30T00:00:00Z",
id="this_week",
),
pytest.param(
"last_week",
"previous week",
"2026-03-16T00:00:00Z",
"2026-03-23T00:00:00Z",
id="last_week",
id="previous_week",
),
pytest.param(
"this_month",
"this month",
"2026-03-01T00:00:00Z",
"2026-04-01T00:00:00Z",
id="this_month",
),
pytest.param(
"last_month",
"previous month",
"2026-02-01T00:00:00Z",
"2026-03-01T00:00:00Z",
id="last_month",
id="previous_month",
),
pytest.param(
"this_year",
"this year",
"2026-01-01T00:00:00Z",
"2027-01-01T00:00:00Z",
id="this_year",
),
pytest.param(
"last_year",
"previous year",
"2025-01-01T00:00:00Z",
"2026-01-01T00:00:00Z",
id="last_year",
id="previous_year",
),
],
)
@@ -254,17 +250,54 @@ class TestDateTimeFields:
@time_machine.travel(datetime(2026, 12, 15, 12, 0, tzinfo=UTC), tick=False)
def test_this_month_december_wraps_to_next_year(self) -> None:
# December: next month wraps to January of next year
lo, hi = _range(rewrite_natural_date_keywords("added:this_month", UTC), "added")
lo, hi = _range(rewrite_natural_date_keywords("added:this month", UTC), "added")
assert lo == "2026-12-01T00:00:00Z"
assert hi == "2027-01-01T00:00:00Z"
@time_machine.travel(datetime(2026, 1, 15, 12, 0, tzinfo=UTC), tick=False)
def test_last_month_january_wraps_to_previous_year(self) -> None:
# January: last month wraps back to December of previous year
lo, hi = _range(rewrite_natural_date_keywords("added:last_month", UTC), "added")
lo, hi = _range(
rewrite_natural_date_keywords("added:previous month", UTC),
"added",
)
assert lo == "2025-12-01T00:00:00Z"
assert hi == "2026-01-01T00:00:00Z"
@pytest.mark.parametrize(
("query", "expected_lo", "expected_hi"),
[
pytest.param(
'added:"previous quarter"',
"2026-04-01T00:00:00Z",
"2026-07-01T00:00:00Z",
id="quoted_previous_quarter",
),
pytest.param(
"added:previous month",
"2026-06-01T00:00:00Z",
"2026-07-01T00:00:00Z",
id="bare_previous_month",
),
pytest.param(
"added:this month",
"2026-07-01T00:00:00Z",
"2026-08-01T00:00:00Z",
id="bare_this_month",
),
],
)
@time_machine.travel(datetime(2026, 7, 15, 12, 0, tzinfo=UTC), tick=False)
def test_legacy_natural_language_aliases(
self,
query: str,
expected_lo: str,
expected_hi: str,
) -> None:
lo, hi = _range(rewrite_natural_date_keywords(query, UTC), "added")
assert lo == expected_lo
assert hi == expected_hi
def test_unknown_keyword_raises(self) -> None:
with pytest.raises(ValueError, match="Unknown keyword"):
_datetime_range("bogus_keyword", UTC)

View File

@@ -3,6 +3,7 @@ from datetime import timedelta
from unittest import mock
import pytest
import time_machine
from dateutil.relativedelta import relativedelta
from django.contrib.auth.models import Group
from django.contrib.auth.models import Permission
@@ -26,6 +27,7 @@ from documents.models import Tag
from documents.models import Workflow
from documents.search import get_backend
from documents.search import reset_backend
from documents.tests.factories import DocumentFactory
from documents.tests.utils import DirectoriesMixin
from paperless_mail.models import MailAccount
from paperless_mail.models import MailRule
@@ -741,6 +743,49 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
# Tantivy rejects unparsable field queries with a 400
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
@override_settings(
TIME_ZONE="UTC",
)
@time_machine.travel(
datetime.datetime(2026, 7, 15, 12, 0, tzinfo=datetime.UTC),
tick=False,
)
def test_search_added_previous_quarter(self) -> None:
"""
GIVEN:
- Documents inside and outside the previous quarter
WHEN:
- Query with the legacy natural-language phrase used by the UI
THEN:
- Previous-quarter documents are returned
"""
d1 = DocumentFactory.create(
title="quarterly statement april",
content="bank statement",
added=datetime.datetime(2026, 4, 10, 12, 0, tzinfo=datetime.UTC),
)
d2 = DocumentFactory.create(
title="quarterly statement june",
content="bank statement",
added=datetime.datetime(2026, 6, 20, 12, 0, tzinfo=datetime.UTC),
)
d3 = DocumentFactory.create(
title="quarterly statement july",
content="bank statement",
added=datetime.datetime(2026, 7, 10, 12, 0, tzinfo=datetime.UTC),
)
backend = get_backend()
backend.add_or_update(d1)
backend.add_or_update(d2)
backend.add_or_update(d3)
response = self.client.get('/api/documents/?query=added:"previous quarter"')
self.assertEqual(response.status_code, status.HTTP_200_OK)
results = response.data["results"]
self.assertEqual({r["id"] for r in results}, {1, 2})
@mock.patch("documents.search._backend.TantivyBackend.autocomplete")
def test_search_autocomplete_limits(self, m) -> None:
"""