mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-04-21 07:19:26 +00:00
Fix (dev): retain backwards compatibility with natural-date keywords in tantivy (#12602)
This commit is contained in:
@@ -855,13 +855,14 @@ Matching natural date keywords:
|
||||
```
|
||||
added:today
|
||||
modified:yesterday
|
||||
created:this_week
|
||||
added:last_month
|
||||
modified:this_year
|
||||
created:"previous week"
|
||||
added:"previous month"
|
||||
modified:"this year"
|
||||
```
|
||||
|
||||
Supported date keywords: `today`, `yesterday`, `this_week`, `last_week`,
|
||||
`this_month`, `last_month`, `this_year`, `last_year`.
|
||||
Supported date keywords: `today`, `yesterday`, `previous week`,
|
||||
`this month`, `previous month`, `this year`, `previous year`,
|
||||
`previous quarter`.
|
||||
|
||||
#### Searching custom fields
|
||||
|
||||
|
||||
@@ -25,21 +25,39 @@ _REGEX_TIMEOUT: Final[float] = 1.0
|
||||
|
||||
_DATE_ONLY_FIELDS = frozenset({"created"})
|
||||
|
||||
_TODAY: Final[str] = "today"
|
||||
_YESTERDAY: Final[str] = "yesterday"
|
||||
_PREVIOUS_WEEK: Final[str] = "previous week"
|
||||
_THIS_MONTH: Final[str] = "this month"
|
||||
_PREVIOUS_MONTH: Final[str] = "previous month"
|
||||
_THIS_YEAR: Final[str] = "this year"
|
||||
_PREVIOUS_YEAR: Final[str] = "previous year"
|
||||
_PREVIOUS_QUARTER: Final[str] = "previous quarter"
|
||||
|
||||
_DATE_KEYWORDS = frozenset(
|
||||
{
|
||||
"today",
|
||||
"yesterday",
|
||||
"this_week",
|
||||
"last_week",
|
||||
"this_month",
|
||||
"last_month",
|
||||
"this_year",
|
||||
"last_year",
|
||||
_TODAY,
|
||||
_YESTERDAY,
|
||||
_PREVIOUS_WEEK,
|
||||
_THIS_MONTH,
|
||||
_PREVIOUS_MONTH,
|
||||
_THIS_YEAR,
|
||||
_PREVIOUS_YEAR,
|
||||
_PREVIOUS_QUARTER,
|
||||
},
|
||||
)
|
||||
|
||||
_DATE_KEYWORD_PATTERN = "|".join(
|
||||
sorted((regex.escape(k) for k in _DATE_KEYWORDS), key=len, reverse=True),
|
||||
)
|
||||
|
||||
_FIELD_DATE_RE = regex.compile(
|
||||
r"(\w+):(" + "|".join(_DATE_KEYWORDS) + r")\b",
|
||||
rf"""(?P<field>\w+)\s*:\s*(?:
|
||||
(?P<quote>["'])(?P<quoted>{_DATE_KEYWORD_PATTERN})(?P=quote)
|
||||
|
|
||||
(?P<bare>{_DATE_KEYWORD_PATTERN})(?![\w-])
|
||||
)""",
|
||||
regex.IGNORECASE | regex.VERBOSE,
|
||||
)
|
||||
_COMPACT_DATE_RE = regex.compile(r"\b(\d{14})\b")
|
||||
_RELATIVE_RANGE_RE = regex.compile(
|
||||
@@ -74,44 +92,59 @@ def _date_only_range(keyword: str, tz: tzinfo) -> str:
|
||||
|
||||
today = datetime.now(tz).date()
|
||||
|
||||
if keyword == "today":
|
||||
def _quarter_start(d: date) -> date:
|
||||
return date(d.year, ((d.month - 1) // 3) * 3 + 1, 1)
|
||||
|
||||
if keyword == _TODAY:
|
||||
lo = datetime(today.year, today.month, today.day, tzinfo=UTC)
|
||||
return _iso_range(lo, lo + timedelta(days=1))
|
||||
if keyword == "yesterday":
|
||||
if keyword == _YESTERDAY:
|
||||
y = today - timedelta(days=1)
|
||||
lo = datetime(y.year, y.month, y.day, tzinfo=UTC)
|
||||
hi = datetime(today.year, today.month, today.day, tzinfo=UTC)
|
||||
return _iso_range(lo, hi)
|
||||
if keyword == "this_week":
|
||||
mon = today - timedelta(days=today.weekday())
|
||||
lo = datetime(mon.year, mon.month, mon.day, tzinfo=UTC)
|
||||
return _iso_range(lo, lo + timedelta(weeks=1))
|
||||
if keyword == "last_week":
|
||||
if keyword == _PREVIOUS_WEEK:
|
||||
this_mon = today - timedelta(days=today.weekday())
|
||||
last_mon = this_mon - timedelta(weeks=1)
|
||||
lo = datetime(last_mon.year, last_mon.month, last_mon.day, tzinfo=UTC)
|
||||
hi = datetime(this_mon.year, this_mon.month, this_mon.day, tzinfo=UTC)
|
||||
return _iso_range(lo, hi)
|
||||
if keyword == "this_month":
|
||||
if keyword == _THIS_MONTH:
|
||||
lo = datetime(today.year, today.month, 1, tzinfo=UTC)
|
||||
if today.month == 12:
|
||||
hi = datetime(today.year + 1, 1, 1, tzinfo=UTC)
|
||||
else:
|
||||
hi = datetime(today.year, today.month + 1, 1, tzinfo=UTC)
|
||||
return _iso_range(lo, hi)
|
||||
if keyword == "last_month":
|
||||
if keyword == _PREVIOUS_MONTH:
|
||||
if today.month == 1:
|
||||
lo = datetime(today.year - 1, 12, 1, tzinfo=UTC)
|
||||
else:
|
||||
lo = datetime(today.year, today.month - 1, 1, tzinfo=UTC)
|
||||
hi = datetime(today.year, today.month, 1, tzinfo=UTC)
|
||||
return _iso_range(lo, hi)
|
||||
if keyword == "this_year":
|
||||
if keyword == _THIS_YEAR:
|
||||
lo = datetime(today.year, 1, 1, tzinfo=UTC)
|
||||
return _iso_range(lo, datetime(today.year + 1, 1, 1, tzinfo=UTC))
|
||||
if keyword == "last_year":
|
||||
if keyword == _PREVIOUS_YEAR:
|
||||
lo = datetime(today.year - 1, 1, 1, tzinfo=UTC)
|
||||
return _iso_range(lo, datetime(today.year, 1, 1, tzinfo=UTC))
|
||||
if keyword == _PREVIOUS_QUARTER:
|
||||
this_quarter = _quarter_start(today)
|
||||
last_quarter = this_quarter - relativedelta(months=3)
|
||||
lo = datetime(
|
||||
last_quarter.year,
|
||||
last_quarter.month,
|
||||
last_quarter.day,
|
||||
tzinfo=UTC,
|
||||
)
|
||||
hi = datetime(
|
||||
this_quarter.year,
|
||||
this_quarter.month,
|
||||
this_quarter.day,
|
||||
tzinfo=UTC,
|
||||
)
|
||||
return _iso_range(lo, hi)
|
||||
raise ValueError(f"Unknown keyword: {keyword}")
|
||||
|
||||
|
||||
@@ -127,42 +160,46 @@ def _datetime_range(keyword: str, tz: tzinfo) -> str:
|
||||
def _midnight(d: date) -> datetime:
|
||||
return datetime(d.year, d.month, d.day, tzinfo=tz).astimezone(UTC)
|
||||
|
||||
if keyword == "today":
|
||||
def _quarter_start(d: date) -> date:
|
||||
return date(d.year, ((d.month - 1) // 3) * 3 + 1, 1)
|
||||
|
||||
if keyword == _TODAY:
|
||||
return _iso_range(_midnight(today), _midnight(today + timedelta(days=1)))
|
||||
if keyword == "yesterday":
|
||||
if keyword == _YESTERDAY:
|
||||
y = today - timedelta(days=1)
|
||||
return _iso_range(_midnight(y), _midnight(today))
|
||||
if keyword == "this_week":
|
||||
mon = today - timedelta(days=today.weekday())
|
||||
return _iso_range(_midnight(mon), _midnight(mon + timedelta(weeks=1)))
|
||||
if keyword == "last_week":
|
||||
if keyword == _PREVIOUS_WEEK:
|
||||
this_mon = today - timedelta(days=today.weekday())
|
||||
last_mon = this_mon - timedelta(weeks=1)
|
||||
return _iso_range(_midnight(last_mon), _midnight(this_mon))
|
||||
if keyword == "this_month":
|
||||
if keyword == _THIS_MONTH:
|
||||
first = today.replace(day=1)
|
||||
if today.month == 12:
|
||||
next_first = date(today.year + 1, 1, 1)
|
||||
else:
|
||||
next_first = date(today.year, today.month + 1, 1)
|
||||
return _iso_range(_midnight(first), _midnight(next_first))
|
||||
if keyword == "last_month":
|
||||
if keyword == _PREVIOUS_MONTH:
|
||||
this_first = today.replace(day=1)
|
||||
if today.month == 1:
|
||||
last_first = date(today.year - 1, 12, 1)
|
||||
else:
|
||||
last_first = date(today.year, today.month - 1, 1)
|
||||
return _iso_range(_midnight(last_first), _midnight(this_first))
|
||||
if keyword == "this_year":
|
||||
if keyword == _THIS_YEAR:
|
||||
return _iso_range(
|
||||
_midnight(date(today.year, 1, 1)),
|
||||
_midnight(date(today.year + 1, 1, 1)),
|
||||
)
|
||||
if keyword == "last_year":
|
||||
if keyword == _PREVIOUS_YEAR:
|
||||
return _iso_range(
|
||||
_midnight(date(today.year - 1, 1, 1)),
|
||||
_midnight(date(today.year, 1, 1)),
|
||||
)
|
||||
if keyword == _PREVIOUS_QUARTER:
|
||||
this_quarter = _quarter_start(today)
|
||||
last_quarter = this_quarter - relativedelta(months=3)
|
||||
return _iso_range(_midnight(last_quarter), _midnight(this_quarter))
|
||||
raise ValueError(f"Unknown keyword: {keyword}")
|
||||
|
||||
|
||||
@@ -308,7 +345,7 @@ def rewrite_natural_date_keywords(query: str, tz: tzinfo) -> str:
|
||||
- Compact 14-digit dates (YYYYMMDDHHmmss)
|
||||
- Whoosh relative ranges ([-7 days to now], [now-1h TO now+2h])
|
||||
- 8-digit dates with field awareness (created:20240115)
|
||||
- Natural keywords (field:today, field:last_week, etc.)
|
||||
- Natural keywords (field:today, field:"previous quarter", etc.)
|
||||
|
||||
Args:
|
||||
query: Raw user query string
|
||||
@@ -326,7 +363,8 @@ def rewrite_natural_date_keywords(query: str, tz: tzinfo) -> str:
|
||||
query = _rewrite_relative_range(query)
|
||||
|
||||
def _replace(m: regex.Match[str]) -> str:
|
||||
field, keyword = m.group(1), m.group(2)
|
||||
field = m.group("field")
|
||||
keyword = (m.group("quoted") or m.group("bare")).lower()
|
||||
if field in _DATE_ONLY_FIELDS:
|
||||
return f"{field}:{_date_only_range(keyword, tz)}"
|
||||
return f"{field}:{_datetime_range(keyword, tz)}"
|
||||
|
||||
@@ -81,45 +81,38 @@ class TestCreatedDateField:
|
||||
),
|
||||
pytest.param(
|
||||
"created",
|
||||
"this_week",
|
||||
"2026-03-23T00:00:00Z",
|
||||
"2026-03-30T00:00:00Z",
|
||||
id="this_week_mon_sun",
|
||||
),
|
||||
pytest.param(
|
||||
"created",
|
||||
"last_week",
|
||||
"previous week",
|
||||
"2026-03-16T00:00:00Z",
|
||||
"2026-03-23T00:00:00Z",
|
||||
id="last_week",
|
||||
id="previous_week",
|
||||
),
|
||||
pytest.param(
|
||||
"created",
|
||||
"this_month",
|
||||
"this month",
|
||||
"2026-03-01T00:00:00Z",
|
||||
"2026-04-01T00:00:00Z",
|
||||
id="this_month",
|
||||
),
|
||||
pytest.param(
|
||||
"created",
|
||||
"last_month",
|
||||
"previous month",
|
||||
"2026-02-01T00:00:00Z",
|
||||
"2026-03-01T00:00:00Z",
|
||||
id="last_month",
|
||||
id="previous_month",
|
||||
),
|
||||
pytest.param(
|
||||
"created",
|
||||
"this_year",
|
||||
"this year",
|
||||
"2026-01-01T00:00:00Z",
|
||||
"2027-01-01T00:00:00Z",
|
||||
id="this_year",
|
||||
),
|
||||
pytest.param(
|
||||
"created",
|
||||
"last_year",
|
||||
"previous year",
|
||||
"2025-01-01T00:00:00Z",
|
||||
"2026-01-01T00:00:00Z",
|
||||
id="last_year",
|
||||
id="previous_year",
|
||||
),
|
||||
],
|
||||
)
|
||||
@@ -141,7 +134,7 @@ class TestCreatedDateField:
|
||||
def test_this_month_december_wraps_to_next_year(self) -> None:
|
||||
# December: next month must roll over to January 1 of next year
|
||||
lo, hi = _range(
|
||||
rewrite_natural_date_keywords("created:this_month", UTC),
|
||||
rewrite_natural_date_keywords("created:this month", UTC),
|
||||
"created",
|
||||
)
|
||||
assert lo == "2026-12-01T00:00:00Z"
|
||||
@@ -151,12 +144,21 @@ class TestCreatedDateField:
|
||||
def test_last_month_january_wraps_to_previous_year(self) -> None:
|
||||
# January: last month must roll back to December 1 of previous year
|
||||
lo, hi = _range(
|
||||
rewrite_natural_date_keywords("created:last_month", UTC),
|
||||
rewrite_natural_date_keywords("created:previous month", UTC),
|
||||
"created",
|
||||
)
|
||||
assert lo == "2025-12-01T00:00:00Z"
|
||||
assert hi == "2026-01-01T00:00:00Z"
|
||||
|
||||
@time_machine.travel(datetime(2026, 7, 15, 12, 0, tzinfo=UTC), tick=False)
|
||||
def test_previous_quarter(self) -> None:
|
||||
lo, hi = _range(
|
||||
rewrite_natural_date_keywords('created:"previous quarter"', UTC),
|
||||
"created",
|
||||
)
|
||||
assert lo == "2026-04-01T00:00:00Z"
|
||||
assert hi == "2026-07-01T00:00:00Z"
|
||||
|
||||
def test_unknown_keyword_raises(self) -> None:
|
||||
with pytest.raises(ValueError, match="Unknown keyword"):
|
||||
_date_only_range("bogus_keyword", UTC)
|
||||
@@ -202,40 +204,34 @@ class TestDateTimeFields:
|
||||
id="yesterday",
|
||||
),
|
||||
pytest.param(
|
||||
"this_week",
|
||||
"2026-03-23T00:00:00Z",
|
||||
"2026-03-30T00:00:00Z",
|
||||
id="this_week",
|
||||
),
|
||||
pytest.param(
|
||||
"last_week",
|
||||
"previous week",
|
||||
"2026-03-16T00:00:00Z",
|
||||
"2026-03-23T00:00:00Z",
|
||||
id="last_week",
|
||||
id="previous_week",
|
||||
),
|
||||
pytest.param(
|
||||
"this_month",
|
||||
"this month",
|
||||
"2026-03-01T00:00:00Z",
|
||||
"2026-04-01T00:00:00Z",
|
||||
id="this_month",
|
||||
),
|
||||
pytest.param(
|
||||
"last_month",
|
||||
"previous month",
|
||||
"2026-02-01T00:00:00Z",
|
||||
"2026-03-01T00:00:00Z",
|
||||
id="last_month",
|
||||
id="previous_month",
|
||||
),
|
||||
pytest.param(
|
||||
"this_year",
|
||||
"this year",
|
||||
"2026-01-01T00:00:00Z",
|
||||
"2027-01-01T00:00:00Z",
|
||||
id="this_year",
|
||||
),
|
||||
pytest.param(
|
||||
"last_year",
|
||||
"previous year",
|
||||
"2025-01-01T00:00:00Z",
|
||||
"2026-01-01T00:00:00Z",
|
||||
id="last_year",
|
||||
id="previous_year",
|
||||
),
|
||||
],
|
||||
)
|
||||
@@ -254,17 +250,54 @@ class TestDateTimeFields:
|
||||
@time_machine.travel(datetime(2026, 12, 15, 12, 0, tzinfo=UTC), tick=False)
|
||||
def test_this_month_december_wraps_to_next_year(self) -> None:
|
||||
# December: next month wraps to January of next year
|
||||
lo, hi = _range(rewrite_natural_date_keywords("added:this_month", UTC), "added")
|
||||
lo, hi = _range(rewrite_natural_date_keywords("added:this month", UTC), "added")
|
||||
assert lo == "2026-12-01T00:00:00Z"
|
||||
assert hi == "2027-01-01T00:00:00Z"
|
||||
|
||||
@time_machine.travel(datetime(2026, 1, 15, 12, 0, tzinfo=UTC), tick=False)
|
||||
def test_last_month_january_wraps_to_previous_year(self) -> None:
|
||||
# January: last month wraps back to December of previous year
|
||||
lo, hi = _range(rewrite_natural_date_keywords("added:last_month", UTC), "added")
|
||||
lo, hi = _range(
|
||||
rewrite_natural_date_keywords("added:previous month", UTC),
|
||||
"added",
|
||||
)
|
||||
assert lo == "2025-12-01T00:00:00Z"
|
||||
assert hi == "2026-01-01T00:00:00Z"
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("query", "expected_lo", "expected_hi"),
|
||||
[
|
||||
pytest.param(
|
||||
'added:"previous quarter"',
|
||||
"2026-04-01T00:00:00Z",
|
||||
"2026-07-01T00:00:00Z",
|
||||
id="quoted_previous_quarter",
|
||||
),
|
||||
pytest.param(
|
||||
"added:previous month",
|
||||
"2026-06-01T00:00:00Z",
|
||||
"2026-07-01T00:00:00Z",
|
||||
id="bare_previous_month",
|
||||
),
|
||||
pytest.param(
|
||||
"added:this month",
|
||||
"2026-07-01T00:00:00Z",
|
||||
"2026-08-01T00:00:00Z",
|
||||
id="bare_this_month",
|
||||
),
|
||||
],
|
||||
)
|
||||
@time_machine.travel(datetime(2026, 7, 15, 12, 0, tzinfo=UTC), tick=False)
|
||||
def test_legacy_natural_language_aliases(
|
||||
self,
|
||||
query: str,
|
||||
expected_lo: str,
|
||||
expected_hi: str,
|
||||
) -> None:
|
||||
lo, hi = _range(rewrite_natural_date_keywords(query, UTC), "added")
|
||||
assert lo == expected_lo
|
||||
assert hi == expected_hi
|
||||
|
||||
def test_unknown_keyword_raises(self) -> None:
|
||||
with pytest.raises(ValueError, match="Unknown keyword"):
|
||||
_datetime_range("bogus_keyword", UTC)
|
||||
|
||||
@@ -3,6 +3,7 @@ from datetime import timedelta
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
import time_machine
|
||||
from dateutil.relativedelta import relativedelta
|
||||
from django.contrib.auth.models import Group
|
||||
from django.contrib.auth.models import Permission
|
||||
@@ -26,6 +27,7 @@ from documents.models import Tag
|
||||
from documents.models import Workflow
|
||||
from documents.search import get_backend
|
||||
from documents.search import reset_backend
|
||||
from documents.tests.factories import DocumentFactory
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
from paperless_mail.models import MailAccount
|
||||
from paperless_mail.models import MailRule
|
||||
@@ -741,6 +743,49 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
# Tantivy rejects unparsable field queries with a 400
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
|
||||
@override_settings(
|
||||
TIME_ZONE="UTC",
|
||||
)
|
||||
@time_machine.travel(
|
||||
datetime.datetime(2026, 7, 15, 12, 0, tzinfo=datetime.UTC),
|
||||
tick=False,
|
||||
)
|
||||
def test_search_added_previous_quarter(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- Documents inside and outside the previous quarter
|
||||
WHEN:
|
||||
- Query with the legacy natural-language phrase used by the UI
|
||||
THEN:
|
||||
- Previous-quarter documents are returned
|
||||
"""
|
||||
d1 = DocumentFactory.create(
|
||||
title="quarterly statement april",
|
||||
content="bank statement",
|
||||
added=datetime.datetime(2026, 4, 10, 12, 0, tzinfo=datetime.UTC),
|
||||
)
|
||||
d2 = DocumentFactory.create(
|
||||
title="quarterly statement june",
|
||||
content="bank statement",
|
||||
added=datetime.datetime(2026, 6, 20, 12, 0, tzinfo=datetime.UTC),
|
||||
)
|
||||
d3 = DocumentFactory.create(
|
||||
title="quarterly statement july",
|
||||
content="bank statement",
|
||||
added=datetime.datetime(2026, 7, 10, 12, 0, tzinfo=datetime.UTC),
|
||||
)
|
||||
|
||||
backend = get_backend()
|
||||
backend.add_or_update(d1)
|
||||
backend.add_or_update(d2)
|
||||
backend.add_or_update(d3)
|
||||
|
||||
response = self.client.get('/api/documents/?query=added:"previous quarter"')
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
|
||||
results = response.data["results"]
|
||||
self.assertEqual({r["id"] for r in results}, {1, 2})
|
||||
|
||||
@mock.patch("documents.search._backend.TantivyBackend.autocomplete")
|
||||
def test_search_autocomplete_limits(self, m) -> None:
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user