mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-04-19 06:19:26 +00:00
Fix: retain backwards compatibility with natural-date keywords in tantivy
This commit is contained in:
@@ -25,21 +25,43 @@ _REGEX_TIMEOUT: Final[float] = 1.0
|
||||
|
||||
_DATE_ONLY_FIELDS = frozenset({"created"})
|
||||
|
||||
_DATE_KEYWORDS = frozenset(
|
||||
{
|
||||
"today",
|
||||
"yesterday",
|
||||
"this_week",
|
||||
"last_week",
|
||||
"this_month",
|
||||
"last_month",
|
||||
"this_year",
|
||||
"last_year",
|
||||
},
|
||||
# Natural date keywords with whoosh-style aliases mapped to
|
||||
# canonical forms.
|
||||
_DATE_KEYWORD_ALIASES: Final[dict[str, str]] = {
|
||||
"today": "today",
|
||||
"yesterday": "yesterday",
|
||||
"this_week": "this_week",
|
||||
"this week": "this_week",
|
||||
"last_week": "last_week",
|
||||
"last week": "last_week",
|
||||
"previous week": "last_week",
|
||||
"this_month": "this_month",
|
||||
"this month": "this_month",
|
||||
"last_month": "last_month",
|
||||
"last month": "last_month",
|
||||
"previous month": "last_month",
|
||||
"last_quarter": "last_quarter",
|
||||
"last quarter": "last_quarter",
|
||||
"previous_quarter": "last_quarter",
|
||||
"previous quarter": "last_quarter",
|
||||
"this_year": "this_year",
|
||||
"this year": "this_year",
|
||||
"last_year": "last_year",
|
||||
"last year": "last_year",
|
||||
"previous year": "last_year",
|
||||
}
|
||||
_DATE_KEYWORDS = frozenset(_DATE_KEYWORD_ALIASES.values())
|
||||
_DATE_KEYWORD_PATTERN = "|".join(
|
||||
sorted((regex.escape(k) for k in _DATE_KEYWORD_ALIASES), key=len, reverse=True),
|
||||
)
|
||||
|
||||
_FIELD_DATE_RE = regex.compile(
|
||||
r"(\w+):(" + "|".join(_DATE_KEYWORDS) + r")\b",
|
||||
rf"""(?P<field>\w+)\s*:\s*(?:
|
||||
(?P<quote>["'])(?P<quoted>{_DATE_KEYWORD_PATTERN})(?P=quote)
|
||||
|
|
||||
(?P<bare>{_DATE_KEYWORD_PATTERN})(?![\w-])
|
||||
)""",
|
||||
regex.IGNORECASE | regex.VERBOSE,
|
||||
)
|
||||
_COMPACT_DATE_RE = regex.compile(r"\b(\d{14})\b")
|
||||
_RELATIVE_RANGE_RE = regex.compile(
|
||||
@@ -74,6 +96,9 @@ def _date_only_range(keyword: str, tz: tzinfo) -> str:
|
||||
|
||||
today = datetime.now(tz).date()
|
||||
|
||||
def _quarter_start(d: date) -> date:
|
||||
return date(d.year, ((d.month - 1) // 3) * 3 + 1, 1)
|
||||
|
||||
if keyword == "today":
|
||||
lo = datetime(today.year, today.month, today.day, tzinfo=UTC)
|
||||
return _iso_range(lo, lo + timedelta(days=1))
|
||||
@@ -106,6 +131,22 @@ def _date_only_range(keyword: str, tz: tzinfo) -> str:
|
||||
lo = datetime(today.year, today.month - 1, 1, tzinfo=UTC)
|
||||
hi = datetime(today.year, today.month, 1, tzinfo=UTC)
|
||||
return _iso_range(lo, hi)
|
||||
if keyword == "last_quarter":
|
||||
this_quarter = _quarter_start(today)
|
||||
last_quarter = this_quarter - relativedelta(months=3)
|
||||
lo = datetime(
|
||||
last_quarter.year,
|
||||
last_quarter.month,
|
||||
last_quarter.day,
|
||||
tzinfo=UTC,
|
||||
)
|
||||
hi = datetime(
|
||||
this_quarter.year,
|
||||
this_quarter.month,
|
||||
this_quarter.day,
|
||||
tzinfo=UTC,
|
||||
)
|
||||
return _iso_range(lo, hi)
|
||||
if keyword == "this_year":
|
||||
lo = datetime(today.year, 1, 1, tzinfo=UTC)
|
||||
return _iso_range(lo, datetime(today.year + 1, 1, 1, tzinfo=UTC))
|
||||
@@ -127,6 +168,9 @@ def _datetime_range(keyword: str, tz: tzinfo) -> str:
|
||||
def _midnight(d: date) -> datetime:
|
||||
return datetime(d.year, d.month, d.day, tzinfo=tz).astimezone(UTC)
|
||||
|
||||
def _quarter_start(d: date) -> date:
|
||||
return date(d.year, ((d.month - 1) // 3) * 3 + 1, 1)
|
||||
|
||||
if keyword == "today":
|
||||
return _iso_range(_midnight(today), _midnight(today + timedelta(days=1)))
|
||||
if keyword == "yesterday":
|
||||
@@ -153,6 +197,10 @@ def _datetime_range(keyword: str, tz: tzinfo) -> str:
|
||||
else:
|
||||
last_first = date(today.year, today.month - 1, 1)
|
||||
return _iso_range(_midnight(last_first), _midnight(this_first))
|
||||
if keyword == "last_quarter":
|
||||
this_quarter = _quarter_start(today)
|
||||
last_quarter = this_quarter - relativedelta(months=3)
|
||||
return _iso_range(_midnight(last_quarter), _midnight(this_quarter))
|
||||
if keyword == "this_year":
|
||||
return _iso_range(
|
||||
_midnight(date(today.year, 1, 1)),
|
||||
@@ -326,7 +374,9 @@ def rewrite_natural_date_keywords(query: str, tz: tzinfo) -> str:
|
||||
query = _rewrite_relative_range(query)
|
||||
|
||||
def _replace(m: regex.Match[str]) -> str:
|
||||
field, keyword = m.group(1), m.group(2)
|
||||
field = m.group("field")
|
||||
raw_keyword = m.group("quoted") or m.group("bare")
|
||||
keyword = _DATE_KEYWORD_ALIASES[raw_keyword.lower()]
|
||||
if field in _DATE_ONLY_FIELDS:
|
||||
return f"{field}:{_date_only_range(keyword, tz)}"
|
||||
return f"{field}:{_datetime_range(keyword, tz)}"
|
||||
|
||||
@@ -265,6 +265,40 @@ class TestDateTimeFields:
|
||||
assert lo == "2025-12-01T00:00:00Z"
|
||||
assert hi == "2026-01-01T00:00:00Z"
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("query", "expected_lo", "expected_hi"),
|
||||
[
|
||||
pytest.param(
|
||||
'added:"previous quarter"',
|
||||
"2026-04-01T00:00:00Z",
|
||||
"2026-07-01T00:00:00Z",
|
||||
id="quoted_previous_quarter",
|
||||
),
|
||||
pytest.param(
|
||||
"added:previous month",
|
||||
"2026-06-01T00:00:00Z",
|
||||
"2026-07-01T00:00:00Z",
|
||||
id="bare_previous_month",
|
||||
),
|
||||
pytest.param(
|
||||
"added:this month",
|
||||
"2026-07-01T00:00:00Z",
|
||||
"2026-08-01T00:00:00Z",
|
||||
id="bare_this_month",
|
||||
),
|
||||
],
|
||||
)
|
||||
@time_machine.travel(datetime(2026, 7, 15, 12, 0, tzinfo=UTC), tick=False)
|
||||
def test_legacy_natural_language_aliases(
|
||||
self,
|
||||
query: str,
|
||||
expected_lo: str,
|
||||
expected_hi: str,
|
||||
) -> None:
|
||||
lo, hi = _range(rewrite_natural_date_keywords(query, UTC), "added")
|
||||
assert lo == expected_lo
|
||||
assert hi == expected_hi
|
||||
|
||||
def test_unknown_keyword_raises(self) -> None:
|
||||
with pytest.raises(ValueError, match="Unknown keyword"):
|
||||
_datetime_range("bogus_keyword", UTC)
|
||||
|
||||
@@ -3,6 +3,7 @@ from datetime import timedelta
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
import time_machine
|
||||
from dateutil.relativedelta import relativedelta
|
||||
from django.contrib.auth.models import Group
|
||||
from django.contrib.auth.models import Permission
|
||||
@@ -741,6 +742,55 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
# Tantivy rejects unparsable field queries with a 400
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
|
||||
@override_settings(
|
||||
TIME_ZONE="UTC",
|
||||
)
|
||||
@time_machine.travel(
|
||||
datetime.datetime(2026, 7, 15, 12, 0, tzinfo=datetime.UTC),
|
||||
tick=False,
|
||||
)
|
||||
def test_search_added_previous_quarter_legacy_phrase(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- Documents inside and outside the previous quarter
|
||||
WHEN:
|
||||
- Query with the legacy natural-language phrase used by the UI
|
||||
THEN:
|
||||
- Previous-quarter documents are returned
|
||||
"""
|
||||
d1 = Document.objects.create(
|
||||
title="quarterly statement april",
|
||||
content="bank statement",
|
||||
checksum="Q1",
|
||||
pk=1,
|
||||
added=datetime.datetime(2026, 4, 10, 12, 0, tzinfo=datetime.UTC),
|
||||
)
|
||||
d2 = Document.objects.create(
|
||||
title="quarterly statement june",
|
||||
content="bank statement",
|
||||
checksum="Q2",
|
||||
pk=2,
|
||||
added=datetime.datetime(2026, 6, 20, 12, 0, tzinfo=datetime.UTC),
|
||||
)
|
||||
d3 = Document.objects.create(
|
||||
title="quarterly statement july",
|
||||
content="bank statement",
|
||||
checksum="Q3",
|
||||
pk=3,
|
||||
added=datetime.datetime(2026, 7, 10, 12, 0, tzinfo=datetime.UTC),
|
||||
)
|
||||
|
||||
backend = get_backend()
|
||||
backend.add_or_update(d1)
|
||||
backend.add_or_update(d2)
|
||||
backend.add_or_update(d3)
|
||||
|
||||
response = self.client.get('/api/documents/?query=added:"previous quarter"')
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
|
||||
results = response.data["results"]
|
||||
self.assertEqual({r["id"] for r in results}, {1, 2})
|
||||
|
||||
@mock.patch("documents.search._backend.TantivyBackend.autocomplete")
|
||||
def test_search_autocomplete_limits(self, m) -> None:
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user