mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-06-06 13:49:44 +00:00
Restrict comma expansion for queries to multi valued fields only instead of any schema field
This commit is contained in:
@@ -464,8 +464,13 @@ def normalize_query(query: str) -> str:
|
||||
return " AND ".join(f"{field}:{v}" for v in values)
|
||||
|
||||
try:
|
||||
# Only true multi-value fields are comma-split, matching Whoosh's
|
||||
# KEYWORD(commas=True) fields (tag, tag_id, viewer_id). A field-agnostic
|
||||
# match would corrupt unrelated text such as URLs (http://x/a,b) or
|
||||
# numeric values (title:10,20). tag_id is ordered before tag so the
|
||||
# longer field name wins.
|
||||
query = regex.sub(
|
||||
r"(\w+):([^\s\[\]]+(?:,[^\s\[\]]+)+)",
|
||||
r"(?<!\w)(tag_id|viewer_id|tag):([^\s\[\]]+(?:,[^\s\[\]]+)+)",
|
||||
_expand,
|
||||
query,
|
||||
timeout=_REGEX_TIMEOUT,
|
||||
|
||||
@@ -607,6 +607,28 @@ class TestNormalizeQuery:
|
||||
def test_normalize_no_commas_unchanged(self) -> None:
|
||||
assert normalize_query("bank statement") == "bank statement"
|
||||
|
||||
def test_normalize_expands_multi_value_id_fields(self) -> None:
|
||||
# tag_id and viewer_id were KEYWORD(commas=True) in Whoosh too.
|
||||
assert normalize_query("tag_id:1,2") == "tag_id:1 AND tag_id:2"
|
||||
assert normalize_query("viewer_id:5,6") == "viewer_id:5 AND viewer_id:6"
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"query",
|
||||
[
|
||||
pytest.param("http://example.com/a,b", id="url_with_comma"),
|
||||
pytest.param("title:10,20", id="non_multivalue_field"),
|
||||
pytest.param("correspondent:foo,bar", id="text_field_not_comma_split"),
|
||||
pytest.param("content:a,b,c", id="content_field"),
|
||||
],
|
||||
)
|
||||
def test_normalize_does_not_expand_non_multi_value_fields(
|
||||
self,
|
||||
query: str,
|
||||
) -> None:
|
||||
# Only true multi-value fields (tag/tag_id/viewer_id) comma-split, matching
|
||||
# Whoosh's KEYWORD(commas=True) set. Everything else passes through verbatim.
|
||||
assert normalize_query(query) == query
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("raw", "expected"),
|
||||
[
|
||||
|
||||
Reference in New Issue
Block a user