mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-06-11 08:09:44 +00:00
Feature: Replace Whoosh with tantivy search backend (#12471)
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com> Co-authored-by: Antoine Mérino <3023499+Merinorus@users.noreply.github.com>
This commit is contained in:
@@ -2,6 +2,7 @@ import datetime
|
||||
from datetime import timedelta
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
from dateutil.relativedelta import relativedelta
|
||||
from django.contrib.auth.models import Group
|
||||
from django.contrib.auth.models import Permission
|
||||
@@ -11,9 +12,7 @@ from django.utils import timezone
|
||||
from guardian.shortcuts import assign_perm
|
||||
from rest_framework import status
|
||||
from rest_framework.test import APITestCase
|
||||
from whoosh.writing import AsyncWriter
|
||||
|
||||
from documents import index
|
||||
from documents.bulk_edit import set_permissions
|
||||
from documents.models import Correspondent
|
||||
from documents.models import CustomField
|
||||
@@ -25,18 +24,27 @@ from documents.models import SavedView
|
||||
from documents.models import StoragePath
|
||||
from documents.models import Tag
|
||||
from documents.models import Workflow
|
||||
from documents.search import get_backend
|
||||
from documents.search import reset_backend
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
from paperless_mail.models import MailAccount
|
||||
from paperless_mail.models import MailRule
|
||||
|
||||
pytestmark = pytest.mark.search
|
||||
|
||||
|
||||
class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
def setUp(self) -> None:
|
||||
super().setUp()
|
||||
reset_backend()
|
||||
|
||||
self.user = User.objects.create_superuser(username="temp_admin")
|
||||
self.client.force_authenticate(user=self.user)
|
||||
|
||||
def tearDown(self) -> None:
|
||||
reset_backend()
|
||||
super().tearDown()
|
||||
|
||||
def test_search(self) -> None:
|
||||
d1 = Document.objects.create(
|
||||
title="invoice",
|
||||
@@ -57,13 +65,11 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
checksum="C",
|
||||
original_filename="someepdf.pdf",
|
||||
)
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
# Note to future self: there is a reason we dont use a model signal handler to update the index: some operations edit many documents at once
|
||||
# (retagger, renamer) and we don't want to open a writer for each of these, but rather perform the entire operation with one writer.
|
||||
# That's why we can't open the writer in a model on_save handler or something.
|
||||
index.update_document(writer, d1)
|
||||
index.update_document(writer, d2)
|
||||
index.update_document(writer, d3)
|
||||
backend = get_backend()
|
||||
backend.add_or_update(d1)
|
||||
backend.add_or_update(d2)
|
||||
backend.add_or_update(d3)
|
||||
|
||||
response = self.client.get("/api/documents/?query=bank")
|
||||
results = response.data["results"]
|
||||
self.assertEqual(response.data["count"], 3)
|
||||
@@ -98,9 +104,9 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
checksum="B",
|
||||
pk=2,
|
||||
)
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
index.update_document(writer, d1)
|
||||
index.update_document(writer, d2)
|
||||
backend = get_backend()
|
||||
backend.add_or_update(d1)
|
||||
backend.add_or_update(d2)
|
||||
|
||||
response = self.client.get(
|
||||
"/api/documents/?query=bank",
|
||||
@@ -127,8 +133,7 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
)
|
||||
matching_doc.tags.add(tag)
|
||||
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
index.update_document(writer, matching_doc)
|
||||
get_backend().add_or_update(matching_doc)
|
||||
|
||||
response = self.client.get(
|
||||
"/api/documents/?query=bank&include_selection_data=true",
|
||||
@@ -187,10 +192,10 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
value_int=20,
|
||||
)
|
||||
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
index.update_document(writer, d1)
|
||||
index.update_document(writer, d2)
|
||||
index.update_document(writer, d3)
|
||||
backend = get_backend()
|
||||
backend.add_or_update(d1)
|
||||
backend.add_or_update(d2)
|
||||
backend.add_or_update(d3)
|
||||
|
||||
response = self.client.get(
|
||||
f"/api/documents/?query=match&ordering=custom_field_{custom_field.pk}",
|
||||
@@ -211,15 +216,15 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
)
|
||||
|
||||
def test_search_multi_page(self) -> None:
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
for i in range(55):
|
||||
doc = Document.objects.create(
|
||||
checksum=str(i),
|
||||
pk=i + 1,
|
||||
title=f"Document {i + 1}",
|
||||
content="content",
|
||||
)
|
||||
index.update_document(writer, doc)
|
||||
backend = get_backend()
|
||||
for i in range(55):
|
||||
doc = Document.objects.create(
|
||||
checksum=str(i),
|
||||
pk=i + 1,
|
||||
title=f"Document {i + 1}",
|
||||
content="content",
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
# This is here so that we test that no document gets returned twice (might happen if the paging is not working)
|
||||
seen_ids = []
|
||||
@@ -246,15 +251,15 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
seen_ids.append(result["id"])
|
||||
|
||||
def test_search_invalid_page(self) -> None:
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
for i in range(15):
|
||||
doc = Document.objects.create(
|
||||
checksum=str(i),
|
||||
pk=i + 1,
|
||||
title=f"Document {i + 1}",
|
||||
content="content",
|
||||
)
|
||||
index.update_document(writer, doc)
|
||||
backend = get_backend()
|
||||
for i in range(15):
|
||||
doc = Document.objects.create(
|
||||
checksum=str(i),
|
||||
pk=i + 1,
|
||||
title=f"Document {i + 1}",
|
||||
content="content",
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
response = self.client.get("/api/documents/?query=content&page=0&page_size=10")
|
||||
self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)
|
||||
@@ -292,26 +297,25 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
pk=3,
|
||||
checksum="C",
|
||||
)
|
||||
with index.open_index_writer() as writer:
|
||||
index.update_document(writer, d1)
|
||||
index.update_document(writer, d2)
|
||||
index.update_document(writer, d3)
|
||||
backend = get_backend()
|
||||
backend.add_or_update(d1)
|
||||
backend.add_or_update(d2)
|
||||
backend.add_or_update(d3)
|
||||
|
||||
response = self.client.get("/api/documents/?query=added:[-1 week to now]")
|
||||
results = response.data["results"]
|
||||
# Expect 3 documents returned
|
||||
self.assertEqual(len(results), 3)
|
||||
|
||||
for idx, subset in enumerate(
|
||||
[
|
||||
{"id": 1, "title": "invoice"},
|
||||
{"id": 2, "title": "bank statement 1"},
|
||||
{"id": 3, "title": "bank statement 3"},
|
||||
],
|
||||
):
|
||||
result = results[idx]
|
||||
# Assert subset in results
|
||||
self.assertDictEqual(result, {**result, **subset})
|
||||
result_map = {r["id"]: r for r in results}
|
||||
self.assertEqual(set(result_map.keys()), {1, 2, 3})
|
||||
for subset in [
|
||||
{"id": 1, "title": "invoice"},
|
||||
{"id": 2, "title": "bank statement 1"},
|
||||
{"id": 3, "title": "bank statement 3"},
|
||||
]:
|
||||
r = result_map[subset["id"]]
|
||||
self.assertDictEqual(r, {**r, **subset})
|
||||
|
||||
@override_settings(
|
||||
TIME_ZONE="America/Chicago",
|
||||
@@ -347,10 +351,10 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
# 7 days, 1 hour and 1 minute ago
|
||||
added=timezone.now() - timedelta(days=7, hours=1, minutes=1),
|
||||
)
|
||||
with index.open_index_writer() as writer:
|
||||
index.update_document(writer, d1)
|
||||
index.update_document(writer, d2)
|
||||
index.update_document(writer, d3)
|
||||
backend = get_backend()
|
||||
backend.add_or_update(d1)
|
||||
backend.add_or_update(d2)
|
||||
backend.add_or_update(d3)
|
||||
|
||||
response = self.client.get("/api/documents/?query=added:[-1 week to now]")
|
||||
results = response.data["results"]
|
||||
@@ -358,12 +362,14 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
# Expect 2 documents returned
|
||||
self.assertEqual(len(results), 2)
|
||||
|
||||
for idx, subset in enumerate(
|
||||
[{"id": 1, "title": "invoice"}, {"id": 2, "title": "bank statement 1"}],
|
||||
):
|
||||
result = results[idx]
|
||||
# Assert subset in results
|
||||
self.assertDictEqual(result, {**result, **subset})
|
||||
result_map = {r["id"]: r for r in results}
|
||||
self.assertEqual(set(result_map.keys()), {1, 2})
|
||||
for subset in [
|
||||
{"id": 1, "title": "invoice"},
|
||||
{"id": 2, "title": "bank statement 1"},
|
||||
]:
|
||||
r = result_map[subset["id"]]
|
||||
self.assertDictEqual(r, {**r, **subset})
|
||||
|
||||
@override_settings(
|
||||
TIME_ZONE="Europe/Sofia",
|
||||
@@ -399,10 +405,10 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
# 7 days, 1 hour and 1 minute ago
|
||||
added=timezone.now() - timedelta(days=7, hours=1, minutes=1),
|
||||
)
|
||||
with index.open_index_writer() as writer:
|
||||
index.update_document(writer, d1)
|
||||
index.update_document(writer, d2)
|
||||
index.update_document(writer, d3)
|
||||
backend = get_backend()
|
||||
backend.add_or_update(d1)
|
||||
backend.add_or_update(d2)
|
||||
backend.add_or_update(d3)
|
||||
|
||||
response = self.client.get("/api/documents/?query=added:[-1 week to now]")
|
||||
results = response.data["results"]
|
||||
@@ -410,12 +416,14 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
# Expect 2 documents returned
|
||||
self.assertEqual(len(results), 2)
|
||||
|
||||
for idx, subset in enumerate(
|
||||
[{"id": 1, "title": "invoice"}, {"id": 2, "title": "bank statement 1"}],
|
||||
):
|
||||
result = results[idx]
|
||||
# Assert subset in results
|
||||
self.assertDictEqual(result, {**result, **subset})
|
||||
result_map = {r["id"]: r for r in results}
|
||||
self.assertEqual(set(result_map.keys()), {1, 2})
|
||||
for subset in [
|
||||
{"id": 1, "title": "invoice"},
|
||||
{"id": 2, "title": "bank statement 1"},
|
||||
]:
|
||||
r = result_map[subset["id"]]
|
||||
self.assertDictEqual(r, {**r, **subset})
|
||||
|
||||
def test_search_added_in_last_month(self) -> None:
|
||||
"""
|
||||
@@ -451,10 +459,10 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
added=timezone.now() - timedelta(days=7, hours=1, minutes=1),
|
||||
)
|
||||
|
||||
with index.open_index_writer() as writer:
|
||||
index.update_document(writer, d1)
|
||||
index.update_document(writer, d2)
|
||||
index.update_document(writer, d3)
|
||||
backend = get_backend()
|
||||
backend.add_or_update(d1)
|
||||
backend.add_or_update(d2)
|
||||
backend.add_or_update(d3)
|
||||
|
||||
response = self.client.get("/api/documents/?query=added:[-1 month to now]")
|
||||
results = response.data["results"]
|
||||
@@ -462,12 +470,14 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
# Expect 2 documents returned
|
||||
self.assertEqual(len(results), 2)
|
||||
|
||||
for idx, subset in enumerate(
|
||||
[{"id": 1, "title": "invoice"}, {"id": 3, "title": "bank statement 3"}],
|
||||
):
|
||||
result = results[idx]
|
||||
# Assert subset in results
|
||||
self.assertDictEqual(result, {**result, **subset})
|
||||
result_map = {r["id"]: r for r in results}
|
||||
self.assertEqual(set(result_map.keys()), {1, 3})
|
||||
for subset in [
|
||||
{"id": 1, "title": "invoice"},
|
||||
{"id": 3, "title": "bank statement 3"},
|
||||
]:
|
||||
r = result_map[subset["id"]]
|
||||
self.assertDictEqual(r, {**r, **subset})
|
||||
|
||||
@override_settings(
|
||||
TIME_ZONE="America/Denver",
|
||||
@@ -507,10 +517,10 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
added=timezone.now() - timedelta(days=7, hours=1, minutes=1),
|
||||
)
|
||||
|
||||
with index.open_index_writer() as writer:
|
||||
index.update_document(writer, d1)
|
||||
index.update_document(writer, d2)
|
||||
index.update_document(writer, d3)
|
||||
backend = get_backend()
|
||||
backend.add_or_update(d1)
|
||||
backend.add_or_update(d2)
|
||||
backend.add_or_update(d3)
|
||||
|
||||
response = self.client.get("/api/documents/?query=added:[-1 month to now]")
|
||||
results = response.data["results"]
|
||||
@@ -518,12 +528,14 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
# Expect 2 documents returned
|
||||
self.assertEqual(len(results), 2)
|
||||
|
||||
for idx, subset in enumerate(
|
||||
[{"id": 1, "title": "invoice"}, {"id": 3, "title": "bank statement 3"}],
|
||||
):
|
||||
result = results[idx]
|
||||
# Assert subset in results
|
||||
self.assertDictEqual(result, {**result, **subset})
|
||||
result_map = {r["id"]: r for r in results}
|
||||
self.assertEqual(set(result_map.keys()), {1, 3})
|
||||
for subset in [
|
||||
{"id": 1, "title": "invoice"},
|
||||
{"id": 3, "title": "bank statement 3"},
|
||||
]:
|
||||
r = result_map[subset["id"]]
|
||||
self.assertDictEqual(r, {**r, **subset})
|
||||
|
||||
@override_settings(
|
||||
TIME_ZONE="Europe/Sofia",
|
||||
@@ -563,10 +575,10 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
# Django converts dates to UTC
|
||||
d3.refresh_from_db()
|
||||
|
||||
with index.open_index_writer() as writer:
|
||||
index.update_document(writer, d1)
|
||||
index.update_document(writer, d2)
|
||||
index.update_document(writer, d3)
|
||||
backend = get_backend()
|
||||
backend.add_or_update(d1)
|
||||
backend.add_or_update(d2)
|
||||
backend.add_or_update(d3)
|
||||
|
||||
response = self.client.get("/api/documents/?query=added:20231201")
|
||||
results = response.data["results"]
|
||||
@@ -574,12 +586,8 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
# Expect 1 document returned
|
||||
self.assertEqual(len(results), 1)
|
||||
|
||||
for idx, subset in enumerate(
|
||||
[{"id": 3, "title": "bank statement 3"}],
|
||||
):
|
||||
result = results[idx]
|
||||
# Assert subset in results
|
||||
self.assertDictEqual(result, {**result, **subset})
|
||||
self.assertEqual(results[0]["id"], 3)
|
||||
self.assertEqual(results[0]["title"], "bank statement 3")
|
||||
|
||||
def test_search_added_invalid_date(self) -> None:
|
||||
"""
|
||||
@@ -588,7 +596,7 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
WHEN:
|
||||
- Query with invalid added date
|
||||
THEN:
|
||||
- No documents returned
|
||||
- 400 Bad Request returned (Tantivy rejects invalid date field syntax)
|
||||
"""
|
||||
d1 = Document.objects.create(
|
||||
title="invoice",
|
||||
@@ -597,16 +605,14 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
pk=1,
|
||||
)
|
||||
|
||||
with index.open_index_writer() as writer:
|
||||
index.update_document(writer, d1)
|
||||
get_backend().add_or_update(d1)
|
||||
|
||||
response = self.client.get("/api/documents/?query=added:invalid-date")
|
||||
results = response.data["results"]
|
||||
|
||||
# Expect 0 document returned
|
||||
self.assertEqual(len(results), 0)
|
||||
# Tantivy rejects unparsable field queries with a 400
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
|
||||
@mock.patch("documents.index.autocomplete")
|
||||
@mock.patch("documents.search._backend.TantivyBackend.autocomplete")
|
||||
def test_search_autocomplete_limits(self, m) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
@@ -618,7 +624,7 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
- Limit requests are obeyed
|
||||
"""
|
||||
|
||||
m.side_effect = lambda ix, term, limit, user: [term for _ in range(limit)]
|
||||
m.side_effect = lambda term, limit, user=None: [term for _ in range(limit)]
|
||||
|
||||
response = self.client.get("/api/search/autocomplete/?term=test")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
@@ -671,32 +677,29 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
owner=u1,
|
||||
)
|
||||
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
index.update_document(writer, d1)
|
||||
index.update_document(writer, d2)
|
||||
index.update_document(writer, d3)
|
||||
backend = get_backend()
|
||||
backend.add_or_update(d1)
|
||||
backend.add_or_update(d2)
|
||||
backend.add_or_update(d3)
|
||||
|
||||
response = self.client.get("/api/search/autocomplete/?term=app")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data, [b"apples", b"applebaum", b"appletini"])
|
||||
self.assertEqual(response.data, ["applebaum", "apples", "appletini"])
|
||||
|
||||
d3.owner = u2
|
||||
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
index.update_document(writer, d3)
|
||||
d3.save()
|
||||
backend.add_or_update(d3)
|
||||
|
||||
response = self.client.get("/api/search/autocomplete/?term=app")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data, [b"apples", b"applebaum"])
|
||||
self.assertEqual(response.data, ["applebaum", "apples"])
|
||||
|
||||
assign_perm("view_document", u1, d3)
|
||||
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
index.update_document(writer, d3)
|
||||
backend.add_or_update(d3)
|
||||
|
||||
response = self.client.get("/api/search/autocomplete/?term=app")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data, [b"apples", b"applebaum", b"appletini"])
|
||||
self.assertEqual(response.data, ["applebaum", "apples", "appletini"])
|
||||
|
||||
def test_search_autocomplete_field_name_match(self) -> None:
|
||||
"""
|
||||
@@ -714,8 +717,7 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
checksum="1",
|
||||
)
|
||||
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
index.update_document(writer, d1)
|
||||
get_backend().add_or_update(d1)
|
||||
|
||||
response = self.client.get("/api/search/autocomplete/?term=created:2023")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
@@ -736,33 +738,36 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
checksum="1",
|
||||
)
|
||||
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
index.update_document(writer, d1)
|
||||
get_backend().add_or_update(d1)
|
||||
|
||||
response = self.client.get("/api/search/autocomplete/?term=auto")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data[0], b"auto")
|
||||
self.assertEqual(response.data[0], "auto")
|
||||
|
||||
def test_search_spelling_suggestion(self) -> None:
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
for i in range(55):
|
||||
doc = Document.objects.create(
|
||||
checksum=str(i),
|
||||
pk=i + 1,
|
||||
title=f"Document {i + 1}",
|
||||
content=f"Things document {i + 1}",
|
||||
)
|
||||
index.update_document(writer, doc)
|
||||
def test_search_no_spelling_suggestion(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- Documents exist with various terms
|
||||
WHEN:
|
||||
- Query for documents with any term
|
||||
THEN:
|
||||
- corrected_query is always None (Tantivy has no spell correction)
|
||||
"""
|
||||
backend = get_backend()
|
||||
for i in range(5):
|
||||
doc = Document.objects.create(
|
||||
checksum=str(i),
|
||||
pk=i + 1,
|
||||
title=f"Document {i + 1}",
|
||||
content=f"Things document {i + 1}",
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
response = self.client.get("/api/documents/?query=thing")
|
||||
correction = response.data["corrected_query"]
|
||||
|
||||
self.assertEqual(correction, "things")
|
||||
self.assertIsNone(response.data["corrected_query"])
|
||||
|
||||
response = self.client.get("/api/documents/?query=things")
|
||||
correction = response.data["corrected_query"]
|
||||
|
||||
self.assertEqual(correction, None)
|
||||
self.assertIsNone(response.data["corrected_query"])
|
||||
|
||||
def test_search_spelling_suggestion_suppressed_for_private_terms(self):
|
||||
owner = User.objects.create_user("owner")
|
||||
@@ -771,24 +776,24 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
Permission.objects.get(codename="view_document"),
|
||||
)
|
||||
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
for i in range(55):
|
||||
private_doc = Document.objects.create(
|
||||
checksum=f"p{i}",
|
||||
pk=100 + i,
|
||||
title=f"Private Document {i + 1}",
|
||||
content=f"treasury document {i + 1}",
|
||||
owner=owner,
|
||||
)
|
||||
visible_doc = Document.objects.create(
|
||||
checksum=f"v{i}",
|
||||
pk=200 + i,
|
||||
title=f"Visible Document {i + 1}",
|
||||
content=f"public ledger {i + 1}",
|
||||
owner=attacker,
|
||||
)
|
||||
index.update_document(writer, private_doc)
|
||||
index.update_document(writer, visible_doc)
|
||||
backend = get_backend()
|
||||
for i in range(5):
|
||||
private_doc = Document.objects.create(
|
||||
checksum=f"p{i}",
|
||||
pk=100 + i,
|
||||
title=f"Private Document {i + 1}",
|
||||
content=f"treasury document {i + 1}",
|
||||
owner=owner,
|
||||
)
|
||||
visible_doc = Document.objects.create(
|
||||
checksum=f"v{i}",
|
||||
pk=200 + i,
|
||||
title=f"Visible Document {i + 1}",
|
||||
content=f"public ledger {i + 1}",
|
||||
owner=attacker,
|
||||
)
|
||||
backend.add_or_update(private_doc)
|
||||
backend.add_or_update(visible_doc)
|
||||
|
||||
self.client.force_authenticate(user=attacker)
|
||||
|
||||
@@ -798,26 +803,6 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(response.data["count"], 0)
|
||||
self.assertIsNone(response.data["corrected_query"])
|
||||
|
||||
@mock.patch(
|
||||
"whoosh.searching.Searcher.correct_query",
|
||||
side_effect=Exception("Test error"),
|
||||
)
|
||||
def test_corrected_query_error(self, mock_correct_query) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- A query that raises an error on correction
|
||||
WHEN:
|
||||
- API request for search with that query
|
||||
THEN:
|
||||
- The error is logged and the search proceeds
|
||||
"""
|
||||
with self.assertLogs("paperless.index", level="INFO") as cm:
|
||||
response = self.client.get("/api/documents/?query=2025-06-04")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
error_str = cm.output[0]
|
||||
expected_str = "Error while correcting query '2025-06-04': Test error"
|
||||
self.assertIn(expected_str, error_str)
|
||||
|
||||
def test_search_more_like(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
@@ -847,16 +832,16 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
checksum="C",
|
||||
)
|
||||
d4 = Document.objects.create(
|
||||
title="Monty Python & the Holy Grail",
|
||||
content="And now for something completely different",
|
||||
title="Quarterly Report",
|
||||
content="quarterly revenue profit margin earnings growth",
|
||||
pk=4,
|
||||
checksum="ABC",
|
||||
)
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
index.update_document(writer, d1)
|
||||
index.update_document(writer, d2)
|
||||
index.update_document(writer, d3)
|
||||
index.update_document(writer, d4)
|
||||
backend = get_backend()
|
||||
backend.add_or_update(d1)
|
||||
backend.add_or_update(d2)
|
||||
backend.add_or_update(d3)
|
||||
backend.add_or_update(d4)
|
||||
|
||||
response = self.client.get(f"/api/documents/?more_like_id={d2.id}")
|
||||
|
||||
@@ -864,9 +849,10 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
|
||||
results = response.data["results"]
|
||||
|
||||
self.assertEqual(len(results), 2)
|
||||
self.assertEqual(results[0]["id"], d3.id)
|
||||
self.assertEqual(results[1]["id"], d1.id)
|
||||
self.assertGreaterEqual(len(results), 1)
|
||||
result_ids = [r["id"] for r in results]
|
||||
self.assertIn(d3.id, result_ids)
|
||||
self.assertNotIn(d4.id, result_ids)
|
||||
|
||||
def test_search_more_like_requires_view_permission_on_seed_document(
|
||||
self,
|
||||
@@ -908,10 +894,10 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
pk=12,
|
||||
)
|
||||
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
index.update_document(writer, private_seed)
|
||||
index.update_document(writer, visible_doc)
|
||||
index.update_document(writer, other_doc)
|
||||
backend = get_backend()
|
||||
backend.add_or_update(private_seed)
|
||||
backend.add_or_update(visible_doc)
|
||||
backend.add_or_update(other_doc)
|
||||
|
||||
self.client.force_authenticate(user=attacker)
|
||||
|
||||
@@ -985,9 +971,9 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
value_text="foobard4",
|
||||
)
|
||||
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
for doc in Document.objects.all():
|
||||
index.update_document(writer, doc)
|
||||
backend = get_backend()
|
||||
for doc in Document.objects.all():
|
||||
backend.add_or_update(doc)
|
||||
|
||||
def search_query(q):
|
||||
r = self.client.get("/api/documents/?query=test" + q)
|
||||
@@ -1203,9 +1189,9 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
Document.objects.create(checksum="3", content="test 3", owner=u2)
|
||||
Document.objects.create(checksum="4", content="test 4")
|
||||
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
for doc in Document.objects.all():
|
||||
index.update_document(writer, doc)
|
||||
backend = get_backend()
|
||||
for doc in Document.objects.all():
|
||||
backend.add_or_update(doc)
|
||||
|
||||
self.client.force_authenticate(user=u1)
|
||||
r = self.client.get("/api/documents/?query=test")
|
||||
@@ -1256,9 +1242,9 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
d3 = Document.objects.create(checksum="3", content="test 3", owner=u2)
|
||||
Document.objects.create(checksum="4", content="test 4")
|
||||
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
for doc in Document.objects.all():
|
||||
index.update_document(writer, doc)
|
||||
backend = get_backend()
|
||||
for doc in Document.objects.all():
|
||||
backend.add_or_update(doc)
|
||||
|
||||
self.client.force_authenticate(user=u1)
|
||||
r = self.client.get("/api/documents/?query=test")
|
||||
@@ -1278,9 +1264,9 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
assign_perm("view_document", u1, d3)
|
||||
assign_perm("view_document", u2, d1)
|
||||
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
for doc in [d1, d2, d3]:
|
||||
index.update_document(writer, doc)
|
||||
backend.add_or_update(d1)
|
||||
backend.add_or_update(d2)
|
||||
backend.add_or_update(d3)
|
||||
|
||||
self.client.force_authenticate(user=u1)
|
||||
r = self.client.get("/api/documents/?query=test")
|
||||
@@ -1343,9 +1329,9 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
user=u1,
|
||||
)
|
||||
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
for doc in Document.objects.all():
|
||||
index.update_document(writer, doc)
|
||||
backend = get_backend()
|
||||
for doc in Document.objects.all():
|
||||
backend.add_or_update(doc)
|
||||
|
||||
def search_query(q):
|
||||
r = self.client.get("/api/documents/?query=test" + q)
|
||||
@@ -1378,13 +1364,14 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
search_query("&ordering=-num_notes"),
|
||||
[d1.id, d3.id, d2.id],
|
||||
)
|
||||
# owner sort: ORM orders by owner_id (integer); NULLs first in SQLite ASC
|
||||
self.assertListEqual(
|
||||
search_query("&ordering=owner"),
|
||||
[d1.id, d2.id, d3.id],
|
||||
[d3.id, d1.id, d2.id],
|
||||
)
|
||||
self.assertListEqual(
|
||||
search_query("&ordering=-owner"),
|
||||
[d3.id, d2.id, d1.id],
|
||||
[d2.id, d1.id, d3.id],
|
||||
)
|
||||
|
||||
@mock.patch("documents.bulk_edit.bulk_update_documents")
|
||||
@@ -1441,12 +1428,12 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
)
|
||||
set_permissions([4, 5], set_permissions={}, owner=user2, merge=False)
|
||||
|
||||
with index.open_index_writer() as writer:
|
||||
index.update_document(writer, d1)
|
||||
index.update_document(writer, d2)
|
||||
index.update_document(writer, d3)
|
||||
index.update_document(writer, d4)
|
||||
index.update_document(writer, d5)
|
||||
backend = get_backend()
|
||||
backend.add_or_update(d1)
|
||||
backend.add_or_update(d2)
|
||||
backend.add_or_update(d3)
|
||||
backend.add_or_update(d4)
|
||||
backend.add_or_update(d5)
|
||||
|
||||
correspondent1 = Correspondent.objects.create(name="bank correspondent 1")
|
||||
Correspondent.objects.create(name="correspondent 2")
|
||||
|
||||
Reference in New Issue
Block a user