mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-06-10 07:39:50 +00:00
Enhancement: unify text search to use tantivy (#12485)
This commit is contained in:
@@ -91,6 +91,135 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(response.data["count"], 0)
|
||||
self.assertEqual(len(results), 0)
|
||||
|
||||
def test_simple_text_search(self) -> None:
|
||||
tagged = Tag.objects.create(name="invoice")
|
||||
matching_doc = Document.objects.create(
|
||||
title="Quarterly summary",
|
||||
content="Monthly bank report",
|
||||
checksum="T1",
|
||||
pk=11,
|
||||
)
|
||||
matching_doc.tags.add(tagged)
|
||||
|
||||
metadata_only_doc = Document.objects.create(
|
||||
title="Completely unrelated",
|
||||
content="No matching terms here",
|
||||
checksum="T2",
|
||||
pk=12,
|
||||
)
|
||||
metadata_only_doc.tags.add(tagged)
|
||||
|
||||
backend = get_backend()
|
||||
backend.add_or_update(matching_doc)
|
||||
backend.add_or_update(metadata_only_doc)
|
||||
|
||||
response = self.client.get("/api/documents/?text=monthly")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data["count"], 1)
|
||||
self.assertEqual(response.data["results"][0]["id"], matching_doc.id)
|
||||
|
||||
response = self.client.get("/api/documents/?text=tag:invoice")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data["count"], 0)
|
||||
|
||||
def test_simple_text_search_matches_substrings(self) -> None:
|
||||
matching_doc = Document.objects.create(
|
||||
title="Quarterly summary",
|
||||
content="Password reset instructions",
|
||||
checksum="T5",
|
||||
pk=15,
|
||||
)
|
||||
|
||||
backend = get_backend()
|
||||
backend.add_or_update(matching_doc)
|
||||
|
||||
response = self.client.get("/api/documents/?text=pass")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data["count"], 1)
|
||||
self.assertEqual(response.data["results"][0]["id"], matching_doc.id)
|
||||
|
||||
response = self.client.get("/api/documents/?text=sswo")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data["count"], 1)
|
||||
self.assertEqual(response.data["results"][0]["id"], matching_doc.id)
|
||||
|
||||
response = self.client.get("/api/documents/?text=sswo re")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data["count"], 1)
|
||||
self.assertEqual(response.data["results"][0]["id"], matching_doc.id)
|
||||
|
||||
def test_simple_text_search_does_not_match_on_partial_term_overlap(self) -> None:
|
||||
non_matching_doc = Document.objects.create(
|
||||
title="Adobe Acrobat PDF Files",
|
||||
content="Lorem ipsum dolor sit amet, consectetur adipiscing elit.",
|
||||
checksum="T7",
|
||||
pk=17,
|
||||
)
|
||||
|
||||
backend = get_backend()
|
||||
backend.add_or_update(non_matching_doc)
|
||||
|
||||
response = self.client.get("/api/documents/?text=raptor")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data["count"], 0)
|
||||
|
||||
def test_simple_title_search(self) -> None:
|
||||
title_match = Document.objects.create(
|
||||
title="Quarterly summary",
|
||||
content="No matching content here",
|
||||
checksum="T3",
|
||||
pk=13,
|
||||
)
|
||||
content_only = Document.objects.create(
|
||||
title="Completely unrelated",
|
||||
content="Quarterly summary appears only in content",
|
||||
checksum="T4",
|
||||
pk=14,
|
||||
)
|
||||
|
||||
backend = get_backend()
|
||||
backend.add_or_update(title_match)
|
||||
backend.add_or_update(content_only)
|
||||
|
||||
response = self.client.get("/api/documents/?title_search=quarterly")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data["count"], 1)
|
||||
self.assertEqual(response.data["results"][0]["id"], title_match.id)
|
||||
|
||||
def test_simple_title_search_matches_substrings(self) -> None:
|
||||
title_match = Document.objects.create(
|
||||
title="Password handbook",
|
||||
content="No matching content here",
|
||||
checksum="T6",
|
||||
pk=16,
|
||||
)
|
||||
|
||||
backend = get_backend()
|
||||
backend.add_or_update(title_match)
|
||||
|
||||
response = self.client.get("/api/documents/?title_search=pass")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data["count"], 1)
|
||||
self.assertEqual(response.data["results"][0]["id"], title_match.id)
|
||||
|
||||
response = self.client.get("/api/documents/?title_search=sswo")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data["count"], 1)
|
||||
self.assertEqual(response.data["results"][0]["id"], title_match.id)
|
||||
|
||||
response = self.client.get("/api/documents/?title_search=sswo hand")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data["count"], 1)
|
||||
self.assertEqual(response.data["results"][0]["id"], title_match.id)
|
||||
|
||||
def test_search_rejects_multiple_search_modes(self) -> None:
|
||||
response = self.client.get("/api/documents/?text=bank&query=bank")
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertEqual(
|
||||
response.data["detail"],
|
||||
"Specify only one of text, title_search, query, or more_like_id.",
|
||||
)
|
||||
|
||||
def test_search_returns_all_for_api_version_9(self) -> None:
|
||||
d1 = Document.objects.create(
|
||||
title="invoice",
|
||||
@@ -1493,6 +1622,31 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(results["custom_fields"][0]["id"], custom_field1.id)
|
||||
self.assertEqual(results["workflows"][0]["id"], workflow1.id)
|
||||
|
||||
def test_global_search_db_only_limits_documents_to_title_matches(self) -> None:
|
||||
title_match = Document.objects.create(
|
||||
title="bank statement",
|
||||
content="no additional terms",
|
||||
checksum="GS1",
|
||||
pk=21,
|
||||
)
|
||||
content_only = Document.objects.create(
|
||||
title="not a title match",
|
||||
content="bank appears only in content",
|
||||
checksum="GS2",
|
||||
pk=22,
|
||||
)
|
||||
|
||||
backend = get_backend()
|
||||
backend.add_or_update(title_match)
|
||||
backend.add_or_update(content_only)
|
||||
|
||||
self.client.force_authenticate(self.user)
|
||||
|
||||
response = self.client.get("/api/search/?query=bank&db_only=true")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(len(response.data["documents"]), 1)
|
||||
self.assertEqual(response.data["documents"][0]["id"], title_match.id)
|
||||
|
||||
def test_global_search_filters_owned_mail_objects(self) -> None:
|
||||
user1 = User.objects.create_user("mail-search-user")
|
||||
user2 = User.objects.create_user("other-mail-search-user")
|
||||
|
||||
Reference in New Issue
Block a user