mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-05-05 14:15:24 +00:00
Updates tantvity and use the new contributed method for autocomplete
This commit is contained in:
@@ -3,7 +3,6 @@ from __future__ import annotations
|
||||
import logging
|
||||
import re
|
||||
import threading
|
||||
from collections import Counter
|
||||
from datetime import UTC
|
||||
from datetime import datetime
|
||||
from enum import StrEnum
|
||||
@@ -36,8 +35,10 @@ from documents.utils import identity
|
||||
if TYPE_CHECKING:
|
||||
from pathlib import Path
|
||||
|
||||
from django.contrib.auth.base_user import AbstractBaseUser
|
||||
from django.contrib.auth.base_user import AbstractUser
|
||||
from django.db.models import QuerySet
|
||||
from tantivy import Index
|
||||
from tantivy import Schema
|
||||
|
||||
from documents.models import Document
|
||||
|
||||
@@ -324,7 +325,7 @@ class TantivyBackend:
|
||||
def _apply_permission_filter(
|
||||
self,
|
||||
query: tantivy.Query,
|
||||
user: AbstractBaseUser | None,
|
||||
user: AbstractUser | None,
|
||||
) -> tantivy.Query:
|
||||
"""Wrap a query with a permission filter if the user is not a superuser."""
|
||||
if user is not None:
|
||||
@@ -633,7 +634,7 @@ class TantivyBackend:
|
||||
def search_ids(
|
||||
self,
|
||||
query: str,
|
||||
user: AbstractBaseUser | None,
|
||||
user: AbstractUser | None,
|
||||
*,
|
||||
sort_field: str | None = None,
|
||||
sort_reverse: bool = False,
|
||||
@@ -693,7 +694,7 @@ class TantivyBackend:
|
||||
self,
|
||||
term: str,
|
||||
limit: int,
|
||||
user: AbstractBaseUser | None = None,
|
||||
user: AbstractUser | None = None,
|
||||
) -> list[str]:
|
||||
"""
|
||||
Get autocomplete suggestions for search queries.
|
||||
@@ -719,71 +720,32 @@ class TantivyBackend:
|
||||
if not normalized_term:
|
||||
return []
|
||||
|
||||
if TYPE_CHECKING:
|
||||
assert self._index is not None
|
||||
assert isinstance(self._index, Index)
|
||||
assert isinstance(self._schema, Schema)
|
||||
|
||||
searcher = self._index.searcher()
|
||||
|
||||
# Build a prefix query on autocomplete_word so we only scan docs
|
||||
# containing words that start with the prefix, not the entire index.
|
||||
# tantivy regex is implicitly anchored; .+ avoids the empty-match
|
||||
# error that .* triggers. We OR with term_query to also match the
|
||||
# exact prefix as a complete word.
|
||||
escaped = re.escape(normalized_term)
|
||||
prefix_query = tantivy.Query.boolean_query(
|
||||
[
|
||||
(
|
||||
tantivy.Occur.Should,
|
||||
tantivy.Query.term_query(
|
||||
self._schema,
|
||||
"autocomplete_word",
|
||||
normalized_term,
|
||||
),
|
||||
),
|
||||
(
|
||||
tantivy.Occur.Should,
|
||||
tantivy.Query.regex_query(
|
||||
self._schema,
|
||||
"autocomplete_word",
|
||||
f"{escaped}.+",
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
permission_query = None
|
||||
# Intersect with permission filter so autocomplete words from
|
||||
# invisible documents don't leak to other users.
|
||||
if user is not None and not user.is_superuser:
|
||||
final_query = tantivy.Query.boolean_query(
|
||||
[
|
||||
(tantivy.Occur.Must, prefix_query),
|
||||
(tantivy.Occur.Must, build_permission_filter(self._schema, user)),
|
||||
],
|
||||
)
|
||||
else:
|
||||
final_query = prefix_query
|
||||
permission_query = build_permission_filter(self._schema, user)
|
||||
|
||||
results = searcher.search(final_query, limit=searcher.num_docs)
|
||||
|
||||
# Count how many visible documents each matching word appears in.
|
||||
word_counts: Counter[str] = Counter()
|
||||
for _score, doc_address in results.hits:
|
||||
stored_doc = searcher.doc(doc_address)
|
||||
doc_dict = stored_doc.to_dict()
|
||||
if "autocomplete_word" in doc_dict:
|
||||
for word in doc_dict["autocomplete_word"]:
|
||||
if word.startswith(normalized_term):
|
||||
word_counts[word] += 1
|
||||
|
||||
# Sort by document frequency descending; break ties alphabetically.
|
||||
matches = sorted(
|
||||
word_counts,
|
||||
key=lambda w: (-word_counts[w], w),
|
||||
matches = searcher.terms_with_prefix(
|
||||
"autocomplete_word",
|
||||
normalized_term,
|
||||
permission_query,
|
||||
limit,
|
||||
)
|
||||
|
||||
return matches[:limit]
|
||||
return [x[0] for x in matches]
|
||||
|
||||
def more_like_this_ids(
|
||||
self,
|
||||
doc_id: int,
|
||||
user: AbstractBaseUser | None,
|
||||
user: AbstractUser | None,
|
||||
*,
|
||||
limit: int | None = None,
|
||||
) -> list[int]:
|
||||
|
||||
@@ -4661,7 +4661,7 @@ wheels = [
|
||||
[[package]]
|
||||
name = "tantivy"
|
||||
version = "0.26.0"
|
||||
source = { git = "https://github.com/quickwit-oss/tantivy-py.git#fa1a1985b96001929fc1cafcdd9dc94e56658b2a" }
|
||||
source = { git = "https://github.com/quickwit-oss/tantivy-py.git#bf3a0c0d6e18591728121b3b5fa735ab61862110" }
|
||||
|
||||
[[package]]
|
||||
name = "tenacity"
|
||||
@@ -4896,12 +4896,12 @@ dependencies = [
|
||||
{ name = "typing-extensions", marker = "sys_platform == 'darwin'" },
|
||||
]
|
||||
wheels = [
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d75eadcd97fe0dc7cd0eedc4d72152484c19cb2cfe46ce55766c8e129116425f" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:43b35116802c85fb88d99f4a396b8bd4472bfca1dd82e69499e5a4f9b8b4e252" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:442ec9dc78592564fdad69cf0beaa9da2f82ab810ccb4f13903869a90bf3f15d" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:cc3a195701bba2239c313ee311487f80f8aaebe9e89b9073dddbcf2f93b5a0ba" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:072a0d6e4865e8b0dc0dbfe6ebed68fae235124222835ef03e5814d414d8c012" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:23ec7789017da9d95b6d543d790814785e6f30905c5443efa8257d1490d73f79" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d75eadcd97fe0dc7cd0eedc4d72152484c19cb2cfe46ce55766c8e129116425f", upload-time = "2026-03-23T15:16:54Z" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:43b35116802c85fb88d99f4a396b8bd4472bfca1dd82e69499e5a4f9b8b4e252", upload-time = "2026-03-23T15:16:58Z" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:442ec9dc78592564fdad69cf0beaa9da2f82ab810ccb4f13903869a90bf3f15d", upload-time = "2026-03-23T15:17:02Z" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:cc3a195701bba2239c313ee311487f80f8aaebe9e89b9073dddbcf2f93b5a0ba", upload-time = "2026-03-23T15:17:06Z" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:072a0d6e4865e8b0dc0dbfe6ebed68fae235124222835ef03e5814d414d8c012", upload-time = "2026-03-23T15:17:10Z" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:23ec7789017da9d95b6d543d790814785e6f30905c5443efa8257d1490d73f79", upload-time = "2026-03-23T15:17:14Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4924,24 +4924,24 @@ dependencies = [
|
||||
{ name = "typing-extensions", marker = "sys_platform == 'linux'" },
|
||||
]
|
||||
wheels = [
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp311-cp311-linux_s390x.whl" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp311-cp311-manylinux_2_28_aarch64.whl" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp311-cp311-manylinux_2_28_x86_64.whl" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp312-cp312-linux_s390x.whl" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp312-cp312-manylinux_2_28_aarch64.whl" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp312-cp312-manylinux_2_28_x86_64.whl" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp313-cp313-linux_s390x.whl" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp313-cp313-manylinux_2_28_aarch64.whl" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp313-cp313-manylinux_2_28_x86_64.whl" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp313-cp313t-linux_s390x.whl" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp313-cp313t-manylinux_2_28_aarch64.whl" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp313-cp313t-manylinux_2_28_x86_64.whl" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp314-cp314-linux_s390x.whl" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp314-cp314-manylinux_2_28_aarch64.whl" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp314-cp314-manylinux_2_28_x86_64.whl" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp314-cp314t-linux_s390x.whl" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp314-cp314t-manylinux_2_28_aarch64.whl" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp314-cp314t-manylinux_2_28_x86_64.whl" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp311-cp311-linux_s390x.whl", upload-time = "2026-03-23T14:58:58Z" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp311-cp311-manylinux_2_28_aarch64.whl", upload-time = "2026-03-23T14:59:00Z" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp311-cp311-manylinux_2_28_x86_64.whl", upload-time = "2026-03-23T14:59:00Z" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp312-cp312-linux_s390x.whl", upload-time = "2026-03-23T14:59:01Z" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp312-cp312-manylinux_2_28_aarch64.whl", upload-time = "2026-03-23T14:59:02Z" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp312-cp312-manylinux_2_28_x86_64.whl", upload-time = "2026-03-23T14:59:03Z" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp313-cp313-linux_s390x.whl", upload-time = "2026-03-23T14:59:04Z" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp313-cp313-manylinux_2_28_aarch64.whl", upload-time = "2026-03-23T14:59:04Z" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp313-cp313-manylinux_2_28_x86_64.whl", upload-time = "2026-03-23T14:59:05Z" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp313-cp313t-linux_s390x.whl", upload-time = "2026-03-23T14:59:07Z" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp313-cp313t-manylinux_2_28_aarch64.whl", upload-time = "2026-03-23T14:59:07Z" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp313-cp313t-manylinux_2_28_x86_64.whl", upload-time = "2026-03-23T14:59:07Z" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp314-cp314-linux_s390x.whl", upload-time = "2026-03-23T14:59:09Z" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp314-cp314-manylinux_2_28_aarch64.whl", upload-time = "2026-03-23T14:59:10Z" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp314-cp314-manylinux_2_28_x86_64.whl", upload-time = "2026-03-23T14:59:11Z" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp314-cp314t-linux_s390x.whl", upload-time = "2026-03-23T14:59:12Z" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp314-cp314t-manylinux_2_28_aarch64.whl", upload-time = "2026-03-23T14:59:12Z" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp314-cp314t-manylinux_2_28_x86_64.whl", upload-time = "2026-03-23T14:59:13Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
Reference in New Issue
Block a user