mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-05-05 06:05:24 +00:00
Improvements for the typing
This commit is contained in:
+2
-1
@@ -143,7 +143,8 @@ typing = [
|
||||
"types-python-dateutil",
|
||||
"types-pytz",
|
||||
"types-redis",
|
||||
"types-setuptools",
|
||||
"types-regex",
|
||||
"types-setuptools"
|
||||
]
|
||||
|
||||
[tool.uv]
|
||||
|
||||
@@ -10,6 +10,7 @@ from typing import TYPE_CHECKING
|
||||
from typing import Self
|
||||
from typing import TypedDict
|
||||
from typing import TypeVar
|
||||
from typing import cast
|
||||
|
||||
import filelock
|
||||
import regex
|
||||
@@ -35,10 +36,8 @@ from documents.utils import identity
|
||||
if TYPE_CHECKING:
|
||||
from pathlib import Path
|
||||
|
||||
from django.contrib.auth.base_user import AbstractUser
|
||||
from django.contrib.auth.models import AbstractUser
|
||||
from django.db.models import QuerySet
|
||||
from tantivy import Index
|
||||
from tantivy import Schema
|
||||
|
||||
from documents.models import Document
|
||||
|
||||
@@ -170,9 +169,16 @@ class WriteBatch:
|
||||
def __init__(self, backend: TantivyBackend, lock_timeout: float):
|
||||
self._backend = backend
|
||||
self._lock_timeout = lock_timeout
|
||||
self._writer = None
|
||||
self._raw_writer: tantivy.IndexWriter | None = None
|
||||
self._lock = None
|
||||
|
||||
@property
|
||||
def _writer(self) -> tantivy.IndexWriter:
|
||||
assert self._raw_writer is not None, (
|
||||
"WriteBatch not entered; use as context manager"
|
||||
)
|
||||
return self._raw_writer
|
||||
|
||||
def __enter__(self) -> Self:
|
||||
if self._backend._path is not None:
|
||||
lock_path = self._backend._path / ".tantivy.lock"
|
||||
@@ -184,7 +190,7 @@ class WriteBatch:
|
||||
f"Could not acquire index lock within {self._lock_timeout}s",
|
||||
) from e
|
||||
|
||||
self._writer = self._backend._index.writer()
|
||||
self._raw_writer = self._backend._index.writer()
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
@@ -194,9 +200,9 @@ class WriteBatch:
|
||||
self._backend._index.reload()
|
||||
# Explicitly delete writer to release tantivy's internal lock.
|
||||
# On exception the uncommitted writer is simply discarded.
|
||||
if self._writer is not None:
|
||||
del self._writer
|
||||
self._writer = None
|
||||
if self._raw_writer is not None:
|
||||
del self._raw_writer
|
||||
self._raw_writer = None
|
||||
finally:
|
||||
if self._lock is not None:
|
||||
self._lock.release()
|
||||
@@ -274,8 +280,18 @@ class TantivyBackend:
|
||||
# path=None → in-memory index (for tests)
|
||||
# path=some_dir → on-disk index (for production)
|
||||
self._path = path
|
||||
self._index = None
|
||||
self._schema = None
|
||||
self._raw_index: tantivy.Index | None = None
|
||||
self._raw_schema: tantivy.Schema | None = None
|
||||
|
||||
@property
|
||||
def _index(self) -> tantivy.Index:
|
||||
assert self._raw_index is not None, "Index not open; call open() first"
|
||||
return self._raw_index
|
||||
|
||||
@property
|
||||
def _schema(self) -> tantivy.Schema:
|
||||
assert self._raw_schema is not None, "Schema not open; call open() first"
|
||||
return self._raw_schema
|
||||
|
||||
def open(self) -> None:
|
||||
"""
|
||||
@@ -285,14 +301,14 @@ class TantivyBackend:
|
||||
version or language changes. Registers custom tokenizers after opening.
|
||||
Safe to call multiple times - subsequent calls are no-ops.
|
||||
"""
|
||||
if self._index is not None:
|
||||
if self._raw_index is not None:
|
||||
return # pragma: no cover
|
||||
if self._path is not None:
|
||||
self._index = open_or_rebuild_index(self._path)
|
||||
self._raw_index = open_or_rebuild_index(self._path)
|
||||
else:
|
||||
self._index = tantivy.Index(build_schema())
|
||||
register_tokenizers(self._index, settings.SEARCH_LANGUAGE)
|
||||
self._schema = self._index.schema
|
||||
self._raw_index = tantivy.Index(build_schema())
|
||||
register_tokenizers(self._raw_index, settings.SEARCH_LANGUAGE)
|
||||
self._raw_schema = self._raw_index.schema
|
||||
|
||||
def close(self) -> None:
|
||||
"""
|
||||
@@ -300,12 +316,12 @@ class TantivyBackend:
|
||||
|
||||
Safe to call multiple times - subsequent calls are no-ops.
|
||||
"""
|
||||
self._index = None
|
||||
self._schema = None
|
||||
self._raw_index = None
|
||||
self._raw_schema = None
|
||||
|
||||
def _ensure_open(self) -> None:
|
||||
"""Ensure the index is open before operations."""
|
||||
if self._index is None:
|
||||
if self._raw_index is None:
|
||||
self.open() # pragma: no cover
|
||||
|
||||
def _parse_query(
|
||||
@@ -569,7 +585,7 @@ class TantivyBackend:
|
||||
batch_results = searcher.search(batch_query, limit=len(doc_ids))
|
||||
|
||||
result_addrs = [addr for _score, addr in batch_results.hits]
|
||||
result_ids = searcher.fast_field_values("id", result_addrs)
|
||||
result_ids = cast("list[int]", searcher.fast_field_values("id", result_addrs))
|
||||
addr_by_id: dict[int, tuple[float, tantivy.DocAddress]] = {
|
||||
doc_id: (score, addr)
|
||||
for (score, addr), doc_id in zip(batch_results.hits, result_ids)
|
||||
@@ -688,7 +704,10 @@ class TantivyBackend:
|
||||
if threshold is not None:
|
||||
all_hits = [hit for hit in all_hits if hit[1] >= threshold]
|
||||
|
||||
return searcher.fast_field_values("id", [doc_addr for doc_addr, *_ in all_hits])
|
||||
return cast(
|
||||
"list[int]",
|
||||
searcher.fast_field_values("id", [doc_addr for doc_addr, *_ in all_hits]),
|
||||
)
|
||||
|
||||
def autocomplete(
|
||||
self,
|
||||
@@ -720,11 +739,6 @@ class TantivyBackend:
|
||||
if not normalized_term:
|
||||
return []
|
||||
|
||||
if TYPE_CHECKING:
|
||||
assert self._index is not None
|
||||
assert isinstance(self._index, Index)
|
||||
assert isinstance(self._schema, Schema)
|
||||
|
||||
searcher = self._index.searcher()
|
||||
|
||||
permission_query = None
|
||||
@@ -791,7 +805,7 @@ class TantivyBackend:
|
||||
results = searcher.search(final_query, limit=effective_limit + 1)
|
||||
|
||||
addrs = [addr for _score, addr in results.hits]
|
||||
all_ids = searcher.fast_field_values("id", addrs)
|
||||
all_ids = cast("list[int]", searcher.fast_field_values("id", addrs))
|
||||
ids = [rid for rid in all_ids if rid != doc_id]
|
||||
return ids[:limit] if limit is not None else ids
|
||||
|
||||
@@ -840,9 +854,9 @@ class TantivyBackend:
|
||||
register_tokenizers(new_index, settings.SEARCH_LANGUAGE)
|
||||
|
||||
# Point instance at the new index so _build_tantivy_doc uses it
|
||||
old_index, old_schema = self._index, self._schema
|
||||
self._index = new_index
|
||||
self._schema = new_index.schema
|
||||
old_index, old_schema = self._raw_index, self._raw_schema
|
||||
self._raw_index = new_index
|
||||
self._raw_schema = new_index.schema
|
||||
|
||||
try:
|
||||
writer = new_index.writer()
|
||||
@@ -856,8 +870,8 @@ class TantivyBackend:
|
||||
new_index.reload()
|
||||
except BaseException: # pragma: no cover
|
||||
# Restore old index on failure so the backend remains usable
|
||||
self._index = old_index
|
||||
self._schema = old_schema
|
||||
self._raw_index = old_index
|
||||
self._raw_schema = old_schema
|
||||
raise
|
||||
|
||||
|
||||
|
||||
@@ -2987,6 +2987,7 @@ typing = [
|
||||
{ name = "types-python-dateutil", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "types-pytz", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "types-redis", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "types-regex", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "types-setuptools", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
]
|
||||
|
||||
@@ -3125,6 +3126,7 @@ typing = [
|
||||
{ name = "types-python-dateutil" },
|
||||
{ name = "types-pytz" },
|
||||
{ name = "types-redis" },
|
||||
{ name = "types-regex" },
|
||||
{ name = "types-setuptools" },
|
||||
]
|
||||
|
||||
@@ -5185,6 +5187,15 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/55/82/7d25dce10aad92d2226b269bce2f85cfd843b4477cd50245d7d40ecf8f89/types_redis-4.6.0.20241004-py3-none-any.whl", hash = "sha256:ef5da68cb827e5f606c8f9c0b49eeee4c2669d6d97122f301d3a55dc6a63f6ed", size = 58737, upload-time = "2024-10-04T02:43:57.968Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "types-regex"
|
||||
version = "2026.4.4.20260408"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/92/42/d7c691fc5a8a8ecfba3f23c1c4c087a089af0767610d88c29201193d8f60/types_regex-2026.4.4.20260408.tar.gz", hash = "sha256:86b2975ff11b06e7f538839821510daea2566d9cb18bb8acde47834315409cf9", size = 13182, upload-time = "2026-04-08T04:31:11.887Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/e1/92/e109654a804d11d9b60d67c7b29d64b2beac6b2e3209ea075e268e5a1021/types_regex-2026.4.4.20260408-py3-none-any.whl", hash = "sha256:d436bcc409abf9b06747b7e038014afc6d40ef7b72329655c353a1955534068f", size = 11116, upload-time = "2026-04-08T04:31:11.01Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "types-setuptools"
|
||||
version = "80.10.0.20260124"
|
||||
|
||||
Reference in New Issue
Block a user