Improvements for the typing

This commit is contained in:
Trenton Holmes
2026-04-26 14:38:38 -07:00
parent 4c0d60dd47
commit bd47d27c63
3 changed files with 58 additions and 32 deletions
+2 -1
View File
@@ -143,7 +143,8 @@ typing = [
"types-python-dateutil",
"types-pytz",
"types-redis",
"types-setuptools",
"types-regex",
"types-setuptools"
]
[tool.uv]
+45 -31
View File
@@ -10,6 +10,7 @@ from typing import TYPE_CHECKING
from typing import Self
from typing import TypedDict
from typing import TypeVar
from typing import cast
import filelock
import regex
@@ -35,10 +36,8 @@ from documents.utils import identity
if TYPE_CHECKING:
from pathlib import Path
from django.contrib.auth.base_user import AbstractUser
from django.contrib.auth.models import AbstractUser
from django.db.models import QuerySet
from tantivy import Index
from tantivy import Schema
from documents.models import Document
@@ -170,9 +169,16 @@ class WriteBatch:
def __init__(self, backend: TantivyBackend, lock_timeout: float):
self._backend = backend
self._lock_timeout = lock_timeout
self._writer = None
self._raw_writer: tantivy.IndexWriter | None = None
self._lock = None
@property
def _writer(self) -> tantivy.IndexWriter:
assert self._raw_writer is not None, (
"WriteBatch not entered; use as context manager"
)
return self._raw_writer
def __enter__(self) -> Self:
if self._backend._path is not None:
lock_path = self._backend._path / ".tantivy.lock"
@@ -184,7 +190,7 @@ class WriteBatch:
f"Could not acquire index lock within {self._lock_timeout}s",
) from e
self._writer = self._backend._index.writer()
self._raw_writer = self._backend._index.writer()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
@@ -194,9 +200,9 @@ class WriteBatch:
self._backend._index.reload()
# Explicitly delete writer to release tantivy's internal lock.
# On exception the uncommitted writer is simply discarded.
if self._writer is not None:
del self._writer
self._writer = None
if self._raw_writer is not None:
del self._raw_writer
self._raw_writer = None
finally:
if self._lock is not None:
self._lock.release()
@@ -274,8 +280,18 @@ class TantivyBackend:
# path=None → in-memory index (for tests)
# path=some_dir → on-disk index (for production)
self._path = path
self._index = None
self._schema = None
self._raw_index: tantivy.Index | None = None
self._raw_schema: tantivy.Schema | None = None
@property
def _index(self) -> tantivy.Index:
assert self._raw_index is not None, "Index not open; call open() first"
return self._raw_index
@property
def _schema(self) -> tantivy.Schema:
assert self._raw_schema is not None, "Schema not open; call open() first"
return self._raw_schema
def open(self) -> None:
"""
@@ -285,14 +301,14 @@ class TantivyBackend:
version or language changes. Registers custom tokenizers after opening.
Safe to call multiple times - subsequent calls are no-ops.
"""
if self._index is not None:
if self._raw_index is not None:
return # pragma: no cover
if self._path is not None:
self._index = open_or_rebuild_index(self._path)
self._raw_index = open_or_rebuild_index(self._path)
else:
self._index = tantivy.Index(build_schema())
register_tokenizers(self._index, settings.SEARCH_LANGUAGE)
self._schema = self._index.schema
self._raw_index = tantivy.Index(build_schema())
register_tokenizers(self._raw_index, settings.SEARCH_LANGUAGE)
self._raw_schema = self._raw_index.schema
def close(self) -> None:
"""
@@ -300,12 +316,12 @@ class TantivyBackend:
Safe to call multiple times - subsequent calls are no-ops.
"""
self._index = None
self._schema = None
self._raw_index = None
self._raw_schema = None
def _ensure_open(self) -> None:
"""Ensure the index is open before operations."""
if self._index is None:
if self._raw_index is None:
self.open() # pragma: no cover
def _parse_query(
@@ -569,7 +585,7 @@ class TantivyBackend:
batch_results = searcher.search(batch_query, limit=len(doc_ids))
result_addrs = [addr for _score, addr in batch_results.hits]
result_ids = searcher.fast_field_values("id", result_addrs)
result_ids = cast("list[int]", searcher.fast_field_values("id", result_addrs))
addr_by_id: dict[int, tuple[float, tantivy.DocAddress]] = {
doc_id: (score, addr)
for (score, addr), doc_id in zip(batch_results.hits, result_ids)
@@ -688,7 +704,10 @@ class TantivyBackend:
if threshold is not None:
all_hits = [hit for hit in all_hits if hit[1] >= threshold]
return searcher.fast_field_values("id", [doc_addr for doc_addr, *_ in all_hits])
return cast(
"list[int]",
searcher.fast_field_values("id", [doc_addr for doc_addr, *_ in all_hits]),
)
def autocomplete(
self,
@@ -720,11 +739,6 @@ class TantivyBackend:
if not normalized_term:
return []
if TYPE_CHECKING:
assert self._index is not None
assert isinstance(self._index, Index)
assert isinstance(self._schema, Schema)
searcher = self._index.searcher()
permission_query = None
@@ -791,7 +805,7 @@ class TantivyBackend:
results = searcher.search(final_query, limit=effective_limit + 1)
addrs = [addr for _score, addr in results.hits]
all_ids = searcher.fast_field_values("id", addrs)
all_ids = cast("list[int]", searcher.fast_field_values("id", addrs))
ids = [rid for rid in all_ids if rid != doc_id]
return ids[:limit] if limit is not None else ids
@@ -840,9 +854,9 @@ class TantivyBackend:
register_tokenizers(new_index, settings.SEARCH_LANGUAGE)
# Point instance at the new index so _build_tantivy_doc uses it
old_index, old_schema = self._index, self._schema
self._index = new_index
self._schema = new_index.schema
old_index, old_schema = self._raw_index, self._raw_schema
self._raw_index = new_index
self._raw_schema = new_index.schema
try:
writer = new_index.writer()
@@ -856,8 +870,8 @@ class TantivyBackend:
new_index.reload()
except BaseException: # pragma: no cover
# Restore old index on failure so the backend remains usable
self._index = old_index
self._schema = old_schema
self._raw_index = old_index
self._raw_schema = old_schema
raise
Generated
+11
View File
@@ -2987,6 +2987,7 @@ typing = [
{ name = "types-python-dateutil", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "types-pytz", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "types-redis", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "types-regex", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "types-setuptools", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
]
@@ -3125,6 +3126,7 @@ typing = [
{ name = "types-python-dateutil" },
{ name = "types-pytz" },
{ name = "types-redis" },
{ name = "types-regex" },
{ name = "types-setuptools" },
]
@@ -5185,6 +5187,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/55/82/7d25dce10aad92d2226b269bce2f85cfd843b4477cd50245d7d40ecf8f89/types_redis-4.6.0.20241004-py3-none-any.whl", hash = "sha256:ef5da68cb827e5f606c8f9c0b49eeee4c2669d6d97122f301d3a55dc6a63f6ed", size = 58737, upload-time = "2024-10-04T02:43:57.968Z" },
]
[[package]]
name = "types-regex"
version = "2026.4.4.20260408"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/92/42/d7c691fc5a8a8ecfba3f23c1c4c087a089af0767610d88c29201193d8f60/types_regex-2026.4.4.20260408.tar.gz", hash = "sha256:86b2975ff11b06e7f538839821510daea2566d9cb18bb8acde47834315409cf9", size = 13182, upload-time = "2026-04-08T04:31:11.887Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/e1/92/e109654a804d11d9b60d67c7b29d64b2beac6b2e3209ea075e268e5a1021/types_regex-2026.4.4.20260408-py3-none-any.whl", hash = "sha256:d436bcc409abf9b06747b7e038014afc6d40ef7b72329655c353a1955534068f", size = 11116, upload-time = "2026-04-08T04:31:11.01Z" },
]
[[package]]
name = "types-setuptools"
version = "80.10.0.20260124"