Compare commits

..

1 Commits

Author SHA1 Message Date
Crowdin Bot edb3ab785a New Crowdin translations by GitHub Action 2026-06-30 01:12:50 +00:00
95 changed files with 42269 additions and 22226 deletions
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
+4 -7
View File
@@ -48,9 +48,6 @@ _LANGUAGE_MAP: dict[str, str] = {
}
SUPPORTED_LANGUAGES: frozenset[str] = frozenset(_LANGUAGE_MAP)
# Document.title is max_length=128, so use 129 as the limit for
# Tantivy's remove_long filter
_TOKEN_REMOVE_LONG_LIMIT: Final[int] = 129
def register_tokenizers(index: tantivy.Index, language: str | None) -> None:
@@ -80,10 +77,10 @@ def register_tokenizers(index: tantivy.Index, language: str | None) -> None:
def _paperless_text(language: str | None) -> tantivy.TextAnalyzer:
"""Main full-text tokenizer for content, title, etc: simple -> remove_long(129) -> lowercase -> ascii_fold [-> stemmer]"""
"""Main full-text tokenizer for content, title, etc: simple -> remove_long(65) -> lowercase -> ascii_fold [-> stemmer]"""
builder = (
tantivy.TextAnalyzerBuilder(tantivy.Tokenizer.simple())
.filter(tantivy.Filter.remove_long(_TOKEN_REMOVE_LONG_LIMIT))
.filter(tantivy.Filter.remove_long(65))
.filter(tantivy.Filter.lowercase())
.filter(tantivy.Filter.ascii_fold())
)
@@ -122,12 +119,12 @@ def _bigram_analyzer() -> tantivy.TextAnalyzer:
def _simple_search_analyzer() -> tantivy.TextAnalyzer:
"""Tokenizer for simple substring search fields: non-whitespace chunks -> remove_long(129) -> lowercase -> ascii_fold."""
"""Tokenizer for simple substring search fields: non-whitespace chunks -> remove_long(65) -> lowercase -> ascii_fold."""
return (
tantivy.TextAnalyzerBuilder(
tantivy.Tokenizer.regex(r"\S+"),
)
.filter(tantivy.Filter.remove_long(_TOKEN_REMOVE_LONG_LIMIT))
.filter(tantivy.Filter.remove_long(65))
.filter(tantivy.Filter.lowercase())
.filter(tantivy.Filter.ascii_fold())
.build()
@@ -261,36 +261,6 @@ class TestSearch:
== 1
)
@pytest.mark.parametrize(
("search_mode", "query"),
[
pytest.param(SearchMode.TITLE, "12345", id="title_search"),
pytest.param(SearchMode.TEXT, "12345", id="text_search"),
pytest.param(SearchMode.QUERY, None, id="query_title_exact"),
],
)
def test_search_modes_match_model_limit_title_tokens(
self,
backend: TantivyBackend,
search_mode: SearchMode,
query: str | None,
) -> None:
"""Search must keep filename-like title tokens up to the model limit."""
long_title = "1234567890" * 12 + "12345678"
doc = Document.objects.create(
title=long_title,
content="ordinary content",
checksum="TXT12",
pk=18,
)
backend.add_or_update(doc)
assert backend.search_ids(
query or f"title:{long_title}",
user=None,
search_mode=search_mode,
) == [doc.pk]
@pytest.mark.parametrize(
("mode", "title", "content", "hits", "misses"),
[
@@ -99,25 +99,6 @@ class TestTokenizers:
)
assert simple_search_index.searcher().search(q, limit=5).count == 1
def test_simple_search_analyzer_supports_model_limit_token_substrings(
self,
simple_search_index: tantivy.Index,
) -> None:
"""Simple substring search keeps tokens up to Document.title's model limit."""
long_token = "abcdefghij" * 12 + "abcdefgh"
writer = simple_search_index.writer()
doc = tantivy.Document()
doc.add_text("simple_content", long_token)
writer.add_document(doc)
writer.commit()
simple_search_index.reload()
q = tantivy.Query.regex_query(
simple_search_index.schema,
"simple_content",
".*cdefg.*",
)
assert simple_search_index.searcher().search(q, limit=5).count == 1
def test_unsupported_language_logs_warning(self, caplog: LogCaptureFixture) -> None:
"""Unsupported language codes should log a warning and disable stemming gracefully."""
sb = tantivy.SchemaBuilder()
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff