From 9ab2b16d9d53c873cd2bb2126f77209b81b8cf8b Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Tue, 21 Apr 2026 07:43:47 -0700 Subject: [PATCH] Replaces two sentinel files with .index_settings.json which can properly store multiple values and handle None --- src/documents/search/_schema.py | 39 +++++++++++--------- src/documents/tests/search/test_schema.py | 45 +++++++++++++++-------- 2 files changed, 51 insertions(+), 33 deletions(-) diff --git a/src/documents/search/_schema.py b/src/documents/search/_schema.py index 479c60bc5..bc0832d37 100644 --- a/src/documents/search/_schema.py +++ b/src/documents/search/_schema.py @@ -1,5 +1,6 @@ from __future__ import annotations +import json import logging import shutil from typing import TYPE_CHECKING @@ -100,9 +101,9 @@ def needs_rebuild(index_dir: Path) -> bool: """ Check if the search index needs rebuilding. - Compares the current schema version and search language configuration - against sentinel files to determine if the index is compatible with - the current paperless-ngx version and settings. + Reads .index_settings.json to compare the stored schema version and + search language against the current configuration. Returns True if the + file is missing, unparsable, or either value mismatches. Args: index_dir: Path to the search index directory @@ -110,24 +111,19 @@ def needs_rebuild(index_dir: Path) -> bool: Returns: True if the index needs rebuilding, False if it's up to date """ - version_file = index_dir / ".schema_version" - if not version_file.exists(): + settings_file = index_dir / ".index_settings.json" + if not settings_file.exists(): return True try: - if int(version_file.read_text().strip()) != SCHEMA_VERSION: + data = json.loads(settings_file.read_text()) + if data.get("schema_version") != SCHEMA_VERSION: logger.info("Search index schema version mismatch - rebuilding.") return True + if "language" not in data or data["language"] != settings.SEARCH_LANGUAGE: + logger.info("Search index language changed - rebuilding.") + return True except ValueError: return True - - language_file = index_dir / ".schema_language" - if not language_file.exists(): - logger.info("Search index language sentinel missing - rebuilding.") - return True - if language_file.read_text().strip() != (settings.SEARCH_LANGUAGE or ""): - logger.info("Search index language changed - rebuilding.") - return True - return False @@ -149,9 +145,16 @@ def wipe_index(index_dir: Path) -> None: def _write_sentinels(index_dir: Path) -> None: - """Write schema version and language sentinel files so the next index open can skip rebuilding.""" - (index_dir / ".schema_version").write_text(str(SCHEMA_VERSION)) - (index_dir / ".schema_language").write_text(settings.SEARCH_LANGUAGE or "") + """Write .index_settings.json so the next index open can skip rebuilding.""" + settings_file = index_dir / ".index_settings.json" + settings_file.write_text( + json.dumps( + { + "schema_version": SCHEMA_VERSION, + "language": settings.SEARCH_LANGUAGE, + }, + ), + ) def open_or_rebuild_index(index_dir: Path | None = None) -> tantivy.Index: diff --git a/src/documents/tests/search/test_schema.py b/src/documents/tests/search/test_schema.py index 1ff9bee32..7219df580 100644 --- a/src/documents/tests/search/test_schema.py +++ b/src/documents/tests/search/test_schema.py @@ -1,5 +1,6 @@ from __future__ import annotations +import json from typing import TYPE_CHECKING import pytest @@ -18,7 +19,7 @@ pytestmark = pytest.mark.search class TestNeedsRebuild: """needs_rebuild covers all sentinel-file states that require a full reindex.""" - def test_returns_true_when_version_file_missing(self, index_dir: Path) -> None: + def test_returns_true_when_settings_file_missing(self, index_dir: Path) -> None: assert needs_rebuild(index_dir) is True def test_returns_false_when_version_and_language_match( @@ -27,37 +28,51 @@ class TestNeedsRebuild: settings: SettingsWrapper, ) -> None: settings.SEARCH_LANGUAGE = "en" - (index_dir / ".schema_version").write_text(str(SCHEMA_VERSION)) - (index_dir / ".schema_language").write_text("en") + (index_dir / ".index_settings.json").write_text( + json.dumps({"schema_version": SCHEMA_VERSION, "language": "en"}), + ) assert needs_rebuild(index_dir) is False - def test_returns_true_on_schema_version_mismatch(self, index_dir: Path) -> None: - (index_dir / ".schema_version").write_text(str(SCHEMA_VERSION - 1)) - assert needs_rebuild(index_dir) is True - - def test_returns_true_when_version_file_not_an_integer( + def test_returns_true_on_schema_version_mismatch( self, index_dir: Path, + settings: SettingsWrapper, ) -> None: - (index_dir / ".schema_version").write_text("not-a-number") + settings.SEARCH_LANGUAGE = None + (index_dir / ".index_settings.json").write_text( + json.dumps({"schema_version": SCHEMA_VERSION - 1, "language": None}), + ) assert needs_rebuild(index_dir) is True - def test_returns_true_when_language_sentinel_missing( + def test_returns_true_when_version_is_not_an_integer( + self, + index_dir: Path, + settings: SettingsWrapper, + ) -> None: + settings.SEARCH_LANGUAGE = None + (index_dir / ".index_settings.json").write_text( + json.dumps({"schema_version": "not-a-number", "language": None}), + ) + assert needs_rebuild(index_dir) is True + + def test_returns_true_when_language_key_missing( self, index_dir: Path, settings: SettingsWrapper, ) -> None: settings.SEARCH_LANGUAGE = "en" - (index_dir / ".schema_version").write_text(str(SCHEMA_VERSION)) - # .schema_language intentionally absent + (index_dir / ".index_settings.json").write_text( + json.dumps({"schema_version": SCHEMA_VERSION}), + ) assert needs_rebuild(index_dir) is True - def test_returns_true_when_language_sentinel_content_differs( + def test_returns_true_when_language_differs( self, index_dir: Path, settings: SettingsWrapper, ) -> None: settings.SEARCH_LANGUAGE = "de" - (index_dir / ".schema_version").write_text(str(SCHEMA_VERSION)) - (index_dir / ".schema_language").write_text("en") + (index_dir / ".index_settings.json").write_text( + json.dumps({"schema_version": SCHEMA_VERSION, "language": "en"}), + ) assert needs_rebuild(index_dir) is True