diff --git a/docker/rootfs/etc/s6-overlay/s6-rc.d/init-search-index/run b/docker/rootfs/etc/s6-overlay/s6-rc.d/init-search-index/run index 465c91f0b..8f6feeb7f 100755 --- a/docker/rootfs/etc/s6-overlay/s6-rc.d/init-search-index/run +++ b/docker/rootfs/etc/s6-overlay/s6-rc.d/init-search-index/run @@ -3,39 +3,10 @@ declare -r log_prefix="[init-index]" -# Version 1: Tantivy backend (replaces Whoosh; resets versioning from scratch) -declare -r index_version=1 -declare -r data_dir="${PAPERLESS_DATA_DIR:-/usr/src/paperless/data}" -declare -r index_version_file="${data_dir}/.index_version" -declare -r index_language_file="${data_dir}/.index_language" -# Track the raw env var (not the resolved language) so inference changes -# don't cause spurious reindexes — only explicit setting changes trigger one. -declare -r search_language="${PAPERLESS_SEARCH_LANGUAGE:-}" - -update_index () { - echo "${log_prefix} Search index out of date. Updating..." - cd "${PAPERLESS_SRC_DIR}" - if [[ -n "${USER_IS_NON_ROOT}" ]]; then - python3 manage.py document_index reindex --no-progress-bar - echo ${index_version} | tee "${index_version_file}" > /dev/null - echo "${search_language}" | tee "${index_language_file}" > /dev/null - else - s6-setuidgid paperless python3 manage.py document_index reindex --no-progress-bar - echo ${index_version} | s6-setuidgid paperless tee "${index_version_file}" > /dev/null - echo "${search_language}" | s6-setuidgid paperless tee "${index_language_file}" > /dev/null - fi -} - -if [[ ! -f "${index_version_file}" ]]; then - echo "${log_prefix} No index version file found" - update_index -elif [[ $(<"${index_version_file}") != "${index_version}" ]]; then - echo "${log_prefix} Index version updated" - update_index -elif [[ ! -f "${index_language_file}" ]]; then - echo "${log_prefix} No language file found" - update_index -elif [[ $(<"${index_language_file}") != "${search_language}" ]]; then - echo "${log_prefix} Search language changed" - update_index +echo "${log_prefix} Checking search index..." +cd "${PAPERLESS_SRC_DIR}" +if [[ -n "${USER_IS_NON_ROOT}" ]]; then + python3 manage.py document_index reindex --if-needed --no-progress-bar +else + s6-setuidgid paperless python3 manage.py document_index reindex --if-needed --no-progress-bar fi diff --git a/docs/administration.md b/docs/administration.md index 8993e1031..4a4a2f925 100644 --- a/docs/administration.md +++ b/docs/administration.md @@ -180,16 +180,15 @@ following: This might not actually do anything. Not every new paperless version comes with new database migrations. -4. Rebuild the search index. +4. Rebuild the search index if needed. ```shell-session cd src - python3 manage.py document_index reindex + python3 manage.py document_index reindex --if-needed ``` - This is required when the search backend has changed (e.g. the upgrade - to Tantivy). It is safe to run on every upgrade — if the index is already - current it completes quickly. + This is a no-op if the index is already up to date, so it is safe to + run on every upgrade. ### Database Upgrades @@ -480,21 +479,20 @@ task scheduler. !!! note - **Docker users:** On first startup after upgrading, the container automatically - detects the index format change and runs a full reindex before starting the - webserver. No manual step is required. + **Docker users:** On every startup, the container runs + `document_index reindex --if-needed` automatically. Schema changes, language + changes, and missing indexes are all detected and rebuilt before the webserver + starts. No manual step is required. - **Bare metal users:** After upgrading, run the following command once to rebuild - the search index in the new format: + **Bare metal users:** Run the following command after each upgrade (and after + changing `PAPERLESS_SEARCH_LANGUAGE`). It is a no-op if the index is already + up to date: ```shell-session cd src - python3 manage.py document_index reindex + python3 manage.py document_index reindex --if-needed ``` - Changing `PAPERLESS_SEARCH_LANGUAGE` also requires a manual reindex on bare - metal (Docker handles this automatically). - ### Clearing the database read cache If the database read cache is enabled, **you must run this command** after making any changes to the database outside the application context. diff --git a/src/documents/management/commands/document_index.py b/src/documents/management/commands/document_index.py index 4c70ec268..6b85e61b1 100644 --- a/src/documents/management/commands/document_index.py +++ b/src/documents/management/commands/document_index.py @@ -21,10 +21,26 @@ class Command(PaperlessCommand): default=False, help="Wipe and recreate the index from scratch (only used with reindex).", ) + parser.add_argument( + "--if-needed", + action="store_true", + default=False, + help=( + "Skip reindex if the index is already up to date. " + "Checks schema version and search language sentinels. " + "Safe to run on every startup or upgrade." + ), + ) def handle(self, *args, **options): with transaction.atomic(): if options["command"] == "reindex": + if options.get("if_needed"): + from documents.search._schema import _needs_rebuild + + if not _needs_rebuild(settings.INDEX_DIR): + self.stdout.write("Search index is up to date.") + return if options.get("recreate"): from documents.search import wipe_index