diff --git a/docker/rootfs/etc/s6-overlay/s6-rc.d/init-search-index/run b/docker/rootfs/etc/s6-overlay/s6-rc.d/init-search-index/run index 2208faf67..465c91f0b 100755 --- a/docker/rootfs/etc/s6-overlay/s6-rc.d/init-search-index/run +++ b/docker/rootfs/etc/s6-overlay/s6-rc.d/init-search-index/run @@ -3,9 +3,14 @@ declare -r log_prefix="[init-index]" -declare -r index_version=9 +# Version 1: Tantivy backend (replaces Whoosh; resets versioning from scratch) +declare -r index_version=1 declare -r data_dir="${PAPERLESS_DATA_DIR:-/usr/src/paperless/data}" declare -r index_version_file="${data_dir}/.index_version" +declare -r index_language_file="${data_dir}/.index_language" +# Track the raw env var (not the resolved language) so inference changes +# don't cause spurious reindexes — only explicit setting changes trigger one. +declare -r search_language="${PAPERLESS_SEARCH_LANGUAGE:-}" update_index () { echo "${log_prefix} Search index out of date. Updating..." @@ -13,16 +18,24 @@ update_index () { if [[ -n "${USER_IS_NON_ROOT}" ]]; then python3 manage.py document_index reindex --no-progress-bar echo ${index_version} | tee "${index_version_file}" > /dev/null + echo "${search_language}" | tee "${index_language_file}" > /dev/null else s6-setuidgid paperless python3 manage.py document_index reindex --no-progress-bar echo ${index_version} | s6-setuidgid paperless tee "${index_version_file}" > /dev/null + echo "${search_language}" | s6-setuidgid paperless tee "${index_language_file}" > /dev/null fi } -if [[ (! -f "${index_version_file}") ]]; then +if [[ ! -f "${index_version_file}" ]]; then echo "${log_prefix} No index version file found" update_index -elif [[ $(<"${index_version_file}") != "$index_version" ]]; then - echo "${log_prefix} index version updated" +elif [[ $(<"${index_version_file}") != "${index_version}" ]]; then + echo "${log_prefix} Index version updated" + update_index +elif [[ ! -f "${index_language_file}" ]]; then + echo "${log_prefix} No language file found" + update_index +elif [[ $(<"${index_language_file}") != "${search_language}" ]]; then + echo "${log_prefix} Search language changed" update_index fi diff --git a/docs/administration.md b/docs/administration.md index 9d123cd38..8993e1031 100644 --- a/docs/administration.md +++ b/docs/administration.md @@ -180,6 +180,17 @@ following: This might not actually do anything. Not every new paperless version comes with new database migrations. +4. Rebuild the search index. + + ```shell-session + cd src + python3 manage.py document_index reindex + ``` + + This is required when the search backend has changed (e.g. the upgrade + to Tantivy). It is safe to run on every upgrade — if the index is already + current it completes quickly. + ### Database Upgrades Paperless-ngx is compatible with Django-supported versions of PostgreSQL and MariaDB and it is generally @@ -469,9 +480,20 @@ task scheduler. !!! note - On first startup after upgrading from a previous version, paperless detects - that the index format has changed and automatically performs a one-time full - reindex. No manual migration step is required. + **Docker users:** On first startup after upgrading, the container automatically + detects the index format change and runs a full reindex before starting the + webserver. No manual step is required. + + **Bare metal users:** After upgrading, run the following command once to rebuild + the search index in the new format: + + ```shell-session + cd src + python3 manage.py document_index reindex + ``` + + Changing `PAPERLESS_SEARCH_LANGUAGE` also requires a manual reindex on bare + metal (Docker handles this automatically). ### Clearing the database read cache