From aed9abe48c02b1b7df07e429db3f2ed8c3a58ce8 Mon Sep 17 00:00:00 2001
From: Trenton H <797416+stumpylog@users.noreply.github.com>
Date: Thu, 2 Apr 2026 12:38:22 -0700
Subject: [PATCH] Feature: Replace Whoosh with tantivy search backend (#12471)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
Co-authored-by: Antoine Mérino <3023499+Merinorus@users.noreply.github.com>
---
 .../s6-overlay/s6-rc.d/init-search-index/run  |  28 +-
 docs/administration.md                        |  47 +-
 docs/api.md                                   |   5 +-
 docs/configuration.md                         |  26 +
 docs/migration-v3.md                          |  31 +
 docs/usage.md                                 |  84 +-
 pyproject.toml                                |   4 +-
 src/documents/admin.py                        |  15 +-
 src/documents/bulk_edit.py                    |   6 +-
 src/documents/index.py                        | 675 --------------
 .../management/commands/document_index.py     |  57 +-
 ...7_migrate_fulltext_query_field_prefixes.py |  39 +
 src/documents/models.py                       |  33 +-
 src/documents/sanity_checker.py               |  17 +-
 src/documents/search/__init__.py              |  21 +
 src/documents/search/_backend.py              | 858 ++++++++++++++++++
 src/documents/search/_query.py                | 497 ++++++++++
 src/documents/search/_schema.py               | 165 ++++
 src/documents/search/_tokenizer.py            | 116 +++
 src/documents/serialisers.py                  |  18 +-
 src/documents/signals/handlers.py             |  13 +-
 src/documents/tasks.py                        |  45 +-
 src/documents/tests/conftest.py               |  21 +
 src/documents/tests/search/__init__.py        |   0
 src/documents/tests/search/conftest.py        |  33 +
 src/documents/tests/search/test_backend.py    | 502 ++++++++++
 ...migration_fulltext_query_field_prefixes.py | 138 +++
 src/documents/tests/search/test_query.py      | 530 +++++++++++
 src/documents/tests/search/test_schema.py     |  63 ++
 src/documents/tests/search/test_tokenizer.py  |  78 ++
 src/documents/tests/test_admin.py             |  33 +-
 .../tests/test_api_document_versions.py       |  37 +-
 src/documents/tests/test_api_search.py        | 421 +++++----
 src/documents/tests/test_api_status.py        |  30 +-
 src/documents/tests/test_delayedquery.py      |  58 --
 src/documents/tests/test_index.py             | 371 --------
 src/documents/tests/test_management.py        |  73 +-
 src/documents/tests/test_matchables.py        |   6 +
 src/documents/tests/test_tag_hierarchy.py     |   4 +-
 src/documents/tests/test_task_signals.py      |  30 +-
 src/documents/tests/test_tasks.py             |  25 +-
 src/documents/tests/test_workflows.py         |   1 +
 src/documents/tests/utils.py                  |   6 +
 src/documents/utils.py                        |  13 +
 src/documents/views.py                        | 236 ++---
 src/paperless/settings/__init__.py            |  51 ++
 src/paperless/settings/parsers.py             |   2 +-
 .../parsers/test_tesseract_custom_settings.py |   5 +
 src/paperless/tests/settings/test_settings.py |  45 +
 src/paperless/views.py                        |  19 +-
 src/paperless_ai/indexing.py                  |  14 +-
 uv.lock                                       | 113 ++-
 52 files changed, 4050 insertions(+), 1708 deletions(-)
 delete mode 100644 src/documents/index.py
 create mode 100644 src/documents/migrations/0017_migrate_fulltext_query_field_prefixes.py
 create mode 100644 src/documents/search/__init__.py
 create mode 100644 src/documents/search/_backend.py
 create mode 100644 src/documents/search/_query.py
 create mode 100644 src/documents/search/_schema.py
 create mode 100644 src/documents/search/_tokenizer.py
 create mode 100644 src/documents/tests/search/__init__.py
 create mode 100644 src/documents/tests/search/conftest.py
 create mode 100644 src/documents/tests/search/test_backend.py
 create mode 100644 src/documents/tests/search/test_migration_fulltext_query_field_prefixes.py
 create mode 100644 src/documents/tests/search/test_query.py
 create mode 100644 src/documents/tests/search/test_schema.py
 create mode 100644 src/documents/tests/search/test_tokenizer.py
 delete mode 100644 src/documents/tests/test_delayedquery.py
 delete mode 100644 src/documents/tests/test_index.py

diff --git a/docker/rootfs/etc/s6-overlay/s6-rc.d/init-search-index/run b/docker/rootfs/etc/s6-overlay/s6-rc.d/init-search-index/run
index 2208faf67..8f6feeb7f 100755
--- a/docker/rootfs/etc/s6-overlay/s6-rc.d/init-search-index/run
+++ b/docker/rootfs/etc/s6-overlay/s6-rc.d/init-search-index/run
@@ -3,26 +3,10 @@
 
 declare -r log_prefix="[init-index]"
 
-declare -r index_version=9
-declare -r data_dir="${PAPERLESS_DATA_DIR:-/usr/src/paperless/data}"
-declare -r index_version_file="${data_dir}/.index_version"
-
-update_index () {
-	echo "${log_prefix} Search index out of date. Updating..."
-	cd "${PAPERLESS_SRC_DIR}"
-	if [[ -n "${USER_IS_NON_ROOT}" ]]; then
-		python3 manage.py document_index reindex --no-progress-bar
-		echo ${index_version} | tee "${index_version_file}" > /dev/null
-	else
-		s6-setuidgid paperless python3 manage.py document_index reindex --no-progress-bar
-		echo ${index_version} | s6-setuidgid paperless tee "${index_version_file}" > /dev/null
-	fi
-}
-
-if [[ (! -f "${index_version_file}") ]]; then
-	echo "${log_prefix} No index version file found"
-	update_index
-elif [[ $(<"${index_version_file}") != "$index_version" ]]; then
-	echo "${log_prefix} index version updated"
-	update_index
+echo "${log_prefix} Checking search index..."
+cd "${PAPERLESS_SRC_DIR}"
+if [[ -n "${USER_IS_NON_ROOT}" ]]; then
+	python3 manage.py document_index reindex --if-needed --no-progress-bar
+else
+	s6-setuidgid paperless python3 manage.py document_index reindex --if-needed --no-progress-bar
 fi
diff --git a/docs/administration.md b/docs/administration.md
index e55b899f5..013ac9fdd 100644
--- a/docs/administration.md
+++ b/docs/administration.md
@@ -180,6 +180,16 @@ following:
     This might not actually do anything. Not every new paperless version
     comes with new database migrations.
 
+4.  Rebuild the search index if needed.
+
+    ```shell-session
+    cd src
+    python3 manage.py document_index reindex --if-needed
+    ```
+
+    This is a no-op if the index is already up to date, so it is safe to
+    run on every upgrade.
+
 ### Database Upgrades
 
 Paperless-ngx is compatible with Django-supported versions of PostgreSQL and MariaDB and it is generally
@@ -453,17 +463,42 @@ the search yields non-existing documents or won't find anything, you
 may need to recreate the index manually.
 
 ```
-document_index {reindex,optimize}
+document_index {reindex,optimize} [--recreate] [--if-needed]
 ```
 
-Specify `reindex` to have the index created from scratch. This may take
-some time.
+Specify `reindex` to rebuild the index from all documents in the database. This
+may take some time.
 
-Specify `optimize` to optimize the index. This updates certain aspects
-of the index and usually makes queries faster and also ensures that the
-autocompletion works properly. This command is regularly invoked by the
+Pass `--recreate` to wipe the existing index before rebuilding. Use this when the
+index is corrupted or you want a fully clean rebuild.
+
+Pass `--if-needed` to skip the rebuild if the index is already up to date (schema
+version and search language match). Safe to run on every startup or upgrade.
+
+Specify `optimize` to optimize the index. This command is regularly invoked by the
 task scheduler.
 
+!!! note
+
+    The `optimize` subcommand is deprecated and is now a no-op. Tantivy manages
+    segment merging automatically; no manual optimization step is needed.
+
+!!! note
+
+    **Docker users:** On every startup, the container runs
+    `document_index reindex --if-needed` automatically. Schema changes, language
+    changes, and missing indexes are all detected and rebuilt before the webserver
+    starts. No manual step is required.
+
+    **Bare metal users:** Run the following command after each upgrade (and after
+    changing `PAPERLESS_SEARCH_LANGUAGE`). It is a no-op if the index is already
+    up to date:
+
+    ```shell-session
+    cd src
+    python3 manage.py document_index reindex --if-needed
+    ```
+
 ### Clearing the database read cache
 
 If the database read cache is enabled, **you must run this command** after making any changes to the database outside the application context.
diff --git a/docs/api.md b/docs/api.md
index 21c6b140f..2284d9d29 100644
--- a/docs/api.md
+++ b/docs/api.md
@@ -167,9 +167,8 @@ Query parameters:
 - `term`: The incomplete term.
 - `limit`: Amount of results. Defaults to 10.
 
-Results returned by the endpoint are ordered by importance of the term
-in the document index. The first result is the term that has the highest
-[Tf/Idf](https://en.wikipedia.org/wiki/Tf%E2%80%93idf) score in the index.
+Results are ordered by how many of the user's visible documents contain
+each matching word. The first result is the word that appears in the most documents.
 
 ```json
 ["term1", "term3", "term6", "term4"]
diff --git a/docs/configuration.md b/docs/configuration.md
index 4ce2d9dc6..a22171ce9 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -1103,6 +1103,32 @@ should be a valid crontab(5) expression describing when to run.
 
     Defaults to `0 0 * * *` or daily at midnight.
 
+#### [`PAPERLESS_SEARCH_LANGUAGE=<language>`](#PAPERLESS_SEARCH_LANGUAGE) {#PAPERLESS_SEARCH_LANGUAGE}
+
+: Sets the stemmer language for the full-text search index.
+Stemming improves recall by matching word variants (e.g. "running" matches "run").
+Changing this setting causes the index to be rebuilt automatically on next startup.
+An invalid value raises an error at startup.
+
+: Use the ISO 639-1 two-letter code (e.g. `en`, `de`, `fr`). Lowercase full names
+(e.g. `english`, `german`, `french`) are also accepted. The capitalized names shown
+in the [Tantivy Language enum](https://docs.rs/tantivy/latest/tantivy/tokenizer/enum.Language.html)
+documentation are **not** valid — use the lowercase equivalent.
+
+: If not set, paperless infers the language from
+[`PAPERLESS_OCR_LANGUAGE`](#PAPERLESS_OCR_LANGUAGE). If the OCR language has no
+Tantivy stemmer equivalent, stemming is disabled.
+
+    Defaults to unset (inferred from `PAPERLESS_OCR_LANGUAGE`).
+
+#### [`PAPERLESS_ADVANCED_FUZZY_SEARCH_THRESHOLD=<float>`](#PAPERLESS_ADVANCED_FUZZY_SEARCH_THRESHOLD) {#PAPERLESS_ADVANCED_FUZZY_SEARCH_THRESHOLD}
+
+: When set to a float value, approximate/fuzzy matching is applied alongside exact
+matching. Fuzzy results rank below exact matches. A value of `0.5` is a reasonable
+starting point. Leave unset to disable fuzzy matching entirely.
+
+    Defaults to unset (disabled).
+
 #### [`PAPERLESS_SANITY_TASK_CRON=<cron expression>`](#PAPERLESS_SANITY_TASK_CRON) {#PAPERLESS_SANITY_TASK_CRON}
 
 : Configures the scheduled sanity checker frequency. The value should be a
diff --git a/docs/migration-v3.md b/docs/migration-v3.md
index 4c728a6a4..1cfb212ff 100644
--- a/docs/migration-v3.md
+++ b/docs/migration-v3.md
@@ -104,6 +104,37 @@ Multiple options are combined in a single value:
 PAPERLESS_DB_OPTIONS="sslmode=require;sslrootcert=/certs/ca.pem;pool.max_size=10"
 ```
 
+## Search Index (Whoosh -> Tantivy)
+
+The full-text search backend has been replaced with [Tantivy](https://github.com/quickwit-oss/tantivy).
+The index format is incompatible with Whoosh, so **the search index is automatically rebuilt from
+scratch on first startup after upgrading**. No manual action is required for the rebuild itself.
+
+### Note and custom field search syntax
+
+The old Whoosh index exposed `note` and `custom_field` as flat text fields that were included in
+unqualified searches (e.g. just typing `invoice` would match note content). With Tantivy these are
+now structured JSON fields accessed via dotted paths:
+
+| Old syntax           | New syntax                  |
+| -------------------- | --------------------------- |
+| `note:query`         | `notes.note:query`          |
+| `custom_field:query` | `custom_fields.value:query` |
+
+**Saved views are migrated automatically.** Any saved view filter rule that used an explicit
+`note:` or `custom_field:` field prefix in a fulltext query is rewritten to the new syntax by a
+data migration that runs on upgrade.
+
+**Unqualified queries are not migrated.** If you had a saved view with a plain search term (e.g.
+`invoice`) that happened to match note content or custom field values, it will no longer return
+those matches. Update those queries to use the explicit prefix, for example:
+
+```
+invoice OR notes.note:invoice OR custom_fields.value:invoice
+```
+
+Custom field names can also be searched with `custom_fields.name:fieldname`.
+
 ## OpenID Connect Token Endpoint Authentication
 
 Some existing OpenID Connect setups may require an explicit token endpoint authentication method after upgrading to v3.
diff --git a/docs/usage.md b/docs/usage.md
index 6da6c4d77..4e2def93b 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -804,13 +804,20 @@ contract you signed 8 years ago).
 
 When you search paperless for a document, it tries to match this query
 against your documents. Paperless will look for matching documents by
-inspecting their content, title, correspondent, type and tags. Paperless
-returns a scored list of results, so that documents matching your query
-better will appear further up in the search results.
+inspecting their content, title, correspondent, type, tags, notes, and
+custom field values. Paperless returns a scored list of results, so that
+documents matching your query better will appear further up in the search
+results.
 
 By default, paperless returns only documents which contain all words
-typed in the search bar. However, paperless also offers advanced search
-syntax if you want to drill down the results further.
+typed in the search bar. A few things to know about how matching works:
+
+- **Word-order-independent**: "invoice unpaid" and "unpaid invoice" return the same results.
+- **Accent-insensitive**: searching `resume` also finds `résumé`, `cafe` finds `café`.
+- **Separator-agnostic**: punctuation and separators are stripped during indexing, so
+  searching a partial number like `1312` finds documents containing `A-1312/B`.
+
+Paperless also offers advanced search syntax if you want to drill down further.
 
 Matching documents with logical expressions:
 
@@ -839,18 +846,69 @@ Matching inexact words:
 produ*name
 ```
 
+Matching natural date keywords:
+
+```
+added:today
+modified:yesterday
+created:this_week
+added:last_month
+modified:this_year
+```
+
+Supported date keywords: `today`, `yesterday`, `this_week`, `last_week`,
+`this_month`, `last_month`, `this_year`, `last_year`.
+
+#### Searching custom fields
+
+Custom field values are included in the full-text index, so a plain search
+already matches documents whose custom field values contain your search terms.
+To narrow by field name or value specifically:
+
+```
+custom_fields.value:policy
+custom_fields.name:"Contract Number"
+custom_fields.name:Insurance custom_fields.value:policy
+```
+
+- `custom_fields.value` matches against the value of any custom field.
+- `custom_fields.name` matches the name of the field (use quotes for multi-word names).
+- Combine both to find documents where a specific named field contains a specific value.
+
+Because separators are stripped during indexing, individual parts of formatted
+codes are searchable on their own. A value stored as `A-1312/99.50` produces the
+tokens `a`, `1312`, `99`, `50` — each searchable independently:
+
+```
+custom_fields.value:1312
+custom_fields.name:"Contract Number" custom_fields.value:1312
+```
+
 !!! note
 
-    Inexact terms are hard for search indexes. These queries might take a
-    while to execute. That's why paperless offers auto complete and query
-    correction.
+    Custom date fields do not support relative date syntax (e.g. `[now to 2 weeks]`).
+    For date ranges on custom date fields, use the document list filters in the web UI.
+
+#### Searching notes
+
+Notes content is included in full-text search automatically. To search
+by note author or content specifically:
+
+```
+notes.user:alice
+notes.note:reminder
+notes.user:alice notes.note:insurance
+```
 
 All of these constructs can be combined as you see fit. If you want to
-learn more about the query language used by paperless, paperless uses
-Whoosh's default query language. Head over to [Whoosh query
-language](https://whoosh.readthedocs.io/en/latest/querylang.html). For
-details on what date parsing utilities are available, see [Date
-parsing](https://whoosh.readthedocs.io/en/latest/dates.html#parsing-date-queries).
+learn more about the query language used by paperless, see the
+[Tantivy query language documentation](https://docs.rs/tantivy/latest/tantivy/query/struct.QueryParser.html).
+
+!!! note
+
+    Fuzzy (approximate) matching can be enabled by setting
+    [`PAPERLESS_ADVANCED_FUZZY_SEARCH_THRESHOLD`](configuration.md#PAPERLESS_ADVANCED_FUZZY_SEARCH_THRESHOLD).
+    When enabled, paperless will include near-miss results ranked below exact matches.
 
 ## Keyboard shortcuts / hotkeys
 
diff --git a/pyproject.toml b/pyproject.toml
index e37a7958f..5af886f0c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -74,11 +74,11 @@ dependencies = [
   "scikit-learn~=1.8.0",
   "sentence-transformers>=4.1",
   "setproctitle~=1.3.4",
+  "tantivy>=0.25.1",
   "tika-client~=0.10.0",
   "torch~=2.10.0",
   "watchfiles>=1.1.1",
   "whitenoise~=6.11",
-  "whoosh-reloaded>=2.7.5",
   "zxing-cpp~=3.0.0",
 ]
 [project.optional-dependencies]
@@ -123,6 +123,7 @@ testing = [
   "pytest-rerunfailures~=16.1",
   "pytest-sugar",
   "pytest-xdist~=3.8.0",
+  "time-machine>=2.13",
 ]
 typing = [
   "celery-types",
@@ -310,6 +311,7 @@ markers = [
   "greenmail: Tests requiring Greenmail service",
   "date_parsing: Tests which cover date parsing from content or filename",
   "management: Tests which cover management commands/functionality",
+  "search: Tests for the Tantivy search backend",
 ]
 
 [tool.pytest_env]
diff --git a/src/documents/admin.py b/src/documents/admin.py
index 6c7a6f304..f0e5ccd25 100644
--- a/src/documents/admin.py
+++ b/src/documents/admin.py
@@ -100,24 +100,23 @@ class DocumentAdmin(GuardedModelAdmin):
         return Document.global_objects.all()
 
     def delete_queryset(self, request, queryset):
-        from documents import index
+        from documents.search import get_backend
 
-        with index.open_index_writer() as writer:
+        with get_backend().batch_update() as batch:
             for o in queryset:
-                index.remove_document(writer, o)
-
+                batch.remove(o.pk)
         super().delete_queryset(request, queryset)
 
     def delete_model(self, request, obj):
-        from documents import index
+        from documents.search import get_backend
 
-        index.remove_document_from_index(obj)
+        get_backend().remove(obj.pk)
         super().delete_model(request, obj)
 
     def save_model(self, request, obj, form, change):
-        from documents import index
+        from documents.search import get_backend
 
-        index.add_or_update_document(obj)
+        get_backend().add_or_update(obj)
         super().save_model(request, obj, form, change)
 
 
diff --git a/src/documents/bulk_edit.py b/src/documents/bulk_edit.py
index 8dbcdb8a4..3f80b699d 100644
--- a/src/documents/bulk_edit.py
+++ b/src/documents/bulk_edit.py
@@ -349,11 +349,11 @@ def delete(doc_ids: list[int]) -> Literal["OK"]:
 
         Document.objects.filter(id__in=delete_ids).delete()
 
-        from documents import index
+        from documents.search import get_backend
 
-        with index.open_index_writer() as writer:
+        with get_backend().batch_update() as batch:
             for id in delete_ids:
-                index.remove_document_by_id(writer, id)
+                batch.remove(id)
 
         status_mgr = DocumentsStatusManager()
         status_mgr.send_documents_deleted(delete_ids)
diff --git a/src/documents/index.py b/src/documents/index.py
deleted file mode 100644
index 24d74d8c1..000000000
--- a/src/documents/index.py
+++ /dev/null
@@ -1,675 +0,0 @@
-from __future__ import annotations
-
-import logging
-import math
-import re
-from collections import Counter
-from contextlib import contextmanager
-from datetime import UTC
-from datetime import datetime
-from datetime import time
-from datetime import timedelta
-from shutil import rmtree
-from time import sleep
-from typing import TYPE_CHECKING
-from typing import Literal
-
-from dateutil.relativedelta import relativedelta
-from django.conf import settings
-from django.utils import timezone as django_timezone
-from django.utils.timezone import get_current_timezone
-from django.utils.timezone import now
-from guardian.shortcuts import get_users_with_perms
-from whoosh import classify
-from whoosh import highlight
-from whoosh import query
-from whoosh.fields import BOOLEAN
-from whoosh.fields import DATETIME
-from whoosh.fields import KEYWORD
-from whoosh.fields import NUMERIC
-from whoosh.fields import TEXT
-from whoosh.fields import Schema
-from whoosh.highlight import HtmlFormatter
-from whoosh.idsets import BitSet
-from whoosh.idsets import DocIdSet
-from whoosh.index import FileIndex
-from whoosh.index import LockError
-from whoosh.index import create_in
-from whoosh.index import exists_in
-from whoosh.index import open_dir
-from whoosh.qparser import MultifieldParser
-from whoosh.qparser import QueryParser
-from whoosh.qparser.dateparse import DateParserPlugin
-from whoosh.qparser.dateparse import English
-from whoosh.qparser.plugins import FieldsPlugin
-from whoosh.scoring import TF_IDF
-from whoosh.util.times import timespan
-from whoosh.writing import AsyncWriter
-
-from documents.models import CustomFieldInstance
-from documents.models import Document
-from documents.models import Note
-from documents.models import User
-
-if TYPE_CHECKING:
-    from django.db.models import QuerySet
-    from whoosh.reading import IndexReader
-    from whoosh.searching import ResultsPage
-    from whoosh.searching import Searcher
-
-logger = logging.getLogger("paperless.index")
-
-
-def get_schema() -> Schema:
-    return Schema(
-        id=NUMERIC(stored=True, unique=True),
-        title=TEXT(sortable=True),
-        content=TEXT(),
-        asn=NUMERIC(sortable=True, signed=False),
-        correspondent=TEXT(sortable=True),
-        correspondent_id=NUMERIC(),
-        has_correspondent=BOOLEAN(),
-        tag=KEYWORD(commas=True, scorable=True, lowercase=True),
-        tag_id=KEYWORD(commas=True, scorable=True),
-        has_tag=BOOLEAN(),
-        type=TEXT(sortable=True),
-        type_id=NUMERIC(),
-        has_type=BOOLEAN(),
-        created=DATETIME(sortable=True),
-        modified=DATETIME(sortable=True),
-        added=DATETIME(sortable=True),
-        path=TEXT(sortable=True),
-        path_id=NUMERIC(),
-        has_path=BOOLEAN(),
-        notes=TEXT(),
-        num_notes=NUMERIC(sortable=True, signed=False),
-        custom_fields=TEXT(),
-        custom_field_count=NUMERIC(sortable=True, signed=False),
-        has_custom_fields=BOOLEAN(),
-        custom_fields_id=KEYWORD(commas=True),
-        owner=TEXT(),
-        owner_id=NUMERIC(),
-        has_owner=BOOLEAN(),
-        viewer_id=KEYWORD(commas=True),
-        checksum=TEXT(),
-        page_count=NUMERIC(sortable=True),
-        original_filename=TEXT(sortable=True),
-        is_shared=BOOLEAN(),
-    )
-
-
-def open_index(*, recreate=False) -> FileIndex:
-    transient_exceptions = (FileNotFoundError, LockError)
-    max_retries = 3
-    retry_delay = 0.1
-
-    for attempt in range(max_retries + 1):
-        try:
-            if exists_in(settings.INDEX_DIR) and not recreate:
-                return open_dir(settings.INDEX_DIR, schema=get_schema())
-            break
-        except transient_exceptions as exc:
-            is_last_attempt = attempt == max_retries or recreate
-            if is_last_attempt:
-                logger.exception(
-                    "Error while opening the index after retries, recreating.",
-                )
-                break
-
-            logger.warning(
-                "Transient error while opening the index (attempt %s/%s): %s. Retrying.",
-                attempt + 1,
-                max_retries + 1,
-                exc,
-            )
-            sleep(retry_delay)
-        except Exception:
-            logger.exception("Error while opening the index, recreating.")
-            break
-
-    # create_in doesn't handle corrupted indexes very well, remove the directory entirely first
-    if settings.INDEX_DIR.is_dir():
-        rmtree(settings.INDEX_DIR)
-    settings.INDEX_DIR.mkdir(parents=True, exist_ok=True)
-
-    return create_in(settings.INDEX_DIR, get_schema())
-
-
-@contextmanager
-def open_index_writer(*, optimize=False) -> AsyncWriter:
-    writer = AsyncWriter(open_index())
-
-    try:
-        yield writer
-    except Exception as e:
-        logger.exception(str(e))
-        writer.cancel()
-    finally:
-        writer.commit(optimize=optimize)
-
-
-@contextmanager
-def open_index_searcher() -> Searcher:
-    searcher = open_index().searcher()
-
-    try:
-        yield searcher
-    finally:
-        searcher.close()
-
-
-def update_document(
-    writer: AsyncWriter,
-    doc: Document,
-    effective_content: str | None = None,
-) -> None:
-    tags = ",".join([t.name for t in doc.tags.all()])
-    tags_ids = ",".join([str(t.id) for t in doc.tags.all()])
-    notes = ",".join([str(c.note) for c in Note.objects.filter(document=doc)])
-    custom_fields = ",".join(
-        [str(c) for c in CustomFieldInstance.objects.filter(document=doc)],
-    )
-    custom_fields_ids = ",".join(
-        [str(f.field.id) for f in CustomFieldInstance.objects.filter(document=doc)],
-    )
-    asn: int | None = doc.archive_serial_number
-    if asn is not None and (
-        asn < Document.ARCHIVE_SERIAL_NUMBER_MIN
-        or asn > Document.ARCHIVE_SERIAL_NUMBER_MAX
-    ):
-        logger.error(
-            f"Not indexing Archive Serial Number {asn} of document {doc.pk}. "
-            f"ASN is out of range "
-            f"[{Document.ARCHIVE_SERIAL_NUMBER_MIN:,}, "
-            f"{Document.ARCHIVE_SERIAL_NUMBER_MAX:,}.",
-        )
-        asn = 0
-    users_with_perms = get_users_with_perms(
-        doc,
-        only_with_perms_in=["view_document"],
-    )
-    viewer_ids: str = ",".join([str(u.id) for u in users_with_perms])
-    writer.update_document(
-        id=doc.pk,
-        title=doc.title,
-        content=effective_content or doc.content,
-        correspondent=doc.correspondent.name if doc.correspondent else None,
-        correspondent_id=doc.correspondent.id if doc.correspondent else None,
-        has_correspondent=doc.correspondent is not None,
-        tag=tags if tags else None,
-        tag_id=tags_ids if tags_ids else None,
-        has_tag=len(tags) > 0,
-        type=doc.document_type.name if doc.document_type else None,
-        type_id=doc.document_type.id if doc.document_type else None,
-        has_type=doc.document_type is not None,
-        created=datetime.combine(doc.created, time.min),
-        added=doc.added,
-        asn=asn,
-        modified=doc.modified,
-        path=doc.storage_path.name if doc.storage_path else None,
-        path_id=doc.storage_path.id if doc.storage_path else None,
-        has_path=doc.storage_path is not None,
-        notes=notes,
-        num_notes=len(notes),
-        custom_fields=custom_fields,
-        custom_field_count=len(doc.custom_fields.all()),
-        has_custom_fields=len(custom_fields) > 0,
-        custom_fields_id=custom_fields_ids if custom_fields_ids else None,
-        owner=doc.owner.username if doc.owner else None,
-        owner_id=doc.owner.id if doc.owner else None,
-        has_owner=doc.owner is not None,
-        viewer_id=viewer_ids if viewer_ids else None,
-        checksum=doc.checksum,
-        page_count=doc.page_count,
-        original_filename=doc.original_filename,
-        is_shared=len(viewer_ids) > 0,
-    )
-    logger.debug(f"Index updated for document {doc.pk}.")
-
-
-def remove_document(writer: AsyncWriter, doc: Document) -> None:
-    remove_document_by_id(writer, doc.pk)
-
-
-def remove_document_by_id(writer: AsyncWriter, doc_id) -> None:
-    writer.delete_by_term("id", doc_id)
-
-
-def add_or_update_document(
-    document: Document,
-    effective_content: str | None = None,
-) -> None:
-    with open_index_writer() as writer:
-        update_document(writer, document, effective_content=effective_content)
-
-
-def remove_document_from_index(document: Document) -> None:
-    with open_index_writer() as writer:
-        remove_document(writer, document)
-
-
-class MappedDocIdSet(DocIdSet):
-    """
-    A DocIdSet backed by a set of `Document` IDs.
-    Supports efficiently looking up if a whoosh docnum is in the provided `filter_queryset`.
-    """
-
-    def __init__(self, filter_queryset: QuerySet, ixreader: IndexReader) -> None:
-        super().__init__()
-        document_ids = filter_queryset.order_by("id").values_list("id", flat=True)
-        max_id = document_ids.last() or 0
-        self.document_ids = BitSet(document_ids, size=max_id)
-        self.ixreader = ixreader
-
-    def __contains__(self, docnum) -> bool:
-        document_id = self.ixreader.stored_fields(docnum)["id"]
-        return document_id in self.document_ids
-
-    def __bool__(self) -> Literal[True]:
-        # searcher.search ignores a filter if it's "falsy".
-        # We use this hack so this DocIdSet, when used as a filter, is never ignored.
-        return True
-
-
-class DelayedQuery:
-    def _get_query(self):
-        raise NotImplementedError  # pragma: no cover
-
-    def _get_query_sortedby(self) -> tuple[None, Literal[False]] | tuple[str, bool]:
-        if "ordering" not in self.query_params:
-            return None, False
-
-        field: str = self.query_params["ordering"]
-
-        sort_fields_map: dict[str, str] = {
-            "created": "created",
-            "modified": "modified",
-            "added": "added",
-            "title": "title",
-            "correspondent__name": "correspondent",
-            "document_type__name": "type",
-            "archive_serial_number": "asn",
-            "num_notes": "num_notes",
-            "owner": "owner",
-            "page_count": "page_count",
-        }
-
-        if field.startswith("-"):
-            field = field[1:]
-            reverse = True
-        else:
-            reverse = False
-
-        if field not in sort_fields_map:
-            return None, False
-        else:
-            return sort_fields_map[field], reverse
-
-    def __init__(
-        self,
-        searcher: Searcher,
-        query_params,
-        page_size,
-        filter_queryset: QuerySet,
-    ) -> None:
-        self.searcher = searcher
-        self.query_params = query_params
-        self.page_size = page_size
-        self.saved_results = dict()
-        self.first_score = None
-        self.filter_queryset = filter_queryset
-        self.suggested_correction = None
-        self._manual_hits_cache: list | None = None
-
-    def __len__(self) -> int:
-        if self._manual_sort_requested():
-            manual_hits = self._manual_hits()
-            return len(manual_hits)
-
-        page = self[0:1]
-        return len(page)
-
-    def _manual_sort_requested(self):
-        ordering = self.query_params.get("ordering", "")
-        return ordering.lstrip("-").startswith("custom_field_")
-
-    def _manual_hits(self):
-        if self._manual_hits_cache is None:
-            q, mask, suggested_correction = self._get_query()
-            self.suggested_correction = suggested_correction
-
-            results = self.searcher.search(
-                q,
-                mask=mask,
-                filter=MappedDocIdSet(self.filter_queryset, self.searcher.ixreader),
-                limit=None,
-            )
-            results.fragmenter = highlight.ContextFragmenter(surround=50)
-            results.formatter = HtmlFormatter(tagname="span", between=" ... ")
-
-            if not self.first_score and len(results) > 0:
-                self.first_score = results[0].score
-
-            if self.first_score:
-                results.top_n = [
-                    (
-                        (hit[0] / self.first_score) if self.first_score else None,
-                        hit[1],
-                    )
-                    for hit in results.top_n
-                ]
-
-            hits_by_id = {hit["id"]: hit for hit in results}
-            matching_ids = list(hits_by_id.keys())
-
-            ordered_ids = list(
-                self.filter_queryset.filter(id__in=matching_ids).values_list(
-                    "id",
-                    flat=True,
-                ),
-            )
-            ordered_ids = list(dict.fromkeys(ordered_ids))
-
-            self._manual_hits_cache = [
-                hits_by_id[_id] for _id in ordered_ids if _id in hits_by_id
-            ]
-        return self._manual_hits_cache
-
-    def get_result_ids(self) -> list[int]:
-        """
-        Return all matching document IDs for the current query and ordering.
-        """
-        if self._manual_sort_requested():
-            return [hit["id"] for hit in self._manual_hits()]
-
-        q, mask, suggested_correction = self._get_query()
-        self.suggested_correction = suggested_correction
-        sortedby, reverse = self._get_query_sortedby()
-        results = self.searcher.search(
-            q,
-            mask=mask,
-            filter=MappedDocIdSet(self.filter_queryset, self.searcher.ixreader),
-            limit=None,
-            sortedby=sortedby,
-            reverse=reverse,
-        )
-        return [hit["id"] for hit in results]
-
-    def __getitem__(self, item):
-        if item.start in self.saved_results:
-            return self.saved_results[item.start]
-
-        if self._manual_sort_requested():
-            manual_hits = self._manual_hits()
-            start = 0 if item.start is None else item.start
-            stop = item.stop
-            hits = manual_hits[start:stop] if stop is not None else manual_hits[start:]
-            page = ManualResultsPage(hits)
-            self.saved_results[start] = page
-            return page
-
-        q, mask, suggested_correction = self._get_query()
-        self.suggested_correction = suggested_correction
-        sortedby, reverse = self._get_query_sortedby()
-
-        page: ResultsPage = self.searcher.search_page(
-            q,
-            mask=mask,
-            filter=MappedDocIdSet(self.filter_queryset, self.searcher.ixreader),
-            pagenum=math.floor(item.start / self.page_size) + 1,
-            pagelen=self.page_size,
-            sortedby=sortedby,
-            reverse=reverse,
-        )
-        page.results.fragmenter = highlight.ContextFragmenter(surround=50)
-        page.results.formatter = HtmlFormatter(tagname="span", between=" ... ")
-
-        if not self.first_score and len(page.results) > 0 and sortedby is None:
-            self.first_score = page.results[0].score
-
-        page.results.top_n = [
-            (
-                (hit[0] / self.first_score) if self.first_score else None,
-                hit[1],
-            )
-            for hit in page.results.top_n
-        ]
-
-        self.saved_results[item.start] = page
-
-        return page
-
-
-class ManualResultsPage(list):
-    def __init__(self, hits) -> None:
-        super().__init__(hits)
-        self.results = ManualResults(hits)
-
-
-class ManualResults:
-    def __init__(self, hits) -> None:
-        self._docnums = [hit.docnum for hit in hits]
-
-    def docs(self):
-        return self._docnums
-
-
-class LocalDateParser(English):
-    def reverse_timezone_offset(self, d):
-        return (d.replace(tzinfo=django_timezone.get_current_timezone())).astimezone(
-            UTC,
-        )
-
-    def date_from(self, *args, **kwargs):
-        d = super().date_from(*args, **kwargs)
-        if isinstance(d, timespan):
-            d.start = self.reverse_timezone_offset(d.start)
-            d.end = self.reverse_timezone_offset(d.end)
-        elif isinstance(d, datetime):
-            d = self.reverse_timezone_offset(d)
-        return d
-
-
-class DelayedFullTextQuery(DelayedQuery):
-    def _get_query(self) -> tuple:
-        q_str = self.query_params["query"]
-        q_str = rewrite_natural_date_keywords(q_str)
-        qp = MultifieldParser(
-            [
-                "content",
-                "title",
-                "correspondent",
-                "tag",
-                "type",
-                "notes",
-                "custom_fields",
-            ],
-            self.searcher.ixreader.schema,
-        )
-        qp.add_plugin(
-            DateParserPlugin(
-                basedate=django_timezone.now(),
-                dateparser=LocalDateParser(),
-            ),
-        )
-        q = qp.parse(q_str)
-        suggested_correction = None
-        try:
-            corrected = self.searcher.correct_query(q, q_str)
-            if corrected.string != q_str:
-                corrected_results = self.searcher.search(
-                    corrected.query,
-                    limit=1,
-                    filter=MappedDocIdSet(self.filter_queryset, self.searcher.ixreader),
-                    scored=False,
-                )
-                if len(corrected_results) > 0:
-                    suggested_correction = corrected.string
-        except Exception as e:
-            logger.info(
-                "Error while correcting query %s: %s",
-                f"{q_str!r}",
-                e,
-            )
-
-        return q, None, suggested_correction
-
-
-class DelayedMoreLikeThisQuery(DelayedQuery):
-    def _get_query(self) -> tuple:
-        more_like_doc_id = int(self.query_params["more_like_id"])
-        content = Document.objects.get(id=more_like_doc_id).content
-
-        docnum = self.searcher.document_number(id=more_like_doc_id)
-        kts = self.searcher.key_terms_from_text(
-            "content",
-            content,
-            numterms=20,
-            model=classify.Bo1Model,
-            normalize=False,
-        )
-        q = query.Or(
-            [query.Term("content", word, boost=weight) for word, weight in kts],
-        )
-        mask: set = {docnum}
-
-        return q, mask, None
-
-
-def autocomplete(
-    ix: FileIndex,
-    term: str,
-    limit: int = 10,
-    user: User | None = None,
-) -> list:
-    """
-    Mimics whoosh.reading.IndexReader.most_distinctive_terms with permissions
-    and without scoring
-    """
-    terms = []
-
-    with ix.searcher(weighting=TF_IDF()) as s:
-        qp = QueryParser("content", schema=ix.schema)
-        # Don't let searches with a query that happen to match a field override the
-        # content field query instead and return bogus, not text data
-        qp.remove_plugin_class(FieldsPlugin)
-        q = qp.parse(f"{term.lower()}*")
-        user_criterias: list = get_permissions_criterias(user)
-
-        results = s.search(
-            q,
-            terms=True,
-            filter=query.Or(user_criterias) if user_criterias is not None else None,
-        )
-
-        termCounts = Counter()
-        if results.has_matched_terms():
-            for hit in results:
-                for _, match in hit.matched_terms():
-                    termCounts[match] += 1
-            terms = [t for t, _ in termCounts.most_common(limit)]
-
-        term_encoded: bytes = term.encode("UTF-8")
-        if term_encoded in terms:
-            terms.insert(0, terms.pop(terms.index(term_encoded)))
-
-    return terms
-
-
-def get_permissions_criterias(user: User | None = None) -> list:
-    user_criterias = [query.Term("has_owner", text=False)]
-    if user is not None:
-        if user.is_superuser:  # superusers see all docs
-            user_criterias = []
-        else:
-            user_criterias.append(query.Term("owner_id", user.id))
-            user_criterias.append(
-                query.Term("viewer_id", str(user.id)),
-            )
-    return user_criterias
-
-
-def rewrite_natural_date_keywords(query_string: str) -> str:
-    """
-    Rewrites natural date keywords (e.g. added:today or added:"yesterday") to UTC range syntax for Whoosh.
-    This resolves timezone issues with date parsing in Whoosh as well as adding support for more
-    natural date keywords.
-    """
-
-    tz = get_current_timezone()
-    local_now = now().astimezone(tz)
-    today = local_now.date()
-
-    # all supported Keywords
-    pattern = r"(\b(?:added|created|modified))\s*:\s*[\"']?(today|yesterday|this month|previous month|previous week|previous quarter|this year|previous year)[\"']?"
-
-    def repl(m):
-        field = m.group(1)
-        keyword = m.group(2).lower()
-
-        match keyword:
-            case "today":
-                start = datetime.combine(today, time.min, tzinfo=tz)
-                end = datetime.combine(today, time.max, tzinfo=tz)
-
-            case "yesterday":
-                yesterday = today - timedelta(days=1)
-                start = datetime.combine(yesterday, time.min, tzinfo=tz)
-                end = datetime.combine(yesterday, time.max, tzinfo=tz)
-
-            case "this month":
-                start = datetime(local_now.year, local_now.month, 1, 0, 0, 0, tzinfo=tz)
-                end = start + relativedelta(months=1) - timedelta(seconds=1)
-
-            case "previous month":
-                this_month_start = datetime(
-                    local_now.year,
-                    local_now.month,
-                    1,
-                    0,
-                    0,
-                    0,
-                    tzinfo=tz,
-                )
-                start = this_month_start - relativedelta(months=1)
-                end = this_month_start - timedelta(seconds=1)
-
-            case "this year":
-                start = datetime(local_now.year, 1, 1, 0, 0, 0, tzinfo=tz)
-                end = datetime(local_now.year, 12, 31, 23, 59, 59, tzinfo=tz)
-
-            case "previous week":
-                days_since_monday = local_now.weekday()
-                this_week_start = datetime.combine(
-                    today - timedelta(days=days_since_monday),
-                    time.min,
-                    tzinfo=tz,
-                )
-                start = this_week_start - timedelta(days=7)
-                end = this_week_start - timedelta(seconds=1)
-
-            case "previous quarter":
-                current_quarter = (local_now.month - 1) // 3 + 1
-                this_quarter_start_month = (current_quarter - 1) * 3 + 1
-                this_quarter_start = datetime(
-                    local_now.year,
-                    this_quarter_start_month,
-                    1,
-                    0,
-                    0,
-                    0,
-                    tzinfo=tz,
-                )
-                start = this_quarter_start - relativedelta(months=3)
-                end = this_quarter_start - timedelta(seconds=1)
-
-            case "previous year":
-                start = datetime(local_now.year - 1, 1, 1, 0, 0, 0, tzinfo=tz)
-                end = datetime(local_now.year - 1, 12, 31, 23, 59, 59, tzinfo=tz)
-
-        # Convert to UTC and format
-        start_str = start.astimezone(UTC).strftime("%Y%m%d%H%M%S")
-        end_str = end.astimezone(UTC).strftime("%Y%m%d%H%M%S")
-        return f"{field}:[{start_str} TO {end_str}]"
-
-    return re.sub(pattern, repl, query_string, flags=re.IGNORECASE)
diff --git a/src/documents/management/commands/document_index.py b/src/documents/management/commands/document_index.py
index 742922010..c4f72dd3a 100644
--- a/src/documents/management/commands/document_index.py
+++ b/src/documents/management/commands/document_index.py
@@ -1,11 +1,26 @@
+import logging
+
+from django.conf import settings
 from django.db import transaction
 
 from documents.management.commands.base import PaperlessCommand
-from documents.tasks import index_optimize
-from documents.tasks import index_reindex
+from documents.models import Document
+from documents.search import get_backend
+from documents.search import needs_rebuild
+from documents.search import reset_backend
+from documents.search import wipe_index
+
+logger = logging.getLogger("paperless.management.document_index")
 
 
 class Command(PaperlessCommand):
+    """
+    Django management command for search index operations.
+
+    Provides subcommands for reindexing documents and optimizing the search index.
+    Supports conditional reindexing based on schema version and language changes.
+    """
+
     help = "Manages the document index."
 
     supports_progress_bar = True
@@ -14,15 +29,49 @@ class Command(PaperlessCommand):
     def add_arguments(self, parser):
         super().add_arguments(parser)
         parser.add_argument("command", choices=["reindex", "optimize"])
+        parser.add_argument(
+            "--recreate",
+            action="store_true",
+            default=False,
+            help="Wipe and recreate the index from scratch (only used with reindex).",
+        )
+        parser.add_argument(
+            "--if-needed",
+            action="store_true",
+            default=False,
+            help=(
+                "Skip reindex if the index is already up to date. "
+                "Checks schema version and search language sentinels. "
+                "Safe to run on every startup or upgrade."
+            ),
+        )
 
     def handle(self, *args, **options):
         with transaction.atomic():
             if options["command"] == "reindex":
-                index_reindex(
+                if options.get("if_needed") and not needs_rebuild(settings.INDEX_DIR):
+                    self.stdout.write("Search index is up to date.")
+                    return
+                if options.get("recreate"):
+                    wipe_index(settings.INDEX_DIR)
+
+                documents = Document.objects.select_related(
+                    "correspondent",
+                    "document_type",
+                    "storage_path",
+                    "owner",
+                ).prefetch_related("tags", "notes", "custom_fields", "versions")
+                get_backend().rebuild(
+                    documents,
                     iter_wrapper=lambda docs: self.track(
                         docs,
                         description="Indexing documents...",
                     ),
                 )
+                reset_backend()
+
             elif options["command"] == "optimize":
-                index_optimize()
+                logger.info(
+                    "document_index optimize is a no-op — Tantivy manages "
+                    "segment merging automatically.",
+                )
diff --git a/src/documents/migrations/0017_migrate_fulltext_query_field_prefixes.py b/src/documents/migrations/0017_migrate_fulltext_query_field_prefixes.py
new file mode 100644
index 000000000..040780a60
--- /dev/null
+++ b/src/documents/migrations/0017_migrate_fulltext_query_field_prefixes.py
@@ -0,0 +1,39 @@
+import re
+
+from django.db import migrations
+
+# Matches "note:" when NOT preceded by a word character or dot.
+# This avoids false positives like "denote:" or already-migrated "notes.note:".
+# Handles start-of-string, whitespace, parentheses, +/- operators per Whoosh syntax.
+_NOTE_RE = re.compile(r"(?<![.\w])note:")
+
+# Same logic for "custom_field:" -> "custom_fields.value:"
+_CUSTOM_FIELD_RE = re.compile(r"(?<![.\w])custom_field:")
+
+
+def migrate_fulltext_query_field_prefixes(apps, schema_editor):
+    SavedViewFilterRule = apps.get_model("documents", "SavedViewFilterRule")
+
+    # rule_type 20 = "fulltext query" — value is a search query string
+    for rule in SavedViewFilterRule.objects.filter(rule_type=20).exclude(
+        value__isnull=True,
+    ):
+        new_value = _NOTE_RE.sub("notes.note:", rule.value)
+        new_value = _CUSTOM_FIELD_RE.sub("custom_fields.value:", new_value)
+
+        if new_value != rule.value:
+            rule.value = new_value
+            rule.save(update_fields=["value"])
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("documents", "0016_sha256_checksums"),
+    ]
+
+    operations = [
+        migrations.RunPython(
+            migrate_fulltext_query_field_prefixes,
+            migrations.RunPython.noop,
+        ),
+    ]
diff --git a/src/documents/models.py b/src/documents/models.py
index 08f995ff6..96f027b94 100644
--- a/src/documents/models.py
+++ b/src/documents/models.py
@@ -1114,19 +1114,7 @@ class CustomFieldInstance(SoftDeleteModel):
         ]
 
     def __str__(self) -> str:
-        value = (
-            next(
-                option.get("label")
-                for option in self.field.extra_data["select_options"]
-                if option.get("id") == self.value_select
-            )
-            if (
-                self.field.data_type == CustomField.FieldDataType.SELECT
-                and self.value_select is not None
-            )
-            else self.value
-        )
-        return str(self.field.name) + f" : {value}"
+        return str(self.field.name) + f" : {self.value_for_search}"
 
     @classmethod
     def get_value_field_name(cls, data_type: CustomField.FieldDataType):
@@ -1144,6 +1132,25 @@ class CustomFieldInstance(SoftDeleteModel):
         value_field_name = self.get_value_field_name(self.field.data_type)
         return getattr(self, value_field_name)
 
+    @property
+    def value_for_search(self) -> str | None:
+        """
+        Return the value suitable for full-text indexing and display, or None
+        if the value is unset.
+
+        For SELECT fields, resolves the human-readable label rather than the
+        opaque option ID stored in value_select.
+        """
+        if self.value is None:
+            return None
+        if self.field.data_type == CustomField.FieldDataType.SELECT:
+            options = (self.field.extra_data or {}).get("select_options", [])
+            return next(
+                (o["label"] for o in options if o.get("id") == self.value),
+                None,
+            )
+        return str(self.value)
+
 
 if settings.AUDIT_LOG_ENABLED:
     auditlog.register(
diff --git a/src/documents/sanity_checker.py b/src/documents/sanity_checker.py
index b53ed8cfb..0b3dea368 100644
--- a/src/documents/sanity_checker.py
+++ b/src/documents/sanity_checker.py
@@ -9,19 +9,14 @@ to wrap the document queryset (e.g., with a progress bar). The default
 is an identity function that adds no overhead.
 """
 
-from __future__ import annotations
-
 import logging
 import uuid
 from collections import defaultdict
-from collections.abc import Callable
-from collections.abc import Iterable
 from collections.abc import Iterator
 from pathlib import Path
 from typing import TYPE_CHECKING
 from typing import Final
 from typing import TypedDict
-from typing import TypeVar
 
 from celery import states
 from django.conf import settings
@@ -29,14 +24,13 @@ from django.utils import timezone
 
 from documents.models import Document
 from documents.models import PaperlessTask
+from documents.utils import IterWrapper
 from documents.utils import compute_checksum
+from documents.utils import identity
 from paperless.config import GeneralConfig
 
 logger = logging.getLogger("paperless.sanity_checker")
 
-_T = TypeVar("_T")
-IterWrapper = Callable[[Iterable[_T]], Iterable[_T]]
-
 
 class MessageEntry(TypedDict):
     """A single sanity check message with its severity level."""
@@ -45,11 +39,6 @@ class MessageEntry(TypedDict):
     message: str
 
 
-def _identity(iterable: Iterable[_T]) -> Iterable[_T]:
-    """Pass through an iterable unchanged (default iter_wrapper)."""
-    return iterable
-
-
 class SanityCheckMessages:
     """Collects sanity check messages grouped by document primary key.
 
@@ -296,7 +285,7 @@ def _check_document(
 def check_sanity(
     *,
     scheduled: bool = True,
-    iter_wrapper: IterWrapper[Document] = _identity,
+    iter_wrapper: IterWrapper[Document] = identity,
 ) -> SanityCheckMessages:
     """Run a full sanity check on the document archive.
 
diff --git a/src/documents/search/__init__.py b/src/documents/search/__init__.py
new file mode 100644
index 000000000..b0a89f242
--- /dev/null
+++ b/src/documents/search/__init__.py
@@ -0,0 +1,21 @@
+from documents.search._backend import SearchIndexLockError
+from documents.search._backend import SearchResults
+from documents.search._backend import TantivyBackend
+from documents.search._backend import TantivyRelevanceList
+from documents.search._backend import WriteBatch
+from documents.search._backend import get_backend
+from documents.search._backend import reset_backend
+from documents.search._schema import needs_rebuild
+from documents.search._schema import wipe_index
+
+__all__ = [
+    "SearchIndexLockError",
+    "SearchResults",
+    "TantivyBackend",
+    "TantivyRelevanceList",
+    "WriteBatch",
+    "get_backend",
+    "needs_rebuild",
+    "reset_backend",
+    "wipe_index",
+]
diff --git a/src/documents/search/_backend.py b/src/documents/search/_backend.py
new file mode 100644
index 000000000..a1bff8a9f
--- /dev/null
+++ b/src/documents/search/_backend.py
@@ -0,0 +1,858 @@
+from __future__ import annotations
+
+import logging
+import threading
+import unicodedata
+from collections import Counter
+from dataclasses import dataclass
+from datetime import UTC
+from datetime import datetime
+from typing import TYPE_CHECKING
+from typing import Self
+from typing import TypedDict
+from typing import TypeVar
+
+import filelock
+import regex
+import tantivy
+from django.conf import settings
+from django.utils.timezone import get_current_timezone
+from guardian.shortcuts import get_users_with_perms
+
+from documents.search._query import build_permission_filter
+from documents.search._query import parse_user_query
+from documents.search._schema import _write_sentinels
+from documents.search._schema import build_schema
+from documents.search._schema import open_or_rebuild_index
+from documents.search._schema import wipe_index
+from documents.search._tokenizer import register_tokenizers
+from documents.utils import IterWrapper
+from documents.utils import identity
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+    from django.contrib.auth.base_user import AbstractBaseUser
+    from django.db.models import QuerySet
+
+    from documents.models import Document
+
+logger = logging.getLogger("paperless.search")
+
+_WORD_RE = regex.compile(r"\w+")
+_AUTOCOMPLETE_REGEX_TIMEOUT = 1.0  # seconds; guards against ReDoS on untrusted content
+
+T = TypeVar("T")
+
+
+def _ascii_fold(s: str) -> str:
+    """
+    Normalize unicode to ASCII equivalent characters for search consistency.
+
+    Converts accented characters (e.g., "café") to their ASCII base forms ("cafe")
+    to enable cross-language searching without requiring exact diacritic matching.
+    """
+    return unicodedata.normalize("NFD", s).encode("ascii", "ignore").decode()
+
+
+def _extract_autocomplete_words(text_sources: list[str]) -> set[str]:
+    """Extract and normalize words for autocomplete.
+
+    Splits on non-word characters (matching Tantivy's simple tokenizer), lowercases,
+    and ascii-folds each token. Uses the regex library with a timeout to guard against
+    ReDoS on untrusted document content.
+    """
+    words = set()
+    for text in text_sources:
+        if not text:
+            continue
+        try:
+            tokens = _WORD_RE.findall(text, timeout=_AUTOCOMPLETE_REGEX_TIMEOUT)
+        except TimeoutError:  # pragma: no cover
+            logger.warning(
+                "Autocomplete word extraction timed out for a text source; skipping.",
+            )
+            continue
+        for token in tokens:
+            normalized = _ascii_fold(token.lower())
+            if normalized:
+                words.add(normalized)
+    return words
+
+
+class SearchHit(TypedDict):
+    """Type definition for search result hits."""
+
+    id: int
+    score: float
+    rank: int
+    highlights: dict[str, str]
+
+
+@dataclass(frozen=True, slots=True)
+class SearchResults:
+    """
+    Container for search results with pagination metadata.
+
+    Attributes:
+        hits: List of search results with scores and highlights
+        total: Total matching documents across all pages (for pagination)
+        query: Preprocessed query string after date/syntax rewriting
+    """
+
+    hits: list[SearchHit]
+    total: int  # total matching documents (for pagination)
+    query: str  # preprocessed query string
+
+
+class TantivyRelevanceList:
+    """
+    DRF-compatible list wrapper for Tantivy search hits.
+
+    Provides paginated access to search results while storing all hits in memory
+    for efficient ID retrieval. Used by Django REST framework for pagination.
+
+    Methods:
+        __len__: Returns total hit count for pagination calculations
+        __getitem__: Slices the hit list for page-specific results
+
+    Note: Stores ALL post-filter hits so get_all_result_ids() can return
+    every matching document ID without requiring a second search query.
+    """
+
+    def __init__(self, hits: list[SearchHit]) -> None:
+        self._hits = hits
+
+    def __len__(self) -> int:
+        return len(self._hits)
+
+    def __getitem__(self, key: slice) -> list[SearchHit]:
+        return self._hits[key]
+
+
+class SearchIndexLockError(Exception):
+    """Raised when the search index file lock cannot be acquired within the timeout."""
+
+
+class WriteBatch:
+    """
+    Context manager for bulk index operations with file locking.
+
+    Provides transactional batch updates to the search index with proper
+    concurrency control via file locking. All operations within the batch
+    are committed atomically or rolled back on exception.
+
+    Usage:
+        with backend.batch_update() as batch:
+            batch.add_or_update(document)
+            batch.remove(doc_id)
+    """
+
+    def __init__(self, backend: TantivyBackend, lock_timeout: float):
+        self._backend = backend
+        self._lock_timeout = lock_timeout
+        self._writer = None
+        self._lock = None
+
+    def __enter__(self) -> Self:
+        if self._backend._path is not None:
+            lock_path = self._backend._path / ".tantivy.lock"
+            self._lock = filelock.FileLock(str(lock_path))
+            try:
+                self._lock.acquire(timeout=self._lock_timeout)
+            except filelock.Timeout as e:  # pragma: no cover
+                raise SearchIndexLockError(
+                    f"Could not acquire index lock within {self._lock_timeout}s",
+                ) from e
+
+        self._writer = self._backend._index.writer()
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        try:
+            if exc_type is None:
+                self._writer.commit()
+                self._backend._index.reload()
+            # Explicitly delete writer to release tantivy's internal lock.
+            # On exception the uncommitted writer is simply discarded.
+            if self._writer is not None:
+                del self._writer
+                self._writer = None
+        finally:
+            if self._lock is not None:
+                self._lock.release()
+
+    def add_or_update(
+        self,
+        document: Document,
+        effective_content: str | None = None,
+    ) -> None:
+        """
+        Add or update a document in the batch.
+
+        Implements upsert behavior by deleting any existing document with the same ID
+        and adding the new version. This ensures stale document data (e.g., after
+        permission changes) doesn't persist in the index.
+
+        Args:
+            document: Django Document instance to index
+            effective_content: Override document.content for indexing (used when
+                re-indexing with newer OCR text from document versions)
+        """
+        self.remove(document.pk)
+        doc = self._backend._build_tantivy_doc(document, effective_content)
+        self._writer.add_document(doc)
+
+    def remove(self, doc_id: int) -> None:
+        """
+        Remove a document from the batch by its primary key.
+
+        Uses range query instead of term query to work around unsigned integer
+        type detection bug in tantivy-py 0.25.
+        """
+        # Use range query to work around u64 deletion bug
+        self._writer.delete_documents_by_query(
+            tantivy.Query.range_query(
+                self._backend._schema,
+                "id",
+                tantivy.FieldType.Unsigned,
+                doc_id,
+                doc_id,
+            ),
+        )
+
+
+class TantivyBackend:
+    """
+    Tantivy search backend with explicit lifecycle management.
+
+    Provides full-text search capabilities using the Tantivy search engine.
+    Supports in-memory indexes (for testing) and persistent on-disk indexes
+    (for production use). Handles document indexing, search queries, autocompletion,
+    and "more like this" functionality.
+
+    The backend manages its own connection lifecycle and can be reset when
+    the underlying index directory changes (e.g., during test isolation).
+    """
+
+    def __init__(self, path: Path | None = None):
+        # path=None → in-memory index (for tests)
+        # path=some_dir → on-disk index (for production)
+        self._path = path
+        self._index = None
+        self._schema = None
+
+    def open(self) -> None:
+        """
+        Open or rebuild the index as needed.
+
+        For disk-based indexes, checks if rebuilding is needed due to schema
+        version or language changes. Registers custom tokenizers after opening.
+        Safe to call multiple times - subsequent calls are no-ops.
+        """
+        if self._index is not None:
+            return  # pragma: no cover
+        if self._path is not None:
+            self._index = open_or_rebuild_index(self._path)
+        else:
+            self._index = tantivy.Index(build_schema())
+        register_tokenizers(self._index, settings.SEARCH_LANGUAGE)
+        self._schema = self._index.schema
+
+    def close(self) -> None:
+        """
+        Close the index and release resources.
+
+        Safe to call multiple times - subsequent calls are no-ops.
+        """
+        self._index = None
+        self._schema = None
+
+    def _ensure_open(self) -> None:
+        """Ensure the index is open before operations."""
+        if self._index is None:
+            self.open()  # pragma: no cover
+
+    def _build_tantivy_doc(
+        self,
+        document: Document,
+        effective_content: str | None = None,
+    ) -> tantivy.Document:
+        """Build a tantivy Document from a Django Document instance.
+
+        ``effective_content`` overrides ``document.content`` for indexing —
+        used when re-indexing a root document with a newer version's OCR text.
+        """
+        content = (
+            effective_content if effective_content is not None else document.content
+        )
+
+        doc = tantivy.Document()
+
+        # Basic fields
+        doc.add_unsigned("id", document.pk)
+        doc.add_text("checksum", document.checksum)
+        doc.add_text("title", document.title)
+        doc.add_text("title_sort", document.title)
+        doc.add_text("content", content)
+        doc.add_text("bigram_content", content)
+
+        # Original filename - only add if not None/empty
+        if document.original_filename:
+            doc.add_text("original_filename", document.original_filename)
+
+        # Correspondent
+        if document.correspondent:
+            doc.add_text("correspondent", document.correspondent.name)
+            doc.add_text("correspondent_sort", document.correspondent.name)
+            doc.add_unsigned("correspondent_id", document.correspondent_id)
+
+        # Document type
+        if document.document_type:
+            doc.add_text("document_type", document.document_type.name)
+            doc.add_text("type_sort", document.document_type.name)
+            doc.add_unsigned("document_type_id", document.document_type_id)
+
+        # Storage path
+        if document.storage_path:
+            doc.add_text("storage_path", document.storage_path.name)
+            doc.add_unsigned("storage_path_id", document.storage_path_id)
+
+        # Tags — collect names for autocomplete in the same pass
+        tag_names: list[str] = []
+        for tag in document.tags.all():
+            doc.add_text("tag", tag.name)
+            doc.add_unsigned("tag_id", tag.pk)
+            tag_names.append(tag.name)
+
+        # Notes — JSON for structured queries (notes.user:alice, notes.note:text),
+        # companion text field for default full-text search.
+        num_notes = 0
+        for note in document.notes.all():
+            num_notes += 1
+            doc.add_json("notes", {"note": note.note, "user": note.user.username})
+
+        # Custom fields — JSON for structured queries (custom_fields.name:x, custom_fields.value:y),
+        # companion text field for default full-text search.
+        for cfi in document.custom_fields.all():
+            search_value = cfi.value_for_search
+            # Skip fields where there is no value yet
+            if search_value is None:
+                continue
+            doc.add_json(
+                "custom_fields",
+                {
+                    "name": cfi.field.name,
+                    "value": search_value,
+                },
+            )
+
+        # Dates
+        created_date = datetime(
+            document.created.year,
+            document.created.month,
+            document.created.day,
+            tzinfo=UTC,
+        )
+        doc.add_date("created", created_date)
+        doc.add_date("modified", document.modified)
+        doc.add_date("added", document.added)
+
+        if document.archive_serial_number is not None:
+            doc.add_unsigned("asn", document.archive_serial_number)
+
+        if document.page_count is not None:
+            doc.add_unsigned("page_count", document.page_count)
+
+        doc.add_unsigned("num_notes", num_notes)
+
+        # Owner
+        if document.owner_id:
+            doc.add_unsigned("owner_id", document.owner_id)
+
+        # Viewers with permission
+        users_with_perms = get_users_with_perms(
+            document,
+            only_with_perms_in=["view_document"],
+        )
+        for user in users_with_perms:
+            doc.add_unsigned("viewer_id", user.pk)
+
+        # Autocomplete words
+        text_sources = [document.title, content]
+        if document.correspondent:
+            text_sources.append(document.correspondent.name)
+        if document.document_type:
+            text_sources.append(document.document_type.name)
+        text_sources.extend(tag_names)
+
+        for word in sorted(_extract_autocomplete_words(text_sources)):
+            doc.add_text("autocomplete_word", word)
+
+        return doc
+
+    def add_or_update(
+        self,
+        document: Document,
+        effective_content: str | None = None,
+    ) -> None:
+        """
+        Add or update a single document with file locking.
+
+        Convenience method for single-document updates. For bulk operations,
+        use batch_update() context manager for better performance.
+
+        Args:
+            document: Django Document instance to index
+            effective_content: Override document.content for indexing
+        """
+        self._ensure_open()
+        with self.batch_update(lock_timeout=5.0) as batch:
+            batch.add_or_update(document, effective_content)
+
+    def remove(self, doc_id: int) -> None:
+        """
+        Remove a single document from the index with file locking.
+
+        Convenience method for single-document removal. For bulk operations,
+        use batch_update() context manager for better performance.
+
+        Args:
+            doc_id: Primary key of the document to remove
+        """
+        self._ensure_open()
+        with self.batch_update(lock_timeout=5.0) as batch:
+            batch.remove(doc_id)
+
+    def search(
+        self,
+        query: str,
+        user: AbstractBaseUser | None,
+        page: int,
+        page_size: int,
+        sort_field: str | None,
+        *,
+        sort_reverse: bool,
+    ) -> SearchResults:
+        """
+        Execute a search query against the document index.
+
+        Processes the user query through date rewriting, normalization, and
+        permission filtering before executing against Tantivy. Supports both
+        relevance-based and field-based sorting.
+
+        Args:
+            query: User's search query (supports natural date keywords, field filters)
+            user: User for permission filtering (None for superuser/no filtering)
+            page: Page number (1-indexed) for pagination
+            page_size: Number of results per page
+            sort_field: Field to sort by (None for relevance ranking)
+            sort_reverse: Whether to reverse the sort order
+
+        Returns:
+            SearchResults with hits, total count, and processed query
+        """
+        self._ensure_open()
+        tz = get_current_timezone()
+        user_query = parse_user_query(self._index, query, tz)
+
+        # Apply permission filter if user is not None (not superuser)
+        if user is not None:
+            permission_filter = build_permission_filter(self._schema, user)
+            final_query = tantivy.Query.boolean_query(
+                [
+                    (tantivy.Occur.Must, user_query),
+                    (tantivy.Occur.Must, permission_filter),
+                ],
+            )
+        else:
+            final_query = user_query
+
+        searcher = self._index.searcher()
+        offset = (page - 1) * page_size
+
+        # Map sort fields
+        sort_field_map = {
+            "title": "title_sort",
+            "correspondent__name": "correspondent_sort",
+            "document_type__name": "type_sort",
+            "created": "created",
+            "added": "added",
+            "modified": "modified",
+            "archive_serial_number": "asn",
+            "page_count": "page_count",
+            "num_notes": "num_notes",
+        }
+
+        # Perform search
+        if sort_field and sort_field in sort_field_map:
+            mapped_field = sort_field_map[sort_field]
+            results = searcher.search(
+                final_query,
+                limit=offset + page_size,
+                order_by_field=mapped_field,
+                order=tantivy.Order.Desc if sort_reverse else tantivy.Order.Asc,
+            )
+            # Field sorting: hits are still (score, DocAddress) tuples; score unused
+            all_hits = [(hit[1], 0.0) for hit in results.hits]
+        else:
+            # Score-based search: hits are (score, DocAddress) tuples
+            results = searcher.search(final_query, limit=offset + page_size)
+            all_hits = [(hit[1], hit[0]) for hit in results.hits]
+
+        total = results.count
+
+        # Normalize scores for score-based searches
+        if not sort_field and all_hits:
+            max_score = max(hit[1] for hit in all_hits) or 1.0
+            all_hits = [(hit[0], hit[1] / max_score) for hit in all_hits]
+
+        # Apply threshold filter if configured (score-based search only)
+        threshold = settings.ADVANCED_FUZZY_SEARCH_THRESHOLD
+        if threshold is not None and not sort_field:
+            all_hits = [hit for hit in all_hits if hit[1] >= threshold]
+
+        # Get the page's hits
+        page_hits = all_hits[offset : offset + page_size]
+
+        # Build result hits with highlights
+        hits: list[SearchHit] = []
+        snippet_generator = None
+
+        for rank, (doc_address, score) in enumerate(page_hits, start=offset + 1):
+            # Get the actual document from the searcher using the doc address
+            actual_doc = searcher.doc(doc_address)
+            doc_dict = actual_doc.to_dict()
+            doc_id = doc_dict["id"][0]
+
+            highlights: dict[str, str] = {}
+
+            # Generate highlights if score > 0
+            if score > 0:
+                try:
+                    if snippet_generator is None:
+                        snippet_generator = tantivy.SnippetGenerator.create(
+                            searcher,
+                            final_query,
+                            self._schema,
+                            "content",
+                        )
+
+                    content_snippet = snippet_generator.snippet_from_doc(actual_doc)
+                    if content_snippet:
+                        highlights["content"] = str(content_snippet)
+
+                    # Try notes highlights
+                    if "notes" in doc_dict:
+                        notes_generator = tantivy.SnippetGenerator.create(
+                            searcher,
+                            final_query,
+                            self._schema,
+                            "notes",
+                        )
+                        notes_snippet = notes_generator.snippet_from_doc(actual_doc)
+                        if notes_snippet:
+                            highlights["notes"] = str(notes_snippet)
+
+                except Exception:  # pragma: no cover
+                    logger.debug("Failed to generate highlights for doc %s", doc_id)
+
+            hits.append(
+                SearchHit(
+                    id=doc_id,
+                    score=score,
+                    rank=rank,
+                    highlights=highlights,
+                ),
+            )
+
+        return SearchResults(
+            hits=hits,
+            total=total,
+            query=query,
+        )
+
+    def autocomplete(
+        self,
+        term: str,
+        limit: int,
+        user: AbstractBaseUser | None = None,
+    ) -> list[str]:
+        """
+        Get autocomplete suggestions for search queries.
+
+        Returns words that start with the given term prefix, ranked by document
+        frequency (how many documents contain each word). Optionally filters
+        results to only words from documents visible to the specified user.
+
+        Args:
+            term: Prefix to match against autocomplete words
+            limit: Maximum number of suggestions to return
+            user: User for permission filtering (None for no filtering)
+
+        Returns:
+            List of word suggestions ordered by frequency, then alphabetically
+        """
+        self._ensure_open()
+        normalized_term = _ascii_fold(term.lower())
+
+        searcher = self._index.searcher()
+
+        # Apply permission filter for non-superusers so autocomplete words
+        # from invisible documents don't leak to other users.
+        if user is not None and not user.is_superuser:
+            base_query = build_permission_filter(self._schema, user)
+        else:
+            base_query = tantivy.Query.all_query()
+
+        results = searcher.search(base_query, limit=10000)
+
+        # Count how many visible documents each word appears in.
+        # Using Counter (not set) preserves per-word document frequency so
+        # we can rank suggestions by how commonly they occur — the same
+        # signal Whoosh used for Tf/Idf-based autocomplete ordering.
+        word_counts: Counter[str] = Counter()
+        for _score, doc_address in results.hits:
+            stored_doc = searcher.doc(doc_address)
+            doc_dict = stored_doc.to_dict()
+            if "autocomplete_word" in doc_dict:
+                word_counts.update(doc_dict["autocomplete_word"])
+
+        # Filter to prefix matches, sort by document frequency descending;
+        # break ties alphabetically for stable, deterministic output.
+        matches = sorted(
+            (w for w in word_counts if w.startswith(normalized_term)),
+            key=lambda w: (-word_counts[w], w),
+        )
+
+        return matches[:limit]
+
+    def more_like_this(
+        self,
+        doc_id: int,
+        user: AbstractBaseUser | None,
+        page: int,
+        page_size: int,
+    ) -> SearchResults:
+        """
+        Find documents similar to the given document using content analysis.
+
+        Uses Tantivy's "more like this" query to find documents with similar
+        content patterns. The original document is excluded from results.
+
+        Args:
+            doc_id: Primary key of the reference document
+            user: User for permission filtering (None for no filtering)
+            page: Page number (1-indexed) for pagination
+            page_size: Number of results per page
+
+        Returns:
+            SearchResults with similar documents (excluding the original)
+        """
+        self._ensure_open()
+        searcher = self._index.searcher()
+
+        # First find the document address
+        id_query = tantivy.Query.range_query(
+            self._schema,
+            "id",
+            tantivy.FieldType.Unsigned,
+            doc_id,
+            doc_id,
+        )
+        results = searcher.search(id_query, limit=1)
+
+        if not results.hits:
+            # Document not found
+            return SearchResults(hits=[], total=0, query=f"more_like:{doc_id}")
+
+        # Extract doc_address from (score, doc_address) tuple
+        doc_address = results.hits[0][1]
+
+        # Build more like this query
+        mlt_query = tantivy.Query.more_like_this_query(
+            doc_address,
+            min_doc_frequency=1,
+            max_doc_frequency=None,
+            min_term_frequency=1,
+            max_query_terms=12,
+            min_word_length=None,
+            max_word_length=None,
+            boost_factor=None,
+        )
+
+        # Apply permission filter
+        if user is not None:
+            permission_filter = build_permission_filter(self._schema, user)
+            final_query = tantivy.Query.boolean_query(
+                [
+                    (tantivy.Occur.Must, mlt_query),
+                    (tantivy.Occur.Must, permission_filter),
+                ],
+            )
+        else:
+            final_query = mlt_query
+
+        # Search
+        offset = (page - 1) * page_size
+        results = searcher.search(final_query, limit=offset + page_size)
+
+        total = results.count
+        # Convert from (score, doc_address) to (doc_address, score)
+        all_hits = [(hit[1], hit[0]) for hit in results.hits]
+
+        # Normalize scores
+        if all_hits:
+            max_score = max(hit[1] for hit in all_hits) or 1.0
+            all_hits = [(hit[0], hit[1] / max_score) for hit in all_hits]
+
+        # Get page hits
+        page_hits = all_hits[offset : offset + page_size]
+
+        # Build results
+        hits: list[SearchHit] = []
+        for rank, (doc_address, score) in enumerate(page_hits, start=offset + 1):
+            actual_doc = searcher.doc(doc_address)
+            doc_dict = actual_doc.to_dict()
+            result_doc_id = doc_dict["id"][0]
+
+            # Skip the original document
+            if result_doc_id == doc_id:
+                continue
+
+            hits.append(
+                SearchHit(
+                    id=result_doc_id,
+                    score=score,
+                    rank=rank,
+                    highlights={},  # MLT doesn't generate highlights
+                ),
+            )
+
+        return SearchResults(
+            hits=hits,
+            total=max(0, total - 1),  # Subtract 1 for the original document
+            query=f"more_like:{doc_id}",
+        )
+
+    def batch_update(self, lock_timeout: float = 30.0) -> WriteBatch:
+        """
+        Get a batch context manager for bulk index operations.
+
+        Use this for efficient bulk document updates/deletions. All operations
+        within the batch are committed atomically at the end of the context.
+
+        Args:
+            lock_timeout: Seconds to wait for file lock acquisition
+
+        Returns:
+            WriteBatch context manager
+
+        Raises:
+            SearchIndexLockError: If lock cannot be acquired within timeout
+        """
+        self._ensure_open()
+        return WriteBatch(self, lock_timeout)
+
+    def rebuild(
+        self,
+        documents: QuerySet[Document],
+        iter_wrapper: IterWrapper[Document] = identity,
+    ) -> None:
+        """
+        Rebuild the entire search index from scratch.
+
+        Wipes the existing index and re-indexes all provided documents.
+        On failure, restores the previous index state to keep the backend usable.
+
+        Args:
+            documents: QuerySet of Document instances to index
+            iter_wrapper: Optional wrapper function for progress tracking
+                (e.g., progress bar). Should yield each document unchanged.
+        """
+        # Create new index (on-disk or in-memory)
+        if self._path is not None:
+            wipe_index(self._path)
+            new_index = tantivy.Index(build_schema(), path=str(self._path))
+            _write_sentinels(self._path)
+        else:
+            new_index = tantivy.Index(build_schema())
+        register_tokenizers(new_index, settings.SEARCH_LANGUAGE)
+
+        # Point instance at the new index so _build_tantivy_doc uses it
+        old_index, old_schema = self._index, self._schema
+        self._index = new_index
+        self._schema = new_index.schema
+
+        try:
+            writer = new_index.writer()
+            for document in iter_wrapper(documents):
+                doc = self._build_tantivy_doc(
+                    document,
+                    document.get_effective_content(),
+                )
+                writer.add_document(doc)
+            writer.commit()
+            new_index.reload()
+        except BaseException:  # pragma: no cover
+            # Restore old index on failure so the backend remains usable
+            self._index = old_index
+            self._schema = old_schema
+            raise
+
+
+# Module-level singleton with proper thread safety
+_backend: TantivyBackend | None = None
+_backend_path: Path | None = None  # tracks which INDEX_DIR the singleton uses
+_backend_lock = threading.RLock()
+
+
+def get_backend() -> TantivyBackend:
+    """
+    Get the global backend instance with thread safety.
+
+    Returns a singleton TantivyBackend instance, automatically reinitializing
+    when settings.INDEX_DIR changes. This ensures proper test isolation when
+    using pytest-xdist or @override_settings that change the index directory.
+
+    Returns:
+        Thread-safe singleton TantivyBackend instance
+    """
+    global _backend, _backend_path
+
+    current_path: Path = settings.INDEX_DIR
+
+    # Fast path: backend is initialized and path hasn't changed (no lock needed)
+    if _backend is not None and _backend_path == current_path:
+        return _backend
+
+    # Slow path: first call, or INDEX_DIR changed between calls
+    with _backend_lock:
+        # Double-check after acquiring lock — another thread may have beaten us
+        if _backend is not None and _backend_path == current_path:
+            return _backend  # pragma: no cover
+
+        if _backend is not None:
+            _backend.close()
+
+        _backend = TantivyBackend(path=current_path)
+        _backend.open()
+        _backend_path = current_path
+
+        return _backend
+
+
+def reset_backend() -> None:
+    """
+    Reset the global backend instance with thread safety.
+
+    Forces creation of a new backend instance on the next get_backend() call.
+    Used for test isolation and when switching between different index directories.
+    """
+    global _backend, _backend_path
+
+    with _backend_lock:
+        if _backend is not None:
+            _backend.close()
+        _backend = None
+        _backend_path = None
diff --git a/src/documents/search/_query.py b/src/documents/search/_query.py
new file mode 100644
index 000000000..212df1516
--- /dev/null
+++ b/src/documents/search/_query.py
@@ -0,0 +1,497 @@
+from __future__ import annotations
+
+from datetime import UTC
+from datetime import date
+from datetime import datetime
+from datetime import timedelta
+from typing import TYPE_CHECKING
+from typing import Final
+
+import regex
+import tantivy
+from dateutil.relativedelta import relativedelta
+from django.conf import settings
+
+if TYPE_CHECKING:
+    from datetime import tzinfo
+
+    from django.contrib.auth.base_user import AbstractBaseUser
+
+# Maximum seconds any single regex substitution may run.
+# Prevents ReDoS on adversarial user-supplied query strings.
+_REGEX_TIMEOUT: Final[float] = 1.0
+
+_DATE_ONLY_FIELDS = frozenset({"created"})
+
+_DATE_KEYWORDS = frozenset(
+    {
+        "today",
+        "yesterday",
+        "this_week",
+        "last_week",
+        "this_month",
+        "last_month",
+        "this_year",
+        "last_year",
+    },
+)
+
+_FIELD_DATE_RE = regex.compile(
+    r"(\w+):(" + "|".join(_DATE_KEYWORDS) + r")\b",
+)
+_COMPACT_DATE_RE = regex.compile(r"\b(\d{14})\b")
+_RELATIVE_RANGE_RE = regex.compile(
+    r"\[now([+-]\d+[dhm])?\s+TO\s+now([+-]\d+[dhm])?\]",
+    regex.IGNORECASE,
+)
+# Whoosh-style relative date range: e.g. [-1 week to now], [-7 days to now]
+_WHOOSH_REL_RANGE_RE = regex.compile(
+    r"\[-(?P<n>\d+)\s+(?P<unit>second|minute|hour|day|week|month|year)s?\s+to\s+now\]",
+    regex.IGNORECASE,
+)
+# Whoosh-style 8-digit date: field:YYYYMMDD — field-aware so timezone can be applied correctly
+_DATE8_RE = regex.compile(r"(?P<field>\w+):(?P<date8>\d{8})\b")
+
+
+def _fmt(dt: datetime) -> str:
+    """Format a datetime as an ISO 8601 UTC string for use in Tantivy range queries."""
+    return dt.astimezone(UTC).strftime("%Y-%m-%dT%H:%M:%SZ")
+
+
+def _iso_range(lo: datetime, hi: datetime) -> str:
+    """Format a [lo TO hi] range string in ISO 8601 for Tantivy query syntax."""
+    return f"[{_fmt(lo)} TO {_fmt(hi)}]"
+
+
+def _date_only_range(keyword: str, tz: tzinfo) -> str:
+    """
+    For `created` (DateField): use the local calendar date, converted to
+    midnight UTC boundaries. No offset arithmetic — date only.
+    """
+
+    today = datetime.now(tz).date()
+
+    if keyword == "today":
+        lo = datetime(today.year, today.month, today.day, tzinfo=UTC)
+        return _iso_range(lo, lo + timedelta(days=1))
+    if keyword == "yesterday":
+        y = today - timedelta(days=1)
+        lo = datetime(y.year, y.month, y.day, tzinfo=UTC)
+        hi = datetime(today.year, today.month, today.day, tzinfo=UTC)
+        return _iso_range(lo, hi)
+    if keyword == "this_week":
+        mon = today - timedelta(days=today.weekday())
+        lo = datetime(mon.year, mon.month, mon.day, tzinfo=UTC)
+        return _iso_range(lo, lo + timedelta(weeks=1))
+    if keyword == "last_week":
+        this_mon = today - timedelta(days=today.weekday())
+        last_mon = this_mon - timedelta(weeks=1)
+        lo = datetime(last_mon.year, last_mon.month, last_mon.day, tzinfo=UTC)
+        hi = datetime(this_mon.year, this_mon.month, this_mon.day, tzinfo=UTC)
+        return _iso_range(lo, hi)
+    if keyword == "this_month":
+        lo = datetime(today.year, today.month, 1, tzinfo=UTC)
+        if today.month == 12:
+            hi = datetime(today.year + 1, 1, 1, tzinfo=UTC)
+        else:
+            hi = datetime(today.year, today.month + 1, 1, tzinfo=UTC)
+        return _iso_range(lo, hi)
+    if keyword == "last_month":
+        if today.month == 1:
+            lo = datetime(today.year - 1, 12, 1, tzinfo=UTC)
+        else:
+            lo = datetime(today.year, today.month - 1, 1, tzinfo=UTC)
+        hi = datetime(today.year, today.month, 1, tzinfo=UTC)
+        return _iso_range(lo, hi)
+    if keyword == "this_year":
+        lo = datetime(today.year, 1, 1, tzinfo=UTC)
+        return _iso_range(lo, datetime(today.year + 1, 1, 1, tzinfo=UTC))
+    if keyword == "last_year":
+        lo = datetime(today.year - 1, 1, 1, tzinfo=UTC)
+        return _iso_range(lo, datetime(today.year, 1, 1, tzinfo=UTC))
+    raise ValueError(f"Unknown keyword: {keyword}")
+
+
+def _datetime_range(keyword: str, tz: tzinfo) -> str:
+    """
+    For `added` / `modified` (DateTimeField, stored as UTC): convert local day
+    boundaries to UTC — full offset arithmetic required.
+    """
+
+    now_local = datetime.now(tz)
+    today = now_local.date()
+
+    def _midnight(d: date) -> datetime:
+        return datetime(d.year, d.month, d.day, tzinfo=tz).astimezone(UTC)
+
+    if keyword == "today":
+        return _iso_range(_midnight(today), _midnight(today + timedelta(days=1)))
+    if keyword == "yesterday":
+        y = today - timedelta(days=1)
+        return _iso_range(_midnight(y), _midnight(today))
+    if keyword == "this_week":
+        mon = today - timedelta(days=today.weekday())
+        return _iso_range(_midnight(mon), _midnight(mon + timedelta(weeks=1)))
+    if keyword == "last_week":
+        this_mon = today - timedelta(days=today.weekday())
+        last_mon = this_mon - timedelta(weeks=1)
+        return _iso_range(_midnight(last_mon), _midnight(this_mon))
+    if keyword == "this_month":
+        first = today.replace(day=1)
+        if today.month == 12:
+            next_first = date(today.year + 1, 1, 1)
+        else:
+            next_first = date(today.year, today.month + 1, 1)
+        return _iso_range(_midnight(first), _midnight(next_first))
+    if keyword == "last_month":
+        this_first = today.replace(day=1)
+        if today.month == 1:
+            last_first = date(today.year - 1, 12, 1)
+        else:
+            last_first = date(today.year, today.month - 1, 1)
+        return _iso_range(_midnight(last_first), _midnight(this_first))
+    if keyword == "this_year":
+        return _iso_range(
+            _midnight(date(today.year, 1, 1)),
+            _midnight(date(today.year + 1, 1, 1)),
+        )
+    if keyword == "last_year":
+        return _iso_range(
+            _midnight(date(today.year - 1, 1, 1)),
+            _midnight(date(today.year, 1, 1)),
+        )
+    raise ValueError(f"Unknown keyword: {keyword}")
+
+
+def _rewrite_compact_date(query: str) -> str:
+    """Rewrite Whoosh compact date tokens (14-digit YYYYMMDDHHmmss) to ISO 8601."""
+
+    def _sub(m: regex.Match[str]) -> str:
+        raw = m.group(1)
+        try:
+            dt = datetime(
+                int(raw[0:4]),
+                int(raw[4:6]),
+                int(raw[6:8]),
+                int(raw[8:10]),
+                int(raw[10:12]),
+                int(raw[12:14]),
+                tzinfo=UTC,
+            )
+            return dt.strftime("%Y-%m-%dT%H:%M:%SZ")
+        except ValueError:
+            return str(m.group(0))
+
+    try:
+        return _COMPACT_DATE_RE.sub(_sub, query, timeout=_REGEX_TIMEOUT)
+    except TimeoutError:  # pragma: no cover
+        raise ValueError(
+            "Query too complex to process (compact date rewrite timed out)",
+        )
+
+
+def _rewrite_relative_range(query: str) -> str:
+    """Rewrite Whoosh relative ranges ([now-7d TO now]) to concrete ISO 8601 UTC boundaries."""
+
+    def _sub(m: regex.Match[str]) -> str:
+        now = datetime.now(UTC)
+
+        def _offset(s: str | None) -> timedelta:
+            if not s:
+                return timedelta(0)
+            sign = 1 if s[0] == "+" else -1
+            n, unit = int(s[1:-1]), s[-1]
+            return (
+                sign
+                * {
+                    "d": timedelta(days=n),
+                    "h": timedelta(hours=n),
+                    "m": timedelta(minutes=n),
+                }[unit]
+            )
+
+        lo, hi = now + _offset(m.group(1)), now + _offset(m.group(2))
+        if lo > hi:
+            lo, hi = hi, lo
+        return f"[{_fmt(lo)} TO {_fmt(hi)}]"
+
+    try:
+        return _RELATIVE_RANGE_RE.sub(_sub, query, timeout=_REGEX_TIMEOUT)
+    except TimeoutError:  # pragma: no cover
+        raise ValueError(
+            "Query too complex to process (relative range rewrite timed out)",
+        )
+
+
+def _rewrite_whoosh_relative_range(query: str) -> str:
+    """Rewrite Whoosh-style relative date ranges ([-N unit to now]) to ISO 8601.
+
+    Supports: second, minute, hour, day, week, month, year (singular and plural).
+    Example: ``added:[-1 week to now]`` → ``added:[2025-01-01T… TO 2025-01-08T…]``
+    """
+    now = datetime.now(UTC)
+
+    def _sub(m: regex.Match[str]) -> str:
+        n = int(m.group("n"))
+        unit = m.group("unit").lower()
+        delta_map: dict[str, timedelta | relativedelta] = {
+            "second": timedelta(seconds=n),
+            "minute": timedelta(minutes=n),
+            "hour": timedelta(hours=n),
+            "day": timedelta(days=n),
+            "week": timedelta(weeks=n),
+            "month": relativedelta(months=n),
+            "year": relativedelta(years=n),
+        }
+        lo = now - delta_map[unit]
+        return f"[{_fmt(lo)} TO {_fmt(now)}]"
+
+    try:
+        return _WHOOSH_REL_RANGE_RE.sub(_sub, query, timeout=_REGEX_TIMEOUT)
+    except TimeoutError:  # pragma: no cover
+        raise ValueError(
+            "Query too complex to process (Whoosh relative range rewrite timed out)",
+        )
+
+
+def _rewrite_8digit_date(query: str, tz: tzinfo) -> str:
+    """Rewrite field:YYYYMMDD date tokens to an ISO 8601 day range.
+
+    Runs after ``_rewrite_compact_date`` so 14-digit timestamps are already
+    converted and won't spuriously match here.
+
+    For DateField fields (e.g. ``created``) uses UTC midnight boundaries.
+    For DateTimeField fields (e.g. ``added``, ``modified``) uses local TZ
+    midnight boundaries converted to UTC — matching the ``_datetime_range``
+    behaviour for keyword dates.
+    """
+
+    def _sub(m: regex.Match[str]) -> str:
+        field = m.group("field")
+        raw = m.group("date8")
+        try:
+            year, month, day = int(raw[0:4]), int(raw[4:6]), int(raw[6:8])
+            d = date(year, month, day)
+            if field in _DATE_ONLY_FIELDS:
+                lo = datetime(d.year, d.month, d.day, tzinfo=UTC)
+                hi = lo + timedelta(days=1)
+            else:
+                # DateTimeField: use local-timezone midnight → UTC
+                lo = datetime(d.year, d.month, d.day, tzinfo=tz).astimezone(UTC)
+                hi = datetime(
+                    (d + timedelta(days=1)).year,
+                    (d + timedelta(days=1)).month,
+                    (d + timedelta(days=1)).day,
+                    tzinfo=tz,
+                ).astimezone(UTC)
+            return f"{field}:[{_fmt(lo)} TO {_fmt(hi)}]"
+        except ValueError:
+            return m.group(0)
+
+    try:
+        return _DATE8_RE.sub(_sub, query, timeout=_REGEX_TIMEOUT)
+    except TimeoutError:  # pragma: no cover
+        raise ValueError(
+            "Query too complex to process (8-digit date rewrite timed out)",
+        )
+
+
+def rewrite_natural_date_keywords(query: str, tz: tzinfo) -> str:
+    """
+    Rewrite natural date syntax to ISO 8601 format for Tantivy compatibility.
+
+    Performs the first stage of query preprocessing, converting various date
+    formats and keywords to ISO 8601 datetime ranges that Tantivy can parse:
+    - Compact 14-digit dates (YYYYMMDDHHmmss)
+    - Whoosh relative ranges ([-7 days to now], [now-1h TO now+2h])
+    - 8-digit dates with field awareness (created:20240115)
+    - Natural keywords (field:today, field:last_week, etc.)
+
+    Args:
+        query: Raw user query string
+        tz: Timezone for converting local date boundaries to UTC
+
+    Returns:
+        Query with date syntax rewritten to ISO 8601 ranges
+
+    Note:
+        Bare keywords without field prefixes pass through unchanged.
+    """
+    query = _rewrite_compact_date(query)
+    query = _rewrite_whoosh_relative_range(query)
+    query = _rewrite_8digit_date(query, tz)
+    query = _rewrite_relative_range(query)
+
+    def _replace(m: regex.Match[str]) -> str:
+        field, keyword = m.group(1), m.group(2)
+        if field in _DATE_ONLY_FIELDS:
+            return f"{field}:{_date_only_range(keyword, tz)}"
+        return f"{field}:{_datetime_range(keyword, tz)}"
+
+    try:
+        return _FIELD_DATE_RE.sub(_replace, query, timeout=_REGEX_TIMEOUT)
+    except TimeoutError:  # pragma: no cover
+        raise ValueError(
+            "Query too complex to process (date keyword rewrite timed out)",
+        )
+
+
+def normalize_query(query: str) -> str:
+    """
+    Normalize query syntax for better search behavior.
+
+    Expands comma-separated field values to explicit AND clauses and
+    collapses excessive whitespace for cleaner parsing:
+    - tag:foo,bar → tag:foo AND tag:bar
+    - multiple spaces → single spaces
+
+    Args:
+        query: Query string after date rewriting
+
+    Returns:
+        Normalized query string ready for Tantivy parsing
+    """
+
+    def _expand(m: regex.Match[str]) -> str:
+        field = m.group(1)
+        values = [v.strip() for v in m.group(2).split(",") if v.strip()]
+        return " AND ".join(f"{field}:{v}" for v in values)
+
+    try:
+        query = regex.sub(
+            r"(\w+):([^\s\[\]]+(?:,[^\s\[\]]+)+)",
+            _expand,
+            query,
+            timeout=_REGEX_TIMEOUT,
+        )
+        return regex.sub(r" {2,}", " ", query, timeout=_REGEX_TIMEOUT).strip()
+    except TimeoutError:  # pragma: no cover
+        raise ValueError("Query too complex to process (normalization timed out)")
+
+
+_MAX_U64 = 2**64 - 1  # u64 max — used as inclusive upper bound for "any owner" range
+
+
+def build_permission_filter(
+    schema: tantivy.Schema,
+    user: AbstractBaseUser,
+) -> tantivy.Query:
+    """
+    Build a query filter for user document permissions.
+
+    Creates a query that matches only documents visible to the specified user
+    according to paperless-ngx permission rules:
+    - Public documents (no owner) are visible to all users
+    - Private documents are visible to their owner
+    - Documents explicitly shared with the user are visible
+
+    Args:
+        schema: Tantivy schema for field validation
+        user: User to check permissions for
+
+    Returns:
+        Tantivy query that filters results to visible documents
+
+    Implementation Notes:
+        - Uses range_query instead of term_query to work around unsigned integer
+          type detection bug in tantivy-py 0.25
+        - Uses boolean_query for "no owner" check since exists_query is not
+          available in tantivy-py 0.25.1 (available in master)
+        - Uses disjunction_max_query to combine permission clauses with OR logic
+    """
+    owner_any = tantivy.Query.range_query(
+        schema,
+        "owner_id",
+        tantivy.FieldType.Unsigned,
+        1,
+        _MAX_U64,
+    )
+    no_owner = tantivy.Query.boolean_query(
+        [
+            (tantivy.Occur.Must, tantivy.Query.all_query()),
+            (tantivy.Occur.MustNot, owner_any),
+        ],
+    )
+    owned = tantivy.Query.range_query(
+        schema,
+        "owner_id",
+        tantivy.FieldType.Unsigned,
+        user.pk,
+        user.pk,
+    )
+    shared = tantivy.Query.range_query(
+        schema,
+        "viewer_id",
+        tantivy.FieldType.Unsigned,
+        user.pk,
+        user.pk,
+    )
+    return tantivy.Query.disjunction_max_query([no_owner, owned, shared])
+
+
+DEFAULT_SEARCH_FIELDS = [
+    "title",
+    "content",
+    "correspondent",
+    "document_type",
+    "tag",
+]
+_FIELD_BOOSTS = {"title": 2.0}
+
+
+def parse_user_query(
+    index: tantivy.Index,
+    raw_query: str,
+    tz: tzinfo,
+) -> tantivy.Query:
+    """
+    Parse user query through the complete preprocessing pipeline.
+
+    Transforms the raw user query through multiple stages:
+    1. Date keyword rewriting (today → ISO 8601 ranges)
+    2. Query normalization (comma expansion, whitespace cleanup)
+    3. Tantivy parsing with field boosts
+    4. Optional fuzzy query blending (if ADVANCED_FUZZY_SEARCH_THRESHOLD set)
+
+    Args:
+        index: Tantivy index with registered tokenizers
+        raw_query: Original user query string
+        tz: Timezone for date boundary calculations
+
+    Returns:
+        Parsed Tantivy query ready for execution
+
+    Note:
+        When ADVANCED_FUZZY_SEARCH_THRESHOLD is configured, adds a low-priority
+        fuzzy query as a Should clause (0.1 boost) to catch approximate matches
+        while keeping exact matches ranked higher. The threshold value is applied
+        as a post-search score filter, not during query construction.
+    """
+
+    query_str = rewrite_natural_date_keywords(raw_query, tz)
+    query_str = normalize_query(query_str)
+
+    exact = index.parse_query(
+        query_str,
+        DEFAULT_SEARCH_FIELDS,
+        field_boosts=_FIELD_BOOSTS,
+    )
+
+    threshold = settings.ADVANCED_FUZZY_SEARCH_THRESHOLD
+    if threshold is not None:
+        fuzzy = index.parse_query(
+            query_str,
+            DEFAULT_SEARCH_FIELDS,
+            field_boosts=_FIELD_BOOSTS,
+            # (prefix=True, distance=1, transposition_cost_one=True) — edit-distance fuzziness
+            fuzzy_fields={f: (True, 1, True) for f in DEFAULT_SEARCH_FIELDS},
+        )
+        return tantivy.Query.boolean_query(
+            [
+                (tantivy.Occur.Should, exact),
+                # 0.1 boost keeps fuzzy hits ranked below exact matches (intentional)
+                (tantivy.Occur.Should, tantivy.Query.boost_query(fuzzy, 0.1)),
+            ],
+        )
+
+    return exact
diff --git a/src/documents/search/_schema.py b/src/documents/search/_schema.py
new file mode 100644
index 000000000..ba6646007
--- /dev/null
+++ b/src/documents/search/_schema.py
@@ -0,0 +1,165 @@
+from __future__ import annotations
+
+import logging
+import shutil
+from typing import TYPE_CHECKING
+
+import tantivy
+from django.conf import settings
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+logger = logging.getLogger("paperless.search")
+
+SCHEMA_VERSION = 1
+
+
+def build_schema() -> tantivy.Schema:
+    """
+    Build the Tantivy schema for the paperless document index.
+
+    Creates a comprehensive schema supporting full-text search, filtering,
+    sorting, and autocomplete functionality. Includes fields for document
+    content, metadata, permissions, custom fields, and notes.
+
+    Returns:
+        Configured Tantivy schema ready for index creation
+    """
+    sb = tantivy.SchemaBuilder()
+
+    sb.add_unsigned_field("id", stored=True, indexed=True, fast=True)
+    sb.add_text_field("checksum", stored=True, tokenizer_name="raw")
+
+    for field in (
+        "title",
+        "correspondent",
+        "document_type",
+        "storage_path",
+        "original_filename",
+        "content",
+    ):
+        sb.add_text_field(field, stored=True, tokenizer_name="paperless_text")
+
+    # Shadow sort fields - fast, not stored/indexed
+    for field in ("title_sort", "correspondent_sort", "type_sort"):
+        sb.add_text_field(
+            field,
+            stored=False,
+            tokenizer_name="simple_analyzer",
+            fast=True,
+        )
+
+    # CJK support - not stored, indexed only
+    sb.add_text_field("bigram_content", stored=False, tokenizer_name="bigram_analyzer")
+
+    # Autocomplete prefix scan - stored, not indexed
+    sb.add_text_field("autocomplete_word", stored=True, tokenizer_name="raw")
+
+    sb.add_text_field("tag", stored=True, tokenizer_name="paperless_text")
+
+    # JSON fields — structured queries: notes.user:alice, custom_fields.name:invoice
+    sb.add_json_field("notes", stored=True, tokenizer_name="paperless_text")
+    sb.add_json_field("custom_fields", stored=True, tokenizer_name="paperless_text")
+
+    for field in (
+        "correspondent_id",
+        "document_type_id",
+        "storage_path_id",
+        "tag_id",
+        "owner_id",
+        "viewer_id",
+    ):
+        sb.add_unsigned_field(field, stored=False, indexed=True, fast=True)
+
+    for field in ("created", "modified", "added"):
+        sb.add_date_field(field, stored=True, indexed=True, fast=True)
+
+    for field in ("asn", "page_count", "num_notes"):
+        sb.add_unsigned_field(field, stored=True, indexed=True, fast=True)
+
+    return sb.build()
+
+
+def needs_rebuild(index_dir: Path) -> bool:
+    """
+    Check if the search index needs rebuilding.
+
+    Compares the current schema version and search language configuration
+    against sentinel files to determine if the index is compatible with
+    the current paperless-ngx version and settings.
+
+    Args:
+        index_dir: Path to the search index directory
+
+    Returns:
+        True if the index needs rebuilding, False if it's up to date
+    """
+    version_file = index_dir / ".schema_version"
+    if not version_file.exists():
+        return True
+    try:
+        if int(version_file.read_text().strip()) != SCHEMA_VERSION:
+            logger.info("Search index schema version mismatch - rebuilding.")
+            return True
+    except ValueError:
+        return True
+
+    language_file = index_dir / ".schema_language"
+    if not language_file.exists():
+        logger.info("Search index language sentinel missing - rebuilding.")
+        return True
+    if language_file.read_text().strip() != (settings.SEARCH_LANGUAGE or ""):
+        logger.info("Search index language changed - rebuilding.")
+        return True
+
+    return False
+
+
+def wipe_index(index_dir: Path) -> None:
+    """
+    Delete all contents of the index directory to prepare for rebuild.
+
+    Recursively removes all files and subdirectories within the index
+    directory while preserving the directory itself.
+
+    Args:
+        index_dir: Path to the search index directory to clear
+    """
+    for child in index_dir.iterdir():
+        if child.is_dir():
+            shutil.rmtree(child)
+        else:
+            child.unlink()
+
+
+def _write_sentinels(index_dir: Path) -> None:
+    """Write schema version and language sentinel files so the next index open can skip rebuilding."""
+    (index_dir / ".schema_version").write_text(str(SCHEMA_VERSION))
+    (index_dir / ".schema_language").write_text(settings.SEARCH_LANGUAGE or "")
+
+
+def open_or_rebuild_index(index_dir: Path | None = None) -> tantivy.Index:
+    """
+    Open the Tantivy index, creating or rebuilding as needed.
+
+    Checks if the index needs rebuilding due to schema version or language
+    changes. If rebuilding is needed, wipes the directory and creates a fresh
+    index with the current schema and configuration.
+
+    Args:
+        index_dir: Path to index directory (defaults to settings.INDEX_DIR)
+
+    Returns:
+        Opened Tantivy index (caller must register custom tokenizers)
+    """
+    if index_dir is None:
+        index_dir = settings.INDEX_DIR
+    if not index_dir.exists():
+        return tantivy.Index(build_schema())
+    if needs_rebuild(index_dir):
+        wipe_index(index_dir)
+        idx = tantivy.Index(build_schema(), path=str(index_dir))
+        _write_sentinels(index_dir)
+        return idx
+    return tantivy.Index.open(str(index_dir))
diff --git a/src/documents/search/_tokenizer.py b/src/documents/search/_tokenizer.py
new file mode 100644
index 000000000..e597a879e
--- /dev/null
+++ b/src/documents/search/_tokenizer.py
@@ -0,0 +1,116 @@
+from __future__ import annotations
+
+import logging
+
+import tantivy
+
+logger = logging.getLogger("paperless.search")
+
+# Mapping of ISO 639-1 codes (and common aliases) -> Tantivy Snowball name
+_LANGUAGE_MAP: dict[str, str] = {
+    "ar": "Arabic",
+    "arabic": "Arabic",
+    "da": "Danish",
+    "danish": "Danish",
+    "nl": "Dutch",
+    "dutch": "Dutch",
+    "en": "English",
+    "english": "English",
+    "fi": "Finnish",
+    "finnish": "Finnish",
+    "fr": "French",
+    "french": "French",
+    "de": "German",
+    "german": "German",
+    "el": "Greek",
+    "greek": "Greek",
+    "hu": "Hungarian",
+    "hungarian": "Hungarian",
+    "it": "Italian",
+    "italian": "Italian",
+    "no": "Norwegian",
+    "norwegian": "Norwegian",
+    "pt": "Portuguese",
+    "portuguese": "Portuguese",
+    "ro": "Romanian",
+    "romanian": "Romanian",
+    "ru": "Russian",
+    "russian": "Russian",
+    "es": "Spanish",
+    "spanish": "Spanish",
+    "sv": "Swedish",
+    "swedish": "Swedish",
+    "ta": "Tamil",
+    "tamil": "Tamil",
+    "tr": "Turkish",
+    "turkish": "Turkish",
+}
+
+SUPPORTED_LANGUAGES: frozenset[str] = frozenset(_LANGUAGE_MAP)
+
+
+def register_tokenizers(index: tantivy.Index, language: str | None) -> None:
+    """
+    Register all custom tokenizers required by the paperless schema.
+
+    Must be called on every Index instance since Tantivy requires tokenizer
+    re-registration after each index open/creation. Registers tokenizers for
+    full-text search, sorting, CJK language support, and fast-field indexing.
+
+    Args:
+        index: Tantivy index instance to register tokenizers on
+        language: ISO 639-1 language code for stemming (None to disable)
+
+    Note:
+        simple_analyzer is registered as both a text and fast-field tokenizer
+        since sort shadow fields (title_sort, correspondent_sort, type_sort)
+        use fast=True and Tantivy requires fast-field tokenizers to exist
+        even for documents that omit those fields.
+    """
+    index.register_tokenizer("paperless_text", _paperless_text(language))
+    index.register_tokenizer("simple_analyzer", _simple_analyzer())
+    index.register_tokenizer("bigram_analyzer", _bigram_analyzer())
+    # Fast-field tokenizer required for fast=True text fields in the schema
+    index.register_fast_field_tokenizer("simple_analyzer", _simple_analyzer())
+
+
+def _paperless_text(language: str | None) -> tantivy.TextAnalyzer:
+    """Main full-text tokenizer for content, title, etc: simple -> remove_long(65) -> lowercase -> ascii_fold [-> stemmer]"""
+    builder = (
+        tantivy.TextAnalyzerBuilder(tantivy.Tokenizer.simple())
+        .filter(tantivy.Filter.remove_long(65))
+        .filter(tantivy.Filter.lowercase())
+        .filter(tantivy.Filter.ascii_fold())
+    )
+    if language:
+        tantivy_lang = _LANGUAGE_MAP.get(language.lower())
+        if tantivy_lang:
+            builder = builder.filter(tantivy.Filter.stemmer(tantivy_lang))
+        else:
+            logger.warning(
+                "Unsupported search language '%s' - stemming disabled. Supported: %s",
+                language,
+                ", ".join(sorted(SUPPORTED_LANGUAGES)),
+            )
+    return builder.build()
+
+
+def _simple_analyzer() -> tantivy.TextAnalyzer:
+    """Tokenizer for shadow sort fields (title_sort, correspondent_sort, type_sort): simple -> lowercase -> ascii_fold."""
+    return (
+        tantivy.TextAnalyzerBuilder(tantivy.Tokenizer.simple())
+        .filter(tantivy.Filter.lowercase())
+        .filter(tantivy.Filter.ascii_fold())
+        .build()
+    )
+
+
+def _bigram_analyzer() -> tantivy.TextAnalyzer:
+    """Enables substring search in CJK text: ngram(2,2) -> lowercase. CJK / no-whitespace language support."""
+    return (
+        tantivy.TextAnalyzerBuilder(
+            tantivy.Tokenizer.ngram(min_gram=2, max_gram=2, prefix_only=False),
+        )
+        .filter(tantivy.Filter.lowercase())
+        .build()
+    )
diff --git a/src/documents/serialisers.py b/src/documents/serialisers.py
index a8beb70c0..9a026ba54 100644
--- a/src/documents/serialisers.py
+++ b/src/documents/serialisers.py
@@ -1293,22 +1293,18 @@ class SearchResultSerializer(DocumentSerializer):
         documents = self.context.get("documents")
         # Otherwise we fetch this document.
         if documents is None:  # pragma: no cover
-            # In practice we only serialize **lists** of whoosh.searching.Hit.
-            # I'm keeping this check for completeness but marking it no cover for now.
+            # In practice we only serialize **lists** of SearchHit dicts.
+            # Keeping this check for completeness but marking it no cover for now.
             documents = self.fetch_documents([hit["id"]])
         document = documents[hit["id"]]
 
-        notes = ",".join(
-            [str(c.note) for c in document.notes.all()],
-        )
+        highlights = hit.get("highlights", {})
         r = super().to_representation(document)
         r["__search_hit__"] = {
-            "score": hit.score,
-            "highlights": hit.highlights("content", text=document.content),
-            "note_highlights": (
-                hit.highlights("notes", text=notes) if document else None
-            ),
-            "rank": hit.rank,
+            "score": hit["score"],
+            "highlights": highlights.get("content", ""),
+            "note_highlights": highlights.get("notes") or None,
+            "rank": hit["rank"],
         }
 
         return r
diff --git a/src/documents/signals/handlers.py b/src/documents/signals/handlers.py
index 82a691696..a72abc2d5 100644
--- a/src/documents/signals/handlers.py
+++ b/src/documents/signals/handlers.py
@@ -790,15 +790,12 @@ def cleanup_user_deletion(sender, instance: User | Group, **kwargs) -> None:
 
 
 def add_to_index(sender, document, **kwargs) -> None:
-    from documents import index
+    from documents.search import get_backend
 
-    index.add_or_update_document(document)
-    if document.root_document_id is not None and document.root_document is not None:
-        # keep in sync when a new version is consumed.
-        index.add_or_update_document(
-            document.root_document,
-            effective_content=document.content,
-        )
+    get_backend().add_or_update(
+        document,
+        effective_content=document.get_effective_content(),
+    )
 
 
 def run_workflows_added(
diff --git a/src/documents/tasks.py b/src/documents/tasks.py
index adf1f016c..ae65a5fbe 100644
--- a/src/documents/tasks.py
+++ b/src/documents/tasks.py
@@ -4,11 +4,9 @@ import shutil
 import uuid
 import zipfile
 from collections.abc import Callable
-from collections.abc import Iterable
 from pathlib import Path
 from tempfile import TemporaryDirectory
 from tempfile import mkstemp
-from typing import TypeVar
 
 from celery import Task
 from celery import shared_task
@@ -20,9 +18,7 @@ from django.db import transaction
 from django.db.models.signals import post_save
 from django.utils import timezone
 from filelock import FileLock
-from whoosh.writing import AsyncWriter
 
-from documents import index
 from documents import sanity_checker
 from documents.barcodes import BarcodePlugin
 from documents.bulk_download import ArchiveOnlyStrategy
@@ -60,7 +56,9 @@ from documents.signals import document_updated
 from documents.signals.handlers import cleanup_document_deletion
 from documents.signals.handlers import run_workflows
 from documents.signals.handlers import send_websocket_document_updated
+from documents.utils import IterWrapper
 from documents.utils import compute_checksum
+from documents.utils import identity
 from documents.workflows.utils import get_workflows_for_trigger
 from paperless.config import AIConfig
 from paperless.parsers import ParserContext
@@ -69,34 +67,16 @@ from paperless_ai.indexing import llm_index_add_or_update_document
 from paperless_ai.indexing import llm_index_remove_document
 from paperless_ai.indexing import update_llm_index
 
-_T = TypeVar("_T")
-IterWrapper = Callable[[Iterable[_T]], Iterable[_T]]
-
-
 if settings.AUDIT_LOG_ENABLED:
     from auditlog.models import LogEntry
 logger = logging.getLogger("paperless.tasks")
 
 
-def _identity(iterable: Iterable[_T]) -> Iterable[_T]:
-    return iterable
-
-
 @shared_task
 def index_optimize() -> None:
-    ix = index.open_index()
-    writer = AsyncWriter(ix)
-    writer.commit(optimize=True)
-
-
-def index_reindex(*, iter_wrapper: IterWrapper[Document] = _identity) -> None:
-    documents = Document.objects.all()
-
-    ix = index.open_index(recreate=True)
-
-    with AsyncWriter(ix) as writer:
-        for document in iter_wrapper(documents):
-            index.update_document(writer, document)
+    logger.info(
+        "index_optimize is a no-op — Tantivy manages segment merging automatically.",
+    )
 
 
 @shared_task
@@ -270,9 +250,9 @@ def sanity_check(*, scheduled=True, raise_on_error=True):
 
 @shared_task
 def bulk_update_documents(document_ids) -> None:
-    documents = Document.objects.filter(id__in=document_ids)
+    from documents.search import get_backend
 
-    ix = index.open_index()
+    documents = Document.objects.filter(id__in=document_ids)
 
     for doc in documents:
         clear_document_caches(doc.pk)
@@ -283,9 +263,9 @@ def bulk_update_documents(document_ids) -> None:
         )
         post_save.send(Document, instance=doc, created=False)
 
-    with AsyncWriter(ix) as writer:
+    with get_backend().batch_update() as batch:
         for doc in documents:
-            index.update_document(writer, doc)
+            batch.add_or_update(doc)
 
     ai_config = AIConfig()
     if ai_config.llm_index_enabled:
@@ -389,8 +369,9 @@ def update_document_content_maybe_archive_file(document_id) -> None:
             logger.info(
                 f"Updating index for document {document_id} ({document.archive_checksum})",
             )
-            with index.open_index_writer() as writer:
-                index.update_document(writer, document)
+            from documents.search import get_backend
+
+            get_backend().add_or_update(document)
 
             ai_config = AIConfig()
             if ai_config.llm_index_enabled:
@@ -633,7 +614,7 @@ def update_document_parent_tags(tag: Tag, new_parent: Tag) -> None:
 @shared_task
 def llmindex_index(
     *,
-    iter_wrapper: IterWrapper[Document] = _identity,
+    iter_wrapper: IterWrapper[Document] = identity,
     rebuild=False,
     scheduled=True,
     auto=False,
diff --git a/src/documents/tests/conftest.py b/src/documents/tests/conftest.py
index 7e75b9194..5cde34768 100644
--- a/src/documents/tests/conftest.py
+++ b/src/documents/tests/conftest.py
@@ -1,5 +1,6 @@
 import shutil
 import zoneinfo
+from collections.abc import Generator
 from dataclasses import dataclass
 from pathlib import Path
 from typing import TYPE_CHECKING
@@ -92,6 +93,26 @@ def sample_doc(
     )
 
 
+@pytest.fixture()
+def _search_index(
+    tmp_path: Path,
+    settings: SettingsWrapper,
+) -> Generator[None, None, None]:
+    """Create a temp index directory and point INDEX_DIR at it.
+
+    Resets the backend singleton before and after so each test gets a clean
+    index rather than reusing a stale singleton from another test.
+    """
+    from documents.search import reset_backend
+
+    index_dir = tmp_path / "index"
+    index_dir.mkdir()
+    settings.INDEX_DIR = index_dir
+    reset_backend()
+    yield
+    reset_backend()
+
+
 @pytest.fixture()
 def settings_timezone(settings: SettingsWrapper) -> zoneinfo.ZoneInfo:
     return zoneinfo.ZoneInfo(settings.TIME_ZONE)
diff --git a/src/documents/tests/search/__init__.py b/src/documents/tests/search/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/documents/tests/search/conftest.py b/src/documents/tests/search/conftest.py
new file mode 100644
index 000000000..ccc26d695
--- /dev/null
+++ b/src/documents/tests/search/conftest.py
@@ -0,0 +1,33 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+
+from documents.search._backend import TantivyBackend
+from documents.search._backend import reset_backend
+
+if TYPE_CHECKING:
+    from collections.abc import Generator
+    from pathlib import Path
+
+    from pytest_django.fixtures import SettingsWrapper
+
+
+@pytest.fixture
+def index_dir(tmp_path: Path, settings: SettingsWrapper) -> Path:
+    path = tmp_path / "index"
+    path.mkdir()
+    settings.INDEX_DIR = path
+    return path
+
+
+@pytest.fixture
+def backend() -> Generator[TantivyBackend, None, None]:
+    b = TantivyBackend()  # path=None → in-memory index
+    b.open()
+    try:
+        yield b
+    finally:
+        b.close()
+        reset_backend()
diff --git a/src/documents/tests/search/test_backend.py b/src/documents/tests/search/test_backend.py
new file mode 100644
index 000000000..5c92da447
--- /dev/null
+++ b/src/documents/tests/search/test_backend.py
@@ -0,0 +1,502 @@
+import pytest
+from django.contrib.auth.models import User
+
+from documents.models import CustomField
+from documents.models import CustomFieldInstance
+from documents.models import Document
+from documents.models import Note
+from documents.search._backend import TantivyBackend
+from documents.search._backend import get_backend
+from documents.search._backend import reset_backend
+
+pytestmark = [pytest.mark.search, pytest.mark.django_db]
+
+
+class TestWriteBatch:
+    """Test WriteBatch context manager functionality."""
+
+    def test_rolls_back_on_exception(self, backend: TantivyBackend):
+        """Batch operations must rollback on exception to preserve index integrity."""
+        doc = Document.objects.create(
+            title="Rollback Target",
+            content="should survive",
+            checksum="RB1",
+            pk=1,
+        )
+        backend.add_or_update(doc)
+
+        try:
+            with backend.batch_update() as batch:
+                batch.remove(doc.pk)
+                raise RuntimeError("simulated failure")
+        except RuntimeError:
+            pass
+
+        r = backend.search(
+            "should survive",
+            user=None,
+            page=1,
+            page_size=10,
+            sort_field=None,
+            sort_reverse=False,
+        )
+        assert r.total == 1
+
+
+class TestSearch:
+    """Test search functionality."""
+
+    def test_scores_normalised_top_hit_is_one(self, backend: TantivyBackend):
+        """Search scores must be normalized so top hit has score 1.0 for UI consistency."""
+        for i, title in enumerate(["bank invoice", "bank statement", "bank receipt"]):
+            doc = Document.objects.create(
+                title=title,
+                content=title,
+                checksum=f"SN{i}",
+                pk=10 + i,
+            )
+            backend.add_or_update(doc)
+        r = backend.search(
+            "bank",
+            user=None,
+            page=1,
+            page_size=10,
+            sort_field=None,
+            sort_reverse=False,
+        )
+        assert r.hits[0]["score"] == pytest.approx(1.0)
+        assert all(0.0 <= h["score"] <= 1.0 for h in r.hits)
+
+    def test_sort_field_ascending(self, backend: TantivyBackend):
+        """Searching with sort_reverse=False must return results in ascending ASN order."""
+        for asn in [30, 10, 20]:
+            doc = Document.objects.create(
+                title="sortable",
+                content="sortable content",
+                checksum=f"SFA{asn}",
+                archive_serial_number=asn,
+            )
+            backend.add_or_update(doc)
+
+        r = backend.search(
+            "sortable",
+            user=None,
+            page=1,
+            page_size=10,
+            sort_field="archive_serial_number",
+            sort_reverse=False,
+        )
+        assert r.total == 3
+        asns = [Document.objects.get(pk=h["id"]).archive_serial_number for h in r.hits]
+        assert asns == [10, 20, 30]
+
+    def test_sort_field_descending(self, backend: TantivyBackend):
+        """Searching with sort_reverse=True must return results in descending ASN order."""
+        for asn in [30, 10, 20]:
+            doc = Document.objects.create(
+                title="sortable",
+                content="sortable content",
+                checksum=f"SFD{asn}",
+                archive_serial_number=asn,
+            )
+            backend.add_or_update(doc)
+
+        r = backend.search(
+            "sortable",
+            user=None,
+            page=1,
+            page_size=10,
+            sort_field="archive_serial_number",
+            sort_reverse=True,
+        )
+        assert r.total == 3
+        asns = [Document.objects.get(pk=h["id"]).archive_serial_number for h in r.hits]
+        assert asns == [30, 20, 10]
+
+    def test_fuzzy_threshold_filters_low_score_hits(
+        self,
+        backend: TantivyBackend,
+        settings,
+    ):
+        """When ADVANCED_FUZZY_SEARCH_THRESHOLD exceeds all normalized scores, hits must be filtered out."""
+        doc = Document.objects.create(
+            title="Invoice document",
+            content="financial report",
+            checksum="FT1",
+            pk=120,
+        )
+        backend.add_or_update(doc)
+
+        # Threshold above 1.0 filters every hit (normalized scores top out at 1.0)
+        settings.ADVANCED_FUZZY_SEARCH_THRESHOLD = 1.1
+        r = backend.search(
+            "invoice",
+            user=None,
+            page=1,
+            page_size=10,
+            sort_field=None,
+            sort_reverse=False,
+        )
+        assert r.hits == []
+
+    def test_owner_filter(self, backend: TantivyBackend):
+        """Document owners can search their private documents; other users cannot access them."""
+        owner = User.objects.create_user("owner")
+        other = User.objects.create_user("other")
+        doc = Document.objects.create(
+            title="Private",
+            content="secret",
+            checksum="PF1",
+            pk=20,
+            owner=owner,
+        )
+        backend.add_or_update(doc)
+
+        assert (
+            backend.search(
+                "secret",
+                user=owner,
+                page=1,
+                page_size=10,
+                sort_field=None,
+                sort_reverse=False,
+            ).total
+            == 1
+        )
+        assert (
+            backend.search(
+                "secret",
+                user=other,
+                page=1,
+                page_size=10,
+                sort_field=None,
+                sort_reverse=False,
+            ).total
+            == 0
+        )
+
+
+class TestRebuild:
+    """Test index rebuilding functionality."""
+
+    def test_with_iter_wrapper_called(self, backend: TantivyBackend):
+        """Index rebuild must pass documents through iter_wrapper for progress tracking."""
+        seen = []
+
+        def wrapper(docs):
+            for doc in docs:
+                seen.append(doc.pk)
+                yield doc
+
+        Document.objects.create(title="Tracked", content="x", checksum="TW1", pk=30)
+        backend.rebuild(Document.objects.all(), iter_wrapper=wrapper)
+        assert 30 in seen
+
+
+class TestAutocomplete:
+    """Test autocomplete functionality."""
+
+    def test_basic_functionality(self, backend: TantivyBackend):
+        """Autocomplete must return words matching the given prefix."""
+        doc = Document.objects.create(
+            title="Invoice from Microsoft Corporation",
+            content="payment details",
+            checksum="AC1",
+            pk=40,
+        )
+        backend.add_or_update(doc)
+
+        results = backend.autocomplete("micro", limit=10)
+        assert "microsoft" in results
+
+    def test_results_ordered_by_document_frequency(self, backend: TantivyBackend):
+        """Autocomplete results must be ordered by document frequency to prioritize common terms."""
+        # "payment" appears in 3 docs; "payslip" in 1 — "pay" prefix should
+        # return "payment" before "payslip".
+        for i, (title, checksum) in enumerate(
+            [
+                ("payment invoice", "AF1"),
+                ("payment receipt", "AF2"),
+                ("payment confirmation", "AF3"),
+                ("payslip march", "AF4"),
+            ],
+            start=41,
+        ):
+            doc = Document.objects.create(
+                title=title,
+                content="details",
+                checksum=checksum,
+                pk=i,
+            )
+            backend.add_or_update(doc)
+
+        results = backend.autocomplete("pay", limit=10)
+        assert results.index("payment") < results.index("payslip")
+
+
+class TestMoreLikeThis:
+    """Test more like this functionality."""
+
+    def test_excludes_original(self, backend: TantivyBackend):
+        """More like this queries must exclude the reference document from results."""
+        doc1 = Document.objects.create(
+            title="Important document",
+            content="financial information",
+            checksum="MLT1",
+            pk=50,
+        )
+        doc2 = Document.objects.create(
+            title="Another document",
+            content="financial report",
+            checksum="MLT2",
+            pk=51,
+        )
+        backend.add_or_update(doc1)
+        backend.add_or_update(doc2)
+
+        results = backend.more_like_this(doc_id=50, user=None, page=1, page_size=10)
+        returned_ids = [hit["id"] for hit in results.hits]
+        assert 50 not in returned_ids  # Original document excluded
+
+    def test_with_user_applies_permission_filter(self, backend: TantivyBackend):
+        """more_like_this with a user must exclude documents that user cannot see."""
+        viewer = User.objects.create_user("mlt_viewer")
+        other = User.objects.create_user("mlt_other")
+        public_doc = Document.objects.create(
+            title="Public financial document",
+            content="quarterly financial analysis report figures",
+            checksum="MLT3",
+            pk=52,
+        )
+        private_doc = Document.objects.create(
+            title="Private financial document",
+            content="quarterly financial analysis report figures",
+            checksum="MLT4",
+            pk=53,
+            owner=other,
+        )
+        backend.add_or_update(public_doc)
+        backend.add_or_update(private_doc)
+
+        results = backend.more_like_this(doc_id=52, user=viewer, page=1, page_size=10)
+        returned_ids = [hit["id"] for hit in results.hits]
+        # private_doc is owned by other, so viewer cannot see it
+        assert 53 not in returned_ids
+
+    def test_document_not_in_index_returns_empty(self, backend: TantivyBackend):
+        """more_like_this for a doc_id absent from the index must return empty results."""
+        results = backend.more_like_this(doc_id=9999, user=None, page=1, page_size=10)
+        assert results.hits == []
+        assert results.total == 0
+
+
+class TestSingleton:
+    """Test get_backend() and reset_backend() singleton lifecycle."""
+
+    @pytest.fixture(autouse=True)
+    def _clean(self):
+        reset_backend()
+        yield
+        reset_backend()
+
+    def test_returns_same_instance_on_repeated_calls(self, index_dir):
+        """Singleton pattern: repeated calls to get_backend() must return the same instance."""
+        assert get_backend() is get_backend()
+
+    def test_reinitializes_when_index_dir_changes(self, tmp_path, settings):
+        """Backend singleton must reinitialize when INDEX_DIR setting changes for test isolation."""
+        settings.INDEX_DIR = tmp_path / "a"
+        (tmp_path / "a").mkdir()
+        b1 = get_backend()
+
+        settings.INDEX_DIR = tmp_path / "b"
+        (tmp_path / "b").mkdir()
+        b2 = get_backend()
+
+        assert b1 is not b2
+        assert b2._path == tmp_path / "b"
+
+    def test_reset_forces_new_instance(self, index_dir):
+        """reset_backend() must force creation of a new backend instance on next get_backend() call."""
+        b1 = get_backend()
+        reset_backend()
+        b2 = get_backend()
+        assert b1 is not b2
+
+
+class TestFieldHandling:
+    """Test handling of various document fields."""
+
+    def test_none_values_handled_correctly(self, backend: TantivyBackend):
+        """Document fields with None values must not cause indexing errors."""
+        doc = Document.objects.create(
+            title="Test Doc",
+            content="test content",
+            checksum="NV1",
+            pk=60,
+            original_filename=None,
+            page_count=None,
+        )
+        # Should not raise an exception
+        backend.add_or_update(doc)
+
+        results = backend.search(
+            "test",
+            user=None,
+            page=1,
+            page_size=10,
+            sort_field=None,
+            sort_reverse=False,
+        )
+        assert results.total == 1
+
+    def test_custom_fields_include_name_and_value(self, backend: TantivyBackend):
+        """Custom fields must be indexed with both field name and value for structured queries."""
+        # Create a custom field
+        field = CustomField.objects.create(
+            name="Invoice Number",
+            data_type=CustomField.FieldDataType.STRING,
+        )
+        doc = Document.objects.create(
+            title="Invoice",
+            content="test",
+            checksum="CF1",
+            pk=70,
+        )
+        CustomFieldInstance.objects.create(
+            document=doc,
+            field=field,
+            value_text="INV-2024-001",
+        )
+
+        # Should not raise an exception during indexing
+        backend.add_or_update(doc)
+
+        results = backend.search(
+            "invoice",
+            user=None,
+            page=1,
+            page_size=10,
+            sort_field=None,
+            sort_reverse=False,
+        )
+        assert results.total == 1
+
+    def test_select_custom_field_indexes_label_not_id(self, backend: TantivyBackend):
+        """SELECT custom fields must index the human-readable label, not the opaque option ID."""
+        field = CustomField.objects.create(
+            name="Category",
+            data_type=CustomField.FieldDataType.SELECT,
+            extra_data={
+                "select_options": [
+                    {"id": "opt_abc", "label": "Invoice"},
+                    {"id": "opt_def", "label": "Receipt"},
+                ],
+            },
+        )
+        doc = Document.objects.create(
+            title="Categorised doc",
+            content="test",
+            checksum="SEL1",
+            pk=71,
+        )
+        CustomFieldInstance.objects.create(
+            document=doc,
+            field=field,
+            value_select="opt_abc",
+        )
+        backend.add_or_update(doc)
+
+        # Label should be findable
+        results = backend.search(
+            "custom_fields.value:invoice",
+            user=None,
+            page=1,
+            page_size=10,
+            sort_field=None,
+            sort_reverse=False,
+        )
+        assert results.total == 1
+
+        # Opaque ID must not appear in the index
+        results = backend.search(
+            "custom_fields.value:opt_abc",
+            user=None,
+            page=1,
+            page_size=10,
+            sort_field=None,
+            sort_reverse=False,
+        )
+        assert results.total == 0
+
+    def test_none_custom_field_value_not_indexed(self, backend: TantivyBackend):
+        """Custom field instances with no value set must not produce an index entry."""
+        field = CustomField.objects.create(
+            name="Optional",
+            data_type=CustomField.FieldDataType.SELECT,
+            extra_data={"select_options": [{"id": "opt_1", "label": "Yes"}]},
+        )
+        doc = Document.objects.create(
+            title="Unset field doc",
+            content="test",
+            checksum="SEL2",
+            pk=72,
+        )
+        CustomFieldInstance.objects.create(
+            document=doc,
+            field=field,
+            value_select=None,
+        )
+        backend.add_or_update(doc)
+
+        # The string "none" must not appear as an indexed value
+        results = backend.search(
+            "custom_fields.value:none",
+            user=None,
+            page=1,
+            page_size=10,
+            sort_field=None,
+            sort_reverse=False,
+        )
+        assert results.total == 0
+
+    def test_notes_include_user_information(self, backend: TantivyBackend):
+        """Notes must be indexed with user information when available for structured queries."""
+        user = User.objects.create_user("notewriter")
+        doc = Document.objects.create(
+            title="Doc with notes",
+            content="test",
+            checksum="NT1",
+            pk=80,
+        )
+        Note.objects.create(document=doc, note="Important note", user=user)
+
+        # Should not raise an exception during indexing
+        backend.add_or_update(doc)
+
+        # Test basic document search first
+        results = backend.search(
+            "test",
+            user=None,
+            page=1,
+            page_size=10,
+            sort_field=None,
+            sort_reverse=False,
+        )
+        assert results.total == 1, (
+            f"Expected 1, got {results.total}. Document content should be searchable."
+        )
+
+        # Test notes search — must use structured JSON syntax now that note
+        # is no longer in DEFAULT_SEARCH_FIELDS
+        results = backend.search(
+            "notes.note:important",
+            user=None,
+            page=1,
+            page_size=10,
+            sort_field=None,
+            sort_reverse=False,
+        )
+        assert results.total == 1, (
+            f"Expected 1, got {results.total}. Note content should be searchable via notes.note: prefix."
+        )
diff --git a/src/documents/tests/search/test_migration_fulltext_query_field_prefixes.py b/src/documents/tests/search/test_migration_fulltext_query_field_prefixes.py
new file mode 100644
index 000000000..739ea996c
--- /dev/null
+++ b/src/documents/tests/search/test_migration_fulltext_query_field_prefixes.py
@@ -0,0 +1,138 @@
+import pytest
+
+from documents.tests.utils import TestMigrations
+
+pytestmark = pytest.mark.search
+
+
+class TestMigrateFulltextQueryFieldPrefixes(TestMigrations):
+    migrate_from = "0016_sha256_checksums"
+    migrate_to = "0017_migrate_fulltext_query_field_prefixes"
+
+    def setUpBeforeMigration(self, apps) -> None:
+        User = apps.get_model("auth", "User")
+        SavedView = apps.get_model("documents", "SavedView")
+        SavedViewFilterRule = apps.get_model("documents", "SavedViewFilterRule")
+
+        user = User.objects.create(username="testuser")
+
+        def make_rule(value: str):
+            view = SavedView.objects.create(
+                owner=user,
+                name=f"view-{value}",
+                sort_field="created",
+            )
+            return SavedViewFilterRule.objects.create(
+                saved_view=view,
+                rule_type=20,  # fulltext query
+                value=value,
+            )
+
+        # Simple field prefixes
+        self.rule_note = make_rule("note:invoice")
+        self.rule_cf = make_rule("custom_field:amount")
+
+        # Combined query
+        self.rule_combined = make_rule("note:invoice AND custom_field:total")
+
+        # Parenthesized groups (Whoosh syntax)
+        self.rule_parens = make_rule("(note:invoice OR note:receipt)")
+
+        # Prefix operators
+        self.rule_plus = make_rule("+note:foo")
+        self.rule_minus = make_rule("-note:bar")
+
+        # Boosted
+        self.rule_boost = make_rule("note:test^2")
+
+        # Should NOT be rewritten — no field prefix match
+        self.rule_no_match = make_rule("title:hello content:world")
+
+        # Should NOT false-positive on word boundaries
+        self.rule_denote = make_rule("denote:foo")
+
+        # Already using new syntax — should be idempotent
+        self.rule_already_migrated = make_rule("notes.note:foo")
+        self.rule_already_migrated_cf = make_rule("custom_fields.value:bar")
+
+        # Null value — should not crash
+        view_null = SavedView.objects.create(
+            owner=user,
+            name="view-null",
+            sort_field="created",
+        )
+        self.rule_null = SavedViewFilterRule.objects.create(
+            saved_view=view_null,
+            rule_type=20,
+            value=None,
+        )
+
+        # Non-fulltext rule type — should be untouched
+        view_other = SavedView.objects.create(
+            owner=user,
+            name="view-other-type",
+            sort_field="created",
+        )
+        self.rule_other_type = SavedViewFilterRule.objects.create(
+            saved_view=view_other,
+            rule_type=0,  # title contains
+            value="note:something",
+        )
+
+    def test_note_prefix_rewritten(self):
+        self.rule_note.refresh_from_db()
+        self.assertEqual(self.rule_note.value, "notes.note:invoice")
+
+    def test_custom_field_prefix_rewritten(self):
+        self.rule_cf.refresh_from_db()
+        self.assertEqual(self.rule_cf.value, "custom_fields.value:amount")
+
+    def test_combined_query_rewritten(self):
+        self.rule_combined.refresh_from_db()
+        self.assertEqual(
+            self.rule_combined.value,
+            "notes.note:invoice AND custom_fields.value:total",
+        )
+
+    def test_parenthesized_groups(self):
+        self.rule_parens.refresh_from_db()
+        self.assertEqual(
+            self.rule_parens.value,
+            "(notes.note:invoice OR notes.note:receipt)",
+        )
+
+    def test_plus_prefix(self):
+        self.rule_plus.refresh_from_db()
+        self.assertEqual(self.rule_plus.value, "+notes.note:foo")
+
+    def test_minus_prefix(self):
+        self.rule_minus.refresh_from_db()
+        self.assertEqual(self.rule_minus.value, "-notes.note:bar")
+
+    def test_boosted(self):
+        self.rule_boost.refresh_from_db()
+        self.assertEqual(self.rule_boost.value, "notes.note:test^2")
+
+    def test_no_match_unchanged(self):
+        self.rule_no_match.refresh_from_db()
+        self.assertEqual(self.rule_no_match.value, "title:hello content:world")
+
+    def test_word_boundary_no_false_positive(self):
+        self.rule_denote.refresh_from_db()
+        self.assertEqual(self.rule_denote.value, "denote:foo")
+
+    def test_already_migrated_idempotent(self):
+        self.rule_already_migrated.refresh_from_db()
+        self.assertEqual(self.rule_already_migrated.value, "notes.note:foo")
+
+    def test_already_migrated_cf_idempotent(self):
+        self.rule_already_migrated_cf.refresh_from_db()
+        self.assertEqual(self.rule_already_migrated_cf.value, "custom_fields.value:bar")
+
+    def test_null_value_no_crash(self):
+        self.rule_null.refresh_from_db()
+        self.assertIsNone(self.rule_null.value)
+
+    def test_non_fulltext_rule_untouched(self):
+        self.rule_other_type.refresh_from_db()
+        self.assertEqual(self.rule_other_type.value, "note:something")
diff --git a/src/documents/tests/search/test_query.py b/src/documents/tests/search/test_query.py
new file mode 100644
index 000000000..74a064dbb
--- /dev/null
+++ b/src/documents/tests/search/test_query.py
@@ -0,0 +1,530 @@
+from __future__ import annotations
+
+import re
+from datetime import UTC
+from datetime import datetime
+from datetime import tzinfo
+from typing import TYPE_CHECKING
+from zoneinfo import ZoneInfo
+
+import pytest
+import tantivy
+import time_machine
+
+from documents.search._query import _date_only_range
+from documents.search._query import _datetime_range
+from documents.search._query import _rewrite_compact_date
+from documents.search._query import build_permission_filter
+from documents.search._query import normalize_query
+from documents.search._query import parse_user_query
+from documents.search._query import rewrite_natural_date_keywords
+from documents.search._schema import build_schema
+from documents.search._tokenizer import register_tokenizers
+
+if TYPE_CHECKING:
+    from django.contrib.auth.base_user import AbstractBaseUser
+
+pytestmark = pytest.mark.search
+
+EASTERN = ZoneInfo("America/New_York")  # UTC-5 / UTC-4 (DST)
+AUCKLAND = ZoneInfo("Pacific/Auckland")  # UTC+13 in southern-hemisphere summer
+
+
+def _range(result: str, field: str) -> tuple[str, str]:
+    m = re.search(rf"{field}:\[(.+?) TO (.+?)\]", result)
+    assert m, f"No range for {field!r} in: {result!r}"
+    return m.group(1), m.group(2)
+
+
+class TestCreatedDateField:
+    """
+    created is a Django DateField: indexed as midnight UTC of the local calendar
+    date. No offset arithmetic needed - the local calendar date is what matters.
+    """
+
+    @pytest.mark.parametrize(
+        ("tz", "expected_lo", "expected_hi"),
+        [
+            pytest.param(UTC, "2026-03-28T00:00:00Z", "2026-03-29T00:00:00Z", id="utc"),
+            pytest.param(
+                EASTERN,
+                "2026-03-28T00:00:00Z",
+                "2026-03-29T00:00:00Z",
+                id="eastern_same_calendar_date",
+            ),
+        ],
+    )
+    @time_machine.travel(datetime(2026, 3, 28, 15, 30, tzinfo=UTC), tick=False)
+    def test_today(self, tz: tzinfo, expected_lo: str, expected_hi: str) -> None:
+        lo, hi = _range(rewrite_natural_date_keywords("created:today", tz), "created")
+        assert lo == expected_lo
+        assert hi == expected_hi
+
+    @time_machine.travel(datetime(2026, 3, 28, 3, 0, tzinfo=UTC), tick=False)
+    def test_today_auckland_ahead_of_utc(self) -> None:
+        # UTC 03:00 -> Auckland (UTC+13) = 16:00 same date; local date = 2026-03-28
+        lo, _ = _range(
+            rewrite_natural_date_keywords("created:today", AUCKLAND),
+            "created",
+        )
+        assert lo == "2026-03-28T00:00:00Z"
+
+    @pytest.mark.parametrize(
+        ("field", "keyword", "expected_lo", "expected_hi"),
+        [
+            pytest.param(
+                "created",
+                "yesterday",
+                "2026-03-27T00:00:00Z",
+                "2026-03-28T00:00:00Z",
+                id="yesterday",
+            ),
+            pytest.param(
+                "created",
+                "this_week",
+                "2026-03-23T00:00:00Z",
+                "2026-03-30T00:00:00Z",
+                id="this_week_mon_sun",
+            ),
+            pytest.param(
+                "created",
+                "last_week",
+                "2026-03-16T00:00:00Z",
+                "2026-03-23T00:00:00Z",
+                id="last_week",
+            ),
+            pytest.param(
+                "created",
+                "this_month",
+                "2026-03-01T00:00:00Z",
+                "2026-04-01T00:00:00Z",
+                id="this_month",
+            ),
+            pytest.param(
+                "created",
+                "last_month",
+                "2026-02-01T00:00:00Z",
+                "2026-03-01T00:00:00Z",
+                id="last_month",
+            ),
+            pytest.param(
+                "created",
+                "this_year",
+                "2026-01-01T00:00:00Z",
+                "2027-01-01T00:00:00Z",
+                id="this_year",
+            ),
+            pytest.param(
+                "created",
+                "last_year",
+                "2025-01-01T00:00:00Z",
+                "2026-01-01T00:00:00Z",
+                id="last_year",
+            ),
+        ],
+    )
+    @time_machine.travel(datetime(2026, 3, 28, 15, 0, tzinfo=UTC), tick=False)
+    def test_date_keywords(
+        self,
+        field: str,
+        keyword: str,
+        expected_lo: str,
+        expected_hi: str,
+    ) -> None:
+        # 2026-03-28 is Saturday; Mon-Sun week calculation built into expectations
+        query = f"{field}:{keyword}"
+        lo, hi = _range(rewrite_natural_date_keywords(query, UTC), field)
+        assert lo == expected_lo
+        assert hi == expected_hi
+
+    @time_machine.travel(datetime(2026, 12, 15, 12, 0, tzinfo=UTC), tick=False)
+    def test_this_month_december_wraps_to_next_year(self) -> None:
+        # December: next month must roll over to January 1 of next year
+        lo, hi = _range(
+            rewrite_natural_date_keywords("created:this_month", UTC),
+            "created",
+        )
+        assert lo == "2026-12-01T00:00:00Z"
+        assert hi == "2027-01-01T00:00:00Z"
+
+    @time_machine.travel(datetime(2026, 1, 15, 12, 0, tzinfo=UTC), tick=False)
+    def test_last_month_january_wraps_to_previous_year(self) -> None:
+        # January: last month must roll back to December 1 of previous year
+        lo, hi = _range(
+            rewrite_natural_date_keywords("created:last_month", UTC),
+            "created",
+        )
+        assert lo == "2025-12-01T00:00:00Z"
+        assert hi == "2026-01-01T00:00:00Z"
+
+    def test_unknown_keyword_raises(self) -> None:
+        with pytest.raises(ValueError, match="Unknown keyword"):
+            _date_only_range("bogus_keyword", UTC)
+
+
+class TestDateTimeFields:
+    """
+    added/modified store full UTC datetimes. Natural keywords must convert
+    the local day boundaries to UTC - timezone offset arithmetic IS required.
+    """
+
+    @time_machine.travel(datetime(2026, 3, 28, 15, 30, tzinfo=UTC), tick=False)
+    def test_added_today_eastern(self) -> None:
+        # EDT = UTC-4; local midnight 2026-03-28 00:00 EDT = 2026-03-28 04:00 UTC
+        lo, hi = _range(rewrite_natural_date_keywords("added:today", EASTERN), "added")
+        assert lo == "2026-03-28T04:00:00Z"
+        assert hi == "2026-03-29T04:00:00Z"
+
+    @time_machine.travel(datetime(2026, 3, 29, 2, 0, tzinfo=UTC), tick=False)
+    def test_added_today_auckland_midnight_crossing(self) -> None:
+        # UTC 02:00 on 2026-03-29 -> Auckland (UTC+13) = 2026-03-29 15:00 local
+        # Auckland midnight = UTC 2026-03-28 11:00
+        lo, hi = _range(rewrite_natural_date_keywords("added:today", AUCKLAND), "added")
+        assert lo == "2026-03-28T11:00:00Z"
+        assert hi == "2026-03-29T11:00:00Z"
+
+    @time_machine.travel(datetime(2026, 3, 28, 15, 0, tzinfo=UTC), tick=False)
+    def test_modified_today_utc(self) -> None:
+        lo, hi = _range(
+            rewrite_natural_date_keywords("modified:today", UTC),
+            "modified",
+        )
+        assert lo == "2026-03-28T00:00:00Z"
+        assert hi == "2026-03-29T00:00:00Z"
+
+    @pytest.mark.parametrize(
+        ("keyword", "expected_lo", "expected_hi"),
+        [
+            pytest.param(
+                "yesterday",
+                "2026-03-27T00:00:00Z",
+                "2026-03-28T00:00:00Z",
+                id="yesterday",
+            ),
+            pytest.param(
+                "this_week",
+                "2026-03-23T00:00:00Z",
+                "2026-03-30T00:00:00Z",
+                id="this_week",
+            ),
+            pytest.param(
+                "last_week",
+                "2026-03-16T00:00:00Z",
+                "2026-03-23T00:00:00Z",
+                id="last_week",
+            ),
+            pytest.param(
+                "this_month",
+                "2026-03-01T00:00:00Z",
+                "2026-04-01T00:00:00Z",
+                id="this_month",
+            ),
+            pytest.param(
+                "last_month",
+                "2026-02-01T00:00:00Z",
+                "2026-03-01T00:00:00Z",
+                id="last_month",
+            ),
+            pytest.param(
+                "this_year",
+                "2026-01-01T00:00:00Z",
+                "2027-01-01T00:00:00Z",
+                id="this_year",
+            ),
+            pytest.param(
+                "last_year",
+                "2025-01-01T00:00:00Z",
+                "2026-01-01T00:00:00Z",
+                id="last_year",
+            ),
+        ],
+    )
+    @time_machine.travel(datetime(2026, 3, 28, 12, 0, tzinfo=UTC), tick=False)
+    def test_datetime_keywords_utc(
+        self,
+        keyword: str,
+        expected_lo: str,
+        expected_hi: str,
+    ) -> None:
+        # 2026-03-28 is Saturday; weekday()==5 so Monday=2026-03-23
+        lo, hi = _range(rewrite_natural_date_keywords(f"added:{keyword}", UTC), "added")
+        assert lo == expected_lo
+        assert hi == expected_hi
+
+    @time_machine.travel(datetime(2026, 12, 15, 12, 0, tzinfo=UTC), tick=False)
+    def test_this_month_december_wraps_to_next_year(self) -> None:
+        # December: next month wraps to January of next year
+        lo, hi = _range(rewrite_natural_date_keywords("added:this_month", UTC), "added")
+        assert lo == "2026-12-01T00:00:00Z"
+        assert hi == "2027-01-01T00:00:00Z"
+
+    @time_machine.travel(datetime(2026, 1, 15, 12, 0, tzinfo=UTC), tick=False)
+    def test_last_month_january_wraps_to_previous_year(self) -> None:
+        # January: last month wraps back to December of previous year
+        lo, hi = _range(rewrite_natural_date_keywords("added:last_month", UTC), "added")
+        assert lo == "2025-12-01T00:00:00Z"
+        assert hi == "2026-01-01T00:00:00Z"
+
+    def test_unknown_keyword_raises(self) -> None:
+        with pytest.raises(ValueError, match="Unknown keyword"):
+            _datetime_range("bogus_keyword", UTC)
+
+
+class TestWhooshQueryRewriting:
+    """All Whoosh query syntax variants must be rewritten to ISO 8601 before Tantivy parses them."""
+
+    @time_machine.travel(datetime(2026, 3, 28, 15, 0, tzinfo=UTC), tick=False)
+    def test_compact_date_shim_rewrites_to_iso(self) -> None:
+        result = rewrite_natural_date_keywords("created:20240115120000", UTC)
+        assert "2024-01-15" in result
+        assert "20240115120000" not in result
+
+    @time_machine.travel(datetime(2026, 3, 28, 15, 0, tzinfo=UTC), tick=False)
+    def test_relative_range_shim_removes_now(self) -> None:
+        result = rewrite_natural_date_keywords("added:[now-7d TO now]", UTC)
+        assert "now" not in result
+        assert "2026-03-" in result
+
+    @time_machine.travel(datetime(2026, 3, 28, 12, 0, tzinfo=UTC), tick=False)
+    def test_bracket_minus_7_days(self) -> None:
+        lo, hi = _range(
+            rewrite_natural_date_keywords("added:[-7 days to now]", UTC),
+            "added",
+        )
+        assert lo == "2026-03-21T12:00:00Z"
+        assert hi == "2026-03-28T12:00:00Z"
+
+    @time_machine.travel(datetime(2026, 3, 28, 12, 0, tzinfo=UTC), tick=False)
+    def test_bracket_minus_1_week(self) -> None:
+        lo, hi = _range(
+            rewrite_natural_date_keywords("added:[-1 week to now]", UTC),
+            "added",
+        )
+        assert lo == "2026-03-21T12:00:00Z"
+        assert hi == "2026-03-28T12:00:00Z"
+
+    @time_machine.travel(datetime(2026, 3, 28, 12, 0, tzinfo=UTC), tick=False)
+    def test_bracket_minus_1_month_uses_relativedelta(self) -> None:
+        # relativedelta(months=1) from 2026-03-28 = 2026-02-28 (not 29)
+        lo, hi = _range(
+            rewrite_natural_date_keywords("created:[-1 month to now]", UTC),
+            "created",
+        )
+        assert lo == "2026-02-28T12:00:00Z"
+        assert hi == "2026-03-28T12:00:00Z"
+
+    @time_machine.travel(datetime(2026, 3, 28, 12, 0, tzinfo=UTC), tick=False)
+    def test_bracket_minus_1_year(self) -> None:
+        lo, hi = _range(
+            rewrite_natural_date_keywords("modified:[-1 year to now]", UTC),
+            "modified",
+        )
+        assert lo == "2025-03-28T12:00:00Z"
+        assert hi == "2026-03-28T12:00:00Z"
+
+    @time_machine.travel(datetime(2026, 3, 28, 12, 0, tzinfo=UTC), tick=False)
+    def test_bracket_plural_unit_hours(self) -> None:
+        lo, hi = _range(
+            rewrite_natural_date_keywords("added:[-3 hours to now]", UTC),
+            "added",
+        )
+        assert lo == "2026-03-28T09:00:00Z"
+        assert hi == "2026-03-28T12:00:00Z"
+
+    @time_machine.travel(datetime(2026, 3, 28, 12, 0, tzinfo=UTC), tick=False)
+    def test_bracket_case_insensitive(self) -> None:
+        result = rewrite_natural_date_keywords("added:[-1 WEEK TO NOW]", UTC)
+        assert "now" not in result.lower()
+        lo, hi = _range(result, "added")
+        assert lo == "2026-03-21T12:00:00Z"
+        assert hi == "2026-03-28T12:00:00Z"
+
+    @time_machine.travel(datetime(2026, 3, 28, 12, 0, tzinfo=UTC), tick=False)
+    def test_relative_range_swaps_bounds_when_lo_exceeds_hi(self) -> None:
+        # [now+1h TO now-1h] has lo > hi before substitution; they must be swapped
+        lo, hi = _range(
+            rewrite_natural_date_keywords("added:[now+1h TO now-1h]", UTC),
+            "added",
+        )
+        assert lo == "2026-03-28T11:00:00Z"
+        assert hi == "2026-03-28T13:00:00Z"
+
+    def test_8digit_created_date_field_always_uses_utc_midnight(self) -> None:
+        # created is a DateField: boundaries are always UTC midnight, no TZ offset
+        result = rewrite_natural_date_keywords("created:20231201", EASTERN)
+        lo, hi = _range(result, "created")
+        assert lo == "2023-12-01T00:00:00Z"
+        assert hi == "2023-12-02T00:00:00Z"
+
+    def test_8digit_added_datetime_field_converts_local_midnight_to_utc(self) -> None:
+        # added is DateTimeField: midnight Dec 1 Eastern (EST = UTC-5) = 05:00 UTC
+        result = rewrite_natural_date_keywords("added:20231201", EASTERN)
+        lo, hi = _range(result, "added")
+        assert lo == "2023-12-01T05:00:00Z"
+        assert hi == "2023-12-02T05:00:00Z"
+
+    def test_8digit_modified_datetime_field_converts_local_midnight_to_utc(
+        self,
+    ) -> None:
+        result = rewrite_natural_date_keywords("modified:20231201", EASTERN)
+        lo, hi = _range(result, "modified")
+        assert lo == "2023-12-01T05:00:00Z"
+        assert hi == "2023-12-02T05:00:00Z"
+
+    def test_8digit_invalid_date_passes_through_unchanged(self) -> None:
+        assert rewrite_natural_date_keywords("added:20231340", UTC) == "added:20231340"
+
+    def test_compact_14digit_invalid_date_passes_through_unchanged(self) -> None:
+        # Month=13 makes datetime() raise ValueError; the token must be left as-is
+        assert _rewrite_compact_date("20231300120000") == "20231300120000"
+
+
+class TestParseUserQuery:
+    """parse_user_query runs the full preprocessing pipeline."""
+
+    @pytest.fixture
+    def query_index(self) -> tantivy.Index:
+        schema = build_schema()
+        idx = tantivy.Index(schema, path=None)
+        register_tokenizers(idx, "")
+        return idx
+
+    def test_returns_tantivy_query(self, query_index: tantivy.Index) -> None:
+        assert isinstance(parse_user_query(query_index, "invoice", UTC), tantivy.Query)
+
+    def test_fuzzy_mode_does_not_raise(
+        self,
+        query_index: tantivy.Index,
+        settings,
+    ) -> None:
+        settings.ADVANCED_FUZZY_SEARCH_THRESHOLD = 0.5
+        assert isinstance(parse_user_query(query_index, "invoice", UTC), tantivy.Query)
+
+    def test_date_rewriting_applied_before_tantivy_parse(
+        self,
+        query_index: tantivy.Index,
+    ) -> None:
+        # created:today must be rewritten to an ISO range before Tantivy parses it;
+        # if passed raw, Tantivy would reject "today" as an invalid date value
+        with time_machine.travel(datetime(2026, 3, 28, 12, 0, tzinfo=UTC), tick=False):
+            q = parse_user_query(query_index, "created:today", UTC)
+        assert isinstance(q, tantivy.Query)
+
+
+class TestPassthrough:
+    """Queries without field prefixes or unrelated content pass through unchanged."""
+
+    def test_bare_keyword_no_field_prefix_unchanged(self) -> None:
+        # Bare 'today' with no field: prefix passes through unchanged
+        result = rewrite_natural_date_keywords("bank statement today", UTC)
+        assert "today" in result
+
+    def test_unrelated_query_unchanged(self) -> None:
+        assert rewrite_natural_date_keywords("title:invoice", UTC) == "title:invoice"
+
+
+class TestNormalizeQuery:
+    """normalize_query expands comma-separated values and collapses whitespace."""
+
+    def test_normalize_expands_comma_separated_tags(self) -> None:
+        assert normalize_query("tag:foo,bar") == "tag:foo AND tag:bar"
+
+    def test_normalize_expands_three_values(self) -> None:
+        assert normalize_query("tag:foo,bar,baz") == "tag:foo AND tag:bar AND tag:baz"
+
+    def test_normalize_collapses_whitespace(self) -> None:
+        assert normalize_query("bank   statement") == "bank statement"
+
+    def test_normalize_no_commas_unchanged(self) -> None:
+        assert normalize_query("bank statement") == "bank statement"
+
+
+class TestPermissionFilter:
+    """
+    build_permission_filter tests use an in-memory index — no DB access needed.
+
+    Users are constructed as unsaved model instances (django_user_model(pk=N))
+    so no database round-trip occurs; only .pk is read by build_permission_filter.
+    """
+
+    @pytest.fixture
+    def perm_index(self) -> tantivy.Index:
+        schema = build_schema()
+        idx = tantivy.Index(schema, path=None)
+        register_tokenizers(idx, "")
+        return idx
+
+    def _add_doc(
+        self,
+        idx: tantivy.Index,
+        doc_id: int,
+        owner_id: int | None = None,
+        viewer_ids: tuple[int, ...] = (),
+    ) -> None:
+        writer = idx.writer()
+        doc = tantivy.Document()
+        doc.add_unsigned("id", doc_id)
+        # Only add owner_id field if the document has an owner
+        if owner_id is not None:
+            doc.add_unsigned("owner_id", owner_id)
+        for vid in viewer_ids:
+            doc.add_unsigned("viewer_id", vid)
+        writer.add_document(doc)
+        writer.commit()
+        idx.reload()
+
+    def test_perm_no_owner_visible_to_any_user(
+        self,
+        perm_index: tantivy.Index,
+        django_user_model: type[AbstractBaseUser],
+    ) -> None:
+        """Documents with no owner must be visible to every user."""
+        self._add_doc(perm_index, doc_id=1, owner_id=None)
+        user = django_user_model(pk=99)
+        perm = build_permission_filter(perm_index.schema, user)
+        assert perm_index.searcher().search(perm, limit=10).count == 1
+
+    def test_perm_owned_by_user_is_visible(
+        self,
+        perm_index: tantivy.Index,
+        django_user_model: type[AbstractBaseUser],
+    ) -> None:
+        """A document owned by the requesting user must be visible."""
+        self._add_doc(perm_index, doc_id=2, owner_id=42)
+        user = django_user_model(pk=42)
+        perm = build_permission_filter(perm_index.schema, user)
+        assert perm_index.searcher().search(perm, limit=10).count == 1
+
+    def test_perm_owned_by_other_not_visible(
+        self,
+        perm_index: tantivy.Index,
+        django_user_model: type[AbstractBaseUser],
+    ) -> None:
+        """A document owned by a different user must not be visible."""
+        self._add_doc(perm_index, doc_id=3, owner_id=42)
+        user = django_user_model(pk=99)
+        perm = build_permission_filter(perm_index.schema, user)
+        assert perm_index.searcher().search(perm, limit=10).count == 0
+
+    def test_perm_shared_viewer_is_visible(
+        self,
+        perm_index: tantivy.Index,
+        django_user_model: type[AbstractBaseUser],
+    ) -> None:
+        """A document explicitly shared with a user must be visible to that user."""
+        self._add_doc(perm_index, doc_id=4, owner_id=42, viewer_ids=(99,))
+        user = django_user_model(pk=99)
+        perm = build_permission_filter(perm_index.schema, user)
+        assert perm_index.searcher().search(perm, limit=10).count == 1
+
+    def test_perm_only_owned_docs_hidden_from_others(
+        self,
+        perm_index: tantivy.Index,
+        django_user_model: type[AbstractBaseUser],
+    ) -> None:
+        """Only unowned documents appear when the user owns none of them."""
+        self._add_doc(perm_index, doc_id=5, owner_id=10)  # owned by 10
+        self._add_doc(perm_index, doc_id=6, owner_id=None)  # unowned
+        user = django_user_model(pk=20)
+        perm = build_permission_filter(perm_index.schema, user)
+        assert perm_index.searcher().search(perm, limit=10).count == 1  # only unowned
diff --git a/src/documents/tests/search/test_schema.py b/src/documents/tests/search/test_schema.py
new file mode 100644
index 000000000..1ff9bee32
--- /dev/null
+++ b/src/documents/tests/search/test_schema.py
@@ -0,0 +1,63 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+
+from documents.search._schema import SCHEMA_VERSION
+from documents.search._schema import needs_rebuild
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+    from pytest_django.fixtures import SettingsWrapper
+
+pytestmark = pytest.mark.search
+
+
+class TestNeedsRebuild:
+    """needs_rebuild covers all sentinel-file states that require a full reindex."""
+
+    def test_returns_true_when_version_file_missing(self, index_dir: Path) -> None:
+        assert needs_rebuild(index_dir) is True
+
+    def test_returns_false_when_version_and_language_match(
+        self,
+        index_dir: Path,
+        settings: SettingsWrapper,
+    ) -> None:
+        settings.SEARCH_LANGUAGE = "en"
+        (index_dir / ".schema_version").write_text(str(SCHEMA_VERSION))
+        (index_dir / ".schema_language").write_text("en")
+        assert needs_rebuild(index_dir) is False
+
+    def test_returns_true_on_schema_version_mismatch(self, index_dir: Path) -> None:
+        (index_dir / ".schema_version").write_text(str(SCHEMA_VERSION - 1))
+        assert needs_rebuild(index_dir) is True
+
+    def test_returns_true_when_version_file_not_an_integer(
+        self,
+        index_dir: Path,
+    ) -> None:
+        (index_dir / ".schema_version").write_text("not-a-number")
+        assert needs_rebuild(index_dir) is True
+
+    def test_returns_true_when_language_sentinel_missing(
+        self,
+        index_dir: Path,
+        settings: SettingsWrapper,
+    ) -> None:
+        settings.SEARCH_LANGUAGE = "en"
+        (index_dir / ".schema_version").write_text(str(SCHEMA_VERSION))
+        # .schema_language intentionally absent
+        assert needs_rebuild(index_dir) is True
+
+    def test_returns_true_when_language_sentinel_content_differs(
+        self,
+        index_dir: Path,
+        settings: SettingsWrapper,
+    ) -> None:
+        settings.SEARCH_LANGUAGE = "de"
+        (index_dir / ".schema_version").write_text(str(SCHEMA_VERSION))
+        (index_dir / ".schema_language").write_text("en")
+        assert needs_rebuild(index_dir) is True
diff --git a/src/documents/tests/search/test_tokenizer.py b/src/documents/tests/search/test_tokenizer.py
new file mode 100644
index 000000000..aee52a567
--- /dev/null
+++ b/src/documents/tests/search/test_tokenizer.py
@@ -0,0 +1,78 @@
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING
+
+import pytest
+import tantivy
+
+from documents.search._tokenizer import _bigram_analyzer
+from documents.search._tokenizer import _paperless_text
+from documents.search._tokenizer import register_tokenizers
+
+if TYPE_CHECKING:
+    from _pytest.logging import LogCaptureFixture
+
+pytestmark = pytest.mark.search
+
+
+class TestTokenizers:
+    @pytest.fixture
+    def content_index(self) -> tantivy.Index:
+        """Index with just a content field for ASCII folding tests."""
+        sb = tantivy.SchemaBuilder()
+        sb.add_text_field("content", stored=True, tokenizer_name="paperless_text")
+        schema = sb.build()
+        idx = tantivy.Index(schema, path=None)
+        idx.register_tokenizer("paperless_text", _paperless_text(""))
+        return idx
+
+    @pytest.fixture
+    def bigram_index(self) -> tantivy.Index:
+        """Index with bigram field for CJK tests."""
+        sb = tantivy.SchemaBuilder()
+        sb.add_text_field(
+            "bigram_content",
+            stored=False,
+            tokenizer_name="bigram_analyzer",
+        )
+        schema = sb.build()
+        idx = tantivy.Index(schema, path=None)
+        idx.register_tokenizer("bigram_analyzer", _bigram_analyzer())
+        return idx
+
+    def test_ascii_fold_finds_accented_content(
+        self,
+        content_index: tantivy.Index,
+    ) -> None:
+        """ASCII folding allows searching accented text with plain ASCII queries."""
+        writer = content_index.writer()
+        doc = tantivy.Document()
+        doc.add_text("content", "café résumé")
+        writer.add_document(doc)
+        writer.commit()
+        content_index.reload()
+        q = content_index.parse_query("cafe resume", ["content"])
+        assert content_index.searcher().search(q, limit=5).count == 1
+
+    def test_bigram_finds_cjk_substring(self, bigram_index: tantivy.Index) -> None:
+        """Bigram tokenizer enables substring search in CJK languages without whitespace delimiters."""
+        writer = bigram_index.writer()
+        doc = tantivy.Document()
+        doc.add_text("bigram_content", "東京都")
+        writer.add_document(doc)
+        writer.commit()
+        bigram_index.reload()
+        q = bigram_index.parse_query("東京", ["bigram_content"])
+        assert bigram_index.searcher().search(q, limit=5).count == 1
+
+    def test_unsupported_language_logs_warning(self, caplog: LogCaptureFixture) -> None:
+        """Unsupported language codes should log a warning and disable stemming gracefully."""
+        sb = tantivy.SchemaBuilder()
+        sb.add_text_field("content", stored=True, tokenizer_name="paperless_text")
+        schema = sb.build()
+        idx = tantivy.Index(schema, path=None)
+
+        with caplog.at_level(logging.WARNING, logger="paperless.search"):
+            register_tokenizers(idx, "klingon")
+        assert "klingon" in caplog.text
diff --git a/src/documents/tests/test_admin.py b/src/documents/tests/test_admin.py
index de2f07df5..533319c2f 100644
--- a/src/documents/tests/test_admin.py
+++ b/src/documents/tests/test_admin.py
@@ -1,6 +1,7 @@
 import types
 from unittest.mock import patch
 
+import tantivy
 from django.contrib.admin.sites import AdminSite
 from django.contrib.auth.models import Permission
 from django.contrib.auth.models import User
@@ -8,36 +9,54 @@ from django.test import TestCase
 from django.utils import timezone
 from rest_framework import status
 
-from documents import index
 from documents.admin import DocumentAdmin
 from documents.admin import TagAdmin
 from documents.models import Document
 from documents.models import Tag
+from documents.search import get_backend
+from documents.search import reset_backend
 from documents.tests.utils import DirectoriesMixin
 from paperless.admin import PaperlessUserAdmin
 
 
 class TestDocumentAdmin(DirectoriesMixin, TestCase):
     def get_document_from_index(self, doc):
-        ix = index.open_index()
-        with ix.searcher() as searcher:
-            return searcher.document(id=doc.id)
+        backend = get_backend()
+        searcher = backend._index.searcher()
+        results = searcher.search(
+            tantivy.Query.range_query(
+                backend._schema,
+                "id",
+                tantivy.FieldType.Unsigned,
+                doc.pk,
+                doc.pk,
+            ),
+            limit=1,
+        )
+        if results.hits:
+            return searcher.doc(results.hits[0][1]).to_dict()
+        return None
 
     def setUp(self) -> None:
         super().setUp()
+        reset_backend()
         self.doc_admin = DocumentAdmin(model=Document, admin_site=AdminSite())
 
+    def tearDown(self) -> None:
+        reset_backend()
+        super().tearDown()
+
     def test_save_model(self) -> None:
         doc = Document.objects.create(title="test")
 
         doc.title = "new title"
         self.doc_admin.save_model(None, doc, None, None)
         self.assertEqual(Document.objects.get(id=doc.id).title, "new title")
-        self.assertEqual(self.get_document_from_index(doc)["id"], doc.id)
+        self.assertEqual(self.get_document_from_index(doc)["id"], [doc.id])
 
     def test_delete_model(self) -> None:
         doc = Document.objects.create(title="test")
-        index.add_or_update_document(doc)
+        get_backend().add_or_update(doc)
         self.assertIsNotNone(self.get_document_from_index(doc))
 
         self.doc_admin.delete_model(None, doc)
@@ -53,7 +72,7 @@ class TestDocumentAdmin(DirectoriesMixin, TestCase):
                 checksum=f"{i:02}",
             )
             docs.append(doc)
-            index.add_or_update_document(doc)
+            get_backend().add_or_update(doc)
 
         self.assertEqual(Document.objects.count(), 42)
 
diff --git a/src/documents/tests/test_api_document_versions.py b/src/documents/tests/test_api_document_versions.py
index f5c1a7346..d95e78fe9 100644
--- a/src/documents/tests/test_api_document_versions.py
+++ b/src/documents/tests/test_api_document_versions.py
@@ -109,7 +109,7 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
             mime_type="application/pdf",
         )
 
-        with mock.patch("documents.index.remove_document_from_index"):
+        with mock.patch("documents.search.get_backend"):
             resp = self.client.delete(f"/api/documents/{root.id}/versions/{root.id}/")
 
         self.assertEqual(resp.status_code, status.HTTP_400_BAD_REQUEST)
@@ -137,10 +137,7 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
             content="v2-content",
         )
 
-        with (
-            mock.patch("documents.index.remove_document_from_index"),
-            mock.patch("documents.index.add_or_update_document"),
-        ):
+        with mock.patch("documents.search.get_backend"):
             resp = self.client.delete(f"/api/documents/{root.id}/versions/{v2.id}/")
 
         self.assertEqual(resp.status_code, status.HTTP_200_OK)
@@ -149,10 +146,7 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
         root.refresh_from_db()
         self.assertEqual(root.content, "root-content")
 
-        with (
-            mock.patch("documents.index.remove_document_from_index"),
-            mock.patch("documents.index.add_or_update_document"),
-        ):
+        with mock.patch("documents.search.get_backend"):
             resp = self.client.delete(f"/api/documents/{root.id}/versions/{v1.id}/")
 
         self.assertEqual(resp.status_code, status.HTTP_200_OK)
@@ -175,10 +169,7 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
         )
         version_id = version.id
 
-        with (
-            mock.patch("documents.index.remove_document_from_index"),
-            mock.patch("documents.index.add_or_update_document"),
-        ):
+        with mock.patch("documents.search.get_backend"):
             resp = self.client.delete(
                 f"/api/documents/{root.id}/versions/{version_id}/",
             )
@@ -225,7 +216,7 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
             root_document=other_root,
         )
 
-        with mock.patch("documents.index.remove_document_from_index"):
+        with mock.patch("documents.search.get_backend"):
             resp = self.client.delete(
                 f"/api/documents/{root.id}/versions/{other_version.id}/",
             )
@@ -245,10 +236,7 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
             root_document=root,
         )
 
-        with (
-            mock.patch("documents.index.remove_document_from_index"),
-            mock.patch("documents.index.add_or_update_document"),
-        ):
+        with mock.patch("documents.search.get_backend"):
             resp = self.client.delete(
                 f"/api/documents/{version.id}/versions/{version.id}/",
             )
@@ -275,18 +263,17 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
             root_document=root,
         )
 
-        with (
-            mock.patch("documents.index.remove_document_from_index") as remove_index,
-            mock.patch("documents.index.add_or_update_document") as add_or_update,
-        ):
+        with mock.patch("documents.search.get_backend") as mock_get_backend:
+            mock_backend = mock.MagicMock()
+            mock_get_backend.return_value = mock_backend
             resp = self.client.delete(
                 f"/api/documents/{root.id}/versions/{version.id}/",
             )
 
         self.assertEqual(resp.status_code, status.HTTP_200_OK)
-        remove_index.assert_called_once_with(version)
-        add_or_update.assert_called_once()
-        self.assertEqual(add_or_update.call_args[0][0].id, root.id)
+        mock_backend.remove.assert_called_once_with(version.pk)
+        mock_backend.add_or_update.assert_called_once()
+        self.assertEqual(mock_backend.add_or_update.call_args[0][0].id, root.id)
 
     def test_delete_version_returns_403_without_permission(self) -> None:
         owner = User.objects.create_user(username="owner")
diff --git a/src/documents/tests/test_api_search.py b/src/documents/tests/test_api_search.py
index 546dff233..69bd65198 100644
--- a/src/documents/tests/test_api_search.py
+++ b/src/documents/tests/test_api_search.py
@@ -2,6 +2,7 @@ import datetime
 from datetime import timedelta
 from unittest import mock
 
+import pytest
 from dateutil.relativedelta import relativedelta
 from django.contrib.auth.models import Group
 from django.contrib.auth.models import Permission
@@ -11,9 +12,7 @@ from django.utils import timezone
 from guardian.shortcuts import assign_perm
 from rest_framework import status
 from rest_framework.test import APITestCase
-from whoosh.writing import AsyncWriter
 
-from documents import index
 from documents.bulk_edit import set_permissions
 from documents.models import Correspondent
 from documents.models import CustomField
@@ -25,18 +24,27 @@ from documents.models import SavedView
 from documents.models import StoragePath
 from documents.models import Tag
 from documents.models import Workflow
+from documents.search import get_backend
+from documents.search import reset_backend
 from documents.tests.utils import DirectoriesMixin
 from paperless_mail.models import MailAccount
 from paperless_mail.models import MailRule
 
+pytestmark = pytest.mark.search
+
 
 class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
     def setUp(self) -> None:
         super().setUp()
+        reset_backend()
 
         self.user = User.objects.create_superuser(username="temp_admin")
         self.client.force_authenticate(user=self.user)
 
+    def tearDown(self) -> None:
+        reset_backend()
+        super().tearDown()
+
     def test_search(self) -> None:
         d1 = Document.objects.create(
             title="invoice",
@@ -57,13 +65,11 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
             checksum="C",
             original_filename="someepdf.pdf",
         )
-        with AsyncWriter(index.open_index()) as writer:
-            # Note to future self: there is a reason we dont use a model signal handler to update the index: some operations edit many documents at once
-            # (retagger, renamer) and we don't want to open a writer for each of these, but rather perform the entire operation with one writer.
-            # That's why we can't open the writer in a model on_save handler or something.
-            index.update_document(writer, d1)
-            index.update_document(writer, d2)
-            index.update_document(writer, d3)
+        backend = get_backend()
+        backend.add_or_update(d1)
+        backend.add_or_update(d2)
+        backend.add_or_update(d3)
+
         response = self.client.get("/api/documents/?query=bank")
         results = response.data["results"]
         self.assertEqual(response.data["count"], 3)
@@ -98,9 +104,9 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
             checksum="B",
             pk=2,
         )
-        with AsyncWriter(index.open_index()) as writer:
-            index.update_document(writer, d1)
-            index.update_document(writer, d2)
+        backend = get_backend()
+        backend.add_or_update(d1)
+        backend.add_or_update(d2)
 
         response = self.client.get(
             "/api/documents/?query=bank",
@@ -127,8 +133,7 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
         )
         matching_doc.tags.add(tag)
 
-        with AsyncWriter(index.open_index()) as writer:
-            index.update_document(writer, matching_doc)
+        get_backend().add_or_update(matching_doc)
 
         response = self.client.get(
             "/api/documents/?query=bank&include_selection_data=true",
@@ -187,10 +192,10 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
             value_int=20,
         )
 
-        with AsyncWriter(index.open_index()) as writer:
-            index.update_document(writer, d1)
-            index.update_document(writer, d2)
-            index.update_document(writer, d3)
+        backend = get_backend()
+        backend.add_or_update(d1)
+        backend.add_or_update(d2)
+        backend.add_or_update(d3)
 
         response = self.client.get(
             f"/api/documents/?query=match&ordering=custom_field_{custom_field.pk}",
@@ -211,15 +216,15 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
         )
 
     def test_search_multi_page(self) -> None:
-        with AsyncWriter(index.open_index()) as writer:
-            for i in range(55):
-                doc = Document.objects.create(
-                    checksum=str(i),
-                    pk=i + 1,
-                    title=f"Document {i + 1}",
-                    content="content",
-                )
-                index.update_document(writer, doc)
+        backend = get_backend()
+        for i in range(55):
+            doc = Document.objects.create(
+                checksum=str(i),
+                pk=i + 1,
+                title=f"Document {i + 1}",
+                content="content",
+            )
+            backend.add_or_update(doc)
 
         # This is here so that we test that no document gets returned twice (might happen if the paging is not working)
         seen_ids = []
@@ -246,15 +251,15 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
             seen_ids.append(result["id"])
 
     def test_search_invalid_page(self) -> None:
-        with AsyncWriter(index.open_index()) as writer:
-            for i in range(15):
-                doc = Document.objects.create(
-                    checksum=str(i),
-                    pk=i + 1,
-                    title=f"Document {i + 1}",
-                    content="content",
-                )
-                index.update_document(writer, doc)
+        backend = get_backend()
+        for i in range(15):
+            doc = Document.objects.create(
+                checksum=str(i),
+                pk=i + 1,
+                title=f"Document {i + 1}",
+                content="content",
+            )
+            backend.add_or_update(doc)
 
         response = self.client.get("/api/documents/?query=content&page=0&page_size=10")
         self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)
@@ -292,26 +297,25 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
             pk=3,
             checksum="C",
         )
-        with index.open_index_writer() as writer:
-            index.update_document(writer, d1)
-            index.update_document(writer, d2)
-            index.update_document(writer, d3)
+        backend = get_backend()
+        backend.add_or_update(d1)
+        backend.add_or_update(d2)
+        backend.add_or_update(d3)
 
         response = self.client.get("/api/documents/?query=added:[-1 week to now]")
         results = response.data["results"]
         # Expect 3 documents returned
         self.assertEqual(len(results), 3)
 
-        for idx, subset in enumerate(
-            [
-                {"id": 1, "title": "invoice"},
-                {"id": 2, "title": "bank statement 1"},
-                {"id": 3, "title": "bank statement 3"},
-            ],
-        ):
-            result = results[idx]
-            # Assert subset in results
-            self.assertDictEqual(result, {**result, **subset})
+        result_map = {r["id"]: r for r in results}
+        self.assertEqual(set(result_map.keys()), {1, 2, 3})
+        for subset in [
+            {"id": 1, "title": "invoice"},
+            {"id": 2, "title": "bank statement 1"},
+            {"id": 3, "title": "bank statement 3"},
+        ]:
+            r = result_map[subset["id"]]
+            self.assertDictEqual(r, {**r, **subset})
 
     @override_settings(
         TIME_ZONE="America/Chicago",
@@ -347,10 +351,10 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
             # 7 days, 1 hour and 1 minute ago
             added=timezone.now() - timedelta(days=7, hours=1, minutes=1),
         )
-        with index.open_index_writer() as writer:
-            index.update_document(writer, d1)
-            index.update_document(writer, d2)
-            index.update_document(writer, d3)
+        backend = get_backend()
+        backend.add_or_update(d1)
+        backend.add_or_update(d2)
+        backend.add_or_update(d3)
 
         response = self.client.get("/api/documents/?query=added:[-1 week to now]")
         results = response.data["results"]
@@ -358,12 +362,14 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
         # Expect 2 documents returned
         self.assertEqual(len(results), 2)
 
-        for idx, subset in enumerate(
-            [{"id": 1, "title": "invoice"}, {"id": 2, "title": "bank statement 1"}],
-        ):
-            result = results[idx]
-            # Assert subset in results
-            self.assertDictEqual(result, {**result, **subset})
+        result_map = {r["id"]: r for r in results}
+        self.assertEqual(set(result_map.keys()), {1, 2})
+        for subset in [
+            {"id": 1, "title": "invoice"},
+            {"id": 2, "title": "bank statement 1"},
+        ]:
+            r = result_map[subset["id"]]
+            self.assertDictEqual(r, {**r, **subset})
 
     @override_settings(
         TIME_ZONE="Europe/Sofia",
@@ -399,10 +405,10 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
             # 7 days, 1 hour and 1 minute ago
             added=timezone.now() - timedelta(days=7, hours=1, minutes=1),
         )
-        with index.open_index_writer() as writer:
-            index.update_document(writer, d1)
-            index.update_document(writer, d2)
-            index.update_document(writer, d3)
+        backend = get_backend()
+        backend.add_or_update(d1)
+        backend.add_or_update(d2)
+        backend.add_or_update(d3)
 
         response = self.client.get("/api/documents/?query=added:[-1 week to now]")
         results = response.data["results"]
@@ -410,12 +416,14 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
         # Expect 2 documents returned
         self.assertEqual(len(results), 2)
 
-        for idx, subset in enumerate(
-            [{"id": 1, "title": "invoice"}, {"id": 2, "title": "bank statement 1"}],
-        ):
-            result = results[idx]
-            # Assert subset in results
-            self.assertDictEqual(result, {**result, **subset})
+        result_map = {r["id"]: r for r in results}
+        self.assertEqual(set(result_map.keys()), {1, 2})
+        for subset in [
+            {"id": 1, "title": "invoice"},
+            {"id": 2, "title": "bank statement 1"},
+        ]:
+            r = result_map[subset["id"]]
+            self.assertDictEqual(r, {**r, **subset})
 
     def test_search_added_in_last_month(self) -> None:
         """
@@ -451,10 +459,10 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
             added=timezone.now() - timedelta(days=7, hours=1, minutes=1),
         )
 
-        with index.open_index_writer() as writer:
-            index.update_document(writer, d1)
-            index.update_document(writer, d2)
-            index.update_document(writer, d3)
+        backend = get_backend()
+        backend.add_or_update(d1)
+        backend.add_or_update(d2)
+        backend.add_or_update(d3)
 
         response = self.client.get("/api/documents/?query=added:[-1 month to now]")
         results = response.data["results"]
@@ -462,12 +470,14 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
         # Expect 2 documents returned
         self.assertEqual(len(results), 2)
 
-        for idx, subset in enumerate(
-            [{"id": 1, "title": "invoice"}, {"id": 3, "title": "bank statement 3"}],
-        ):
-            result = results[idx]
-            # Assert subset in results
-            self.assertDictEqual(result, {**result, **subset})
+        result_map = {r["id"]: r for r in results}
+        self.assertEqual(set(result_map.keys()), {1, 3})
+        for subset in [
+            {"id": 1, "title": "invoice"},
+            {"id": 3, "title": "bank statement 3"},
+        ]:
+            r = result_map[subset["id"]]
+            self.assertDictEqual(r, {**r, **subset})
 
     @override_settings(
         TIME_ZONE="America/Denver",
@@ -507,10 +517,10 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
             added=timezone.now() - timedelta(days=7, hours=1, minutes=1),
         )
 
-        with index.open_index_writer() as writer:
-            index.update_document(writer, d1)
-            index.update_document(writer, d2)
-            index.update_document(writer, d3)
+        backend = get_backend()
+        backend.add_or_update(d1)
+        backend.add_or_update(d2)
+        backend.add_or_update(d3)
 
         response = self.client.get("/api/documents/?query=added:[-1 month to now]")
         results = response.data["results"]
@@ -518,12 +528,14 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
         # Expect 2 documents returned
         self.assertEqual(len(results), 2)
 
-        for idx, subset in enumerate(
-            [{"id": 1, "title": "invoice"}, {"id": 3, "title": "bank statement 3"}],
-        ):
-            result = results[idx]
-            # Assert subset in results
-            self.assertDictEqual(result, {**result, **subset})
+        result_map = {r["id"]: r for r in results}
+        self.assertEqual(set(result_map.keys()), {1, 3})
+        for subset in [
+            {"id": 1, "title": "invoice"},
+            {"id": 3, "title": "bank statement 3"},
+        ]:
+            r = result_map[subset["id"]]
+            self.assertDictEqual(r, {**r, **subset})
 
     @override_settings(
         TIME_ZONE="Europe/Sofia",
@@ -563,10 +575,10 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
         # Django converts dates to UTC
         d3.refresh_from_db()
 
-        with index.open_index_writer() as writer:
-            index.update_document(writer, d1)
-            index.update_document(writer, d2)
-            index.update_document(writer, d3)
+        backend = get_backend()
+        backend.add_or_update(d1)
+        backend.add_or_update(d2)
+        backend.add_or_update(d3)
 
         response = self.client.get("/api/documents/?query=added:20231201")
         results = response.data["results"]
@@ -574,12 +586,8 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
         # Expect 1 document returned
         self.assertEqual(len(results), 1)
 
-        for idx, subset in enumerate(
-            [{"id": 3, "title": "bank statement 3"}],
-        ):
-            result = results[idx]
-            # Assert subset in results
-            self.assertDictEqual(result, {**result, **subset})
+        self.assertEqual(results[0]["id"], 3)
+        self.assertEqual(results[0]["title"], "bank statement 3")
 
     def test_search_added_invalid_date(self) -> None:
         """
@@ -588,7 +596,7 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
         WHEN:
             - Query with invalid added date
         THEN:
-            - No documents returned
+            - 400 Bad Request returned (Tantivy rejects invalid date field syntax)
         """
         d1 = Document.objects.create(
             title="invoice",
@@ -597,16 +605,14 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
             pk=1,
         )
 
-        with index.open_index_writer() as writer:
-            index.update_document(writer, d1)
+        get_backend().add_or_update(d1)
 
         response = self.client.get("/api/documents/?query=added:invalid-date")
-        results = response.data["results"]
 
-        # Expect 0 document returned
-        self.assertEqual(len(results), 0)
+        # Tantivy rejects unparsable field queries with a 400
+        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
 
-    @mock.patch("documents.index.autocomplete")
+    @mock.patch("documents.search._backend.TantivyBackend.autocomplete")
     def test_search_autocomplete_limits(self, m) -> None:
         """
         GIVEN:
@@ -618,7 +624,7 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
             - Limit requests are obeyed
         """
 
-        m.side_effect = lambda ix, term, limit, user: [term for _ in range(limit)]
+        m.side_effect = lambda term, limit, user=None: [term for _ in range(limit)]
 
         response = self.client.get("/api/search/autocomplete/?term=test")
         self.assertEqual(response.status_code, status.HTTP_200_OK)
@@ -671,32 +677,29 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
             owner=u1,
         )
 
-        with AsyncWriter(index.open_index()) as writer:
-            index.update_document(writer, d1)
-            index.update_document(writer, d2)
-            index.update_document(writer, d3)
+        backend = get_backend()
+        backend.add_or_update(d1)
+        backend.add_or_update(d2)
+        backend.add_or_update(d3)
 
         response = self.client.get("/api/search/autocomplete/?term=app")
         self.assertEqual(response.status_code, status.HTTP_200_OK)
-        self.assertEqual(response.data, [b"apples", b"applebaum", b"appletini"])
+        self.assertEqual(response.data, ["applebaum", "apples", "appletini"])
 
         d3.owner = u2
-
-        with AsyncWriter(index.open_index()) as writer:
-            index.update_document(writer, d3)
+        d3.save()
+        backend.add_or_update(d3)
 
         response = self.client.get("/api/search/autocomplete/?term=app")
         self.assertEqual(response.status_code, status.HTTP_200_OK)
-        self.assertEqual(response.data, [b"apples", b"applebaum"])
+        self.assertEqual(response.data, ["applebaum", "apples"])
 
         assign_perm("view_document", u1, d3)
-
-        with AsyncWriter(index.open_index()) as writer:
-            index.update_document(writer, d3)
+        backend.add_or_update(d3)
 
         response = self.client.get("/api/search/autocomplete/?term=app")
         self.assertEqual(response.status_code, status.HTTP_200_OK)
-        self.assertEqual(response.data, [b"apples", b"applebaum", b"appletini"])
+        self.assertEqual(response.data, ["applebaum", "apples", "appletini"])
 
     def test_search_autocomplete_field_name_match(self) -> None:
         """
@@ -714,8 +717,7 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
             checksum="1",
         )
 
-        with AsyncWriter(index.open_index()) as writer:
-            index.update_document(writer, d1)
+        get_backend().add_or_update(d1)
 
         response = self.client.get("/api/search/autocomplete/?term=created:2023")
         self.assertEqual(response.status_code, status.HTTP_200_OK)
@@ -736,33 +738,36 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
             checksum="1",
         )
 
-        with AsyncWriter(index.open_index()) as writer:
-            index.update_document(writer, d1)
+        get_backend().add_or_update(d1)
 
         response = self.client.get("/api/search/autocomplete/?term=auto")
         self.assertEqual(response.status_code, status.HTTP_200_OK)
-        self.assertEqual(response.data[0], b"auto")
+        self.assertEqual(response.data[0], "auto")
 
-    def test_search_spelling_suggestion(self) -> None:
-        with AsyncWriter(index.open_index()) as writer:
-            for i in range(55):
-                doc = Document.objects.create(
-                    checksum=str(i),
-                    pk=i + 1,
-                    title=f"Document {i + 1}",
-                    content=f"Things document {i + 1}",
-                )
-                index.update_document(writer, doc)
+    def test_search_no_spelling_suggestion(self) -> None:
+        """
+        GIVEN:
+            - Documents exist with various terms
+        WHEN:
+            - Query for documents with any term
+        THEN:
+            - corrected_query is always None (Tantivy has no spell correction)
+        """
+        backend = get_backend()
+        for i in range(5):
+            doc = Document.objects.create(
+                checksum=str(i),
+                pk=i + 1,
+                title=f"Document {i + 1}",
+                content=f"Things document {i + 1}",
+            )
+            backend.add_or_update(doc)
 
         response = self.client.get("/api/documents/?query=thing")
-        correction = response.data["corrected_query"]
-
-        self.assertEqual(correction, "things")
+        self.assertIsNone(response.data["corrected_query"])
 
         response = self.client.get("/api/documents/?query=things")
-        correction = response.data["corrected_query"]
-
-        self.assertEqual(correction, None)
+        self.assertIsNone(response.data["corrected_query"])
 
     def test_search_spelling_suggestion_suppressed_for_private_terms(self):
         owner = User.objects.create_user("owner")
@@ -771,24 +776,24 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
             Permission.objects.get(codename="view_document"),
         )
 
-        with AsyncWriter(index.open_index()) as writer:
-            for i in range(55):
-                private_doc = Document.objects.create(
-                    checksum=f"p{i}",
-                    pk=100 + i,
-                    title=f"Private Document {i + 1}",
-                    content=f"treasury document {i + 1}",
-                    owner=owner,
-                )
-                visible_doc = Document.objects.create(
-                    checksum=f"v{i}",
-                    pk=200 + i,
-                    title=f"Visible Document {i + 1}",
-                    content=f"public ledger {i + 1}",
-                    owner=attacker,
-                )
-                index.update_document(writer, private_doc)
-                index.update_document(writer, visible_doc)
+        backend = get_backend()
+        for i in range(5):
+            private_doc = Document.objects.create(
+                checksum=f"p{i}",
+                pk=100 + i,
+                title=f"Private Document {i + 1}",
+                content=f"treasury document {i + 1}",
+                owner=owner,
+            )
+            visible_doc = Document.objects.create(
+                checksum=f"v{i}",
+                pk=200 + i,
+                title=f"Visible Document {i + 1}",
+                content=f"public ledger {i + 1}",
+                owner=attacker,
+            )
+            backend.add_or_update(private_doc)
+            backend.add_or_update(visible_doc)
 
         self.client.force_authenticate(user=attacker)
 
@@ -798,26 +803,6 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
         self.assertEqual(response.data["count"], 0)
         self.assertIsNone(response.data["corrected_query"])
 
-    @mock.patch(
-        "whoosh.searching.Searcher.correct_query",
-        side_effect=Exception("Test error"),
-    )
-    def test_corrected_query_error(self, mock_correct_query) -> None:
-        """
-        GIVEN:
-            - A query that raises an error on correction
-        WHEN:
-            - API request for search with that query
-        THEN:
-            - The error is logged and the search proceeds
-        """
-        with self.assertLogs("paperless.index", level="INFO") as cm:
-            response = self.client.get("/api/documents/?query=2025-06-04")
-            self.assertEqual(response.status_code, status.HTTP_200_OK)
-            error_str = cm.output[0]
-            expected_str = "Error while correcting query '2025-06-04': Test error"
-            self.assertIn(expected_str, error_str)
-
     def test_search_more_like(self) -> None:
         """
         GIVEN:
@@ -847,16 +832,16 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
             checksum="C",
         )
         d4 = Document.objects.create(
-            title="Monty Python & the Holy Grail",
-            content="And now for something completely different",
+            title="Quarterly Report",
+            content="quarterly revenue profit margin earnings growth",
             pk=4,
             checksum="ABC",
         )
-        with AsyncWriter(index.open_index()) as writer:
-            index.update_document(writer, d1)
-            index.update_document(writer, d2)
-            index.update_document(writer, d3)
-            index.update_document(writer, d4)
+        backend = get_backend()
+        backend.add_or_update(d1)
+        backend.add_or_update(d2)
+        backend.add_or_update(d3)
+        backend.add_or_update(d4)
 
         response = self.client.get(f"/api/documents/?more_like_id={d2.id}")
 
@@ -864,9 +849,10 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
 
         results = response.data["results"]
 
-        self.assertEqual(len(results), 2)
-        self.assertEqual(results[0]["id"], d3.id)
-        self.assertEqual(results[1]["id"], d1.id)
+        self.assertGreaterEqual(len(results), 1)
+        result_ids = [r["id"] for r in results]
+        self.assertIn(d3.id, result_ids)
+        self.assertNotIn(d4.id, result_ids)
 
     def test_search_more_like_requires_view_permission_on_seed_document(
         self,
@@ -908,10 +894,10 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
             pk=12,
         )
 
-        with AsyncWriter(index.open_index()) as writer:
-            index.update_document(writer, private_seed)
-            index.update_document(writer, visible_doc)
-            index.update_document(writer, other_doc)
+        backend = get_backend()
+        backend.add_or_update(private_seed)
+        backend.add_or_update(visible_doc)
+        backend.add_or_update(other_doc)
 
         self.client.force_authenticate(user=attacker)
 
@@ -985,9 +971,9 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
             value_text="foobard4",
         )
 
-        with AsyncWriter(index.open_index()) as writer:
-            for doc in Document.objects.all():
-                index.update_document(writer, doc)
+        backend = get_backend()
+        for doc in Document.objects.all():
+            backend.add_or_update(doc)
 
         def search_query(q):
             r = self.client.get("/api/documents/?query=test" + q)
@@ -1203,9 +1189,9 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
         Document.objects.create(checksum="3", content="test 3", owner=u2)
         Document.objects.create(checksum="4", content="test 4")
 
-        with AsyncWriter(index.open_index()) as writer:
-            for doc in Document.objects.all():
-                index.update_document(writer, doc)
+        backend = get_backend()
+        for doc in Document.objects.all():
+            backend.add_or_update(doc)
 
         self.client.force_authenticate(user=u1)
         r = self.client.get("/api/documents/?query=test")
@@ -1256,9 +1242,9 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
         d3 = Document.objects.create(checksum="3", content="test 3", owner=u2)
         Document.objects.create(checksum="4", content="test 4")
 
-        with AsyncWriter(index.open_index()) as writer:
-            for doc in Document.objects.all():
-                index.update_document(writer, doc)
+        backend = get_backend()
+        for doc in Document.objects.all():
+            backend.add_or_update(doc)
 
         self.client.force_authenticate(user=u1)
         r = self.client.get("/api/documents/?query=test")
@@ -1278,9 +1264,9 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
         assign_perm("view_document", u1, d3)
         assign_perm("view_document", u2, d1)
 
-        with AsyncWriter(index.open_index()) as writer:
-            for doc in [d1, d2, d3]:
-                index.update_document(writer, doc)
+        backend.add_or_update(d1)
+        backend.add_or_update(d2)
+        backend.add_or_update(d3)
 
         self.client.force_authenticate(user=u1)
         r = self.client.get("/api/documents/?query=test")
@@ -1343,9 +1329,9 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
             user=u1,
         )
 
-        with AsyncWriter(index.open_index()) as writer:
-            for doc in Document.objects.all():
-                index.update_document(writer, doc)
+        backend = get_backend()
+        for doc in Document.objects.all():
+            backend.add_or_update(doc)
 
         def search_query(q):
             r = self.client.get("/api/documents/?query=test" + q)
@@ -1378,13 +1364,14 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
             search_query("&ordering=-num_notes"),
             [d1.id, d3.id, d2.id],
         )
+        # owner sort: ORM orders by owner_id (integer); NULLs first in SQLite ASC
         self.assertListEqual(
             search_query("&ordering=owner"),
-            [d1.id, d2.id, d3.id],
+            [d3.id, d1.id, d2.id],
         )
         self.assertListEqual(
             search_query("&ordering=-owner"),
-            [d3.id, d2.id, d1.id],
+            [d2.id, d1.id, d3.id],
         )
 
     @mock.patch("documents.bulk_edit.bulk_update_documents")
@@ -1441,12 +1428,12 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
         )
         set_permissions([4, 5], set_permissions={}, owner=user2, merge=False)
 
-        with index.open_index_writer() as writer:
-            index.update_document(writer, d1)
-            index.update_document(writer, d2)
-            index.update_document(writer, d3)
-            index.update_document(writer, d4)
-            index.update_document(writer, d5)
+        backend = get_backend()
+        backend.add_or_update(d1)
+        backend.add_or_update(d2)
+        backend.add_or_update(d3)
+        backend.add_or_update(d4)
+        backend.add_or_update(d5)
 
         correspondent1 = Correspondent.objects.create(name="bank correspondent 1")
         Correspondent.objects.create(name="correspondent 2")
diff --git a/src/documents/tests/test_api_status.py b/src/documents/tests/test_api_status.py
index b8f7d408e..32717af63 100644
--- a/src/documents/tests/test_api_status.py
+++ b/src/documents/tests/test_api_status.py
@@ -191,40 +191,42 @@ class TestSystemStatus(APITestCase):
         self.assertEqual(response.status_code, status.HTTP_200_OK)
         self.assertEqual(response.data["tasks"]["celery_status"], "OK")
 
-    @override_settings(INDEX_DIR=Path("/tmp/index"))
-    @mock.patch("whoosh.index.FileIndex.last_modified")
-    def test_system_status_index_ok(self, mock_last_modified) -> None:
+    @mock.patch("documents.search.get_backend")
+    def test_system_status_index_ok(self, mock_get_backend) -> None:
         """
         GIVEN:
-            - The index last modified time is set
+            - The index is accessible
         WHEN:
             - The user requests the system status
         THEN:
             - The response contains the correct index status
         """
-        mock_last_modified.return_value = 1707839087
-        self.client.force_login(self.user)
-        response = self.client.get(self.ENDPOINT)
+        mock_get_backend.return_value = mock.MagicMock()
+        # Use the temp dir created in setUp (self.tmp_dir) as a real INDEX_DIR
+        # with a real file so the mtime lookup works
+        sentinel = self.tmp_dir / "sentinel.txt"
+        sentinel.write_text("ok")
+        with self.settings(INDEX_DIR=self.tmp_dir):
+            self.client.force_login(self.user)
+            response = self.client.get(self.ENDPOINT)
         self.assertEqual(response.status_code, status.HTTP_200_OK)
         self.assertEqual(response.data["tasks"]["index_status"], "OK")
         self.assertIsNotNone(response.data["tasks"]["index_last_modified"])
 
-    @override_settings(INDEX_DIR=Path("/tmp/index/"))
-    @mock.patch("documents.index.open_index", autospec=True)
-    def test_system_status_index_error(self, mock_open_index) -> None:
+    @mock.patch("documents.search.get_backend")
+    def test_system_status_index_error(self, mock_get_backend) -> None:
         """
         GIVEN:
-            - The index is not found
+            - The index cannot be opened
         WHEN:
             - The user requests the system status
         THEN:
             - The response contains the correct index status
         """
-        mock_open_index.return_value = None
-        mock_open_index.side_effect = Exception("Index error")
+        mock_get_backend.side_effect = Exception("Index error")
         self.client.force_login(self.user)
         response = self.client.get(self.ENDPOINT)
-        mock_open_index.assert_called_once()
+        mock_get_backend.assert_called_once()
         self.assertEqual(response.status_code, status.HTTP_200_OK)
         self.assertEqual(response.data["tasks"]["index_status"], "ERROR")
         self.assertIsNotNone(response.data["tasks"]["index_error"])
diff --git a/src/documents/tests/test_delayedquery.py b/src/documents/tests/test_delayedquery.py
deleted file mode 100644
index 6357d9030..000000000
--- a/src/documents/tests/test_delayedquery.py
+++ /dev/null
@@ -1,58 +0,0 @@
-from django.test import TestCase
-from whoosh import query
-
-from documents.index import get_permissions_criterias
-from documents.models import User
-
-
-class TestDelayedQuery(TestCase):
-    def setUp(self) -> None:
-        super().setUp()
-        # all tests run without permission criteria, so has_no_owner query will always
-        # be appended.
-        self.has_no_owner = query.Or([query.Term("has_owner", text=False)])
-
-    def _get_testset__id__in(self, param, field):
-        return (
-            {f"{param}__id__in": "42,43"},
-            query.And(
-                [
-                    query.Or(
-                        [
-                            query.Term(f"{field}_id", "42"),
-                            query.Term(f"{field}_id", "43"),
-                        ],
-                    ),
-                    self.has_no_owner,
-                ],
-            ),
-        )
-
-    def _get_testset__id__none(self, param, field):
-        return (
-            {f"{param}__id__none": "42,43"},
-            query.And(
-                [
-                    query.Not(query.Term(f"{field}_id", "42")),
-                    query.Not(query.Term(f"{field}_id", "43")),
-                    self.has_no_owner,
-                ],
-            ),
-        )
-
-    def test_get_permission_criteria(self) -> None:
-        # tests contains tuples of user instances and the expected filter
-        tests = (
-            (None, [query.Term("has_owner", text=False)]),
-            (User(42, username="foo", is_superuser=True), []),
-            (
-                User(42, username="foo", is_superuser=False),
-                [
-                    query.Term("has_owner", text=False),
-                    query.Term("owner_id", 42),
-                    query.Term("viewer_id", "42"),
-                ],
-            ),
-        )
-        for user, expected in tests:
-            self.assertEqual(get_permissions_criterias(user), expected)
diff --git a/src/documents/tests/test_index.py b/src/documents/tests/test_index.py
deleted file mode 100644
index 5f1c7487d..000000000
--- a/src/documents/tests/test_index.py
+++ /dev/null
@@ -1,371 +0,0 @@
-from datetime import datetime
-from unittest import mock
-
-from django.conf import settings
-from django.contrib.auth.models import User
-from django.test import SimpleTestCase
-from django.test import TestCase
-from django.test import override_settings
-from django.utils.timezone import get_current_timezone
-from django.utils.timezone import timezone
-
-from documents import index
-from documents.models import Document
-from documents.tests.utils import DirectoriesMixin
-
-
-class TestAutoComplete(DirectoriesMixin, TestCase):
-    def test_auto_complete(self) -> None:
-        doc1 = Document.objects.create(
-            title="doc1",
-            checksum="A",
-            content="test test2 test3",
-        )
-        doc2 = Document.objects.create(title="doc2", checksum="B", content="test test2")
-        doc3 = Document.objects.create(title="doc3", checksum="C", content="test2")
-
-        index.add_or_update_document(doc1)
-        index.add_or_update_document(doc2)
-        index.add_or_update_document(doc3)
-
-        ix = index.open_index()
-
-        self.assertListEqual(
-            index.autocomplete(ix, "tes"),
-            [b"test2", b"test", b"test3"],
-        )
-        self.assertListEqual(
-            index.autocomplete(ix, "tes", limit=3),
-            [b"test2", b"test", b"test3"],
-        )
-        self.assertListEqual(index.autocomplete(ix, "tes", limit=1), [b"test2"])
-        self.assertListEqual(index.autocomplete(ix, "tes", limit=0), [])
-
-    def test_archive_serial_number_ranging(self) -> None:
-        """
-        GIVEN:
-            - Document with an archive serial number above schema allowed size
-        WHEN:
-            - Document is provided to the index
-        THEN:
-            - Error is logged
-            - Document ASN is reset to 0 for the index
-        """
-        doc1 = Document.objects.create(
-            title="doc1",
-            checksum="A",
-            content="test test2 test3",
-            # yes, this is allowed, unless full_clean is run
-            # DRF does call the validators, this test won't
-            archive_serial_number=Document.ARCHIVE_SERIAL_NUMBER_MAX + 1,
-        )
-        with self.assertLogs("paperless.index", level="ERROR") as cm:
-            with mock.patch(
-                "documents.index.AsyncWriter.update_document",
-            ) as mocked_update_doc:
-                index.add_or_update_document(doc1)
-
-                mocked_update_doc.assert_called_once()
-                _, kwargs = mocked_update_doc.call_args
-
-                self.assertEqual(kwargs["asn"], 0)
-
-                error_str = cm.output[0]
-                expected_str = "ERROR:paperless.index:Not indexing Archive Serial Number 4294967296 of document 1"
-                self.assertIn(expected_str, error_str)
-
-    def test_archive_serial_number_is_none(self) -> None:
-        """
-        GIVEN:
-            - Document with no archive serial number
-        WHEN:
-            - Document is provided to the index
-        THEN:
-            - ASN isn't touched
-        """
-        doc1 = Document.objects.create(
-            title="doc1",
-            checksum="A",
-            content="test test2 test3",
-        )
-        with mock.patch(
-            "documents.index.AsyncWriter.update_document",
-        ) as mocked_update_doc:
-            index.add_or_update_document(doc1)
-
-            mocked_update_doc.assert_called_once()
-            _, kwargs = mocked_update_doc.call_args
-
-            self.assertIsNone(kwargs["asn"])
-
-    @override_settings(TIME_ZONE="Pacific/Auckland")
-    def test_added_today_respects_local_timezone_boundary(self) -> None:
-        tz = get_current_timezone()
-        fixed_now = datetime(2025, 7, 20, 15, 0, 0, tzinfo=tz)
-
-        # Fake a time near the local boundary (1 AM NZT = 13:00 UTC on previous UTC day)
-        local_dt = datetime(2025, 7, 20, 1, 0, 0).replace(tzinfo=tz)
-        utc_dt = local_dt.astimezone(timezone.utc)
-
-        doc = Document.objects.create(
-            title="Time zone",
-            content="Testing added:today",
-            checksum="edgecase123",
-            added=utc_dt,
-        )
-
-        with index.open_index_writer() as writer:
-            index.update_document(writer, doc)
-
-        superuser = User.objects.create_superuser(username="testuser")
-        self.client.force_login(superuser)
-
-        with mock.patch("documents.index.now", return_value=fixed_now):
-            response = self.client.get("/api/documents/?query=added:today")
-            results = response.json()["results"]
-            self.assertEqual(len(results), 1)
-            self.assertEqual(results[0]["id"], doc.id)
-
-            response = self.client.get("/api/documents/?query=added:yesterday")
-            results = response.json()["results"]
-            self.assertEqual(len(results), 0)
-
-
-@override_settings(TIME_ZONE="UTC")
-class TestRewriteNaturalDateKeywords(SimpleTestCase):
-    """
-    Unit tests for rewrite_natural_date_keywords function.
-    """
-
-    def _rewrite_with_now(self, query: str, now_dt: datetime) -> str:
-        with mock.patch("documents.index.now", return_value=now_dt):
-            return index.rewrite_natural_date_keywords(query)
-
-    def _assert_rewrite_contains(
-        self,
-        query: str,
-        now_dt: datetime,
-        *expected_fragments: str,
-    ) -> str:
-        result = self._rewrite_with_now(query, now_dt)
-        for fragment in expected_fragments:
-            self.assertIn(fragment, result)
-        return result
-
-    def test_range_keywords(self) -> None:
-        """
-        Test various different range keywords
-        """
-        cases = [
-            (
-                "added:today",
-                datetime(2025, 7, 20, 15, 30, 45, tzinfo=timezone.utc),
-                ("added:[20250720", "TO 20250720"),
-            ),
-            (
-                "added:yesterday",
-                datetime(2025, 7, 20, 15, 30, 45, tzinfo=timezone.utc),
-                ("added:[20250719", "TO 20250719"),
-            ),
-            (
-                "added:this month",
-                datetime(2025, 7, 15, 12, 0, 0, tzinfo=timezone.utc),
-                ("added:[20250701", "TO 20250731"),
-            ),
-            (
-                "added:previous month",
-                datetime(2025, 7, 15, 12, 0, 0, tzinfo=timezone.utc),
-                ("added:[20250601", "TO 20250630"),
-            ),
-            (
-                "added:this year",
-                datetime(2025, 7, 15, 12, 0, 0, tzinfo=timezone.utc),
-                ("added:[20250101", "TO 20251231"),
-            ),
-            (
-                "added:previous year",
-                datetime(2025, 7, 15, 12, 0, 0, tzinfo=timezone.utc),
-                ("added:[20240101", "TO 20241231"),
-            ),
-            # Previous quarter from July 15, 2025 is April-June.
-            (
-                "added:previous quarter",
-                datetime(2025, 7, 15, 12, 0, 0, tzinfo=timezone.utc),
-                ("added:[20250401", "TO 20250630"),
-            ),
-            # July 20, 2025 is a Sunday (weekday 6) so previous week is July 7-13.
-            (
-                "added:previous week",
-                datetime(2025, 7, 20, 12, 0, 0, tzinfo=timezone.utc),
-                ("added:[20250707", "TO 20250713"),
-            ),
-        ]
-
-        for query, now_dt, fragments in cases:
-            with self.subTest(query=query):
-                self._assert_rewrite_contains(query, now_dt, *fragments)
-
-    def test_additional_fields(self) -> None:
-        fixed_now = datetime(2025, 7, 20, 15, 30, 45, tzinfo=timezone.utc)
-        # created
-        self._assert_rewrite_contains("created:today", fixed_now, "created:[20250720")
-        # modified
-        self._assert_rewrite_contains("modified:today", fixed_now, "modified:[20250720")
-
-    def test_basic_syntax_variants(self) -> None:
-        """
-        Test that quoting, casing, and multi-clause queries are parsed.
-        """
-        fixed_now = datetime(2025, 7, 20, 15, 30, 45, tzinfo=timezone.utc)
-
-        # quoted keywords
-        result1 = self._rewrite_with_now('added:"today"', fixed_now)
-        result2 = self._rewrite_with_now("added:'today'", fixed_now)
-        self.assertIn("added:[20250720", result1)
-        self.assertIn("added:[20250720", result2)
-
-        # case insensitivity
-        for query in ("added:TODAY", "added:Today", "added:ToDaY"):
-            with self.subTest(case_variant=query):
-                self._assert_rewrite_contains(query, fixed_now, "added:[20250720")
-
-        # multiple clauses
-        result = self._rewrite_with_now("added:today created:yesterday", fixed_now)
-        self.assertIn("added:[20250720", result)
-        self.assertIn("created:[20250719", result)
-
-    def test_no_match(self) -> None:
-        """
-        Test that queries without keywords are unchanged.
-        """
-        query = "title:test content:example"
-        result = index.rewrite_natural_date_keywords(query)
-        self.assertEqual(query, result)
-
-    @override_settings(TIME_ZONE="Pacific/Auckland")
-    def test_timezone_awareness(self) -> None:
-        """
-        Test timezone conversion.
-        """
-        # July 20, 2025 1:00 AM NZST = July 19, 2025 13:00 UTC
-        fixed_now = datetime(2025, 7, 20, 1, 0, 0, tzinfo=get_current_timezone())
-        result = self._rewrite_with_now("added:today", fixed_now)
-        # Should convert to UTC properly
-        self.assertIn("added:[20250719", result)
-
-
-class TestIndexResilience(DirectoriesMixin, SimpleTestCase):
-    def _assert_recreate_called(self, mock_create_in) -> None:
-        mock_create_in.assert_called_once()
-        path_arg, schema_arg = mock_create_in.call_args.args
-        self.assertEqual(path_arg, settings.INDEX_DIR)
-        self.assertEqual(schema_arg.__class__.__name__, "Schema")
-
-    def test_transient_missing_segment_does_not_force_recreate(self) -> None:
-        """
-        GIVEN:
-            - Index directory exists
-        WHEN:
-            - open_index is called
-            - Opening the index raises FileNotFoundError once due to a
-              transient missing segment
-        THEN:
-            - Index is opened successfully on retry
-            - Index is not recreated
-        """
-        file_marker = settings.INDEX_DIR / "file_marker.txt"
-        file_marker.write_text("keep")
-        expected_index = object()
-
-        with (
-            mock.patch("documents.index.exists_in", return_value=True),
-            mock.patch(
-                "documents.index.open_dir",
-                side_effect=[FileNotFoundError("missing"), expected_index],
-            ) as mock_open_dir,
-            mock.patch(
-                "documents.index.create_in",
-            ) as mock_create_in,
-            mock.patch(
-                "documents.index.rmtree",
-            ) as mock_rmtree,
-        ):
-            ix = index.open_index()
-
-        self.assertIs(ix, expected_index)
-        self.assertGreaterEqual(mock_open_dir.call_count, 2)
-        mock_rmtree.assert_not_called()
-        mock_create_in.assert_not_called()
-        self.assertEqual(file_marker.read_text(), "keep")
-
-    def test_transient_errors_exhaust_retries_and_recreate(self) -> None:
-        """
-        GIVEN:
-            - Index directory exists
-        WHEN:
-            - open_index is called
-            - Opening the index raises FileNotFoundError multiple times due to
-              transient missing segments
-        THEN:
-            - Index is recreated after retries are exhausted
-        """
-        recreated_index = object()
-
-        with (
-            self.assertLogs("paperless.index", level="ERROR") as cm,
-            mock.patch("documents.index.exists_in", return_value=True),
-            mock.patch(
-                "documents.index.open_dir",
-                side_effect=FileNotFoundError("missing"),
-            ) as mock_open_dir,
-            mock.patch("documents.index.rmtree") as mock_rmtree,
-            mock.patch(
-                "documents.index.create_in",
-                return_value=recreated_index,
-            ) as mock_create_in,
-        ):
-            ix = index.open_index()
-
-        self.assertIs(ix, recreated_index)
-        self.assertEqual(mock_open_dir.call_count, 4)
-        mock_rmtree.assert_called_once_with(settings.INDEX_DIR)
-        self._assert_recreate_called(mock_create_in)
-        self.assertIn(
-            "Error while opening the index after retries, recreating.",
-            cm.output[0],
-        )
-
-    def test_non_transient_error_recreates_index(self) -> None:
-        """
-        GIVEN:
-            - Index directory exists
-        WHEN:
-            - open_index is called
-            - Opening the index raises a "non-transient" error
-        THEN:
-            - Index is recreated
-        """
-        recreated_index = object()
-
-        with (
-            self.assertLogs("paperless.index", level="ERROR") as cm,
-            mock.patch("documents.index.exists_in", return_value=True),
-            mock.patch(
-                "documents.index.open_dir",
-                side_effect=RuntimeError("boom"),
-            ),
-            mock.patch("documents.index.rmtree") as mock_rmtree,
-            mock.patch(
-                "documents.index.create_in",
-                return_value=recreated_index,
-            ) as mock_create_in,
-        ):
-            ix = index.open_index()
-
-        self.assertIs(ix, recreated_index)
-        mock_rmtree.assert_called_once_with(settings.INDEX_DIR)
-        self._assert_recreate_called(mock_create_in)
-        self.assertIn(
-            "Error while opening the index, recreating.",
-            cm.output[0],
-        )
diff --git a/src/documents/tests/test_management.py b/src/documents/tests/test_management.py
index 7719d21dd..6ea4431fd 100644
--- a/src/documents/tests/test_management.py
+++ b/src/documents/tests/test_management.py
@@ -103,16 +103,75 @@ class TestArchiver(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
 
 
 @pytest.mark.management
-class TestMakeIndex(TestCase):
-    @mock.patch("documents.management.commands.document_index.index_reindex")
-    def test_reindex(self, m) -> None:
+@pytest.mark.django_db
+class TestMakeIndex:
+    def test_reindex(self, mocker: MockerFixture) -> None:
+        """Reindex command must call the backend rebuild method to recreate the index."""
+        mock_get_backend = mocker.patch(
+            "documents.management.commands.document_index.get_backend",
+        )
         call_command("document_index", "reindex", skip_checks=True)
-        m.assert_called_once()
+        mock_get_backend.return_value.rebuild.assert_called_once()
 
-    @mock.patch("documents.management.commands.document_index.index_optimize")
-    def test_optimize(self, m) -> None:
+    def test_optimize(self) -> None:
+        """Optimize command must execute without error (Tantivy handles optimization automatically)."""
         call_command("document_index", "optimize", skip_checks=True)
-        m.assert_called_once()
+
+    def test_reindex_recreate_wipes_index(self, mocker: MockerFixture) -> None:
+        """Reindex with --recreate must wipe the index before rebuilding."""
+        mock_wipe = mocker.patch(
+            "documents.management.commands.document_index.wipe_index",
+        )
+        mock_get_backend = mocker.patch(
+            "documents.management.commands.document_index.get_backend",
+        )
+        call_command("document_index", "reindex", recreate=True, skip_checks=True)
+        mock_wipe.assert_called_once()
+        mock_get_backend.return_value.rebuild.assert_called_once()
+
+    def test_reindex_without_recreate_does_not_wipe_index(
+        self,
+        mocker: MockerFixture,
+    ) -> None:
+        """Reindex without --recreate must not wipe the index."""
+        mock_wipe = mocker.patch(
+            "documents.management.commands.document_index.wipe_index",
+        )
+        mocker.patch(
+            "documents.management.commands.document_index.get_backend",
+        )
+        call_command("document_index", "reindex", skip_checks=True)
+        mock_wipe.assert_not_called()
+
+    def test_reindex_if_needed_skips_when_up_to_date(
+        self,
+        mocker: MockerFixture,
+    ) -> None:
+        """Conditional reindex must skip rebuild when schema version and language match."""
+        mocker.patch(
+            "documents.management.commands.document_index.needs_rebuild",
+            return_value=False,
+        )
+        mock_get_backend = mocker.patch(
+            "documents.management.commands.document_index.get_backend",
+        )
+        call_command("document_index", "reindex", if_needed=True, skip_checks=True)
+        mock_get_backend.return_value.rebuild.assert_not_called()
+
+    def test_reindex_if_needed_runs_when_rebuild_needed(
+        self,
+        mocker: MockerFixture,
+    ) -> None:
+        """Conditional reindex must proceed with rebuild when schema version or language changed."""
+        mocker.patch(
+            "documents.management.commands.document_index.needs_rebuild",
+            return_value=True,
+        )
+        mock_get_backend = mocker.patch(
+            "documents.management.commands.document_index.get_backend",
+        )
+        call_command("document_index", "reindex", if_needed=True, skip_checks=True)
+        mock_get_backend.return_value.rebuild.assert_called_once()
 
 
 @pytest.mark.management
diff --git a/src/documents/tests/test_matchables.py b/src/documents/tests/test_matchables.py
index e038bf786..e13d3827a 100644
--- a/src/documents/tests/test_matchables.py
+++ b/src/documents/tests/test_matchables.py
@@ -452,7 +452,10 @@ class TestDocumentConsumptionFinishedSignal(TestCase):
     """
 
     def setUp(self) -> None:
+        from documents.search import reset_backend
+
         TestCase.setUp(self)
+        reset_backend()
         User.objects.create_user(username="test_consumer", password="12345")
         self.doc_contains = Document.objects.create(
             content="I contain the keyword.",
@@ -464,6 +467,9 @@ class TestDocumentConsumptionFinishedSignal(TestCase):
         override_settings(INDEX_DIR=self.index_dir).enable()
 
     def tearDown(self) -> None:
+        from documents.search import reset_backend
+
+        reset_backend()
         shutil.rmtree(self.index_dir, ignore_errors=True)
 
     def test_tag_applied_any(self) -> None:
diff --git a/src/documents/tests/test_tag_hierarchy.py b/src/documents/tests/test_tag_hierarchy.py
index 12e5475f3..57aa27e3a 100644
--- a/src/documents/tests/test_tag_hierarchy.py
+++ b/src/documents/tests/test_tag_hierarchy.py
@@ -11,10 +11,12 @@ from documents.models import WorkflowAction
 from documents.models import WorkflowTrigger
 from documents.serialisers import TagSerializer
 from documents.signals.handlers import run_workflows
+from documents.tests.utils import DirectoriesMixin
 
 
-class TestTagHierarchy(APITestCase):
+class TestTagHierarchy(DirectoriesMixin, APITestCase):
     def setUp(self) -> None:
+        super().setUp()
         self.user = User.objects.create_superuser(username="admin")
         self.client.force_authenticate(user=self.user)
 
diff --git a/src/documents/tests/test_task_signals.py b/src/documents/tests/test_task_signals.py
index 4f17a8fd2..3dcbbeaff 100644
--- a/src/documents/tests/test_task_signals.py
+++ b/src/documents/tests/test_task_signals.py
@@ -2,6 +2,7 @@ import uuid
 from unittest import mock
 
 import celery
+from django.contrib.auth import get_user_model
 from django.test import TestCase
 
 from documents.data_models import ConsumableDocument
@@ -20,6 +21,11 @@ from documents.tests.utils import DirectoriesMixin
 
 @mock.patch("documents.consumer.magic.from_file", fake_magic_from_file)
 class TestTaskSignalHandler(DirectoriesMixin, TestCase):
+    @classmethod
+    def setUpTestData(cls) -> None:
+        super().setUpTestData()
+        cls.user = get_user_model().objects.create_user(username="testuser")
+
     def util_call_before_task_publish_handler(
         self,
         headers_to_use,
@@ -57,7 +63,7 @@ class TestTaskSignalHandler(DirectoriesMixin, TestCase):
                 ),
                 DocumentMetadataOverrides(
                     title="Hello world",
-                    owner_id=1,
+                    owner_id=self.user.id,
                 ),
             ),
             # kwargs
@@ -75,7 +81,7 @@ class TestTaskSignalHandler(DirectoriesMixin, TestCase):
         self.assertEqual(headers["id"], task.task_id)
         self.assertEqual("hello-999.pdf", task.task_file_name)
         self.assertEqual(PaperlessTask.TaskName.CONSUME_FILE, task.task_name)
-        self.assertEqual(1, task.owner_id)
+        self.assertEqual(self.user.id, task.owner_id)
         self.assertEqual(celery.states.PENDING, task.status)
 
     def test_task_prerun_handler(self) -> None:
@@ -208,10 +214,12 @@ class TestTaskSignalHandler(DirectoriesMixin, TestCase):
             mime_type="application/pdf",
         )
 
-        with mock.patch("documents.index.add_or_update_document") as add:
+        with mock.patch("documents.search.get_backend") as mock_get_backend:
+            mock_backend = mock.MagicMock()
+            mock_get_backend.return_value = mock_backend
             add_to_index(sender=None, document=root)
 
-        add.assert_called_once_with(root)
+        mock_backend.add_or_update.assert_called_once_with(root, effective_content="")
 
     def test_add_to_index_reindexes_root_for_version_documents(self) -> None:
         root = Document.objects.create(
@@ -226,13 +234,17 @@ class TestTaskSignalHandler(DirectoriesMixin, TestCase):
             root_document=root,
         )
 
-        with mock.patch("documents.index.add_or_update_document") as add:
+        with mock.patch("documents.search.get_backend") as mock_get_backend:
+            mock_backend = mock.MagicMock()
+            mock_get_backend.return_value = mock_backend
             add_to_index(sender=None, document=version)
 
-        self.assertEqual(add.call_count, 2)
-        self.assertEqual(add.call_args_list[0].args[0].id, version.id)
-        self.assertEqual(add.call_args_list[1].args[0].id, root.id)
+        self.assertEqual(mock_backend.add_or_update.call_count, 1)
         self.assertEqual(
-            add.call_args_list[1].kwargs,
+            mock_backend.add_or_update.call_args_list[0].args[0].id,
+            version.id,
+        )
+        self.assertEqual(
+            mock_backend.add_or_update.call_args_list[0].kwargs,
             {"effective_content": version.content},
         )
diff --git a/src/documents/tests/test_tasks.py b/src/documents/tests/test_tasks.py
index 37f1e6fed..9fb9ddbc6 100644
--- a/src/documents/tests/test_tasks.py
+++ b/src/documents/tests/test_tasks.py
@@ -23,29 +23,10 @@ from documents.tests.utils import DirectoriesMixin
 from documents.tests.utils import FileSystemAssertsMixin
 
 
-class TestIndexReindex(DirectoriesMixin, TestCase):
-    def test_index_reindex(self) -> None:
-        Document.objects.create(
-            title="test",
-            content="my document",
-            checksum="wow",
-            added=timezone.now(),
-            created=timezone.now(),
-            modified=timezone.now(),
-        )
-
-        tasks.index_reindex()
-
+@pytest.mark.django_db
+class TestIndexOptimize:
     def test_index_optimize(self) -> None:
-        Document.objects.create(
-            title="test",
-            content="my document",
-            checksum="wow",
-            added=timezone.now(),
-            created=timezone.now(),
-            modified=timezone.now(),
-        )
-
+        """Index optimization task must execute without error (Tantivy handles optimization automatically)."""
         tasks.index_optimize()
 
 
diff --git a/src/documents/tests/test_workflows.py b/src/documents/tests/test_workflows.py
index 58d989882..0fd893a5b 100644
--- a/src/documents/tests/test_workflows.py
+++ b/src/documents/tests/test_workflows.py
@@ -4802,6 +4802,7 @@ class TestWebhookSecurity:
 
 
 @pytest.mark.django_db
+@pytest.mark.usefixtures("_search_index")
 class TestDateWorkflowLocalization(
     SampleDirMixin,
 ):
diff --git a/src/documents/tests/utils.py b/src/documents/tests/utils.py
index 346d895aa..cc4190974 100644
--- a/src/documents/tests/utils.py
+++ b/src/documents/tests/utils.py
@@ -157,11 +157,17 @@ class DirectoriesMixin:
     """
 
     def setUp(self) -> None:
+        from documents.search import reset_backend
+
+        reset_backend()
         self.dirs = setup_directories()
         super().setUp()
 
     def tearDown(self) -> None:
+        from documents.search import reset_backend
+
         super().tearDown()
+        reset_backend()
         remove_dirs(self.dirs)
 
 
diff --git a/src/documents/utils.py b/src/documents/utils.py
index 975185a5f..2ed6758dd 100644
--- a/src/documents/utils.py
+++ b/src/documents/utils.py
@@ -1,14 +1,27 @@
 import hashlib
 import logging
 import shutil
+from collections.abc import Callable
+from collections.abc import Iterable
 from os import utime
 from pathlib import Path
 from subprocess import CompletedProcess
 from subprocess import run
+from typing import TypeVar
 
 from django.conf import settings
 from PIL import Image
 
+_T = TypeVar("_T")
+
+# A function that wraps an iterable — typically used to inject a progress bar.
+IterWrapper = Callable[[Iterable[_T]], Iterable[_T]]
+
+
+def identity(iterable: Iterable[_T]) -> Iterable[_T]:
+    """Return the iterable unchanged; the no-op default for IterWrapper."""
+    return iterable
+
 
 def _coerce_to_path(
     source: Path | str,
diff --git a/src/documents/views.py b/src/documents/views.py
index 244e81161..024e846a0 100644
--- a/src/documents/views.py
+++ b/src/documents/views.py
@@ -100,7 +100,6 @@ from rest_framework.viewsets import ReadOnlyModelViewSet
 from rest_framework.viewsets import ViewSet
 
 from documents import bulk_edit
-from documents import index
 from documents.bulk_download import ArchiveOnlyStrategy
 from documents.bulk_download import OriginalAndArchiveStrategy
 from documents.bulk_download import OriginalsOnlyStrategy
@@ -1029,9 +1028,9 @@ class DocumentViewSet(
             response_data["content"] = content_doc.content
         response = Response(response_data)
 
-        from documents import index
+        from documents.search import get_backend
 
-        index.add_or_update_document(refreshed_doc)
+        get_backend().add_or_update(refreshed_doc)
 
         document_updated.send(
             sender=self.__class__,
@@ -1060,9 +1059,9 @@ class DocumentViewSet(
         return Response({"results": serializer.data, "selection_data": selection_data})
 
     def destroy(self, request, *args, **kwargs):
-        from documents import index
+        from documents.search import get_backend
 
-        index.remove_document_from_index(self.get_object())
+        get_backend().remove(self.get_object().pk)
         try:
             return super().destroy(request, *args, **kwargs)
         except Exception as e:
@@ -1469,9 +1468,9 @@ class DocumentViewSet(
                 doc.modified = timezone.now()
                 doc.save()
 
-                from documents import index
+                from documents.search import get_backend
 
-                index.add_or_update_document(doc)
+                get_backend().add_or_update(doc)
 
                 notes = serializer.to_representation(doc).get("notes")
 
@@ -1506,9 +1505,9 @@ class DocumentViewSet(
             doc.modified = timezone.now()
             doc.save()
 
-            from documents import index
+            from documents.search import get_backend
 
-            index.add_or_update_document(doc)
+            get_backend().add_or_update(doc)
 
             notes = serializer.to_representation(doc).get("notes")
 
@@ -1820,12 +1819,13 @@ class DocumentViewSet(
                 "Cannot delete the root/original version. Delete the document instead.",
             )
 
-        from documents import index
+        from documents.search import get_backend
 
-        index.remove_document_from_index(version_doc)
+        _backend = get_backend()
+        _backend.remove(version_doc.pk)
         version_doc_id = version_doc.id
         version_doc.delete()
-        index.add_or_update_document(root_doc)
+        _backend.add_or_update(root_doc)
         if settings.AUDIT_LOG_ENABLED:
             actor = (
                 request.user if request.user and request.user.is_authenticated else None
@@ -2025,10 +2025,6 @@ class ChatStreamingView(GenericAPIView):
     ),
 )
 class UnifiedSearchViewSet(DocumentViewSet):
-    def __init__(self, *args: Any, **kwargs: Any) -> None:
-        super().__init__(*args, **kwargs)
-        self.searcher = None
-
     def get_serializer_class(self):
         if self._is_search_request():
             return SearchResultSerializer
@@ -2041,17 +2037,34 @@ class UnifiedSearchViewSet(DocumentViewSet):
             or "more_like_id" in self.request.query_params
         )
 
-    def filter_queryset(self, queryset):
-        filtered_queryset = super().filter_queryset(queryset)
+    def list(self, request, *args, **kwargs):
+        if not self._is_search_request():
+            return super().list(request)
 
-        if self._is_search_request():
-            if "query" in self.request.query_params:
-                from documents import index
+        from documents.search import TantivyRelevanceList
+        from documents.search import get_backend
 
-                query_class = index.DelayedFullTextQuery
-            elif "more_like_id" in self.request.query_params:
+        try:
+            backend = get_backend()
+            # ORM-filtered queryset: permissions + field filters + ordering (DRF backends applied)
+            filtered_qs = self.filter_queryset(self.get_queryset())
+
+            user = None if request.user.is_superuser else request.user
+
+            if "query" in request.query_params:
+                query_str = request.query_params["query"]
+                results = backend.search(
+                    query_str,
+                    user=user,
+                    page=1,
+                    page_size=10000,
+                    sort_field=None,
+                    sort_reverse=False,
+                )
+            else:
+                # more_like_id — validate permission on the seed document first
                 try:
-                    more_like_doc_id = int(self.request.query_params["more_like_id"])
+                    more_like_doc_id = int(request.query_params["more_like_id"])
                     more_like_doc = Document.objects.select_related("owner").get(
                         pk=more_like_doc_id,
                     )
@@ -2059,76 +2072,71 @@ class UnifiedSearchViewSet(DocumentViewSet):
                     raise PermissionDenied(_("Invalid more_like_id"))
 
                 if not has_perms_owner_aware(
-                    self.request.user,
+                    request.user,
                     "view_document",
                     more_like_doc,
                 ):
                     raise PermissionDenied(_("Insufficient permissions."))
 
-                from documents import index
-
-                query_class = index.DelayedMoreLikeThisQuery
-            else:
-                raise ValueError
-
-            return query_class(
-                self.searcher,
-                self.request.query_params,
-                self.paginator.get_page_size(self.request),
-                filter_queryset=filtered_queryset,
-            )
-        else:
-            return filtered_queryset
-
-    def list(self, request, *args, **kwargs):
-        if self._is_search_request():
-            from documents import index
-
-            try:
-                with index.open_index_searcher() as s:
-                    self.searcher = s
-                    queryset = self.filter_queryset(self.get_queryset())
-                    page = self.paginate_queryset(queryset)
-
-                    serializer = self.get_serializer(page, many=True)
-                    response = self.get_paginated_response(serializer.data)
-
-                    response.data["corrected_query"] = (
-                        queryset.suggested_correction
-                        if hasattr(queryset, "suggested_correction")
-                        else None
-                    )
-
-                    if get_boolean(
-                        str(
-                            request.query_params.get(
-                                "include_selection_data",
-                                "false",
-                            ),
-                        ),
-                    ):
-                        result_ids = queryset.get_result_ids()
-                        response.data["selection_data"] = (
-                            self._get_selection_data_for_queryset(
-                                Document.objects.filter(pk__in=result_ids),
-                            )
-                        )
-
-                    return response
-            except NotFound:
-                raise
-            except PermissionDenied as e:
-                invalid_more_like_id_message = _("Invalid more_like_id")
-                if str(e.detail) == str(invalid_more_like_id_message):
-                    return HttpResponseForbidden(invalid_more_like_id_message)
-                return HttpResponseForbidden(_("Insufficient permissions."))
-            except Exception as e:
-                logger.warning(f"An error occurred listing search results: {e!s}")
-                return HttpResponseBadRequest(
-                    "Error listing search results, check logs for more detail.",
+                results = backend.more_like_this(
+                    more_like_doc_id,
+                    user=user,
+                    page=1,
+                    page_size=10000,
                 )
-        else:
-            return super().list(request)
+
+            hits_by_id = {h["id"]: h for h in results.hits}
+
+            # Determine sort order: no ordering param -> Tantivy relevance; otherwise -> ORM order
+            ordering_param = request.query_params.get("ordering", "").lstrip("-")
+            if not ordering_param:
+                # Preserve Tantivy relevance order; intersect with ORM-visible IDs
+                orm_ids = set(filtered_qs.values_list("pk", flat=True))
+                ordered_hits = [h for h in results.hits if h["id"] in orm_ids]
+            else:
+                # Use ORM ordering (already applied by DocumentsOrderingFilter)
+                hit_ids = set(hits_by_id.keys())
+                orm_ordered_ids = filtered_qs.filter(id__in=hit_ids).values_list(
+                    "pk",
+                    flat=True,
+                )
+                ordered_hits = [
+                    hits_by_id[pk] for pk in orm_ordered_ids if pk in hits_by_id
+                ]
+
+            rl = TantivyRelevanceList(ordered_hits)
+            page = self.paginate_queryset(rl)
+
+            if page is not None:
+                serializer = self.get_serializer(page, many=True)
+                response = self.get_paginated_response(serializer.data)
+                response.data["corrected_query"] = None
+                if get_boolean(
+                    str(request.query_params.get("include_selection_data", "false")),
+                ):
+                    all_ids = [h["id"] for h in ordered_hits]
+                    response.data["selection_data"] = (
+                        self._get_selection_data_for_queryset(
+                            filtered_qs.filter(pk__in=all_ids),
+                        )
+                    )
+                return response
+
+            serializer = self.get_serializer(ordered_hits, many=True)
+            return Response(serializer.data)
+
+        except NotFound:
+            raise
+        except PermissionDenied as e:
+            invalid_more_like_id_message = _("Invalid more_like_id")
+            if str(e.detail) == str(invalid_more_like_id_message):
+                return HttpResponseForbidden(invalid_more_like_id_message)
+            return HttpResponseForbidden(_("Insufficient permissions."))
+        except Exception as e:
+            logger.warning(f"An error occurred listing search results: {e!s}")
+            return HttpResponseBadRequest(
+                "Error listing search results, check logs for more detail.",
+            )
 
     @action(detail=False, methods=["GET"], name="Get Next ASN")
     def next_asn(self, request, *args, **kwargs):
@@ -2946,18 +2954,9 @@ class SearchAutoCompleteView(GenericAPIView):
         else:
             limit = 10
 
-        from documents import index
+        from documents.search import get_backend
 
-        ix = index.open_index()
-
-        return Response(
-            index.autocomplete(
-                ix,
-                term,
-                limit,
-                user,
-            ),
-        )
+        return Response(get_backend().autocomplete(term, limit, user))
 
 
 @extend_schema_view(
@@ -3023,20 +3022,21 @@ class GlobalSearchView(PassUserMixin):
             # First search by title
             docs = all_docs.filter(title__icontains=query)
             if not db_only and len(docs) < OBJECT_LIMIT:
-                # If we don't have enough results, search by content
-                from documents import index
+                # If we don't have enough results, search by content.
+                # Over-fetch from Tantivy (no permission filter) and rely on
+                # the ORM all_docs queryset for authoritative permission gating.
+                from documents.search import get_backend
 
-                with index.open_index_searcher() as s:
-                    fts_query = index.DelayedFullTextQuery(
-                        s,
-                        request.query_params,
-                        OBJECT_LIMIT,
-                        filter_queryset=all_docs,
-                    )
-                    results = fts_query[0:1]
-                    docs = docs | Document.objects.filter(
-                        id__in=[r["id"] for r in results],
-                    )
+                fts_results = get_backend().search(
+                    query,
+                    user=None,
+                    page=1,
+                    page_size=1000,
+                    sort_field=None,
+                    sort_reverse=False,
+                )
+                fts_ids = {h["id"] for h in fts_results.hits}
+                docs = docs | all_docs.filter(id__in=fts_ids)
             docs = docs[:OBJECT_LIMIT]
         saved_views = (
             get_objects_for_user_owner_aware(
@@ -4279,10 +4279,16 @@ class SystemStatusView(PassUserMixin):
 
         index_error = None
         try:
-            ix = index.open_index()
+            from documents.search import get_backend
+
+            get_backend()  # triggers open/rebuild; raises on error
             index_status = "OK"
-            index_last_modified = make_aware(
-                datetime.fromtimestamp(ix.last_modified()),
+            # Use the most-recently modified file in the index directory as a proxy
+            # for last index write time (Tantivy has no single last_modified() call).
+            index_dir = settings.INDEX_DIR
+            mtimes = [p.stat().st_mtime for p in index_dir.iterdir() if p.is_file()]
+            index_last_modified = (
+                make_aware(datetime.fromtimestamp(max(mtimes))) if mtimes else None
             )
         except Exception as e:
             index_status = "ERROR"
diff --git a/src/paperless/settings/__init__.py b/src/paperless/settings/__init__.py
index 1c33db7c6..3522b3187 100644
--- a/src/paperless/settings/__init__.py
+++ b/src/paperless/settings/__init__.py
@@ -21,6 +21,7 @@ from paperless.settings.custom import parse_hosting_settings
 from paperless.settings.custom import parse_ignore_dates
 from paperless.settings.custom import parse_redis_url
 from paperless.settings.parsers import get_bool_from_env
+from paperless.settings.parsers import get_choice_from_env
 from paperless.settings.parsers import get_float_from_env
 from paperless.settings.parsers import get_int_from_env
 from paperless.settings.parsers import get_list_from_env
@@ -85,6 +86,11 @@ EMPTY_TRASH_DIR = (
 # threads.
 MEDIA_LOCK = MEDIA_ROOT / "media.lock"
 INDEX_DIR = DATA_DIR / "index"
+
+ADVANCED_FUZZY_SEARCH_THRESHOLD: float | None = get_float_from_env(
+    "PAPERLESS_ADVANCED_FUZZY_SEARCH_THRESHOLD",
+)
+
 MODEL_FILE = get_path_from_env(
     "PAPERLESS_MODEL_FILE",
     DATA_DIR / "classification_model.pickle",
@@ -1033,10 +1039,55 @@ def _get_nltk_language_setting(ocr_lang: str) -> str | None:
     return iso_code_to_nltk.get(ocr_lang)
 
 
+def _get_search_language_setting(ocr_lang: str) -> str | None:
+    """
+    Determine the Tantivy stemmer language.
+
+    If PAPERLESS_SEARCH_LANGUAGE is explicitly set, it is validated against
+    the languages supported by Tantivy's built-in stemmer and returned as-is.
+    Otherwise the primary Tesseract language code from PAPERLESS_OCR_LANGUAGE
+    is mapped to the corresponding ISO 639-1 code understood by Tantivy.
+    Returns None when unset and the OCR language has no Tantivy stemmer.
+    """
+    explicit = os.environ.get("PAPERLESS_SEARCH_LANGUAGE")
+    if explicit is not None:
+        # Lazy import avoids any app-loading order concerns; _tokenizer has no
+        # Django dependencies so this is safe.
+        from documents.search._tokenizer import SUPPORTED_LANGUAGES
+
+        return get_choice_from_env("PAPERLESS_SEARCH_LANGUAGE", SUPPORTED_LANGUAGES)
+
+    # Infer from the primary Tesseract language code (ISO 639-2/T → ISO 639-1)
+    primary = ocr_lang.split("+", maxsplit=1)[0].lower()
+    _ocr_to_search: dict[str, str] = {
+        "ara": "ar",
+        "dan": "da",
+        "nld": "nl",
+        "eng": "en",
+        "fin": "fi",
+        "fra": "fr",
+        "deu": "de",
+        "ell": "el",
+        "hun": "hu",
+        "ita": "it",
+        "nor": "no",
+        "por": "pt",
+        "ron": "ro",
+        "rus": "ru",
+        "spa": "es",
+        "swe": "sv",
+        "tam": "ta",
+        "tur": "tr",
+    }
+    return _ocr_to_search.get(primary)
+
+
 NLTK_ENABLED: Final[bool] = get_bool_from_env("PAPERLESS_ENABLE_NLTK", "yes")
 
 NLTK_LANGUAGE: str | None = _get_nltk_language_setting(OCR_LANGUAGE)
 
+SEARCH_LANGUAGE: str | None = _get_search_language_setting(OCR_LANGUAGE)
+
 ###############################################################################
 # Email Preprocessors                                                         #
 ###############################################################################
diff --git a/src/paperless/settings/parsers.py b/src/paperless/settings/parsers.py
index 09e474bd5..163633d84 100644
--- a/src/paperless/settings/parsers.py
+++ b/src/paperless/settings/parsers.py
@@ -260,7 +260,7 @@ def get_list_from_env(
 
 def get_choice_from_env(
     env_key: str,
-    choices: set[str],
+    choices: set[str] | frozenset[str],
     default: str | None = None,
 ) -> str:
     """
diff --git a/src/paperless/tests/parsers/test_tesseract_custom_settings.py b/src/paperless/tests/parsers/test_tesseract_custom_settings.py
index 60d1486f4..9f3afacb6 100644
--- a/src/paperless/tests/parsers/test_tesseract_custom_settings.py
+++ b/src/paperless/tests/parsers/test_tesseract_custom_settings.py
@@ -14,6 +14,11 @@ from paperless.parsers.tesseract import RasterisedDocumentParser
 
 
 class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
+    @classmethod
+    def setUpTestData(cls) -> None:
+        super().setUpTestData()
+        ApplicationConfiguration.objects.get_or_create()
+
     @staticmethod
     def get_params():
         """
diff --git a/src/paperless/tests/settings/test_settings.py b/src/paperless/tests/settings/test_settings.py
index b0ae3c0c5..0694d9360 100644
--- a/src/paperless/tests/settings/test_settings.py
+++ b/src/paperless/tests/settings/test_settings.py
@@ -2,6 +2,9 @@ import os
 from unittest import TestCase
 from unittest import mock
 
+import pytest
+
+from paperless.settings import _get_search_language_setting
 from paperless.settings import _parse_paperless_url
 from paperless.settings import default_threads_per_worker
 
@@ -32,6 +35,48 @@ class TestThreadCalculation(TestCase):
                 self.assertLessEqual(default_workers * default_threads, i)
 
 
+@pytest.mark.parametrize(
+    ("env_value", "expected"),
+    [
+        ("en", "en"),
+        ("de", "de"),
+        ("fr", "fr"),
+        ("swedish", "swedish"),
+    ],
+)
+def test_get_search_language_setting_explicit_valid(
+    monkeypatch: pytest.MonkeyPatch,
+    env_value: str,
+    expected: str,
+) -> None:
+    """
+    GIVEN:
+        - PAPERLESS_SEARCH_LANGUAGE is set to a valid Tantivy stemmer language
+    WHEN:
+        - _get_search_language_setting is called
+    THEN:
+        - The explicit value is returned regardless of the OCR language
+    """
+    monkeypatch.setenv("PAPERLESS_SEARCH_LANGUAGE", env_value)
+    assert _get_search_language_setting("deu") == expected
+
+
+def test_get_search_language_setting_explicit_invalid(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """
+    GIVEN:
+        - PAPERLESS_SEARCH_LANGUAGE is set to an unsupported language code
+    WHEN:
+        - _get_search_language_setting is called
+    THEN:
+        - ValueError is raised
+    """
+    monkeypatch.setenv("PAPERLESS_SEARCH_LANGUAGE", "klingon")
+    with pytest.raises(ValueError, match="klingon"):
+        _get_search_language_setting("eng")
+
+
 class TestPaperlessURLSettings(TestCase):
     def test_paperless_url(self) -> None:
         """
diff --git a/src/paperless/views.py b/src/paperless/views.py
index 404bc4339..a3b965f3f 100644
--- a/src/paperless/views.py
+++ b/src/paperless/views.py
@@ -36,7 +36,6 @@ from rest_framework.permissions import IsAuthenticated
 from rest_framework.response import Response
 from rest_framework.viewsets import ModelViewSet
 
-from documents.index import DelayedQuery
 from documents.permissions import PaperlessObjectPermissions
 from documents.tasks import llmindex_index
 from paperless.filters import GroupFilterSet
@@ -83,20 +82,12 @@ class StandardPagination(PageNumberPagination):
         )
 
     def get_all_result_ids(self):
+        from documents.search import TantivyRelevanceList
+
         query = self.page.paginator.object_list
-        if isinstance(query, DelayedQuery):
-            try:
-                ids = [
-                    query.searcher.ixreader.stored_fields(
-                        doc_num,
-                    )["id"]
-                    for doc_num in query.saved_results.get(0).results.docs()
-                ]
-            except Exception:
-                pass
-        else:
-            ids = self.page.paginator.object_list.values_list("pk", flat=True)
-        return ids
+        if isinstance(query, TantivyRelevanceList):
+            return [h["id"] for h in query._hits]
+        return self.page.paginator.object_list.values_list("pk", flat=True)
 
     def get_paginated_response_schema(self, schema):
         response_schema = super().get_paginated_response_schema(schema)
diff --git a/src/paperless_ai/indexing.py b/src/paperless_ai/indexing.py
index bee8f0dd9..a54492f1f 100644
--- a/src/paperless_ai/indexing.py
+++ b/src/paperless_ai/indexing.py
@@ -1,11 +1,8 @@
 import logging
 import shutil
-from collections.abc import Callable
-from collections.abc import Iterable
 from datetime import timedelta
 from pathlib import Path
 from typing import TYPE_CHECKING
-from typing import TypeVar
 
 from celery import states
 from django.conf import settings
@@ -13,22 +10,17 @@ from django.utils import timezone
 
 from documents.models import Document
 from documents.models import PaperlessTask
+from documents.utils import IterWrapper
+from documents.utils import identity
 from paperless_ai.embedding import build_llm_index_text
 from paperless_ai.embedding import get_embedding_dim
 from paperless_ai.embedding import get_embedding_model
 
-_T = TypeVar("_T")
-IterWrapper = Callable[[Iterable[_T]], Iterable[_T]]
-
 if TYPE_CHECKING:
     from llama_index.core import VectorStoreIndex
     from llama_index.core.schema import BaseNode
 
 
-def _identity(iterable: Iterable[_T]) -> Iterable[_T]:
-    return iterable
-
-
 logger = logging.getLogger("paperless_ai.indexing")
 
 
@@ -176,7 +168,7 @@ def vector_store_file_exists():
 
 def update_llm_index(
     *,
-    iter_wrapper: IterWrapper[Document] = _identity,
+    iter_wrapper: IterWrapper[Document] = identity,
     rebuild=False,
 ) -> str:
     """
diff --git a/uv.lock b/uv.lock
index 6bbfdc53b..feffefce5 100644
--- a/uv.lock
+++ b/uv.lock
@@ -350,15 +350,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b3/73/3183c9e41ca755713bdf2cc1d0810df742c09484e2e1ddd693bee53877c1/brotli-1.2.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d2d085ded05278d1c7f65560aae97b3160aeb2ea2c0b3e26204856beccb60888", size = 1488164, upload-time = "2025-11-05T18:38:53.079Z" },
 ]
 
-[[package]]
-name = "cached-property"
-version = "2.0.1"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/76/4b/3d870836119dbe9a5e3c9a61af8cc1a8b69d75aea564572e385882d5aefb/cached_property-2.0.1.tar.gz", hash = "sha256:484d617105e3ee0e4f1f58725e72a8ef9e93deee462222dbd51cd91230897641", size = 10574, upload-time = "2024-10-25T15:43:55.667Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/11/0e/7d8225aab3bc1a0f5811f8e1b557aa034ac04bdf641925b30d3caf586b28/cached_property-2.0.1-py3-none-any.whl", hash = "sha256:f617d70ab1100b7bcf6e42228f9ddcb78c676ffa167278d9f730d1c2fba69ccb", size = 7428, upload-time = "2024-10-25T15:43:54.711Z" },
-]
-
 [[package]]
 name = "cbor2"
 version = "5.9.0"
@@ -2910,12 +2901,12 @@ dependencies = [
     { name = "scikit-learn", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
     { name = "sentence-transformers", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
     { name = "setproctitle", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "tantivy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
     { name = "tika-client", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
     { name = "torch", version = "2.10.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" },
     { name = "torch", version = "2.10.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'linux'" },
     { name = "watchfiles", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
     { name = "whitenoise", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "whoosh-reloaded", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
     { name = "zxing-cpp", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
 ]
 
@@ -2951,6 +2942,7 @@ dev = [
     { name = "pytest-sugar", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
     { name = "pytest-xdist", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
     { name = "ruff", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "time-machine", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
     { name = "zensical", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
 ]
 docs = [
@@ -2974,6 +2966,7 @@ testing = [
     { name = "pytest-rerunfailures", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
     { name = "pytest-sugar", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
     { name = "pytest-xdist", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "time-machine", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
 ]
 typing = [
     { name = "celery-types", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -3064,11 +3057,11 @@ requires-dist = [
     { name = "scikit-learn", specifier = "~=1.8.0" },
     { name = "sentence-transformers", specifier = ">=4.1" },
     { name = "setproctitle", specifier = "~=1.3.4" },
+    { name = "tantivy", specifier = ">=0.25.1" },
     { name = "tika-client", specifier = "~=0.10.0" },
     { name = "torch", specifier = "~=2.10.0", index = "https://download.pytorch.org/whl/cpu" },
     { name = "watchfiles", specifier = ">=1.1.1" },
     { name = "whitenoise", specifier = "~=6.11" },
-    { name = "whoosh-reloaded", specifier = ">=2.7.5" },
     { name = "zxing-cpp", specifier = "~=3.0.0" },
 ]
 provides-extras = ["mariadb", "postgres", "webserver"]
@@ -3090,6 +3083,7 @@ dev = [
     { name = "pytest-sugar" },
     { name = "pytest-xdist", specifier = "~=3.8.0" },
     { name = "ruff", specifier = "~=0.15.0" },
+    { name = "time-machine", specifier = ">=2.13" },
     { name = "zensical", specifier = ">=0.0.21" },
 ]
 docs = [{ name = "zensical", specifier = ">=0.0.21" }]
@@ -3111,6 +3105,7 @@ testing = [
     { name = "pytest-rerunfailures", specifier = "~=16.1" },
     { name = "pytest-sugar" },
     { name = "pytest-xdist", specifier = "~=3.8.0" },
+    { name = "time-machine", specifier = ">=2.13" },
 ]
 typing = [
     { name = "celery-types" },
@@ -4664,6 +4659,34 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/a2/09/77d55d46fd61b4a135c444fc97158ef34a095e5681d0a6c10b75bf356191/sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5", size = 6299353, upload-time = "2025-04-27T18:04:59.103Z" },
 ]
 
+[[package]]
+name = "tantivy"
+version = "0.25.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/1b/f9/0cd3955d155d3e3ef74b864769514dd191e5dacba9f0beb7af2d914942ce/tantivy-0.25.1.tar.gz", hash = "sha256:68a3314699a7d18fcf338b52bae8ce46a97dde1128a3e47e33fa4db7f71f265e", size = 75120, upload-time = "2025-12-02T11:57:12.997Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/4e/7a/8a277f377e8a151fc0e71d4ffc1114aefb6e5e1c7dd609fed0955cf34ed8/tantivy-0.25.1-cp311-cp311-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:d363d7b4207d3a5aa7f0d212420df35bed18bdb6bae26a2a8bd57428388b7c29", size = 7637033, upload-time = "2025-12-02T11:56:18.104Z" },
+    { url = "https://files.pythonhosted.org/packages/71/31/8b4acdedfc9f9a2d04b1340d07eef5213d6f151d1e18da0cb423e5f090d2/tantivy-0.25.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:8f4389cf1d889a1df7c5a3195806b4b56c37cee10d8a26faaa0dea35a867b5ff", size = 3932180, upload-time = "2025-12-02T11:56:19.833Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/dc/3e8499c21b4b9795e8f2fc54c68ce5b92905aaeadadaa56ecfa9180b11b1/tantivy-0.25.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:99864c09fc54652c3c2486cdf13f86cdc8200f4b481569cb291e095ca5d496e5", size = 4197620, upload-time = "2025-12-02T11:56:21.496Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/8e/f2ce62fffc811eb62bead92c7b23c2e218f817cbd54c4f3b802e03ba1438/tantivy-0.25.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05abf37ddbc5063c575548be0d62931629c086bff7a5a1b67cf5a8f5ebf4cd8c", size = 4183794, upload-time = "2025-12-02T11:56:23.215Z" },
+    { url = "https://files.pythonhosted.org/packages/41/e7/6849c713ed0996c7628324c60512c4882006f0a62145e56c624a93407f90/tantivy-0.25.1-cp312-cp312-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:90fd919e5f611809f746560ecf36eb9be824dec62e21ae17a27243759edb9aa1", size = 7621494, upload-time = "2025-12-02T11:56:27.069Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/22/c3d8294600dc6e7fa350daef9ff337d3c06e132b81df727de9f7a50c692a/tantivy-0.25.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:4613c7cf6c23f3a97989819690a0f956d799354957de7a204abcc60083cebe02", size = 3925219, upload-time = "2025-12-02T11:56:29.403Z" },
+    { url = "https://files.pythonhosted.org/packages/41/fc/cbb1df71dd44c9110eff4eaaeda9d44f2d06182fe0452193be20ddfba93f/tantivy-0.25.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c477bd20b4df804d57dfc5033431bef27cde605695ae141b03abbf6ebc069129", size = 4198699, upload-time = "2025-12-02T11:56:31.359Z" },
+    { url = "https://files.pythonhosted.org/packages/47/4d/71abb78b774073c3ce12a4faa4351a9d910a71ffa3659526affba163873d/tantivy-0.25.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f9b1a1ba1113c523c7ff7b10f282d6c4074006f7ef8d71e1d973d51bf7291ddb", size = 4183585, upload-time = "2025-12-02T11:56:33.317Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/25/73cfbcf1a8ea49be6c42817431cac46b70a119fe64da903fcc2d92b5b511/tantivy-0.25.1-cp313-cp313-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:f51ff7196c6f31719202080ed8372d5e3d51e92c749c032fb8234f012e99744c", size = 7622530, upload-time = "2025-12-02T11:56:36.839Z" },
+    { url = "https://files.pythonhosted.org/packages/12/c8/c0d7591cdf4f7e7a9fc4da786d1ca8cd1aacffaa2be16ea6d401a8e4a566/tantivy-0.25.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:550e63321bfcacc003859f2fa29c1e8e56450807b3c9a501c1add27cfb9236d9", size = 3925637, upload-time = "2025-12-02T11:56:38.425Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/09/bedfc223bffec7641b417dd7ab071134b2ef8f8550e9b1fb6014657ef52e/tantivy-0.25.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fde31cc8d6e122faf7902aeea32bc008a429a6e8904e34d3468126a3ec01b016", size = 4197322, upload-time = "2025-12-02T11:56:40.411Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/f1/1fa5183500c8042200c9f2b840d34f5bbcfb434a1ee750e7132262d2a5c9/tantivy-0.25.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b11bd5a518b0be645320b47af8493f6a40c4f3234313e37adcf4534a564d27dd", size = 4183143, upload-time = "2025-12-02T11:56:42.048Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/2f/581519492226f97d23bd0adc95dad991ebeaa73ea6abc8bff389a3096d9a/tantivy-0.25.1-cp313-cp313t-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:dae99e75b7eaa9bf5bd16ab106b416370f08c135aed0e117d62a3201cd1ffe36", size = 7610316, upload-time = "2025-12-02T11:56:45.927Z" },
+    { url = "https://files.pythonhosted.org/packages/91/40/5d7bc315ab9e6a22c5572656e8ada1c836cfa96dccf533377504fbc3c9d9/tantivy-0.25.1-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:506e9533c5ef4d3df43bad64ffecc0aa97c76e361ea610815dc3a20a9d6b30b3", size = 3919882, upload-time = "2025-12-02T11:56:48.469Z" },
+    { url = "https://files.pythonhosted.org/packages/02/b9/e0ef2f57a6a72444cb66c2ffbc310ab33ffaace275f1c4b0319d84ea3f18/tantivy-0.25.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5dbd4f8f264dacbcc9dee542832da2173fd53deaaea03f082d95214f8b5ed6bc", size = 4196031, upload-time = "2025-12-02T11:56:50.151Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/02/bf3f8cacfd08642e14a73f7956a3fb95d58119132c98c121b9065a1f8615/tantivy-0.25.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:824c643ccb640dd9e35e00c5d5054ddf3323f56fe4219d57d428a9eeea13d22c", size = 4183437, upload-time = "2025-12-02T11:56:51.818Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/44/9f1d67aa5030f7eebc966c863d1316a510a971dd8bb45651df4acdfae9ed/tantivy-0.25.1-cp314-cp314-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:7f5d29ae85dd0f23df8d15b3e7b341d4f9eb5a446bbb9640df48ac1f6d9e0c6c", size = 7623723, upload-time = "2025-12-02T11:56:55.066Z" },
+    { url = "https://files.pythonhosted.org/packages/db/30/6e085bd3ed9d12da3c91c185854abd70f9dfd35fb36a75ea98428d42c30b/tantivy-0.25.1-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:f2d2938fb69a74fc1bb36edfaf7f0d1596fa1264db0f377bda2195c58bcb6245", size = 3926243, upload-time = "2025-12-02T11:56:57.058Z" },
+    { url = "https://files.pythonhosted.org/packages/32/f5/a00d65433430f51718e5cc6938df571765d7c4e03aedec5aef4ab567aa9b/tantivy-0.25.1-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f5ff124c4802558e627091e780b362ca944169736caba5a372eef39a79d0ae0", size = 4207186, upload-time = "2025-12-02T11:56:58.803Z" },
+    { url = "https://files.pythonhosted.org/packages/19/63/61bdb12fc95f2a7f77bd419a5149bfa9f28caa76cb569bf2b6b06e1d033e/tantivy-0.25.1-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:43b80ef62a340416139c93d19264e5f808da48e04f9305f1092b8ed22be0a5be", size = 4187312, upload-time = "2025-12-02T11:57:00.595Z" },
+]
+
 [[package]]
 name = "tenacity"
 version = "9.1.2"
@@ -4752,6 +4775,62 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ab/0d/c1ad6f4016a3968c048545f5d9b8ffebf577774b2ede3e2e352553b685fe/tiktoken-0.12.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5edb8743b88d5be814b1a8a8854494719080c28faaa1ccbef02e87354fe71ef0", size = 1253706, upload-time = "2025-10-06T20:22:33.385Z" },
 ]
 
+[[package]]
+name = "time-machine"
+version = "3.2.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/02/fc/37b02f6094dbb1f851145330460532176ed2f1dc70511a35828166c41e52/time_machine-3.2.0.tar.gz", hash = "sha256:a4ddd1cea17b8950e462d1805a42b20c81eb9aafc8f66b392dd5ce997e037d79", size = 14804, upload-time = "2025-12-17T23:33:02.599Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f5/e1/03aae5fbaa53859f665094af696338fc7cae733d926a024af69982712350/time_machine-3.2.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c188a9dda9fcf975022f1b325b466651b96a4dfc223c523ed7ed8d979f9bf3e8", size = 19143, upload-time = "2025-12-17T23:31:44.258Z" },
+    { url = "https://files.pythonhosted.org/packages/75/8f/98cb17bebb52b22ff4ec26984dd44280f9c71353c3bae0640a470e6683e5/time_machine-3.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:17245f1cc2dd13f9d63a174be59bb2684a9e5e0a112ab707e37be92068cd655f", size = 15273, upload-time = "2025-12-17T23:31:45.246Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/2f/ca11e4a7897234bb9331fcc5f4ed4714481ba4012370cc79a0ae8c42ea0a/time_machine-3.2.0-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:d9bd1de1996e76efd36ae15970206c5089fb3728356794455bd5cd8d392b5537", size = 31049, upload-time = "2025-12-17T23:31:46.613Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/ad/d17d83a59943094e6b6c6a3743caaf6811b12203c3e07a30cc7bcc2ab7ee/time_machine-3.2.0-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:98493cd50e8b7f941eab69b9e18e697ad69db1a0ec1959f78f3d7b0387107e5c", size = 32632, upload-time = "2025-12-17T23:31:47.72Z" },
+    { url = "https://files.pythonhosted.org/packages/71/50/d60576d047a0dfb5638cdfb335e9c3deb6e8528544fa0b3966a8480f72b7/time_machine-3.2.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:31f2a33d595d9f91eb9bc7f157f0dc5721f5789f4c4a9e8b852cdedb2a7d9b16", size = 34289, upload-time = "2025-12-17T23:31:48.913Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/fe/4afa602dbdebddde6d0ea4a7fe849e49b9bb85dc3fb415725a87ccb4b471/time_machine-3.2.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9f78ac4213c10fbc44283edd1a29cfb7d3382484f4361783ddc057292aaa1889", size = 33175, upload-time = "2025-12-17T23:31:50.611Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/87/c152e23977c1d7d7c94eb3ed3ea45cc55971796205125c6fdff40db2c60f/time_machine-3.2.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:c1326b09e947b360926d529a96d1d9e126ce120359b63b506ecdc6ee20755c23", size = 31170, upload-time = "2025-12-17T23:31:51.645Z" },
+    { url = "https://files.pythonhosted.org/packages/80/af/54acf51d0f3ade3b51eab73df6192937c9a938753ef5456dff65eb8630be/time_machine-3.2.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9f2949f03d15264cc15c38918a2cda8966001f0f4ebe190cbfd9c56d91aed8ac", size = 32292, upload-time = "2025-12-17T23:31:52.803Z" },
+    { url = "https://files.pythonhosted.org/packages/71/8b/080c8eedcd67921a52ba5bd0e075362062509ab63c86fc1a0442fad241a6/time_machine-3.2.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:cc4bee5b0214d7dc4ebc91f4a4c600f1a598e9b5606ac751f42cb6f6740b1dbb", size = 19255, upload-time = "2025-12-17T23:31:58.057Z" },
+    { url = "https://files.pythonhosted.org/packages/66/17/0e5291e9eb705bf8a5a1305f826e979af307bbeb79def4ddbf4b3f9a81e0/time_machine-3.2.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:3ca036304b4460ae2fdc1b52dd8b1fa7cf1464daa427fc49567413c09aa839c1", size = 15360, upload-time = "2025-12-17T23:31:59.048Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/e8/9ab87b71d2e2b62463b9b058b7ae7ac09fb57f8fcd88729dec169d304340/time_machine-3.2.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:5442735b41d7a2abc2f04579b4ca6047ed4698a8338a4fec92c7c9423e7938cb", size = 33029, upload-time = "2025-12-17T23:32:00.413Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/26/b5ca19da6f25ea905b3e10a0ea95d697c1aeba0404803a43c68f1af253e6/time_machine-3.2.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:97da3e971e505cb637079fb07ab0bcd36e33279f8ecac888ff131f45ef1e4d8d", size = 34579, upload-time = "2025-12-17T23:32:01.431Z" },
+    { url = "https://files.pythonhosted.org/packages/79/ca/6ac7ad5f10ea18cc1d9de49716ba38c32132c7b64532430d92ef240c116b/time_machine-3.2.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3cdda6dee4966e38aeb487309bb414c6cb23a81fc500291c77a8fcd3098832e7", size = 35961, upload-time = "2025-12-17T23:32:02.521Z" },
+    { url = "https://files.pythonhosted.org/packages/33/67/390dd958bed395ab32d79a9fe61fe111825c0dd4ded54dbba7e867f171e6/time_machine-3.2.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:33d9efd302a6998bcc8baa4d84f259f8a4081105bd3d7f7af7f1d0abd3b1c8aa", size = 34668, upload-time = "2025-12-17T23:32:03.585Z" },
+    { url = "https://files.pythonhosted.org/packages/da/57/c88fff034a4e9538b3ae7c68c9cfb283670b14d17522c5a8bc17d29f9a4b/time_machine-3.2.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:3a0b0a33971f14145853c9bd95a6ab0353cf7e0019fa2a7aa1ae9fddfe8eab50", size = 32891, upload-time = "2025-12-17T23:32:04.656Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/70/ebbb76022dba0fec8f9156540fc647e4beae1680c787c01b1b6200e56d70/time_machine-3.2.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2d0be9e5f22c38082d247a2cdcd8a936504e9db60b7b3606855fb39f299e9548", size = 34080, upload-time = "2025-12-17T23:32:06.146Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/cd/43ad5efc88298af3c59b66769cea7f055567a85071579ed40536188530c1/time_machine-3.2.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:c421a8eb85a4418a7675a41bf8660224318c46cc62e4751c8f1ceca752059090", size = 19318, upload-time = "2025-12-17T23:32:10.518Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/f6/084010ef7f4a3f38b5a4900923d7c85b29e797655c4f6ee4ce54d903cca8/time_machine-3.2.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8f4e758f7727d0058c4950c66b58200c187072122d6f7a98b610530a4233ea7b", size = 15390, upload-time = "2025-12-17T23:32:11.625Z" },
+    { url = "https://files.pythonhosted.org/packages/25/aa/1cabb74134f492270dc6860cb7865859bf40ecf828be65972827646e91ad/time_machine-3.2.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:154bd3f75c81f70218b2585cc12b60762fb2665c507eec5ec5037d8756d9b4e0", size = 33115, upload-time = "2025-12-17T23:32:13.219Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/03/78c5d7dfa366924eb4dbfcc3fc917c39a4280ca234b12819cc1f16c03d88/time_machine-3.2.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:d50cfe5ebea422c896ad8d278af9648412b7533b8ea6adeeee698a3fd9b1d3b7", size = 34705, upload-time = "2025-12-17T23:32:14.29Z" },
+    { url = "https://files.pythonhosted.org/packages/86/93/d5e877c24541f674c6869ff6e9c56833369796010190252e92c9d7ae5f0f/time_machine-3.2.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:636576501724bd6a9124e69d86e5aef263479e89ef739c5db361469f0463a0a1", size = 36104, upload-time = "2025-12-17T23:32:15.354Z" },
+    { url = "https://files.pythonhosted.org/packages/22/1c/d4bae72f388f67efc9609f89b012e434bb19d9549c7a7b47d6c7d9e5c55d/time_machine-3.2.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:40e6f40c57197fcf7ec32d2c563f4df0a82c42cdcc3cab27f688e98f6060df10", size = 34765, upload-time = "2025-12-17T23:32:16.434Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/c3/ac378cf301d527d8dfad2f0db6bad0dfb1ab73212eaa56d6b96ee5d9d20b/time_machine-3.2.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:a1bcf0b846bbfc19a79bc19e3fa04d8c7b1e8101c1b70340ffdb689cd801ea53", size = 33010, upload-time = "2025-12-17T23:32:17.532Z" },
+    { url = "https://files.pythonhosted.org/packages/06/35/7ce897319accda7a6970b288a9a8c52d25227342a7508505a2b3d235b649/time_machine-3.2.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ae55a56c179f4fe7a62575ad5148b6ed82f6c7e5cf2f9a9ec65f2f5b067db5f5", size = 34185, upload-time = "2025-12-17T23:32:18.566Z" },
+    { url = "https://files.pythonhosted.org/packages/67/e7/487f0ba5fe6c58186a5e1af2a118dfa2c160fedb37ef53a7e972d410408e/time_machine-3.2.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:59d71545e62525a4b85b6de9ab5c02ee3c61110fd7f636139914a2335dcbfc9c", size = 20000, upload-time = "2025-12-17T23:32:23.058Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/17/eb2c0054c8d44dd42df84ccd434539249a9c7d0b8eb53f799be2102500ab/time_machine-3.2.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:999672c621c35362bc28e03ca0c7df21500195540773c25993421fd8d6cc5003", size = 15657, upload-time = "2025-12-17T23:32:24.125Z" },
+    { url = "https://files.pythonhosted.org/packages/43/21/93443b5d1dd850f8bb9442e90d817a9033dcce6bfbdd3aabbb9786251c80/time_machine-3.2.0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:5faf7397f0580c7b9d67288522c8d7863e85f0cffadc0f1fccdb2c3dfce5783e", size = 39216, upload-time = "2025-12-17T23:32:25.542Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/9e/18544cf8acc72bb1dc03762231c82ecc259733f4bb6770a7bbe5cd138603/time_machine-3.2.0-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:d3dd886ec49f1fa5a00e844f5947e5c0f98ce574750c24b7424c6f77fc1c3e87", size = 40764, upload-time = "2025-12-17T23:32:26.643Z" },
+    { url = "https://files.pythonhosted.org/packages/27/f7/9fe9ce2795636a3a7467307af6bdf38bb613ddb701a8a5cd50ec713beb5e/time_machine-3.2.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:da0ecd96bc7bbe450acaaabe569d84e81688f1be8ad58d1470e42371d145fb53", size = 43526, upload-time = "2025-12-17T23:32:27.693Z" },
+    { url = "https://files.pythonhosted.org/packages/03/c1/a93e975ba9dec22e87ec92d18c28e67d36bd536f9119ffa439b2892b0c9c/time_machine-3.2.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:158220e946c1c4fb8265773a0282c88c35a7e3bb5d78e3561214e3b3231166f3", size = 41727, upload-time = "2025-12-17T23:32:28.985Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/fb/e3633e5a6bbed1c76bb2e9810dabc2f8467532ffcd29b9aed404b473061a/time_machine-3.2.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:8c1aee29bc54356f248d5d7dfdd131e12ca825e850a08c0ebdb022266d073013", size = 38952, upload-time = "2025-12-17T23:32:30.031Z" },
+    { url = "https://files.pythonhosted.org/packages/82/3d/02e9fb2526b3d6b1b45bc8e4d912d95d1cd699d1a3f6df985817d37a0600/time_machine-3.2.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c8ed2224f09d25b1c2fc98683613aca12f90f682a427eabb68fc824d27014e4a", size = 39829, upload-time = "2025-12-17T23:32:31.075Z" },
+    { url = "https://files.pythonhosted.org/packages/61/70/b4b980d126ed155c78d1879c50d60c8dcbd47bd11cb14ee7be50e0dfc07f/time_machine-3.2.0-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:1398980c017fe5744d66f419e0115ee48a53b00b146d738e1416c225eb610b82", size = 19303, upload-time = "2025-12-17T23:32:35.796Z" },
+    { url = "https://files.pythonhosted.org/packages/73/73/eaa33603c69a68fe2b6f54f9dd75481693d62f1d29676531002be06e2d1c/time_machine-3.2.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:4f8f4e35f4191ef70c2ab8ff490761ee9051b891afce2bf86dde3918eb7b537b", size = 15431, upload-time = "2025-12-17T23:32:37.244Z" },
+    { url = "https://files.pythonhosted.org/packages/76/10/b81e138e86cc7bab40cdb59d294b341e172201f4a6c84bb0ec080407977a/time_machine-3.2.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:6db498686ecf6163c5aa8cf0bcd57bbe0f4081184f247edf3ee49a2612b584f9", size = 33206, upload-time = "2025-12-17T23:32:38.713Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/72/4deab446b579e8bd5dca91de98595c5d6bd6a17ce162abf5c5f2ce40d3d8/time_machine-3.2.0-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:027c1807efb74d0cd58ad16524dec94212fbe900115d70b0123399883657ac0f", size = 34792, upload-time = "2025-12-17T23:32:40.223Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/39/439c6b587ddee76d533fe972289d0646e0a5520e14dc83d0a30aeb5565f7/time_machine-3.2.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:92432610c05676edd5e6946a073c6f0c926923123ce7caee1018dc10782c713d", size = 36187, upload-time = "2025-12-17T23:32:41.705Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/db/2da4368db15180989bab83746a857bde05ad16e78f326801c142bb747a06/time_machine-3.2.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c25586b62480eb77ef3d953fba273209478e1ef49654592cd6a52a68dfe56a67", size = 34855, upload-time = "2025-12-17T23:32:42.817Z" },
+    { url = "https://files.pythonhosted.org/packages/88/84/120a431fee50bc4c241425bee4d3a4910df4923b7ab5f7dff1bf0c772f08/time_machine-3.2.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:6bf3a2fa738d15e0b95d14469a0b8ea42635467408d8b490e263d5d45c9a177f", size = 33222, upload-time = "2025-12-17T23:32:43.94Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/ea/89cfda82bb8c57ff91bb9a26751aa234d6d90e9b4d5ab0ad9dce0f9f0329/time_machine-3.2.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ce76b82276d7ad2a66cdc85dad4df19d1422b69183170a34e8fbc4c3f35502f7", size = 34270, upload-time = "2025-12-17T23:32:45.037Z" },
+    { url = "https://files.pythonhosted.org/packages/86/a1/142de946dc4393f910bf4564b5c3ba819906e1f49b06c9cb557519c849e4/time_machine-3.2.0-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:4e374779021446fc2b5c29d80457ec9a3b1a5df043dc2aae07d7c1415d52323c", size = 19991, upload-time = "2025-12-17T23:32:49.933Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/62/7f17def6289901f94726921811a16b9adce46e666362c75d45730c60274f/time_machine-3.2.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:122310a6af9c36e9a636da32830e591e7923e8a07bdd0a43276c3a36c6821c90", size = 15707, upload-time = "2025-12-17T23:32:50.969Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/d3/3502fb9bd3acb159c18844b26c43220201a0d4a622c0c853785d07699a92/time_machine-3.2.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:ba3eeb0f018cc362dd8128befa3426696a2e16dd223c3fb695fde184892d4d8c", size = 39207, upload-time = "2025-12-17T23:32:52.033Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/be/8b27f4aa296fda14a5a2ad7f588ddd450603c33415ab3f8e85b2f1a44678/time_machine-3.2.0-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:77d38ba664b381a7793f8786efc13b5004f0d5f672dae814430445b8202a67a6", size = 40764, upload-time = "2025-12-17T23:32:53.167Z" },
+    { url = "https://files.pythonhosted.org/packages/42/cd/fe4c4e5c8ab6d48fab3624c32be9116fb120173a35fe67e482e5cf68b3d2/time_machine-3.2.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f09abeb8f03f044d72712207e0489a62098ad3ad16dac38927fcf80baca4d6a7", size = 43508, upload-time = "2025-12-17T23:32:54.597Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/28/5a3ba2fce85b97655a425d6bb20a441550acd2b304c96b2c19d3839f721a/time_machine-3.2.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:6b28367ce4f73987a55e230e1d30a57a3af85da8eb1a140074eb6e8c7e6ef19f", size = 41712, upload-time = "2025-12-17T23:32:55.781Z" },
+    { url = "https://files.pythonhosted.org/packages/81/58/e38084be7fdabb4835db68a3a47e58c34182d79fc35df1ecbe0db2c5359f/time_machine-3.2.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:903c7751c904581da9f7861c3015bed7cdc40047321291d3694a3cdc783bbca3", size = 38939, upload-time = "2025-12-17T23:32:56.867Z" },
+    { url = "https://files.pythonhosted.org/packages/40/d0/ad3feb0a392ef4e0c08bc32024950373ddc0669002cbdcbb9f3bf0c2d114/time_machine-3.2.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:528217cad85ede5f85c8bc78b0341868d3c3cfefc6ecb5b622e1cacb6c73247b", size = 39837, upload-time = "2025-12-17T23:32:58.283Z" },
+]
+
 [[package]]
 name = "tinytag"
 version = "2.2.1"
@@ -5474,18 +5553,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/db/eb/d5583a11486211f3ebd4b385545ae787f32363d453c19fffd81106c9c138/whitenoise-6.12.0-py3-none-any.whl", hash = "sha256:fc5e8c572e33ebf24795b47b6a7da8da3c00cff2349f5b04c02f28d0cc5a3cc2", size = 20302, upload-time = "2026-02-27T00:05:40.086Z" },
 ]
 
-[[package]]
-name = "whoosh-reloaded"
-version = "2.7.5"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "cached-property", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/17/51/3fb4b9fdeaaf96512514ccf2871186333ce41a0de2ea48236a4056a5f6af/Whoosh-Reloaded-2.7.5.tar.gz", hash = "sha256:39ed7dfbd1fec97af33933107bdf78110728375ed0f2abb25dec6dbfdcb279d8", size = 1061606, upload-time = "2024-02-02T20:06:42.285Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/69/90/866dfe421f188217ecd7339585e961034a7f4fdc96b62cec3b40a50dbdef/Whoosh_Reloaded-2.7.5-py2.py3-none-any.whl", hash = "sha256:2ab6aeeafb359fbff4beb3c704b960fd88240354f3363f1c5bdb5c2325cae80e", size = 551793, upload-time = "2024-02-02T20:06:39.868Z" },
-]
-
 [[package]]
 name = "wrapt"
 version = "2.0.1"