Accounts for the 0013 migration

Squashes migrations in prep for final release
Enhancement (beta): support LLM timeout config (#13002 )
2026-06-28 16:24:19 +00:00 · 2026-06-18 08:51:15 -07:00 · 2026-06-18 08:48:53 -07:00 · 2026-06-18 08:35:11 -07:00 · 2026-06-17 15:43:48 +00:00 · 2026-06-17 06:24:38 -07:00
37 changed files with 1550 additions and 47 deletions
@@ -2068,6 +2068,13 @@ context by default.

    Defaults to 8192.

+#### [`PAPERLESS_AI_LLM_REQUEST_TIMEOUT=<int>`](#PAPERLESS_AI_LLM_REQUEST_TIMEOUT) {#PAPERLESS_AI_LLM_REQUEST_TIMEOUT}
+
+: The timeout, in seconds, for requests to the configured AI backend. Increase this when using
+local or slow inference servers that need more time to generate responses.
+
+    Defaults to 120.
+
 #### [`PAPERLESS_AI_LLM_BACKEND=<str>`](#PAPERLESS_AI_LLM_BACKEND) {#PAPERLESS_AI_LLM_BACKEND}

 : The AI backend to use. This can be either "openai-like" or "ollama". If set to "ollama", the AI
@@ -26,7 +26,7 @@ module.exports = {
    'abstract-paperless-service',
  ],
  transformIgnorePatterns: [
-    'node_modules/(?!.*(\\.mjs$|tslib|lodash-es|@angular/common/locales/.*\\.js$))',
+    'node_modules/(?!.*(\\.mjs$|tslib|lodash-es|normalize-diacritics|@angular/common/locales/.*\\.js$))',
  ],
  moduleNameMapper: {
    ...esmPreset.moduleNameMapper,
@@ -32,6 +32,7 @@
    "ngx-cookie-service": "^21.3.1",
    "ngx-device-detector": "^11.0.0",
    "ngx-ui-tour-ng-bootstrap": "^18.0.0",
+    "normalize-diacritics": "^5.0.0",
    "pdfjs-dist": "^5.7.284",
    "rxjs": "^7.8.2",
    "tslib": "^2.8.1",
@@ -71,6 +71,9 @@ importers:
      ngx-ui-tour-ng-bootstrap:
        specifier: ^18.0.0
        version: 18.0.0(f910a33494d223bd6dd07ce1bf22a35e)
+      normalize-diacritics:
+        specifier: ^5.0.0
+        version: 5.0.0
      pdfjs-dist:
        specifier: ^5.7.284
        version: 5.7.284
@@ -5516,6 +5519,10 @@ packages:
    engines: {node: ^20.17.0 || >=22.9.0}
    hasBin: true

+  normalize-diacritics@5.0.0:
+    resolution: {integrity: sha512-t6czCJOpbAtckN1wCC2qPWnO3GQvNANb9bcUNbiOLEqojVuP31+ELIs5KhEG8jyz0TH7iD9BWxWz8O3ic2/rMQ==}
+    engines: {node: '>= 14.x', npm: '>= 6.x'}
+
  normalize-path@3.0.0:
    resolution: {integrity: sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==}
    engines: {node: '>=0.10.0'}
@@ -12931,6 +12938,10 @@ snapshots:
    dependencies:
      abbrev: 4.0.0

+  normalize-diacritics@5.0.0:
+    dependencies:
+      tslib: 2.8.1
+
  normalize-path@3.0.0: {}

  npm-bundled@5.0.0:
@@ -23,6 +23,7 @@ import {
 import { CustomFieldsService } from 'src/app/services/rest/custom-fields.service'
 import { ToastService } from 'src/app/services/toast.service'
 import { pngxPopperOptions } from 'src/app/utils/popper-options'
+import { matchesSearchText } from 'src/app/utils/text-search'
 import { LoadingComponentWithPermissions } from '../../loading-component/loading.component'
 import { CustomFieldEditDialogComponent } from '../edit-dialog/custom-field-edit-dialog/custom-field-edit-dialog.component'

@@ -69,9 +70,7 @@ export class CustomFieldsDropdownComponent extends LoadingComponentWithPermissio

  public get filteredFields(): CustomField[] {
    return this.unusedFields.filter(
-      (f) =>
-        !this.filterText ||
-        f.name.toLowerCase().includes(this.filterText.toLowerCase())
+      (f) => !this.filterText || matchesSearchText(f.name, this.filterText)
    )
  }

@@ -63,6 +63,7 @@
      [(ngModel)]="atom.value"
      [disabled]="disabled"
      [virtualScroll]="getSelectOptionsForField(atom.field)?.length > 100"
+      [searchFn]="selectOptionSearchFn"
      (mousedown)="$event.stopImmediatePropagation()"
    ></ng-select>
  } @else if (getCustomFieldByID(atom.field)?.data_type === CustomFieldDataType.DocumentLink) {
@@ -81,6 +82,7 @@
      [disabled]="disabled"
      bindLabel="name"
      bindValue="id"
+      [searchFn]="customFieldSearchFn"
      (mousedown)="$event.stopImmediatePropagation()"
    ></ng-select>
    <select class="w-25 form-select" [(ngModel)]="atom.operator" [disabled]="disabled">
@@ -125,6 +127,7 @@
          [(ngModel)]="atom.value"
          [disabled]="disabled"
          [multiple]="true"
+          [searchFn]="selectOptionSearchFn"
          (mousedown)="$event.stopImmediatePropagation()"
        ></ng-select>
      }
@@ -36,6 +36,7 @@ import {
  CustomFieldQueryExpression,
 } from 'src/app/utils/custom-field-query-element'
 import { pngxPopperOptions } from 'src/app/utils/popper-options'
+import { matchesSearchText } from 'src/app/utils/text-search'
 import { LoadingComponentWithPermissions } from '../../loading-component/loading.component'
 import { ClearableBadgeComponent } from '../clearable-badge/clearable-badge.component'
 import { DocumentLinkComponent } from '../input/document-link/document-link.component'
@@ -281,6 +282,14 @@ export class CustomFieldsQueryDropdownComponent extends LoadingComponentWithPerm

  public readonly today: string = new Date().toLocaleDateString('en-CA')

+  public customFieldSearchFn = (term: string, field: CustomField): boolean =>
+    matchesSearchText(field?.name, term)
+
+  public selectOptionSearchFn = (
+    term: string,
+    option: { id: string; label: string }
+  ): boolean => matchesSearchText(option?.label, term)
+
  constructor() {
    super()
    this.selectionModel = new CustomFieldQueriesModel()
@@ -28,6 +28,7 @@
          [notFoundText]="notFoundText"
          [multiple]="multiple"
          [bindLabel]="bindLabel"
+          [searchFn]="searchFn"
          bindValue="id"
          [virtualScroll]="items?.length > 100"
          (change)="onChange(value)"
@@ -112,6 +112,15 @@ describe('SelectComponent', () => {
    expect(createNewVal).toEqual('baz')
  })

+  it('should search items by independent normalized terms', () => {
+    expect(
+      component.searchFn('tax 26', { id: 11, name: 'Tax\u00e9s 2026' })
+    ).toBeTruthy()
+    expect(
+      component.searchFn('tax receipt', { id: 11, name: 'Tax\u00e9s 2026' })
+    ).toBeFalsy()
+  })
+
  it('should clear search term on blur after delay', fakeAsync(() => {
    const clearSpy = jest.spyOn(component, 'clearLastSearchTerm')
    component.onBlur()
@@ -13,6 +13,7 @@ import {
 import { RouterModule } from '@angular/router'
 import { NgSelectModule } from '@ng-select/ng-select'
 import { NgxBootstrapIconsModule } from 'ngx-bootstrap-icons'
+import { matchesSearchText } from 'src/app/utils/text-search'
 import { AbstractInputComponent } from '../abstract-input'

@Component({
@@ -99,6 +100,9 @@ export class SelectComponent extends AbstractInputComponent<number> {
  @Input()
  bindLabel: string = 'name'

+  public searchFn = (term: string, item: any): boolean =>
+    matchesSearchText(item?.[this.bindLabel], term)
+
  @Input()
  showFilter: boolean = false

@@ -14,6 +14,7 @@
          [clearSearchOnAdd]="true"
          [hideSelected]="tags.length > 0"
          [addTag]="allowCreate ? createTagRef : false"
+          [searchFn]="searchFn"
          addTagText="Add tag"
          i18n-addTagText
          (add)="onAdd($event)"
@@ -171,6 +171,15 @@ describe('TagsComponent', () => {
    expect(component.getTag(4)).toBeUndefined()
  })

+  it('should search tags by independent normalized terms including parents', () => {
+    const parent: Tag = { id: 11, name: 'Financ\u00e9' }
+    const child: Tag = { id: 12, name: 'Taxes 2026', parent: parent.id }
+    component.tags = [parent, child]
+
+    expect(component.searchFn('finance 26', child)).toBeTruthy()
+    expect(component.searchFn('finance receipt', child)).toBeFalsy()
+  })
+
  it('should emit filtered documents', () => {
    component.value = [10]
    component.tags = tags
@@ -21,6 +21,7 @@ import { NgxBootstrapIconsModule } from 'ngx-bootstrap-icons'
 import { first, firstValueFrom, tap } from 'rxjs'
 import { Tag } from 'src/app/data/tag'
 import { TagService } from 'src/app/services/rest/tag.service'
+import { matchesSearchText } from 'src/app/utils/text-search'
 import { EditDialogMode } from '../../edit-dialog/edit-dialog.component'
 import { TagEditDialogComponent } from '../../edit-dialog/tag-edit-dialog/tag-edit-dialog.component'
 import { TagComponent } from '../../tag/tag.component'
@@ -114,6 +115,14 @@ export class TagsComponent implements OnInit, ControlValueAccessor {

  public createTagRef: (name) => void

+  public searchFn = (term: string, tag: Tag): boolean =>
+    matchesSearchText(
+      [this.getParentChain(tag?.id).map((parent) => parent.name), tag?.name]
+        .flat()
+        .join(' '),
+      term
+    )
+
  getTag(id: number) {
    if (this.tags) {
      return this.tags.find((tag) => tag.id == id)
@@ -360,6 +360,14 @@ export const PaperlessConfigOptions: ConfigOption[] = [
    category: ConfigCategory.AI,
    note: $localize`Language to use for generated AI suggestions. When unset, AI suggestions use the user's display language if explicitly set.`,
  },
+  {
+    key: 'llm_request_timeout',
+    title: $localize`LLM Request Timeout`,
+    type: ConfigOptionType.Number,
+    config_key: 'PAPERLESS_AI_LLM_REQUEST_TIMEOUT',
+    category: ConfigCategory.AI,
+    note: $localize`Timeout in seconds for LLM requests.`,
+  },
 ]

 export interface PaperlessConfig extends ObjectWithId {
@@ -401,4 +409,5 @@ export interface PaperlessConfig extends ObjectWithId {
  llm_api_key: string
  llm_endpoint: string
  llm_output_language: string
+  llm_request_timeout: number
 }
@@ -1,5 +1,6 @@
 import { Pipe, PipeTransform } from '@angular/core'
 import { MatchingModel } from '../data/matching-model'
+import { matchesSearchText } from '../utils/text-search'

@Pipe({
  name: 'filter',
@@ -21,9 +22,7 @@ export class FilterPipe implements PipeTransform {
              typeof item[key] === 'string' || typeof item[key] === 'number'
          )
      return keys.some((key) => {
-        return String(item[key])
-          .toLowerCase()
-          .includes(searchText.toLowerCase())
+        return matchesSearchText(item[key], searchText)
      })
    })
  }
@@ -0,0 +1,17 @@
+import { matchesSearchText } from './text-search'
+
+describe('text search utilities', () => {
+  it('matches text accent-insensitively', () => {
+    expect(matchesSearchText('R\u00e9sum\u00e9', 'resume')).toBeTruthy()
+    expect(matchesSearchText('S\u00f8ren', 'soren')).toBeTruthy()
+    expect(matchesSearchText('\u0152uvre', 'oeuvre')).toBeTruthy()
+    expect(matchesSearchText('Invoice', 'receipt')).toBeFalsy()
+  })
+
+  it('matches all whitespace-separated search terms independently', () => {
+    expect(matchesSearchText('taxes 2026', 'tax 26')).toBeTruthy()
+    expect(matchesSearchText('2026 taxes', 'tax 26')).toBeTruthy()
+    expect(matchesSearchText('Tax\u00e9s 2026', 'taxe 26')).toBeTruthy()
+    expect(matchesSearchText('taxes 2026', 'tax receipt')).toBeFalsy()
+  })
+})
@@ -0,0 +1,23 @@
+import { normalizeSync } from 'normalize-diacritics'
+
+export type SearchTextValue =
+  | string
+  | number
+  | boolean
+  | bigint
+  | null
+  | undefined
+
+export function normalizeSearchText(value: SearchTextValue): string {
+  return normalizeSync(String(value ?? '')).toLocaleLowerCase()
+}
+
+export function matchesSearchText(
+  value: SearchTextValue,
+  searchText: SearchTextValue
+): boolean {
+  const normalizedValue = normalizeSearchText(value)
+  const searchTerms = normalizeSearchText(searchText).trim().split(/\s+/)
+
+  return searchTerms.every((term) => normalizedValue.includes(term))
+}
@@ -169,6 +169,10 @@ class FileStabilityTracker:
            self._tracked.pop(path, None)
            yield path

+    def is_tracking(self, path: Path) -> bool:
+        """Check whether a path is currently being tracked for stability."""
+        return path.resolve() in self._tracked
+
    def has_pending_files(self) -> bool:
        """Check if there are files waiting for stability check."""
        return len(self._tracked) > 0
@@ -370,6 +374,16 @@ class Command(BaseCommand):
    # Testing timeout in seconds
    testing_timeout_s: Final[float] = 0.5

+    # How often to perform a full-glob rescan of the consume directory as a
+    # safety net. Each watchfiles watcher is torn down and recreated on every
+    # batch to reconfigure its timeout, and a fresh watcher silently adopts the
+    # current directory contents as its baseline. A file that appears between
+    # one batch and the next watcher's baseline is therefore never reported and
+    # would sit in the consume directory forever. This periodic rescan re-injects
+    # such files into the stability tracker (see GH issue #13011). Not currently
+    # user-configurable; instances may override for testing.
+    rescan_interval_s: float = 300.0
+
    def add_arguments(self, parser) -> None:
        parser.add_argument(
            "directory",
@@ -425,7 +439,7 @@ class Command(BaseCommand):
        )

        # Process existing files
-        self._process_existing_files(
+        queued = self._process_existing_files(
            directory=directory,
            recursive=recursive,
            subdirs_as_tags=subdirs_as_tags,
@@ -445,6 +459,7 @@ class Command(BaseCommand):
            polling_interval=polling_interval,
            stability_delay=stability_delay,
            is_testing=is_testing,
+            queued=queued,
        )

        logger.debug("Consumer exiting")
@@ -456,11 +471,18 @@ class Command(BaseCommand):
        recursive: bool,
        subdirs_as_tags: bool,
        consumer_filter: ConsumerFilter,
-    ) -> None:
-        """Process any existing files in the consumption directory."""
+    ) -> set[Path]:
+        """
+        Process any existing files in the consumption directory.
+
+        Returns the set of resolved paths that were queued, so the watch loop
+        can seed its in-flight set and avoid re-queuing them on the first
+        rescan before the consume tasks have removed them from disk.
+        """
        logger.info(f"Processing existing files in {directory}")

        glob_pattern = "**/*" if recursive else "*"
+        queued: set[Path] = set()

        for filepath in directory.glob(glob_pattern):
            # Use filter to check if file should be processed
@@ -475,6 +497,48 @@ class Command(BaseCommand):
                consumption_dir=directory,
                subdirs_as_tags=subdirs_as_tags,
            )
+            queued.add(filepath.resolve())
+
+        return queued
+
+    def _rescan_existing_files(
+        self,
+        *,
+        directory: Path,
+        recursive: bool,
+        consumer_filter: ConsumerFilter,
+        tracker: FileStabilityTracker,
+        queued: set[Path],
+    ) -> None:
+        """
+        Re-inject on-disk files the watcher never reported into the tracker.
+
+        Acts as a safety net for files stranded by the watcher-recreation gap
+        (see ``rescan_interval_s``). Files already being tracked or already
+        queued and awaiting consumption are skipped, so a file is never queued
+        twice. Queued paths that have since left the directory are pruned so a
+        later file reusing the same name is not skipped forever.
+        """
+        # Prune in-flight paths that have left the directory
+        for path in list(queued):
+            if not path.exists():
+                queued.discard(path)
+
+        glob_pattern = "**/*" if recursive else "*"
+
+        for filepath in directory.glob(glob_pattern):
+            if not filepath.is_file():
+                continue
+
+            if not consumer_filter(Change.added, str(filepath)):
+                continue
+
+            resolved = filepath.resolve()
+            if tracker.is_tracking(resolved) or resolved in queued:
+                continue
+
+            logger.debug(f"Rescan found untracked file: {resolved}")
+            tracker.track(resolved, Change.added)

    def _watch_directory(
        self,
@@ -486,11 +550,24 @@ class Command(BaseCommand):
        polling_interval: float,
        stability_delay: float,
        is_testing: bool,
+        queued: set[Path] | None = None,
    ) -> None:
        """Watch directory for changes and process stable files."""
        use_polling = polling_interval > 0
        poll_delay_ms = int(polling_interval * 1000) if use_polling else 0

+        # Resolved paths that have been queued and are awaiting consumption.
+        # Seeded from the startup scan so the first rescan does not re-queue
+        # files whose consume tasks have not yet removed them from disk.
+        queued = set() if queued is None else queued
+
+        # Full-glob safety net cadence (0 disables)
+        rescan_interval_s = self.rescan_interval_s
+        rescan_timeout_ms = (
+            int(rescan_interval_s * 1000) if rescan_interval_s > 0 else 0
+        )
+        last_rescan = monotonic()
+
        if use_polling:
            logger.info(
                f"Watching {directory} using polling (interval: {polling_interval}s)",
@@ -505,6 +582,20 @@ class Command(BaseCommand):
        stability_timeout_ms = int(stability_delay * 1000)
        testing_timeout_ms = int(self.testing_timeout_s * 1000)

+        def cap_for_rescan(ms: int) -> int:
+            """
+            Ensure the watch loop wakes often enough to run the rescan.
+
+            ``watch()`` blocks for up to ``rust_timeout``, so the rescan can
+            only run that often. A timeout of 0 means "wait indefinitely",
+            which would never wake to rescan; cap it at the rescan interval.
+            """
+            if rescan_timeout_ms <= 0:
+                return ms
+            if ms <= 0:
+                return rescan_timeout_ms
+            return min(ms, rescan_timeout_ms)
+
        # Calculate appropriate timeout for watch loop
        # In polling mode, rust_timeout must be significantly longer than poll_delay_ms
        # to ensure poll cycles can complete before timing out
@@ -522,6 +613,8 @@ class Command(BaseCommand):
            # Not testing, wait indefinitely for first event
            timeout_ms = 0

+        timeout_ms = cap_for_rescan(timeout_ms)
+
        self.stop_flag.clear()

        while not self.stop_flag.is_set():
@@ -551,10 +644,26 @@ class Command(BaseCommand):
                            consumption_dir=directory,
                            subdirs_as_tags=subdirs_as_tags,
                        )
+                        # Remember it so the rescan does not re-queue it while
+                        # the consume task has yet to remove it from disk
+                        queued.add(stable_path)

                    # Exit watch loop to reconfigure timeout
                    break

+                # Periodic full-glob safety net for files the watcher missed
+                if rescan_timeout_ms > 0 and (
+                    monotonic() - last_rescan >= rescan_interval_s
+                ):
+                    self._rescan_existing_files(
+                        directory=directory,
+                        recursive=recursive,
+                        consumer_filter=consumer_filter,
+                        tracker=tracker,
+                        queued=queued,
+                    )
+                    last_rescan = monotonic()
+
                # Determine next timeout
                if tracker.has_pending_files():
                    # Check pending files at stability interval
@@ -572,6 +681,8 @@ class Command(BaseCommand):
                    # No pending files, wait indefinitely
                    timeout_ms = 0

+                timeout_ms = cap_for_rescan(timeout_ms)
+
            except KeyboardInterrupt:  # pragma: nocover
                logger.info("Received interrupt, stopping consumer")
                self.stop_flag.set()
@@ -0,0 +1,63 @@
+# Generated by Django 5.2.14 on 2026-06-04 15:31
+
+from django.db import migrations
+from django.db import models
+
+
+class Migration(migrations.Migration):
+    replaces = [
+        ("documents", "0003_remove_document_storage_type"),
+        ("documents", "0004_workflowtrigger_filter_has_any_correspondents_and_more"),
+        ("documents", "0005_alter_document_checksum_unique"),
+    ]
+
+    dependencies = [
+        ("documents", "0002_squashed"),
+    ]
+
+    operations = [
+        migrations.RemoveField(
+            model_name="document",
+            name="storage_type",
+        ),
+        migrations.AddField(
+            model_name="workflowtrigger",
+            name="filter_has_any_correspondents",
+            field=models.ManyToManyField(
+                blank=True,
+                related_name="workflowtriggers_has_any_correspondent",
+                to="documents.correspondent",
+                verbose_name="has one of these correspondents",
+            ),
+        ),
+        migrations.AddField(
+            model_name="workflowtrigger",
+            name="filter_has_any_document_types",
+            field=models.ManyToManyField(
+                blank=True,
+                related_name="workflowtriggers_has_any_document_type",
+                to="documents.documenttype",
+                verbose_name="has one of these document types",
+            ),
+        ),
+        migrations.AddField(
+            model_name="workflowtrigger",
+            name="filter_has_any_storage_paths",
+            field=models.ManyToManyField(
+                blank=True,
+                related_name="workflowtriggers_has_any_storage_path",
+                to="documents.storagepath",
+                verbose_name="has one of these storage paths",
+            ),
+        ),
+        migrations.AlterField(
+            model_name="document",
+            name="checksum",
+            field=models.CharField(
+                editable=False,
+                help_text="The checksum of the original document.",
+                max_length=32,
+                verbose_name="checksum",
+            ),
+        ),
+    ]
@@ -0,0 +1,252 @@
+# Generated by Django 5.2.14 on 2026-06-04 15:31
+
+import django.db.models.deletion
+import django.db.models.functions.text
+from django.db import migrations
+from django.db import models
+
+
+class Migration(migrations.Migration):
+    replaces = [
+        ("documents", "0008_workflowaction_passwords_alter_workflowaction_type"),
+        ("documents", "0009_alter_document_content_length"),
+        ("documents", "0010_optimize_integer_field_sizes"),
+        ("documents", "0011_alter_workflowaction_type"),
+        ("documents", "0012_document_root_document"),
+    ]
+
+    dependencies = [
+        ("documents", "0007_sharelinkbundle"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="workflowaction",
+            name="passwords",
+            field=models.JSONField(
+                blank=True,
+                help_text="Passwords to try when removing PDF protection. Separate with commas or new lines.",
+                null=True,
+                verbose_name="passwords",
+            ),
+        ),
+        migrations.AlterField(
+            model_name="document",
+            name="content_length",
+            field=models.GeneratedField(
+                db_persist=True,
+                expression=django.db.models.functions.text.Length("content"),
+                help_text="Length of the content field in characters. Automatically maintained by the database for faster statistics computation.",
+                output_field=models.PositiveIntegerField(default=0),
+                serialize=False,
+            ),
+        ),
+        migrations.AlterField(
+            model_name="correspondent",
+            name="matching_algorithm",
+            field=models.PositiveSmallIntegerField(
+                choices=[
+                    (0, "None"),
+                    (1, "Any word"),
+                    (2, "All words"),
+                    (3, "Exact match"),
+                    (4, "Regular expression"),
+                    (5, "Fuzzy word"),
+                    (6, "Automatic"),
+                ],
+                default=1,
+                verbose_name="matching algorithm",
+            ),
+        ),
+        migrations.AlterField(
+            model_name="documenttype",
+            name="matching_algorithm",
+            field=models.PositiveSmallIntegerField(
+                choices=[
+                    (0, "None"),
+                    (1, "Any word"),
+                    (2, "All words"),
+                    (3, "Exact match"),
+                    (4, "Regular expression"),
+                    (5, "Fuzzy word"),
+                    (6, "Automatic"),
+                ],
+                default=1,
+                verbose_name="matching algorithm",
+            ),
+        ),
+        migrations.AlterField(
+            model_name="savedviewfilterrule",
+            name="rule_type",
+            field=models.PositiveSmallIntegerField(
+                choices=[
+                    (0, "title contains"),
+                    (1, "content contains"),
+                    (2, "ASN is"),
+                    (3, "correspondent is"),
+                    (4, "document type is"),
+                    (5, "is in inbox"),
+                    (6, "has tag"),
+                    (7, "has any tag"),
+                    (8, "created before"),
+                    (9, "created after"),
+                    (10, "created year is"),
+                    (11, "created month is"),
+                    (12, "created day is"),
+                    (13, "added before"),
+                    (14, "added after"),
+                    (15, "modified before"),
+                    (16, "modified after"),
+                    (17, "does not have tag"),
+                    (18, "does not have ASN"),
+                    (19, "title or content contains"),
+                    (20, "fulltext query"),
+                    (21, "more like this"),
+                    (22, "has tags in"),
+                    (23, "ASN greater than"),
+                    (24, "ASN less than"),
+                    (25, "storage path is"),
+                    (26, "has correspondent in"),
+                    (27, "does not have correspondent in"),
+                    (28, "has document type in"),
+                    (29, "does not have document type in"),
+                    (30, "has storage path in"),
+                    (31, "does not have storage path in"),
+                    (32, "owner is"),
+                    (33, "has owner in"),
+                    (34, "does not have owner"),
+                    (35, "does not have owner in"),
+                    (36, "has custom field value"),
+                    (37, "is shared by me"),
+                    (38, "has custom fields"),
+                    (39, "has custom field in"),
+                    (40, "does not have custom field in"),
+                    (41, "does not have custom field"),
+                    (42, "custom fields query"),
+                    (43, "created to"),
+                    (44, "created from"),
+                    (45, "added to"),
+                    (46, "added from"),
+                    (47, "mime type is"),
+                ],
+                verbose_name="rule type",
+            ),
+        ),
+        migrations.AlterField(
+            model_name="storagepath",
+            name="matching_algorithm",
+            field=models.PositiveSmallIntegerField(
+                choices=[
+                    (0, "None"),
+                    (1, "Any word"),
+                    (2, "All words"),
+                    (3, "Exact match"),
+                    (4, "Regular expression"),
+                    (5, "Fuzzy word"),
+                    (6, "Automatic"),
+                ],
+                default=1,
+                verbose_name="matching algorithm",
+            ),
+        ),
+        migrations.AlterField(
+            model_name="tag",
+            name="matching_algorithm",
+            field=models.PositiveSmallIntegerField(
+                choices=[
+                    (0, "None"),
+                    (1, "Any word"),
+                    (2, "All words"),
+                    (3, "Exact match"),
+                    (4, "Regular expression"),
+                    (5, "Fuzzy word"),
+                    (6, "Automatic"),
+                ],
+                default=1,
+                verbose_name="matching algorithm",
+            ),
+        ),
+        migrations.AlterField(
+            model_name="workflowrun",
+            name="type",
+            field=models.PositiveSmallIntegerField(
+                choices=[
+                    (1, "Consumption Started"),
+                    (2, "Document Added"),
+                    (3, "Document Updated"),
+                    (4, "Scheduled"),
+                ],
+                null=True,
+                verbose_name="workflow trigger type",
+            ),
+        ),
+        migrations.AlterField(
+            model_name="workflowtrigger",
+            name="matching_algorithm",
+            field=models.PositiveSmallIntegerField(
+                choices=[
+                    (0, "None"),
+                    (1, "Any word"),
+                    (2, "All words"),
+                    (3, "Exact match"),
+                    (4, "Regular expression"),
+                    (5, "Fuzzy word"),
+                ],
+                default=0,
+                verbose_name="matching algorithm",
+            ),
+        ),
+        migrations.AlterField(
+            model_name="workflowtrigger",
+            name="type",
+            field=models.PositiveSmallIntegerField(
+                choices=[
+                    (1, "Consumption Started"),
+                    (2, "Document Added"),
+                    (3, "Document Updated"),
+                    (4, "Scheduled"),
+                ],
+                default=1,
+                verbose_name="Workflow Trigger Type",
+            ),
+        ),
+        migrations.AlterField(
+            model_name="workflowaction",
+            name="type",
+            field=models.PositiveSmallIntegerField(
+                choices=[
+                    (1, "Assignment"),
+                    (2, "Removal"),
+                    (3, "Email"),
+                    (4, "Webhook"),
+                    (5, "Password removal"),
+                    (6, "Move to trash"),
+                ],
+                default=1,
+                verbose_name="Workflow Action Type",
+            ),
+        ),
+        migrations.AddField(
+            model_name="document",
+            name="root_document",
+            field=models.ForeignKey(
+                blank=True,
+                null=True,
+                on_delete=django.db.models.deletion.CASCADE,
+                related_name="versions",
+                to="documents.document",
+                verbose_name="root document for this version",
+            ),
+        ),
+        migrations.AddField(
+            model_name="document",
+            name="version_label",
+            field=models.CharField(
+                blank=True,
+                help_text="Optional short label for a document version.",
+                max_length=64,
+                null=True,
+                verbose_name="version label",
+            ),
+        ),
+    ]
@@ -866,8 +866,24 @@ class TantivyBackend:
        final_query = self._apply_permission_filter(mlt_query, user)

        effective_limit = limit if limit is not None else searcher.num_docs
-        # Fetch one extra to account for excluding the original document
-        results = searcher.search(final_query, limit=effective_limit + 1)
+        try:
+            # Fetch one extra to account for excluding the original document
+            results = searcher.search(final_query, limit=effective_limit + 1)
+        except BaseException:  # pragma: no cover
+            # Tantivy 0.26 panics in BM25 idf scoring when the index holds
+            # soft-deleted documents (doc_freq can exceed the alive doc count),
+            # which only surfaces for the More Like This query. The panic crosses
+            # the pyo3 boundary as a `pyo3_runtime.PanicException` — a
+            # BaseException, not an Exception — so catch BaseException and degrade
+            # to "no similar documents" instead of bubbling a 500 to the client.
+            # Fixed upstream: https://github.com/quickwit-oss/tantivy/pull/2964
+            # Remove once the bundled tantivy includes that fix.
+            logger.warning(
+                "More Like This scoring panicked (likely stale tantivy segment "
+                "stats after deletions); returning no results. A search index "
+                "reindex will rebuild consistent statistics.",
+            )
+            return []

        addrs = [addr for _score, addr in results.hits]
        all_ids = cast("list[int]", searcher.fast_field_values("id", addrs))
@@ -82,6 +82,7 @@ class TestApiAppConfig(DirectoriesMixin, APITestCase):
                "llm_api_key": None,
                "llm_endpoint": None,
                "llm_output_language": None,
+                "llm_request_timeout": None,
            },
        )

@@ -684,6 +684,7 @@ class ConsumerThread(Thread):
        subdirs_as_tags: bool = False,
        polling_interval: float = 0,
        stability_delay: float = 0.1,
+        rescan_interval: float | None = None,
    ) -> None:
        super().__init__()
        self.consumption_dir = consumption_dir
@@ -693,6 +694,8 @@ class ConsumerThread(Thread):
        self.polling_interval = polling_interval
        self.stability_delay = stability_delay
        self.cmd = Command()
+        if rescan_interval is not None:
+            self.cmd.rescan_interval_s = rescan_interval
        self.cmd.stop_flag.clear()
        # Non-daemon ensures finally block runs and connections are closed
        self.daemon = False
@@ -1052,3 +1055,200 @@ class TestCommandWatchEdgeCases:
            thread.stop_and_wait(timeout=5.0)
            # Clean up any Tags created by the thread
            Tag.objects.all().delete()
+
+
+class TestRescanExistingFiles:
+    """
+    Unit tests for the rescan safety net.
+
+    Each ``watch()`` recreation silently adopts the current directory contents
+    as its baseline, so a file appearing between one batch and the next
+    watcher's baseline is never reported and would sit in the consume directory
+    forever. ``_rescan_existing_files`` re-injects such files into the
+    stability tracker as a periodic safety net (see GH issue #13011).
+    """
+
+    @pytest.fixture
+    def pdf_only_filter(self) -> ConsumerFilter:
+        return ConsumerFilter(
+            supported_extensions=frozenset({".pdf"}),
+            ignore_patterns=[],
+        )
+
+    def _rescan(
+        self,
+        directory: Path,
+        consumer_filter: ConsumerFilter,
+        tracker: FileStabilityTracker,
+        queued: set[Path],
+        *,
+        recursive: bool = False,
+    ) -> None:
+        Command()._rescan_existing_files(
+            directory=directory,
+            recursive=recursive,
+            consumer_filter=consumer_filter,
+            tracker=tracker,
+            queued=queued,
+        )
+
+    def test_tracks_stranded_file(
+        self,
+        consumption_dir: Path,
+        sample_pdf: Path,
+        pdf_only_filter: ConsumerFilter,
+    ) -> None:
+        """A supported on-disk file the watcher never reported gets tracked."""
+        target = consumption_dir / "stranded.pdf"
+        shutil.copy(sample_pdf, target)
+        tracker = FileStabilityTracker(stability_delay=0.1)
+
+        self._rescan(consumption_dir, pdf_only_filter, tracker, set())
+
+        assert tracker.is_tracking(target) is True
+        assert tracker.pending_count == 1
+
+    def test_skips_already_tracked_file(
+        self,
+        consumption_dir: Path,
+        sample_pdf: Path,
+        pdf_only_filter: ConsumerFilter,
+    ) -> None:
+        """A file already being tracked by the watcher is not double-tracked."""
+        target = consumption_dir / "tracked.pdf"
+        shutil.copy(sample_pdf, target)
+        tracker = FileStabilityTracker(stability_delay=0.1)
+        tracker.track(target, Change.added)
+
+        self._rescan(consumption_dir, pdf_only_filter, tracker, set())
+
+        assert tracker.pending_count == 1
+
+    def test_skips_queued_file(
+        self,
+        consumption_dir: Path,
+        sample_pdf: Path,
+        pdf_only_filter: ConsumerFilter,
+    ) -> None:
+        """A file already queued and awaiting consumption is not re-tracked."""
+        target = consumption_dir / "inflight.pdf"
+        shutil.copy(sample_pdf, target)
+        tracker = FileStabilityTracker(stability_delay=0.1)
+        queued = {target.resolve()}
+
+        self._rescan(consumption_dir, pdf_only_filter, tracker, queued)
+
+        assert tracker.pending_count == 0
+
+    def test_prunes_vanished_queued_paths(
+        self,
+        consumption_dir: Path,
+        pdf_only_filter: ConsumerFilter,
+    ) -> None:
+        """Queued paths no longer on disk are dropped so the name can recur."""
+        gone = (consumption_dir / "gone.pdf").resolve()
+        tracker = FileStabilityTracker(stability_delay=0.1)
+        queued = {gone}
+
+        self._rescan(consumption_dir, pdf_only_filter, tracker, queued)
+
+        assert gone not in queued
+
+    def test_skips_unsupported_extension(
+        self,
+        consumption_dir: Path,
+        pdf_only_filter: ConsumerFilter,
+    ) -> None:
+        """Files filtered out by the consumer filter are not tracked."""
+        (consumption_dir / "notes.xyz").write_bytes(b"content")
+        tracker = FileStabilityTracker(stability_delay=0.1)
+
+        self._rescan(consumption_dir, pdf_only_filter, tracker, set())
+
+        assert tracker.pending_count == 0
+
+    def test_recursive_respects_flag(
+        self,
+        consumption_dir: Path,
+        sample_pdf: Path,
+        pdf_only_filter: ConsumerFilter,
+    ) -> None:
+        """Nested files are only found when recursive scanning is enabled."""
+        subdir = consumption_dir / "nested"
+        subdir.mkdir()
+        target = subdir / "deep.pdf"
+        shutil.copy(sample_pdf, target)
+
+        shallow = FileStabilityTracker(stability_delay=0.1)
+        self._rescan(consumption_dir, pdf_only_filter, shallow, set())
+        assert shallow.pending_count == 0
+
+        deep = FileStabilityTracker(stability_delay=0.1)
+        self._rescan(consumption_dir, pdf_only_filter, deep, set(), recursive=True)
+        assert deep.is_tracking(target) is True
+
+
+class TestProcessExistingFilesQueued:
+    """Tests that startup processing reports which paths it queued."""
+
+    @pytest.mark.usefixtures("mock_supported_extensions")
+    def test_returns_queued_paths(
+        self,
+        consumption_dir: Path,
+        sample_pdf: Path,
+        mock_consume_file_delay: MagicMock,
+        settings: SettingsWrapper,
+    ) -> None:
+        """The set returned seeds the rescan's queued set, avoiding re-queue."""
+        target = consumption_dir / "document.pdf"
+        shutil.copy(sample_pdf, target)
+        settings.CONSUMER_IGNORE_PATTERNS = []
+
+        queued = Command()._process_existing_files(
+            directory=consumption_dir,
+            recursive=False,
+            subdirs_as_tags=False,
+            consumer_filter=ConsumerFilter(ignore_patterns=[]),
+        )
+
+        assert target.resolve() in queued
+
+
+@pytest.mark.management
+@pytest.mark.django_db
+class TestCommandRescanRecovery:
+    """End-to-end test that the rescan recovers files the watcher misses."""
+
+    def test_rescan_consumes_file_the_watcher_never_reports(
+        self,
+        consumption_dir: Path,
+        sample_pdf: Path,
+        mock_consume_file_delay: MagicMock,
+        start_consumer: Callable[..., ConsumerThread],
+    ) -> None:
+        """
+        Isolate the rescan path: a long polling interval guarantees the
+        watcher cannot report the file within the test window, so only the
+        periodic rescan can consume it.
+        """
+        # poll interval far longer than the test window -> watcher stays silent
+        thread = start_consumer(
+            polling_interval=30.0,
+            stability_delay=0.1,
+            rescan_interval=0.5,
+        )
+
+        # created after startup, so _process_existing_files did not see it
+        target = consumption_dir / "stranded.pdf"
+        shutil.copy(sample_pdf, target)
+
+        wait_for_mock_call(mock_consume_file_delay.apply_async, timeout_s=5.0)
+
+        if thread.exception:
+            raise thread.exception
+
+        mock_consume_file_delay.apply_async.assert_called()
+        call_args = mock_consume_file_delay.apply_async.call_args.kwargs["kwargs"][
+            "input_doc"
+        ]
+        assert call_args.original_file.name == "stranded.pdf"
@@ -30,6 +30,7 @@ from documents.signals.handlers import update_llm_suggestions_cache
 from documents.tests.utils import DirectoriesMixin
 from documents.tests.utils import read_streaming_response
 from paperless.models import ApplicationConfiguration
+from paperless_ai.exceptions import LLMTimeoutError


 class TestViews(DirectoriesMixin, TestCase):
@@ -476,6 +477,33 @@ class TestAISuggestions(DirectoriesMixin, TestCase):
            get_llm_suggestion_cache(self.document.pk, backend="openai-like"),
        )

+    @patch("documents.views.get_ai_document_classification")
+    @override_settings(
+        AI_ENABLED=True,
+        LLM_BACKEND="openai-like",
+    )
+    def test_ai_suggestions_with_llm_timeout(
+        self,
+        mock_get_ai_classification,
+    ) -> None:
+        mock_get_ai_classification.side_effect = LLMTimeoutError()
+
+        self.client.force_login(user=self.user)
+        response = self.client.get(
+            f"/api/documents/{self.document.pk}/ai_suggestions/",
+        )
+
+        self.assertEqual(response.status_code, status.HTTP_503_SERVICE_UNAVAILABLE)
+        self.assertEqual(
+            response.json(),
+            {
+                "ai": ["AI backend request timed out."],
+            },
+        )
+        self.assertIsNone(
+            get_llm_suggestion_cache(self.document.pk, backend="openai-like"),
+        )
+
    def test_invalidate_suggestions_cache(self) -> None:
        self.client.force_login(user=self.user)
        suggestions = {
@@ -241,6 +241,7 @@ from paperless.serialisers import UserSerializer
 from paperless.views import StandardPagination
 from paperless_ai.ai_classifier import get_ai_document_classification
 from paperless_ai.chat import stream_chat_with_documents
+from paperless_ai.exceptions import LLMTimeoutError
 from paperless_ai.matching import extract_unmatched_names
 from paperless_ai.matching import match_correspondents_by_name
 from paperless_ai.matching import match_document_types_by_name
@@ -1510,6 +1511,17 @@ class DocumentViewSet(
                exc_info=True,
            )
            raise ValidationError({"ai": [_("Invalid AI configuration.")]}) from exc
+        except LLMTimeoutError as exc:
+            logger.exception(
+                "AI backend timed out while generating suggestions for document %s: %s",
+                doc.pk,
+                exc,
+                exc_info=True,
+            )
+            return Response(
+                {"ai": [_("AI backend request timed out.")]},
+                status=status.HTTP_503_SERVICE_UNAVAILABLE,
+            )

        matched_tags = match_tags_by_name(
            llm_suggestions.get("tags", []),
@@ -197,6 +197,7 @@ class AIConfig(BaseConfig):
    llm_embedding_endpoint: str = dataclasses.field(init=False)
    llm_embedding_chunk_size: int = dataclasses.field(init=False)
    llm_context_size: int = dataclasses.field(init=False)
+    llm_request_timeout: int = dataclasses.field(init=False)
    llm_backend: str = dataclasses.field(init=False)
    llm_model: str = dataclasses.field(init=False)
    llm_api_key: str = dataclasses.field(init=False)
@@ -221,6 +222,9 @@ class AIConfig(BaseConfig):
            app_config.llm_embedding_chunk_size or settings.LLM_EMBEDDING_CHUNK_SIZE
        )
        self.llm_context_size = app_config.llm_context_size or settings.LLM_CONTEXT_SIZE
+        self.llm_request_timeout = (
+            app_config.llm_request_timeout or settings.LLM_REQUEST_TIMEOUT
+        )
        self.llm_backend = app_config.llm_backend or settings.LLM_BACKEND
        self.llm_model = app_config.llm_model or settings.LLM_MODEL
        self.llm_api_key = app_config.llm_api_key or settings.LLM_API_KEY
@@ -0,0 +1,365 @@
+# Generated by Django 5.2.14 on 2026-06-04 15:30
+
+import django.core.validators
+from django.db import migrations
+from django.db import models
+
+
+def _create_singleton(apps, schema_editor):
+    settings_model = apps.get_model("paperless", "ApplicationConfiguration")
+    settings_model.objects.create()
+
+
+class Migration(migrations.Migration):
+    replaces = [
+        ("paperless", "0001_initial"),
+        ("paperless", "0002_applicationconfiguration_app_logo_and_more"),
+        ("paperless", "0003_alter_applicationconfiguration_max_image_pixels"),
+        ("paperless", "0004_applicationconfiguration_barcode_asn_prefix_and_more"),
+        ("paperless", "0005_applicationconfiguration_ai_enabled_and_more"),
+        ("paperless", "0006_applicationconfiguration_barcode_tag_split"),
+    ]
+
+    dependencies = []
+
+    operations = [
+        migrations.CreateModel(
+            name="ApplicationConfiguration",
+            fields=[
+                (
+                    "id",
+                    models.AutoField(
+                        auto_created=True,
+                        primary_key=True,
+                        serialize=False,
+                        verbose_name="ID",
+                    ),
+                ),
+                (
+                    "output_type",
+                    models.CharField(
+                        blank=True,
+                        choices=[
+                            ("pdf", "pdf"),
+                            ("pdfa", "pdfa"),
+                            ("pdfa-1", "pdfa-1"),
+                            ("pdfa-2", "pdfa-2"),
+                            ("pdfa-3", "pdfa-3"),
+                        ],
+                        max_length=8,
+                        null=True,
+                        verbose_name="Sets the output PDF type",
+                    ),
+                ),
+                (
+                    "pages",
+                    models.PositiveIntegerField(
+                        null=True,
+                        validators=[django.core.validators.MinValueValidator(1)],
+                        verbose_name="Do OCR from page 1 to this value",
+                    ),
+                ),
+                (
+                    "language",
+                    models.CharField(
+                        blank=True,
+                        max_length=32,
+                        null=True,
+                        verbose_name="Do OCR using these languages",
+                    ),
+                ),
+                (
+                    "mode",
+                    models.CharField(
+                        blank=True,
+                        choices=[
+                            ("skip", "skip"),
+                            ("redo", "redo"),
+                            ("force", "force"),
+                            ("skip_noarchive", "skip_noarchive"),
+                        ],
+                        max_length=16,
+                        null=True,
+                        verbose_name="Sets the OCR mode",
+                    ),
+                ),
+                (
+                    "skip_archive_file",
+                    models.CharField(
+                        blank=True,
+                        choices=[
+                            ("never", "never"),
+                            ("with_text", "with_text"),
+                            ("always", "always"),
+                        ],
+                        max_length=16,
+                        null=True,
+                        verbose_name="Controls the generation of an archive file",
+                    ),
+                ),
+                (
+                    "image_dpi",
+                    models.PositiveIntegerField(
+                        null=True,
+                        validators=[django.core.validators.MinValueValidator(1)],
+                        verbose_name="Sets image DPI fallback value",
+                    ),
+                ),
+                (
+                    "unpaper_clean",
+                    models.CharField(
+                        blank=True,
+                        choices=[
+                            ("clean", "clean"),
+                            ("clean-final", "clean-final"),
+                            ("none", "none"),
+                        ],
+                        max_length=16,
+                        null=True,
+                        verbose_name="Controls the unpaper cleaning",
+                    ),
+                ),
+                (
+                    "deskew",
+                    models.BooleanField(null=True, verbose_name="Enables deskew"),
+                ),
+                (
+                    "rotate_pages",
+                    models.BooleanField(
+                        null=True,
+                        verbose_name="Enables page rotation",
+                    ),
+                ),
+                (
+                    "rotate_pages_threshold",
+                    models.FloatField(
+                        null=True,
+                        validators=[django.core.validators.MinValueValidator(0.0)],
+                        verbose_name="Sets the threshold for rotation of pages",
+                    ),
+                ),
+                (
+                    "max_image_pixels",
+                    models.FloatField(
+                        null=True,
+                        validators=[django.core.validators.MinValueValidator(0.0)],
+                        verbose_name="Sets the maximum image size for decompression",
+                    ),
+                ),
+                (
+                    "color_conversion_strategy",
+                    models.CharField(
+                        blank=True,
+                        choices=[
+                            ("LeaveColorUnchanged", "LeaveColorUnchanged"),
+                            ("RGB", "RGB"),
+                            ("UseDeviceIndependentColor", "UseDeviceIndependentColor"),
+                            ("Gray", "Gray"),
+                            ("CMYK", "CMYK"),
+                        ],
+                        max_length=32,
+                        null=True,
+                        verbose_name="Sets the Ghostscript color conversion strategy",
+                    ),
+                ),
+                (
+                    "user_args",
+                    models.JSONField(
+                        null=True,
+                        verbose_name="Adds additional user arguments for OCRMyPDF",
+                    ),
+                ),
+                (
+                    "app_logo",
+                    models.FileField(
+                        blank=True,
+                        null=True,
+                        upload_to="logo/",
+                        validators=[
+                            django.core.validators.FileExtensionValidator(
+                                allowed_extensions=["jpg", "png", "gif", "svg"],
+                            ),
+                        ],
+                        verbose_name="Application logo",
+                    ),
+                ),
+                (
+                    "app_title",
+                    models.CharField(
+                        blank=True,
+                        max_length=48,
+                        null=True,
+                        verbose_name="Application title",
+                    ),
+                ),
+                (
+                    "barcode_asn_prefix",
+                    models.CharField(
+                        blank=True,
+                        max_length=32,
+                        null=True,
+                        verbose_name="Sets the ASN barcode prefix",
+                    ),
+                ),
+                (
+                    "barcode_dpi",
+                    models.PositiveIntegerField(
+                        null=True,
+                        validators=[django.core.validators.MinValueValidator(1)],
+                        verbose_name="Sets the barcode DPI",
+                    ),
+                ),
+                (
+                    "barcode_enable_asn",
+                    models.BooleanField(
+                        null=True,
+                        verbose_name="Enables ASN barcode",
+                    ),
+                ),
+                (
+                    "barcode_enable_tag",
+                    models.BooleanField(
+                        null=True,
+                        verbose_name="Enables tag barcode",
+                    ),
+                ),
+                (
+                    "barcode_enable_tiff_support",
+                    models.BooleanField(
+                        null=True,
+                        verbose_name="Enables barcode TIFF support",
+                    ),
+                ),
+                (
+                    "barcode_max_pages",
+                    models.PositiveIntegerField(
+                        null=True,
+                        validators=[django.core.validators.MinValueValidator(1)],
+                        verbose_name="Sets the maximum pages for barcode",
+                    ),
+                ),
+                (
+                    "barcode_retain_split_pages",
+                    models.BooleanField(
+                        null=True,
+                        verbose_name="Retains split pages",
+                    ),
+                ),
+                (
+                    "barcode_string",
+                    models.CharField(
+                        blank=True,
+                        max_length=32,
+                        null=True,
+                        verbose_name="Sets the barcode string",
+                    ),
+                ),
+                (
+                    "barcode_tag_mapping",
+                    models.JSONField(
+                        null=True,
+                        verbose_name="Sets the tag barcode mapping",
+                    ),
+                ),
+                (
+                    "barcode_upscale",
+                    models.FloatField(
+                        null=True,
+                        validators=[django.core.validators.MinValueValidator(1.0)],
+                        verbose_name="Sets the barcode upscale factor",
+                    ),
+                ),
+                (
+                    "barcodes_enabled",
+                    models.BooleanField(
+                        null=True,
+                        verbose_name="Enables barcode scanning",
+                    ),
+                ),
+                (
+                    "ai_enabled",
+                    models.BooleanField(
+                        default=False,
+                        null=True,
+                        verbose_name="Enables AI features",
+                    ),
+                ),
+                (
+                    "llm_api_key",
+                    models.CharField(
+                        blank=True,
+                        max_length=1024,
+                        null=True,
+                        verbose_name="Sets the LLM API key",
+                    ),
+                ),
+                (
+                    "llm_backend",
+                    models.CharField(
+                        blank=True,
+                        choices=[
+                            ("openai-like", "OpenAI-compatible"),
+                            ("ollama", "Ollama"),
+                        ],
+                        max_length=128,
+                        null=True,
+                        verbose_name="Sets the LLM backend",
+                    ),
+                ),
+                (
+                    "llm_embedding_backend",
+                    models.CharField(
+                        blank=True,
+                        choices=[
+                            ("openai-like", "OpenAI-compatible"),
+                            ("huggingface", "Huggingface"),
+                        ],
+                        max_length=128,
+                        null=True,
+                        verbose_name="Sets the LLM embedding backend",
+                    ),
+                ),
+                (
+                    "llm_embedding_model",
+                    models.CharField(
+                        blank=True,
+                        max_length=128,
+                        null=True,
+                        verbose_name="Sets the LLM embedding model",
+                    ),
+                ),
+                (
+                    "llm_endpoint",
+                    models.CharField(
+                        blank=True,
+                        max_length=256,
+                        null=True,
+                        verbose_name="Sets the LLM endpoint, optional",
+                    ),
+                ),
+                (
+                    "llm_model",
+                    models.CharField(
+                        blank=True,
+                        max_length=128,
+                        null=True,
+                        verbose_name="Sets the LLM model",
+                    ),
+                ),
+                (
+                    "barcode_tag_split",
+                    models.BooleanField(
+                        null=True,
+                        verbose_name="Enables splitting on tag barcodes",
+                    ),
+                ),
+            ],
+            options={
+                "verbose_name": "paperless application settings",
+            },
+        ),
+        migrations.RunPython(
+            code=_create_singleton,
+            reverse_code=migrations.RunPython.noop,
+        ),
+    ]
@@ -0,0 +1,94 @@
+# Generated by Django 5.2.14 on 2026-06-04 15:19
+
+import django.core.validators
+from django.db import migrations
+from django.db import models
+
+
+class Migration(migrations.Migration):
+    replaces = [
+        ("paperless", "0009_alter_applicationconfiguration_options"),
+        ("paperless", "0010_alter_applicationconfiguration_llm_embedding_backend"),
+        ("paperless", "0011_applicationconfiguration_llm_embedding_chunk_size"),
+        ("paperless", "0012_applicationconfiguration_llm_output_language"),
+        ("paperless", "0013_applicationconfiguration_llm_request_timeout"),
+    ]
+
+    dependencies = [
+        ("paperless", "0008_replace_skip_archive_file"),
+    ]
+
+    operations = [
+        migrations.AlterModelOptions(
+            name="applicationconfiguration",
+            options={
+                "permissions": [
+                    ("view_global_statistics", "Can view global object counts"),
+                    ("view_system_monitoring", "Can view system status information"),
+                ],
+                "verbose_name": "paperless application settings",
+            },
+        ),
+        migrations.AlterField(
+            model_name="applicationconfiguration",
+            name="llm_embedding_backend",
+            field=models.CharField(
+                blank=True,
+                choices=[
+                    ("openai-like", "OpenAI-compatible"),
+                    ("huggingface", "Huggingface"),
+                    ("ollama", "Ollama"),
+                ],
+                max_length=128,
+                null=True,
+                verbose_name="Sets the LLM embedding backend",
+            ),
+        ),
+        migrations.AddField(
+            model_name="applicationconfiguration",
+            name="llm_embedding_endpoint",
+            field=models.CharField(
+                blank=True,
+                max_length=256,
+                null=True,
+                verbose_name="Sets the LLM embedding endpoint, optional",
+            ),
+        ),
+        migrations.AddField(
+            model_name="applicationconfiguration",
+            name="llm_embedding_chunk_size",
+            field=models.PositiveSmallIntegerField(
+                null=True,
+                validators=[django.core.validators.MinValueValidator(1)],
+                verbose_name="Sets the LLM embedding chunk size",
+            ),
+        ),
+        migrations.AddField(
+            model_name="applicationconfiguration",
+            name="llm_context_size",
+            field=models.PositiveIntegerField(
+                null=True,
+                validators=[django.core.validators.MinValueValidator(1)],
+                verbose_name="Sets the LLM context size",
+            ),
+        ),
+        migrations.AddField(
+            model_name="applicationconfiguration",
+            name="llm_output_language",
+            field=models.CharField(
+                blank=True,
+                max_length=32,
+                null=True,
+                verbose_name="Sets the LLM output language",
+            ),
+        ),
+        migrations.AddField(
+            model_name="applicationconfiguration",
+            name="llm_request_timeout",
+            field=models.PositiveSmallIntegerField(
+                null=True,
+                validators=[django.core.validators.MinValueValidator(1)],
+                verbose_name="Sets the LLM request timeout in seconds",
+            ),
+        ),
+    ]
@@ -0,0 +1,23 @@
+# Generated by Django 5.2.14 on 2026-06-14 14:22
+
+import django.core.validators
+from django.db import migrations
+from django.db import models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("paperless", "0012_applicationconfiguration_llm_output_language"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="applicationconfiguration",
+            name="llm_request_timeout",
+            field=models.PositiveSmallIntegerField(
+                null=True,
+                validators=[django.core.validators.MinValueValidator(1)],
+                verbose_name="Sets the LLM request timeout in seconds",
+            ),
+        ),
+    ]
@@ -366,6 +366,12 @@ class ApplicationConfiguration(AbstractSingletonModel):
        max_length=32,
    )

+    llm_request_timeout = models.PositiveSmallIntegerField(
+        verbose_name=_("Sets the LLM timeout in seconds"),
+        null=True,
+        validators=[MinValueValidator(1)],
+    )
+
    class Meta:
        verbose_name = _("paperless application settings")
        permissions = [
@@ -1206,6 +1206,9 @@ if LLM_EMBEDDING_CHUNK_SIZE < 1:
 LLM_CONTEXT_SIZE = get_int_from_env("PAPERLESS_AI_LLM_CONTEXT_SIZE", 8192)
 if LLM_CONTEXT_SIZE < 1:
    raise ImproperlyConfigured("PAPERLESS_AI_LLM_CONTEXT_SIZE must be >= 1")
+LLM_REQUEST_TIMEOUT = get_int_from_env("PAPERLESS_AI_LLM_REQUEST_TIMEOUT", 120)
+if LLM_REQUEST_TIMEOUT < 1:
+    raise ImproperlyConfigured("PAPERLESS_AI_LLM_REQUEST_TIMEOUT must be >= 1")
 LLM_BACKEND = get_choice_from_env(
    "PAPERLESS_AI_LLM_BACKEND",
    {"ollama", "openai-like"},
@@ -1,11 +1,14 @@
 import json
 import logging
+from collections.abc import Iterator
+from contextlib import contextmanager
 from typing import TYPE_CHECKING

+import httpx
+
 from paperless.models import LLMBackend

 if TYPE_CHECKING:
-    from llama_index.core.llms import ChatMessage
    from llama_index.llms.ollama import Ollama
    from llama_index.llms.openai_like import OpenAILike

@@ -16,6 +19,7 @@ from paperless.network import create_pinned_async_httpx_client
 from paperless.network import create_pinned_httpx_client
 from paperless.network import validate_outbound_http_url
 from paperless_ai.base_model import DocumentClassifierSchema
+from paperless_ai.exceptions import LLMTimeoutError

 logger = logging.getLogger("paperless_ai.client")

@@ -61,16 +65,16 @@ class AIClient:
                model=self.settings.llm_model or "llama3.1",
                base_url=endpoint,
                context_window=self.settings.llm_context_size,
-                request_timeout=120,
+                request_timeout=self.settings.llm_request_timeout,
                system_prompt=LLM_SYSTEM_PROMPT,
                client=Client(
                    host=endpoint,
-                    timeout=120,
+                    timeout=self.settings.llm_request_timeout,
                    transport=transport,
                ),
                async_client=AsyncClient(
                    host=endpoint,
-                    timeout=120,
+                    timeout=self.settings.llm_request_timeout,
                    transport=async_transport,
                ),
            )
@@ -84,15 +88,18 @@ class AIClient:
                http_client = create_pinned_httpx_client(
                    endpoint,
                    allow_internal=self.settings.llm_allow_internal_endpoints,
+                    timeout=self.settings.llm_request_timeout,
                )
                async_http_client = create_pinned_async_httpx_client(
                    endpoint,
                    allow_internal=self.settings.llm_allow_internal_endpoints,
+                    timeout=self.settings.llm_request_timeout,
                )
            return OpenAILike(
                model=self.settings.llm_model or "gpt-3.5-turbo",
                api_base=endpoint,
                api_key=self.settings.llm_api_key,
+                timeout=self.settings.llm_request_timeout,
                is_chat_model=True,
                is_function_calling_model=True,
                system_prompt=LLM_SYSTEM_PROMPT,
@@ -113,11 +120,12 @@ class AIClient:

        user_msg = ChatMessage(role="user", content=prompt)
        if self.settings.llm_backend == LLMBackend.OLLAMA:
-            result = self.llm.chat(
-                [user_msg],
-                format=DocumentClassifierSchema.model_json_schema(),
-                think=False,
-            )
+            with self._normalize_timeouts():
+                result = self.llm.chat(
+                    [user_msg],
+                    format=DocumentClassifierSchema.model_json_schema(),
+                    think=False,
+                )
            logger.debug("LLM query result: %s", result)
            parsed = DocumentClassifierSchema(**json.loads(result.message.content))
            return parsed.model_dump()
@@ -125,26 +133,39 @@ class AIClient:
        from llama_index.core.program.function_program import get_function_tool

        tool = get_function_tool(DocumentClassifierSchema)
-        result = self.llm.chat_with_tools(
-            tools=[tool],
-            user_msg=user_msg,
-            chat_history=[],
-            allow_parallel_tool_calls=True,
-        )
-        tool_calls = self.llm.get_tool_calls_from_response(
-            result,
-            error_on_no_tool_call=True,
-        )
+        with self._normalize_timeouts():
+            result = self.llm.chat_with_tools(
+                tools=[tool],
+                user_msg=user_msg,
+                chat_history=[],
+                allow_parallel_tool_calls=True,
+                tool_required=True,
+            )
+            tool_calls = self.llm.get_tool_calls_from_response(
+                result,
+                error_on_no_tool_call=True,
+            )
        logger.debug("LLM query result: %s", tool_calls)
        parsed = DocumentClassifierSchema(**tool_calls[0].tool_kwargs)
        return parsed.model_dump()

-    def run_chat(self, messages: list["ChatMessage"]) -> str:
-        logger.debug(
-            "Running chat query against %s with model %s",
-            self.settings.llm_backend,
-            self.settings.llm_model,
-        )
-        result = self.llm.chat(messages)
-        logger.debug("Chat result: %s", result)
-        return result
+    @contextmanager
+    def _normalize_timeouts(self) -> Iterator[None]:
+        try:
+            yield
+        except httpx.TimeoutException as exc:
+            raise LLMTimeoutError from exc
+        except Exception as exc:
+            if self._is_openai_timeout(exc):
+                raise LLMTimeoutError from exc
+            raise
+
+    def _is_openai_timeout(self, exc: Exception) -> bool:
+        if self.settings.llm_backend != LLMBackend.OPENAI_LIKE:
+            return False
+
+        # Keep OpenAI imports out of module import paths and only load the SDK
+        # when translating an error from an OpenAI-backed request.
+        from openai import APITimeoutError
+
+        return isinstance(exc, APITimeoutError)
@@ -32,15 +32,18 @@ def get_embedding_model(config: AIConfig) -> "BaseEmbedding":
                http_client = create_pinned_httpx_client(
                    endpoint,
                    allow_internal=config.llm_allow_internal_endpoints,
+                    timeout=config.llm_request_timeout,
                )
                async_http_client = create_pinned_async_httpx_client(
                    endpoint,
                    allow_internal=config.llm_allow_internal_endpoints,
+                    timeout=config.llm_request_timeout,
                )
            return OpenAILikeEmbedding(
                model_name=config.llm_embedding_model or "text-embedding-3-small",
                api_key=config.llm_api_key,
                api_base=endpoint,
+                timeout=config.llm_request_timeout,
                http_client=http_client,
                async_http_client=async_http_client,
            )
@@ -73,12 +76,14 @@ def get_embedding_model(config: AIConfig) -> "BaseEmbedding":
            )
            embedding._client = Client(
                host=endpoint,
+                timeout=config.llm_request_timeout,
                transport=PinnedHostHTTPTransport(
                    allow_internal=config.llm_allow_internal_endpoints,
                ),
            )
            embedding._async_client = AsyncClient(
                host=endpoint,
+                timeout=config.llm_request_timeout,
                transport=PinnedHostAsyncHTTPTransport(
                    allow_internal=config.llm_allow_internal_endpoints,
                ),
@@ -0,0 +1,2 @@
+class LLMTimeoutError(Exception):
+    pass
@@ -3,12 +3,14 @@ from unittest.mock import ANY
 from unittest.mock import MagicMock
 from unittest.mock import patch

+import httpx
+import openai
 import pytest
-from llama_index.core.llms import ChatMessage
 from llama_index.core.llms.llm import ToolSelection

 from paperless_ai.client import LLM_SYSTEM_PROMPT
 from paperless_ai.client import AIClient
+from paperless_ai.exceptions import LLMTimeoutError


@pytest.fixture
@@ -17,6 +19,7 @@ def mock_ai_config():
        mock_config = MagicMock()
        mock_config.llm_allow_internal_endpoints = True
        mock_config.llm_context_size = 8192
+        mock_config.llm_request_timeout = 120
        MockAIConfig.return_value = mock_config
        yield mock_config

@@ -64,6 +67,7 @@ def test_get_llm_openai(mock_ai_config, mock_openai_llm):
        model="test_model",
        api_base="http://test-url",
        api_key="test_api_key",
+        timeout=120,
        is_chat_model=True,
        is_function_calling_model=True,
        system_prompt=LLM_SYSTEM_PROMPT,
@@ -151,17 +155,38 @@ def test_run_llm_query_openai_uses_tools(mock_ai_config, mock_openai_llm):
    mock_llm_instance.chat_with_tools.assert_called_once()


-def test_run_chat(mock_ai_config, mock_ollama_llm):
+def test_run_llm_query_openai_timeout_raises_local_error(
+    mock_ai_config,
+    mock_openai_llm,
+):
+    mock_ai_config.llm_backend = "openai-like"
+    mock_ai_config.llm_model = "test_model"
+    mock_ai_config.llm_api_key = "test_api_key"
+    mock_ai_config.llm_endpoint = "http://test-url"
+
+    request = httpx.Request("POST", "http://test-url/v1/chat/completions")
+    mock_openai_llm.return_value.chat_with_tools.side_effect = openai.APITimeoutError(
+        request,
+    )
+
+    client = AIClient()
+
+    with pytest.raises(LLMTimeoutError):
+        client.run_llm_query("test_prompt")
+
+
+def test_run_llm_query_httpx_timeout_raises_local_error(
+    mock_ai_config,
+    mock_ollama_llm,
+):
    mock_ai_config.llm_backend = "ollama"
    mock_ai_config.llm_model = "test_model"
    mock_ai_config.llm_endpoint = "http://test-url"

    mock_llm_instance = mock_ollama_llm.return_value
-    mock_llm_instance.chat.return_value = "test_chat_result"
+    mock_llm_instance.chat.side_effect = httpx.ReadTimeout("timed out")

    client = AIClient()
-    messages = [ChatMessage(role="user", content="Hello")]
-    result = client.run_chat(messages)

-    mock_llm_instance.chat.assert_called_once_with(messages)
-    assert result == "test_chat_result"
+    with pytest.raises(LLMTimeoutError):
+        client.run_llm_query("test_prompt")
@@ -19,6 +19,7 @@ def mock_ai_config():
        MockAIConfig.return_value.llm_embedding_endpoint = None
        MockAIConfig.return_value.llm_allow_internal_endpoints = True
        MockAIConfig.return_value.llm_context_size = 8192
+        MockAIConfig.return_value.llm_request_timeout = 120
        yield MockAIConfig


@@ -71,6 +72,7 @@ def test_get_embedding_model_openai(mock_ai_config):
            model_name="text-embedding-3-small",
            api_key="test_api_key",
            api_base="http://test-url",
+            timeout=120,
            http_client=ANY,
            async_http_client=ANY,
        )
@@ -92,6 +94,7 @@ def test_get_embedding_model_openai_prefers_embedding_endpoint(mock_ai_config):
            model_name="text-embedding-3-small",
            api_key="test_api_key",
            api_base="http://embedding-url",
+            timeout=120,
            http_client=ANY,
            async_http_client=ANY,
        )
@@ -0,0 +1,158 @@
+# Generated by Django 5.2.14 on 2026-06-04 15:10
+
+from django.db import migrations
+from django.db import models
+
+
+class Migration(migrations.Migration):
+    replaces = [
+        ("paperless_mail", "0002_optimize_integer_field_sizes"),
+        ("paperless_mail", "0003_mailrule_stop_processing"),
+    ]
+
+    dependencies = [
+        ("paperless_mail", "0001_squashed"),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name="mailaccount",
+            name="account_type",
+            field=models.PositiveSmallIntegerField(
+                choices=[(1, "IMAP"), (2, "Gmail OAuth"), (3, "Outlook OAuth")],
+                default=1,
+                verbose_name="account type",
+            ),
+        ),
+        migrations.AlterField(
+            model_name="mailaccount",
+            name="imap_port",
+            field=models.PositiveIntegerField(
+                blank=True,
+                help_text="This is usually 143 for unencrypted and STARTTLS connections, and 993 for SSL connections.",
+                null=True,
+                verbose_name="IMAP port",
+            ),
+        ),
+        migrations.AlterField(
+            model_name="mailaccount",
+            name="imap_security",
+            field=models.PositiveSmallIntegerField(
+                choices=[(1, "No encryption"), (2, "Use SSL"), (3, "Use STARTTLS")],
+                default=2,
+                verbose_name="IMAP security",
+            ),
+        ),
+        migrations.AlterField(
+            model_name="mailrule",
+            name="action",
+            field=models.PositiveSmallIntegerField(
+                choices=[
+                    (1, "Delete"),
+                    (2, "Move to specified folder"),
+                    (3, "Mark as read, don't process read mails"),
+                    (4, "Flag the mail, don't process flagged mails"),
+                    (5, "Tag the mail with specified tag, don't process tagged mails"),
+                ],
+                default=3,
+                verbose_name="action",
+            ),
+        ),
+        migrations.AlterField(
+            model_name="mailrule",
+            name="assign_correspondent_from",
+            field=models.PositiveSmallIntegerField(
+                choices=[
+                    (1, "Do not assign a correspondent"),
+                    (2, "Use mail address"),
+                    (3, "Use name (or mail address if not available)"),
+                    (4, "Use correspondent selected below"),
+                ],
+                default=1,
+                verbose_name="assign correspondent from",
+            ),
+        ),
+        migrations.AlterField(
+            model_name="mailrule",
+            name="assign_title_from",
+            field=models.PositiveSmallIntegerField(
+                choices=[
+                    (1, "Use subject as title"),
+                    (2, "Use attachment filename as title"),
+                    (3, "Do not assign title from rule"),
+                ],
+                default=1,
+                verbose_name="assign title from",
+            ),
+        ),
+        migrations.AlterField(
+            model_name="mailrule",
+            name="attachment_type",
+            field=models.PositiveSmallIntegerField(
+                choices=[
+                    (1, "Only process attachments."),
+                    (2, "Process all files, including 'inline' attachments."),
+                ],
+                default=1,
+                help_text="Inline attachments include embedded images, so it's best to combine this option with a filename filter.",
+                verbose_name="attachment type",
+            ),
+        ),
+        migrations.AlterField(
+            model_name="mailrule",
+            name="consumption_scope",
+            field=models.PositiveSmallIntegerField(
+                choices=[
+                    (1, "Only process attachments."),
+                    (
+                        2,
+                        "Process full Mail (with embedded attachments in file) as .eml",
+                    ),
+                    (
+                        3,
+                        "Process full Mail (with embedded attachments in file) as .eml + process attachments as separate documents",
+                    ),
+                ],
+                default=1,
+                verbose_name="consumption scope",
+            ),
+        ),
+        migrations.AlterField(
+            model_name="mailrule",
+            name="maximum_age",
+            field=models.PositiveSmallIntegerField(
+                default=30,
+                help_text="Specified in days.",
+                verbose_name="maximum age",
+            ),
+        ),
+        migrations.AlterField(
+            model_name="mailrule",
+            name="order",
+            field=models.SmallIntegerField(default=0, verbose_name="order"),
+        ),
+        migrations.AlterField(
+            model_name="mailrule",
+            name="pdf_layout",
+            field=models.PositiveSmallIntegerField(
+                choices=[
+                    (0, "System default"),
+                    (1, "Text, then HTML"),
+                    (2, "HTML, then text"),
+                    (3, "HTML only"),
+                    (4, "Text only"),
+                ],
+                default=0,
+                verbose_name="pdf layout",
+            ),
+        ),
+        migrations.AddField(
+            model_name="mailrule",
+            name="stop_processing",
+            field=models.BooleanField(
+                default=False,
+                help_text="If True, no further rules will be processed after this one if any document is queued.",
+                verbose_name="Stop processing further rules",
+            ),
+        ),
+    ]
Author	SHA1	Message	Date
stumpylog	096e0de473	Accounts for the 0013 migration	2026-06-18 08:51:15 -07:00
stumpylog	ac3eded573	Squashes migrations in prep for final release	2026-06-18 08:48:53 -07:00
shamoon	262183e848	Enhancement (beta): support LLM timeout config (#13002 )	2026-06-18 08:35:11 -07:00
Trenton H	b8f10269a7	Fix(beta): Workaround a Tantivy panic in more like this searching (#13026 )	2026-06-17 15:43:48 +00:00
shamoon	bcf5d2cffc	Chore: set tool_required to opena-like llm calls (#13025 )	2026-06-17 06:24:38 -07:00
shamoon	8bd620d8ab	Enhancement: ignore diacritics, support multiple substring matching for UI filtering (#13021 )	2026-06-17 05:58:55 -07:00
Trenton H	ad1b54ce88	Fix (beta): Catch consumer files created during watcher re-creations (#13013 )	2026-06-15 19:23:54 -07:00