Compare commits

...

4 Commits

Author SHA1 Message Date
Trenton H
49d1e773d9 Feature: Improve exporter memory efficieny
Phase 1 -- Eliminate JSON round-trip in document exporter

Replace json.loads(serializers.serialize("json", qs)) with
serializers.serialize("python", qs) to skip the intermediate
JSON string allocation and parse step. Use DjangoJSONEncoder
in check_and_write_json() to handle native Python types
(datetime, Decimal, UUID) the Python serializer returns.

Measured on 200 documents + 200 CustomFieldInstances:
  - Memory delta:  1,410 KiB → 527 KiB  (−63%)
  - Peak memory:   1,500 KiB → 530 KiB  (−65%)
  - Wall time:       0.54s  → 0.36s    (−34%)
  - JSON output: identical (byte-for-byte, 345 KB)

Phase 2 -- Batched QuerySet serialization in document exporter

Add serialize_queryset_batched() helper that uses QuerySet.iterator()
and itertools.islice to stream records in configurable chunks, bounding
peak memory during serialization to batch_size * avg_record_size rather
than loading the entire QuerySet at once.

Replace the single-call serializers.serialize("python", qs) in dump()
with list(chain.from_iterable(serialize_queryset_batched(qs, batch_size))).
Add --batch-size CLI argument (default 500).

Measured on 2,000 documents + 2,000 CustomFieldInstances:
  Phase 1 baseline (full queryset, no iterator):
    Peak: 9,293 KiB  |  Time: 4.33s

  Phase 2 batch=2000 (iterator, 1 batch):
    Peak: 7,716 KiB  |  Time: 4.20s  (−17% peak vs Phase 1)

  Phase 2 batch=500 (iterator, 4 batches -- default):
    Peak: 6,980 KiB  |  Time: 4.28s  (−25% peak vs Phase 1)

  Phase 2 batch=100 (iterator, 20 batches):
    Peak: 6,776 KiB  |  Time: 4.30s  (−27% peak vs Phase 1)

Peak memory falls as batch_size decreases. Wall time is within noise
(batching overhead negligible). Output is byte-for-byte identical across
all batch sizes and approaches.

The primary gain is that Django's queryset cache is bypassed (iterator()),
preventing the ORM from holding all model instances in memory after fetch.
Smaller batches reduce the per-batch model-instance peak further.
2026-03-03 19:29:29 -08:00
Trenton H
5498503d60 Chore: Improve user migration path (#12232)
Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>
2026-03-03 15:51:48 -08:00
GitHub Actions
16b58c2de5 Auto translate strings 2026-03-03 19:25:03 +00:00
shamoon
c724fbb5d9 Clarify bulk edit wording with versions 2026-03-03 11:22:22 -08:00
15 changed files with 855 additions and 159 deletions

View File

@@ -305,6 +305,7 @@ markers = [
"greenmail: Tests requiring Greenmail service",
"date_parsing: Tests which cover date parsing from content or filename",
"management: Tests which cover management commands/functionality",
"profiling: Benchmarks that profile and compare implementation performance",
]
[tool.pytest_env]

View File

@@ -1238,8 +1238,8 @@
<context context-type="linenumber">82</context>
</context-group>
</trans-unit>
<trans-unit id="8035757452478567832" datatype="html">
<source>Update existing document</source>
<trans-unit id="7860582931776068318" datatype="html">
<source>Add document version</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
<context context-type="linenumber">280</context>
@@ -8411,8 +8411,8 @@
<context context-type="linenumber">832</context>
</context-group>
</trans-unit>
<trans-unit id="6390006284731990222" datatype="html">
<source>This operation will permanently rotate the original version of <x id="PH" equiv-text="this.list.selected.size"/> document(s).</source>
<trans-unit id="5203024009814367559" datatype="html">
<source>This operation will add rotated versions of the <x id="PH" equiv-text="this.list.selected.size"/> document(s).</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
<context context-type="linenumber">833</context>

View File

@@ -277,7 +277,7 @@
<div class="col">
<select class="form-select" formControlName="pdfEditorDefaultEditMode">
<option [ngValue]="PdfEditorEditMode.Create" i18n>Create new document(s)</option>
<option [ngValue]="PdfEditorEditMode.Update" i18n>Update existing document</option>
<option [ngValue]="PdfEditorEditMode.Update" i18n>Add document version</option>
</select>
</div>
</div>

View File

@@ -84,7 +84,7 @@
<input type="radio" class="btn-check" [(ngModel)]="editMode" [value]="PdfEditorEditMode.Update" id="editModeUpdate" name="editmode" [disabled]="hasSplit()">
<label for="editModeUpdate" class="btn btn-outline-primary btn-sm">
<i-bs name="pencil"></i-bs>
<span class="form-check-label ms-2" i18n>Update existing document</span>
<span class="form-check-label ms-2" i18n>Add document version</span>
</label>
</div>
@if (editMode === PdfEditorEditMode.Create) {

View File

@@ -830,7 +830,7 @@ export class BulkEditorComponent
})
const rotateDialog = modal.componentInstance as RotateConfirmDialogComponent
rotateDialog.title = $localize`Rotate confirm`
rotateDialog.messageBold = $localize`This operation will permanently rotate the original version of ${this.list.selected.size} document(s).`
rotateDialog.messageBold = $localize`This operation will add rotated versions of the ${this.list.selected.size} document(s).`
rotateDialog.btnClass = 'btn-danger'
rotateDialog.btnCaption = $localize`Proceed`
rotateDialog.documentID = Array.from(this.list.selected)[0]

View File

@@ -3,6 +3,8 @@ import json
import os
import shutil
import tempfile
from itertools import chain
from itertools import islice
from pathlib import Path
from typing import TYPE_CHECKING
@@ -19,6 +21,7 @@ from django.contrib.contenttypes.models import ContentType
from django.core import serializers
from django.core.management.base import BaseCommand
from django.core.management.base import CommandError
from django.core.serializers.json import DjangoJSONEncoder
from django.db import transaction
from django.utils import timezone
from filelock import FileLock
@@ -26,6 +29,8 @@ from guardian.models import GroupObjectPermission
from guardian.models import UserObjectPermission
if TYPE_CHECKING:
from collections.abc import Generator
from django.db.models import QuerySet
if settings.AUDIT_LOG_ENABLED:
@@ -60,6 +65,22 @@ from paperless_mail.models import MailAccount
from paperless_mail.models import MailRule
def serialize_queryset_batched(
queryset: "QuerySet",
*,
batch_size: int = 500,
) -> "Generator[list[dict], None, None]":
"""Yield batches of serialized records from a QuerySet.
Each batch is a list of dicts in Django's Python serialization format.
Uses QuerySet.iterator() to avoid loading the full queryset into memory,
and islice to collect chunk-sized batches serialized in a single call.
"""
iterator = queryset.iterator(chunk_size=batch_size)
while chunk := list(islice(iterator, batch_size)):
yield serializers.serialize("python", chunk)
class Command(CryptMixin, BaseCommand):
help = (
"Decrypt and rename all files in our collection into a given target "
@@ -186,6 +207,17 @@ class Command(CryptMixin, BaseCommand):
help="If provided, is used to encrypt sensitive data in the export",
)
parser.add_argument(
"--batch-size",
type=int,
default=500,
help=(
"Number of records to process per batch during serialization. "
"Lower values reduce peak memory usage; higher values improve "
"throughput. Default: 500."
),
)
def handle(self, *args, **options) -> None:
self.target = Path(options["target"]).resolve()
self.split_manifest: bool = options["split_manifest"]
@@ -200,6 +232,7 @@ class Command(CryptMixin, BaseCommand):
self.data_only: bool = options["data_only"]
self.no_progress_bar: bool = options["no_progress_bar"]
self.passphrase: str | None = options.get("passphrase")
self.batch_size: int = options["batch_size"]
self.files_in_export_dir: set[Path] = set()
self.exported_files: set[str] = set()
@@ -294,8 +327,13 @@ class Command(CryptMixin, BaseCommand):
# Build an overall manifest
for key, object_query in manifest_key_to_object_query.items():
manifest_dict[key] = json.loads(
serializers.serialize("json", object_query),
manifest_dict[key] = list(
chain.from_iterable(
serialize_queryset_batched(
object_query,
batch_size=self.batch_size,
),
),
)
self.encrypt_secret_fields(manifest_dict)
@@ -512,14 +550,24 @@ class Command(CryptMixin, BaseCommand):
self.files_in_export_dir.remove(target)
if self.compare_json:
target_checksum = hashlib.md5(target.read_bytes()).hexdigest()
src_str = json.dumps(content, indent=2, ensure_ascii=False)
src_str = json.dumps(
content,
cls=DjangoJSONEncoder,
indent=2,
ensure_ascii=False,
)
src_checksum = hashlib.md5(src_str.encode("utf-8")).hexdigest()
if src_checksum == target_checksum:
perform_write = False
if perform_write:
target.write_text(
json.dumps(content, indent=2, ensure_ascii=False),
json.dumps(
content,
cls=DjangoJSONEncoder,
indent=2,
ensure_ascii=False,
),
encoding="utf-8",
)

View File

@@ -1,4 +1,4 @@
# Generated by Django 5.2.7 on 2026-01-15 22:08
# Generated by Django 5.2.11 on 2026-03-03 16:27
import datetime
@@ -21,6 +21,207 @@ class Migration(migrations.Migration):
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
]
replaces = [
("documents", "0001_initial"),
("documents", "0002_auto_20151226_1316"),
("documents", "0003_sender"),
("documents", "0004_auto_20160114_1844"),
(
"documents",
"0004_auto_20160114_1844_squashed_0011_auto_20160303_1929",
),
("documents", "0005_auto_20160123_0313"),
("documents", "0006_auto_20160123_0430"),
("documents", "0007_auto_20160126_2114"),
("documents", "0008_document_file_type"),
("documents", "0009_auto_20160214_0040"),
("documents", "0010_log"),
("documents", "0011_auto_20160303_1929"),
("documents", "0012_auto_20160305_0040"),
("documents", "0013_auto_20160325_2111"),
("documents", "0014_document_checksum"),
("documents", "0015_add_insensitive_to_match"),
(
"documents",
"0015_add_insensitive_to_match_squashed_0018_auto_20170715_1712",
),
("documents", "0016_auto_20170325_1558"),
("documents", "0017_auto_20170512_0507"),
("documents", "0018_auto_20170715_1712"),
("documents", "0019_add_consumer_user"),
("documents", "0020_document_added"),
("documents", "0021_document_storage_type"),
("documents", "0022_auto_20181007_1420"),
("documents", "0023_document_current_filename"),
("documents", "1000_update_paperless_all"),
("documents", "1001_auto_20201109_1636"),
("documents", "1002_auto_20201111_1105"),
("documents", "1003_mime_types"),
("documents", "1004_sanity_check_schedule"),
("documents", "1005_checksums"),
("documents", "1006_auto_20201208_2209"),
(
"documents",
"1006_auto_20201208_2209_squashed_1011_auto_20210101_2340",
),
("documents", "1007_savedview_savedviewfilterrule"),
("documents", "1008_auto_20201216_1736"),
("documents", "1009_auto_20201216_2005"),
("documents", "1010_auto_20210101_2159"),
("documents", "1011_auto_20210101_2340"),
("documents", "1012_fix_archive_files"),
("documents", "1013_migrate_tag_colour"),
("documents", "1014_auto_20210228_1614"),
("documents", "1015_remove_null_characters"),
("documents", "1016_auto_20210317_1351"),
(
"documents",
"1016_auto_20210317_1351_squashed_1020_merge_20220518_1839",
),
("documents", "1017_alter_savedviewfilterrule_rule_type"),
("documents", "1018_alter_savedviewfilterrule_value"),
("documents", "1019_storagepath_document_storage_path"),
("documents", "1019_uisettings"),
("documents", "1020_merge_20220518_1839"),
("documents", "1021_webp_thumbnail_conversion"),
("documents", "1022_paperlesstask"),
(
"documents",
"1022_paperlesstask_squashed_1036_alter_savedviewfilterrule_rule_type",
),
("documents", "1023_add_comments"),
("documents", "1024_document_original_filename"),
("documents", "1025_alter_savedviewfilterrule_rule_type"),
("documents", "1026_transition_to_celery"),
(
"documents",
"1027_remove_paperlesstask_attempted_task_and_more",
),
(
"documents",
"1028_remove_paperlesstask_task_args_and_more",
),
("documents", "1029_alter_document_archive_serial_number"),
("documents", "1030_alter_paperlesstask_task_file_name"),
(
"documents",
"1031_remove_savedview_user_correspondent_owner_and_more",
),
(
"documents",
"1032_alter_correspondent_matching_algorithm_and_more",
),
(
"documents",
"1033_alter_documenttype_options_alter_tag_options_and_more",
),
("documents", "1034_alter_savedviewfilterrule_rule_type"),
("documents", "1035_rename_comment_note"),
("documents", "1036_alter_savedviewfilterrule_rule_type"),
("documents", "1037_webp_encrypted_thumbnail_conversion"),
("documents", "1038_sharelink"),
("documents", "1039_consumptiontemplate"),
(
"documents",
"1040_customfield_customfieldinstance_and_more",
),
("documents", "1041_alter_consumptiontemplate_sources"),
(
"documents",
"1042_consumptiontemplate_assign_custom_fields_and_more",
),
("documents", "1043_alter_savedviewfilterrule_rule_type"),
(
"documents",
"1044_workflow_workflowaction_workflowtrigger_and_more",
),
(
"documents",
"1045_alter_customfieldinstance_value_monetary",
),
(
"documents",
"1045_alter_customfieldinstance_value_monetary_squashed_1049_document_deleted_at_document_restored_at",
),
(
"documents",
"1046_workflowaction_remove_all_correspondents_and_more",
),
("documents", "1047_savedview_display_mode_and_more"),
("documents", "1048_alter_savedviewfilterrule_rule_type"),
(
"documents",
"1049_document_deleted_at_document_restored_at",
),
("documents", "1050_customfield_extra_data_and_more"),
(
"documents",
"1051_alter_correspondent_owner_alter_document_owner_and_more",
),
("documents", "1052_document_transaction_id"),
("documents", "1053_document_page_count"),
(
"documents",
"1054_customfieldinstance_value_monetary_amount_and_more",
),
("documents", "1055_alter_storagepath_path"),
(
"documents",
"1056_customfieldinstance_deleted_at_and_more",
),
("documents", "1057_paperlesstask_owner"),
(
"documents",
"1058_workflowtrigger_schedule_date_custom_field_and_more",
),
(
"documents",
"1059_workflowactionemail_workflowactionwebhook_and_more",
),
(
"documents",
"1060_alter_customfieldinstance_value_select",
),
("documents", "1061_workflowactionwebhook_as_json"),
("documents", "1062_alter_savedviewfilterrule_rule_type"),
(
"documents",
"1063_paperlesstask_type_alter_paperlesstask_task_name_and_more",
),
("documents", "1064_delete_log"),
(
"documents",
"1065_workflowaction_assign_custom_fields_values",
),
(
"documents",
"1066_alter_workflowtrigger_schedule_offset_days",
),
("documents", "1067_alter_document_created"),
("documents", "1068_alter_document_created"),
(
"documents",
"1069_workflowtrigger_filter_has_storage_path_and_more",
),
(
"documents",
"1070_customfieldinstance_value_long_text_and_more",
),
(
"documents",
"1071_tag_tn_ancestors_count_tag_tn_ancestors_pks_and_more",
),
(
"documents",
"1072_workflowtrigger_filter_custom_field_query_and_more",
),
("documents", "1073_migrate_workflow_title_jinja"),
(
"documents",
"1074_workflowrun_deleted_at_workflowrun_restored_at_and_more",
),
]
operations = [
migrations.CreateModel(
name="WorkflowActionEmail",
@@ -185,70 +386,6 @@ class Migration(migrations.Migration):
"abstract": False,
},
),
migrations.CreateModel(
name="CustomField",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"created",
models.DateTimeField(
db_index=True,
default=django.utils.timezone.now,
editable=False,
verbose_name="created",
),
),
("name", models.CharField(max_length=128)),
(
"data_type",
models.CharField(
choices=[
("string", "String"),
("url", "URL"),
("date", "Date"),
("boolean", "Boolean"),
("integer", "Integer"),
("float", "Float"),
("monetary", "Monetary"),
("documentlink", "Document Link"),
("select", "Select"),
("longtext", "Long Text"),
],
editable=False,
max_length=50,
verbose_name="data type",
),
),
(
"extra_data",
models.JSONField(
blank=True,
help_text="Extra data for the custom field, such as select options",
null=True,
verbose_name="extra data",
),
),
],
options={
"verbose_name": "custom field",
"verbose_name_plural": "custom fields",
"ordering": ("created",),
"constraints": [
models.UniqueConstraint(
fields=("name",),
name="documents_customfield_unique_name",
),
],
},
),
migrations.CreateModel(
name="DocumentType",
fields=[
@@ -733,17 +870,6 @@ class Migration(migrations.Migration):
verbose_name="correspondent",
),
),
(
"owner",
models.ForeignKey(
blank=True,
default=None,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to=settings.AUTH_USER_MODEL,
verbose_name="owner",
),
),
(
"document_type",
models.ForeignKey(
@@ -767,12 +893,14 @@ class Migration(migrations.Migration):
),
),
(
"tags",
models.ManyToManyField(
"owner",
models.ForeignKey(
blank=True,
related_name="documents",
to="documents.tag",
verbose_name="tags",
default=None,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to=settings.AUTH_USER_MODEL,
verbose_name="owner",
),
),
],
@@ -782,6 +910,140 @@ class Migration(migrations.Migration):
"ordering": ("-created",),
},
),
migrations.AddField(
model_name="document",
name="tags",
field=models.ManyToManyField(
blank=True,
related_name="documents",
to="documents.tag",
verbose_name="tags",
),
),
migrations.CreateModel(
name="Note",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("deleted_at", models.DateTimeField(blank=True, null=True)),
("restored_at", models.DateTimeField(blank=True, null=True)),
("transaction_id", models.UUIDField(blank=True, null=True)),
(
"note",
models.TextField(
blank=True,
help_text="Note for the document",
verbose_name="content",
),
),
(
"created",
models.DateTimeField(
db_index=True,
default=django.utils.timezone.now,
verbose_name="created",
),
),
(
"document",
models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.CASCADE,
related_name="notes",
to="documents.document",
verbose_name="document",
),
),
(
"user",
models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="notes",
to=settings.AUTH_USER_MODEL,
verbose_name="user",
),
),
],
options={
"verbose_name": "note",
"verbose_name_plural": "notes",
"ordering": ("created",),
},
),
migrations.CreateModel(
name="CustomField",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"created",
models.DateTimeField(
db_index=True,
default=django.utils.timezone.now,
editable=False,
verbose_name="created",
),
),
("name", models.CharField(max_length=128)),
(
"data_type",
models.CharField(
choices=[
("string", "String"),
("url", "URL"),
("date", "Date"),
("boolean", "Boolean"),
("integer", "Integer"),
("float", "Float"),
("monetary", "Monetary"),
("documentlink", "Document Link"),
("select", "Select"),
("longtext", "Long Text"),
],
editable=False,
max_length=50,
verbose_name="data type",
),
),
(
"extra_data",
models.JSONField(
blank=True,
help_text="Extra data for the custom field, such as select options",
null=True,
verbose_name="extra data",
),
),
],
options={
"verbose_name": "custom field",
"verbose_name_plural": "custom fields",
"ordering": ("created",),
"constraints": [
models.UniqueConstraint(
fields=("name",),
name="documents_customfield_unique_name",
),
],
},
),
migrations.CreateModel(
name="CustomFieldInstance",
fields=[
@@ -880,66 +1142,6 @@ class Migration(migrations.Migration):
"ordering": ("created",),
},
),
migrations.CreateModel(
name="Note",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("deleted_at", models.DateTimeField(blank=True, null=True)),
("restored_at", models.DateTimeField(blank=True, null=True)),
("transaction_id", models.UUIDField(blank=True, null=True)),
(
"note",
models.TextField(
blank=True,
help_text="Note for the document",
verbose_name="content",
),
),
(
"created",
models.DateTimeField(
db_index=True,
default=django.utils.timezone.now,
verbose_name="created",
),
),
(
"document",
models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.CASCADE,
related_name="notes",
to="documents.document",
verbose_name="document",
),
),
(
"user",
models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="notes",
to=settings.AUTH_USER_MODEL,
verbose_name="user",
),
),
],
options={
"verbose_name": "note",
"verbose_name_plural": "notes",
"ordering": ("created",),
},
),
migrations.CreateModel(
name="PaperlessTask",
fields=[
@@ -986,7 +1188,6 @@ class Migration(migrations.Migration):
("train_classifier", "Train Classifier"),
("check_sanity", "Check Sanity"),
("index_optimize", "Index Optimize"),
("llmindex_update", "LLM Index Update"),
],
help_text="Name of the task that was run",
max_length=255,
@@ -1380,6 +1581,7 @@ class Migration(migrations.Migration):
verbose_name="Workflow Action Type",
),
),
("order", models.PositiveIntegerField(default=0, verbose_name="order")),
(
"assign_title",
models.TextField(

View File

@@ -1,4 +1,4 @@
# Generated by Django 5.2.9 on 2026-01-20 18:46
# Generated by Django 5.2.11 on 2026-03-03 16:27
import django.db.models.deletion
from django.db import migrations
@@ -9,8 +9,14 @@ class Migration(migrations.Migration):
initial = True
dependencies = [
("documents", "0001_initial"),
("paperless_mail", "0001_initial"),
("documents", "0001_squashed"),
("paperless_mail", "0001_squashed"),
]
# This migration needs a "replaces", but it doesn't matter which.
# Chose the last 2.20.x migration
replaces = [
("documents", "1075_workflowaction_order"),
]
operations = [

View File

@@ -6,7 +6,7 @@ from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0002_initial"),
("documents", "0002_squashed"),
]
operations = [

View File

@@ -0,0 +1,30 @@
# Generated by Django 5.2.11 on 2026-03-03 16:42
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0013_document_root_document"),
]
operations = [
migrations.AlterField(
model_name="paperlesstask",
name="task_name",
field=models.CharField(
choices=[
("consume_file", "Consume File"),
("train_classifier", "Train Classifier"),
("check_sanity", "Check Sanity"),
("index_optimize", "Index Optimize"),
("llmindex_update", "LLM Index Update"),
],
help_text="Name of the task that was run",
max_length=255,
null=True,
verbose_name="Task Name",
),
),
]

View File

@@ -0,0 +1,71 @@
"""
Temporary profiling utilities for comparing implementations.
Usage in a management command or shell::
from documents.profiling import profile_block
with profile_block("new check_sanity"):
messages = check_sanity()
with profile_block("old check_sanity"):
messages = check_sanity_old()
Drop this file when done.
"""
from __future__ import annotations
import tracemalloc
from contextlib import contextmanager
from time import perf_counter
from typing import TYPE_CHECKING
from django.db import connection
from django.db import reset_queries
from django.test.utils import override_settings
if TYPE_CHECKING:
from collections.abc import Generator
@contextmanager
def profile_block(label: str = "block") -> Generator[None, None, None]:
"""Profile memory, wall time, and DB queries for a code block.
Prints a summary to stdout on exit. Requires no external packages.
Enables DEBUG temporarily to capture Django's query log.
"""
tracemalloc.start()
snapshot_before = tracemalloc.take_snapshot()
with override_settings(DEBUG=True):
reset_queries()
start = perf_counter()
yield
elapsed = perf_counter() - start
queries = list(connection.queries)
snapshot_after = tracemalloc.take_snapshot()
_, peak = tracemalloc.get_traced_memory()
tracemalloc.stop()
# Compare snapshots for top allocations
stats = snapshot_after.compare_to(snapshot_before, "lineno")
query_time = sum(float(q["time"]) for q in queries)
mem_diff = sum(s.size_diff for s in stats)
print(f"\n{'=' * 60}") # noqa: T201
print(f" Profile: {label}") # noqa: T201
print(f"{'=' * 60}") # noqa: T201
print(f" Wall time: {elapsed:.4f}s") # noqa: T201
print(f" Queries: {len(queries)} ({query_time:.4f}s)") # noqa: T201
print(f" Memory delta: {mem_diff / 1024:.1f} KiB") # noqa: T201
print(f" Peak memory: {peak / 1024:.1f} KiB") # noqa: T201
print("\n Top 5 allocations:") # noqa: T201
for stat in stats[:5]:
print(f" {stat}") # noqa: T201
print(f"{'=' * 60}\n") # noqa: T201

View File

@@ -204,6 +204,61 @@ def audit_log_check(app_configs, **kwargs):
return result
@register()
def check_v3_minimum_upgrade_version(
app_configs: object,
**kwargs: object,
) -> list[Error]:
"""Enforce that upgrades to v3 must start from v2.20.9.
v3 squashes all prior migrations into 0001_squashed and 0002_squashed.
If a user skips v2.20.9, the data migration in 1075_workflowaction_order
never runs and the squash may apply schema changes against an incomplete
database state.
"""
from django.db import DatabaseError
from django.db import OperationalError
try:
all_tables = connections["default"].introspection.table_names()
if "django_migrations" not in all_tables:
return []
with connections["default"].cursor() as cursor:
cursor.execute(
"SELECT name FROM django_migrations WHERE app = %s",
["documents"],
)
applied: set[str] = {row[0] for row in cursor.fetchall()}
if not applied:
return []
# Already in a valid v3 state
if {"0001_squashed", "0002_squashed"} & applied:
return []
# On v2.20.9 exactly — squash will pick up cleanly from here
if "1075_workflowaction_order" in applied:
return []
except (DatabaseError, OperationalError):
return []
return [
Error(
"Cannot upgrade to Paperless-ngx v3 from this version.",
hint=(
"Upgrading to v3 can only be performed from v2.20.9."
"Please upgrade to v2.20.9, run migrations, then upgrade to v3."
"See https://docs.paperless-ngx.com/setup/#upgrading for details."
),
id="paperless.E002",
),
]
@register()
def check_deprecated_db_settings(
app_configs: object,

View File

@@ -3,6 +3,7 @@ from pathlib import Path
from unittest import mock
import pytest
from django.core.checks import Error
from django.core.checks import Warning
from django.test import TestCase
from django.test import override_settings
@@ -13,6 +14,7 @@ from documents.tests.utils import FileSystemAssertsMixin
from paperless.checks import audit_log_check
from paperless.checks import binaries_check
from paperless.checks import check_deprecated_db_settings
from paperless.checks import check_v3_minimum_upgrade_version
from paperless.checks import debug_mode_check
from paperless.checks import paths_check
from paperless.checks import settings_values_check
@@ -395,3 +397,240 @@ class TestDeprecatedDbSettings:
assert len(result) == 1
assert "PAPERLESS_DBSSLCERT" in result[0].msg
class TestV3MinimumUpgradeVersionCheck:
"""Test suite for check_v3_minimum_upgrade_version system check."""
@pytest.fixture
def build_conn_mock(self, mocker: MockerFixture):
"""Factory fixture that builds a connections['default'] mock.
Usage::
conn = build_conn_mock(tables=["django_migrations"], applied=["1075_..."])
"""
def _build(tables: list[str], applied: list[str]) -> mock.MagicMock:
conn = mocker.MagicMock()
conn.introspection.table_names.return_value = tables
cursor = conn.cursor.return_value.__enter__.return_value
cursor.fetchall.return_value = [(name,) for name in applied]
return conn
return _build
def test_no_migrations_table_fresh_install(
self,
mocker: MockerFixture,
build_conn_mock,
) -> None:
"""
GIVEN:
- No django_migrations table exists in the database
WHEN:
- The v3 upgrade check runs
THEN:
- No errors are reported (fresh install, nothing to enforce)
"""
mocker.patch.dict(
"paperless.checks.connections",
{"default": build_conn_mock([], [])},
)
assert check_v3_minimum_upgrade_version(None) == []
def test_no_documents_migrations_fresh_install(
self,
mocker: MockerFixture,
build_conn_mock,
) -> None:
"""
GIVEN:
- django_migrations table exists but has no documents app rows
WHEN:
- The v3 upgrade check runs
THEN:
- No errors are reported (fresh install, nothing to enforce)
"""
mocker.patch.dict(
"paperless.checks.connections",
{"default": build_conn_mock(["django_migrations"], [])},
)
assert check_v3_minimum_upgrade_version(None) == []
def test_v3_state_with_0001_squashed(
self,
mocker: MockerFixture,
build_conn_mock,
) -> None:
"""
GIVEN:
- 0001_squashed is recorded in django_migrations
WHEN:
- The v3 upgrade check runs
THEN:
- No errors are reported (DB is already in a valid v3 state)
"""
mocker.patch.dict(
"paperless.checks.connections",
{
"default": build_conn_mock(
["django_migrations"],
["0001_squashed", "0002_squashed", "0003_workflowaction_order"],
),
},
)
assert check_v3_minimum_upgrade_version(None) == []
def test_v3_state_with_0002_squashed_only(
self,
mocker: MockerFixture,
build_conn_mock,
) -> None:
"""
GIVEN:
- Only 0002_squashed is recorded in django_migrations
WHEN:
- The v3 upgrade check runs
THEN:
- No errors are reported (0002_squashed alone confirms a valid v3 state)
"""
mocker.patch.dict(
"paperless.checks.connections",
{"default": build_conn_mock(["django_migrations"], ["0002_squashed"])},
)
assert check_v3_minimum_upgrade_version(None) == []
def test_v2_20_9_state_ready_to_upgrade(
self,
mocker: MockerFixture,
build_conn_mock,
) -> None:
"""
GIVEN:
- 1075_workflowaction_order (the last v2.20.9 migration) is in the DB
WHEN:
- The v3 upgrade check runs
THEN:
- No errors are reported (squash will pick up cleanly from this state)
"""
mocker.patch.dict(
"paperless.checks.connections",
{
"default": build_conn_mock(
["django_migrations"],
[
"1074_workflowrun_deleted_at_workflowrun_restored_at_and_more",
"1075_workflowaction_order",
],
),
},
)
assert check_v3_minimum_upgrade_version(None) == []
def test_v2_20_8_raises_error(
self,
mocker: MockerFixture,
build_conn_mock,
) -> None:
"""
GIVEN:
- 1074 (last v2.20.8 migration) is applied but 1075 is not
WHEN:
- The v3 upgrade check runs
THEN:
- An Error with id paperless.E002 is returned
"""
mocker.patch.dict(
"paperless.checks.connections",
{
"default": build_conn_mock(
["django_migrations"],
["1074_workflowrun_deleted_at_workflowrun_restored_at_and_more"],
),
},
)
result = check_v3_minimum_upgrade_version(None)
assert len(result) == 1
assert isinstance(result[0], Error)
assert result[0].id == "paperless.E002"
def test_very_old_version_raises_error(
self,
mocker: MockerFixture,
build_conn_mock,
) -> None:
"""
GIVEN:
- Only old migrations (well below v2.20.9) are applied
WHEN:
- The v3 upgrade check runs
THEN:
- An Error with id paperless.E002 is returned
"""
mocker.patch.dict(
"paperless.checks.connections",
{
"default": build_conn_mock(
["django_migrations"],
["1000_update_paperless_all", "1022_paperlesstask"],
),
},
)
result = check_v3_minimum_upgrade_version(None)
assert len(result) == 1
assert isinstance(result[0], Error)
assert result[0].id == "paperless.E002"
def test_error_hint_mentions_v2_20_9(
self,
mocker: MockerFixture,
build_conn_mock,
) -> None:
"""
GIVEN:
- DB is on an old v2 version (pre-v2.20.9)
WHEN:
- The v3 upgrade check runs
THEN:
- The error hint explicitly references v2.20.9 so users know what to do
"""
mocker.patch.dict(
"paperless.checks.connections",
{"default": build_conn_mock(["django_migrations"], ["1022_paperlesstask"])},
)
result = check_v3_minimum_upgrade_version(None)
assert len(result) == 1
assert "v2.20.9" in result[0].hint
def test_db_error_is_swallowed(self, mocker: MockerFixture) -> None:
"""
GIVEN:
- A DatabaseError is raised when querying the DB
WHEN:
- The v3 upgrade check runs
THEN:
- No exception propagates and an empty list is returned
"""
from django.db import DatabaseError
conn = mocker.MagicMock()
conn.introspection.table_names.side_effect = DatabaseError("connection refused")
mocker.patch.dict("paperless.checks.connections", {"default": conn})
assert check_v3_minimum_upgrade_version(None) == []
def test_operational_error_is_swallowed(self, mocker: MockerFixture) -> None:
"""
GIVEN:
- An OperationalError is raised when querying the DB
WHEN:
- The v3 upgrade check runs
THEN:
- No exception propagates and an empty list is returned
"""
from django.db import OperationalError
conn = mocker.MagicMock()
conn.introspection.table_names.side_effect = OperationalError("DB unavailable")
mocker.patch.dict("paperless.checks.connections", {"default": conn})
assert check_v3_minimum_upgrade_version(None) == []

View File

@@ -1,4 +1,4 @@
# Generated by Django 5.2.9 on 2026-01-20 18:46
# Generated by Django 5.2.11 on 2026-03-03 16:27
import django.db.models.deletion
import django.utils.timezone
@@ -15,6 +15,50 @@ class Migration(migrations.Migration):
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
]
replaces = [
("paperless_mail", "0001_initial"),
("paperless_mail", "0001_initial_squashed_0009_mailrule_assign_tags"),
("paperless_mail", "0002_auto_20201117_1334"),
("paperless_mail", "0003_auto_20201118_1940"),
("paperless_mail", "0004_mailrule_order"),
("paperless_mail", "0005_help_texts"),
("paperless_mail", "0006_auto_20210101_2340"),
("paperless_mail", "0007_auto_20210106_0138"),
("paperless_mail", "0008_auto_20210516_0940"),
("paperless_mail", "0009_alter_mailrule_action_alter_mailrule_folder"),
("paperless_mail", "0009_mailrule_assign_tags"),
("paperless_mail", "0010_auto_20220311_1602"),
("paperless_mail", "0011_remove_mailrule_assign_tag"),
(
"paperless_mail",
"0011_remove_mailrule_assign_tag_squashed_0024_alter_mailrule_name_and_more",
),
("paperless_mail", "0012_alter_mailrule_assign_tags"),
("paperless_mail", "0013_merge_20220412_1051"),
("paperless_mail", "0014_alter_mailrule_action"),
("paperless_mail", "0015_alter_mailrule_action"),
("paperless_mail", "0016_mailrule_consumption_scope"),
("paperless_mail", "0017_mailaccount_owner_mailrule_owner"),
("paperless_mail", "0018_processedmail"),
("paperless_mail", "0019_mailrule_filter_to"),
("paperless_mail", "0020_mailaccount_is_token"),
("paperless_mail", "0021_alter_mailaccount_password"),
("paperless_mail", "0022_mailrule_assign_owner_from_rule_and_more"),
("paperless_mail", "0023_remove_mailrule_filter_attachment_filename_and_more"),
("paperless_mail", "0024_alter_mailrule_name_and_more"),
(
"paperless_mail",
"0025_alter_mailaccount_owner_alter_mailrule_owner_and_more",
),
("paperless_mail", "0026_mailrule_enabled"),
(
"paperless_mail",
"0027_mailaccount_expiration_mailaccount_account_type_and_more",
),
("paperless_mail", "0028_alter_mailaccount_password_and_more"),
("paperless_mail", "0029_mailrule_pdf_layout"),
]
operations = [
migrations.CreateModel(
name="MailAccount",

View File

@@ -6,7 +6,7 @@ from django.db import models
class Migration(migrations.Migration):
dependencies = [
("paperless_mail", "0001_initial"),
("paperless_mail", "0001_squashed"),
]
operations = [