Merge branch 'dev' into feature-migrate-export-import-rich

Refactor: add explicit supports_progress_bar and supports_multiprocessing to all PaperlessCommand subclasses
Each management command now explicitly declares both class attributes rather than relying on defaults, making intent unambiguous at a glance. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-08 02:01:22 +00:00 · 2026-03-06 09:52:35 -08:00 · 2026-03-05 15:05:15 -08:00 · 2026-03-05 08:56:55 -08:00
24 changed files with 79 additions and 231 deletions
--- a/src/documents/consumer.py
+++ b/src/documents/consumer.py
@@ -1,4 +1,5 @@
 import datetime
+import hashlib
 import os
 import tempfile
 from enum import StrEnum
@@ -47,7 +48,6 @@ from documents.signals import document_consumption_started
 from documents.signals import document_updated
 from documents.signals.handlers import run_workflows
 from documents.templating.workflows import parse_w_workflow_placeholders
-from documents.utils import compute_checksum
 from documents.utils import copy_basic_file_stats
 from documents.utils import copy_file_with_basic_stats
 from documents.utils import run_subprocess
@@ -196,7 +196,9 @@ class ConsumerPlugin(
        version_doc = Document(
            root_document=root_doc_frozen,
            version_index=next_version_index + 1,
-            checksum=compute_checksum(file_for_checksum),
+            checksum=hashlib.md5(
+                file_for_checksum.read_bytes(),
+            ).hexdigest(),
            content=text or "",
            page_count=page_count,
            mime_type=mime_type,
@@ -654,9 +656,10 @@ class ConsumerPlugin(
                            document.archive_path,
                        )

-                        document.archive_checksum = compute_checksum(
-                            Path(archive_path),
-                        )
+                        with Path(archive_path).open("rb") as f:
+                            document.archive_checksum = hashlib.md5(
+                                f.read(),
+                            ).hexdigest()

                # Don't save with the lock active. Saving will cause the file
                # renaming logic to acquire the lock as well.
@@ -797,7 +800,7 @@ class ConsumerPlugin(
            title=title[:127],
            content=text,
            mime_type=mime_type,
-            checksum=compute_checksum(file_for_checksum),
+            checksum=hashlib.md5(file_for_checksum.read_bytes()).hexdigest(),
            created=create_date,
            modified=create_date,
            page_count=page_count,
@@ -914,9 +917,10 @@ class ConsumerPreflightPlugin(

    def pre_check_duplicate(self) -> None:
        """
-        Using the SHA256 of the file, check this exact file doesn't already exist
+        Using the MD5 of the file, check this exact file doesn't already exist
        """
-        checksum = compute_checksum(Path(self.input_doc.original_file))
+        with Path(self.input_doc.original_file).open("rb") as f:
+            checksum = hashlib.md5(f.read()).hexdigest()
        existing_doc = Document.global_objects.filter(
            Q(checksum=checksum) | Q(archive_checksum=checksum),
        )
--- a/src/documents/management/commands/base.py
+++ b/src/documents/management/commands/base.py
@@ -304,7 +304,7 @@ class PaperlessCommand(RichCommand):

        Progress output is directed to stderr to match the convention that
        progress bars are transient UI feedback, not command output. This
-        mirrors tqdm's default behavior and prevents progress bar rendering
+        mirrors the convention that progress bars are transient UI feedback and prevents progress bar rendering
        from interfering with stdout-based assertions in tests or piped
        command output.

--- a/src/documents/management/commands/document_archiver.py
+++ b/src/documents/management/commands/document_archiver.py
@@ -17,6 +17,7 @@ class Command(PaperlessCommand):
        "modified) after their initial import."
    )

+    supports_progress_bar = True
    supports_multiprocessing = True

    def add_arguments(self, parser):
--- a/src/documents/management/commands/document_exporter.py
+++ b/src/documents/management/commands/document_exporter.py
@@ -8,7 +8,6 @@ from itertools import islice
 from pathlib import Path
 from typing import TYPE_CHECKING

-import tqdm
 from allauth.mfa.models import Authenticator
 from allauth.socialaccount.models import SocialAccount
 from allauth.socialaccount.models import SocialApp
@@ -19,7 +18,6 @@ from django.contrib.auth.models import Permission
 from django.contrib.auth.models import User
 from django.contrib.contenttypes.models import ContentType
 from django.core import serializers
-from django.core.management.base import BaseCommand
 from django.core.management.base import CommandError
 from django.core.serializers.json import DjangoJSONEncoder
 from django.db import transaction
@@ -38,6 +36,7 @@ if settings.AUDIT_LOG_ENABLED:

 from documents.file_handling import delete_empty_directories
 from documents.file_handling import generate_filename
+from documents.management.commands.base import PaperlessCommand
 from documents.management.commands.mixins import CryptMixin
 from documents.models import Correspondent
 from documents.models import CustomField
@@ -58,7 +57,6 @@ from documents.models import WorkflowTrigger
 from documents.settings import EXPORTER_ARCHIVE_NAME
 from documents.settings import EXPORTER_FILE_NAME
 from documents.settings import EXPORTER_THUMBNAIL_NAME
-from documents.utils import compute_checksum
 from documents.utils import copy_file_with_basic_stats
 from paperless import version
 from paperless.models import ApplicationConfiguration
@@ -82,14 +80,18 @@ def serialize_queryset_batched(
        yield serializers.serialize("python", chunk)


-class Command(CryptMixin, BaseCommand):
+class Command(CryptMixin, PaperlessCommand):
    help = (
        "Decrypt and rename all files in our collection into a given target "
        "directory.  And include a manifest file containing document data for "
        "easy import."
    )

+    supports_progress_bar = True
+    supports_multiprocessing = False
+
    def add_arguments(self, parser) -> None:
+        super().add_arguments(parser)
        parser.add_argument("target")

        parser.add_argument(
@@ -196,13 +198,6 @@ class Command(CryptMixin, BaseCommand):
            help="If set, only the database will be imported, not files",
        )

-        parser.add_argument(
-            "--no-progress-bar",
-            default=False,
-            action="store_true",
-            help="If set, the progress bar will not be shown",
-        )
-
        parser.add_argument(
            "--passphrase",
            help="If provided, is used to encrypt sensitive data in the export",
@@ -231,7 +226,6 @@ class Command(CryptMixin, BaseCommand):
        self.no_thumbnail: bool = options["no_thumbnail"]
        self.zip_export: bool = options["zip"]
        self.data_only: bool = options["data_only"]
-        self.no_progress_bar: bool = options["no_progress_bar"]
        self.passphrase: str | None = options.get("passphrase")
        self.batch_size: int = options["batch_size"]

@@ -348,10 +342,12 @@ class Command(CryptMixin, BaseCommand):
            document_manifest = manifest_dict["documents"]

        # 3. Export files from each document
-        for index, document_dict in tqdm.tqdm(
-            enumerate(document_manifest),
-            total=len(document_manifest),
-            disable=self.no_progress_bar,
+        for index, document_dict in enumerate(
+            self.track(
+                document_manifest,
+                description="Exporting documents...",
+                total=len(document_manifest),
+            ),
        ):
            document = document_map[document_dict["pk"]]

@@ -550,14 +546,14 @@ class Command(CryptMixin, BaseCommand):
        if target in self.files_in_export_dir:
            self.files_in_export_dir.remove(target)
            if self.compare_json:
-                target_checksum = compute_checksum(target)
+                target_checksum = hashlib.md5(target.read_bytes()).hexdigest()
                src_str = json.dumps(
                    content,
                    cls=DjangoJSONEncoder,
                    indent=2,
                    ensure_ascii=False,
                )
-                src_checksum = hashlib.sha256(src_str.encode("utf-8")).hexdigest()
+                src_checksum = hashlib.md5(src_str.encode("utf-8")).hexdigest()
                if src_checksum == target_checksum:
                    perform_write = False

@@ -593,7 +589,7 @@ class Command(CryptMixin, BaseCommand):
            source_stat = source.stat()
            target_stat = target.stat()
            if self.compare_checksums and source_checksum:
-                target_checksum = compute_checksum(target)
+                target_checksum = hashlib.md5(target.read_bytes()).hexdigest()
                perform_copy = target_checksum != source_checksum
            elif (
                source_stat.st_mtime != target_stat.st_mtime
--- a/src/documents/management/commands/document_fuzzy_match.py
+++ b/src/documents/management/commands/document_fuzzy_match.py
@@ -40,6 +40,7 @@ def _process_and_match(work: _WorkPackage) -> _WorkResult:
 class Command(PaperlessCommand):
    help = "Searches for documents where the content almost matches"

+    supports_progress_bar = True
    supports_multiprocessing = True

    def add_arguments(self, parser):
--- a/src/documents/management/commands/document_importer.py
+++ b/src/documents/management/commands/document_importer.py
@@ -8,14 +8,12 @@ from pathlib import Path
 from zipfile import ZipFile
 from zipfile import is_zipfile

-import tqdm
 from django.conf import settings
 from django.contrib.auth.models import Permission
 from django.contrib.auth.models import User
 from django.contrib.contenttypes.models import ContentType
 from django.core.exceptions import FieldDoesNotExist
 from django.core.management import call_command
-from django.core.management.base import BaseCommand
 from django.core.management.base import CommandError
 from django.core.serializers.base import DeserializationError
 from django.db import IntegrityError
@@ -25,6 +23,7 @@ from django.db.models.signals import post_save
 from filelock import FileLock

 from documents.file_handling import create_source_path_directory
+from documents.management.commands.base import PaperlessCommand
 from documents.management.commands.mixins import CryptMixin
 from documents.models import Correspondent
 from documents.models import CustomField
@@ -57,21 +56,18 @@ def disable_signal(sig, receiver, sender, *, weak: bool | None = None) -> Genera
        sig.connect(receiver=receiver, sender=sender, **kwargs)


-class Command(CryptMixin, BaseCommand):
+class Command(CryptMixin, PaperlessCommand):
    help = (
        "Using a manifest.json file, load the data from there, and import the "
        "documents it refers to."
    )

-    def add_arguments(self, parser) -> None:
-        parser.add_argument("source")
+    supports_progress_bar = True
+    supports_multiprocessing = False

-        parser.add_argument(
-            "--no-progress-bar",
-            default=False,
-            action="store_true",
-            help="If set, the progress bar will not be shown",
-        )
+    def add_arguments(self, parser) -> None:
+        super().add_arguments(parser)
+        parser.add_argument("source")

        parser.add_argument(
            "--data-only",
@@ -231,7 +227,6 @@ class Command(CryptMixin, BaseCommand):

        self.source = Path(options["source"]).resolve()
        self.data_only: bool = options["data_only"]
-        self.no_progress_bar: bool = options["no_progress_bar"]
        self.passphrase: str | None = options.get("passphrase")
        self.version: str | None = None
        self.salt: str | None = None
@@ -365,7 +360,7 @@ class Command(CryptMixin, BaseCommand):
            filter(lambda r: r["model"] == "documents.document", self.manifest),
        )

-        for record in tqdm.tqdm(manifest_documents, disable=self.no_progress_bar):
+        for record in self.track(manifest_documents, description="Copying files..."):
            document = Document.objects.get(pk=record["pk"])

            doc_file = record[EXPORTER_FILE_NAME]
--- a/src/documents/management/commands/document_index.py
+++ b/src/documents/management/commands/document_index.py
@@ -8,6 +8,9 @@ from documents.tasks import index_reindex
 class Command(PaperlessCommand):
    help = "Manages the document index."

+    supports_progress_bar = True
+    supports_multiprocessing = False
+
    def add_arguments(self, parser):
        super().add_arguments(parser)
        parser.add_argument("command", choices=["reindex", "optimize"])
--- a/src/documents/management/commands/document_llmindex.py
+++ b/src/documents/management/commands/document_llmindex.py
@@ -7,6 +7,9 @@ from documents.tasks import llmindex_index
 class Command(PaperlessCommand):
    help = "Manages the LLM-based vector index for Paperless."

+    supports_progress_bar = True
+    supports_multiprocessing = False
+
    def add_arguments(self, parser: Any) -> None:
        super().add_arguments(parser)
        parser.add_argument("command", choices=["rebuild", "update"])
--- a/src/documents/management/commands/document_renamer.py
+++ b/src/documents/management/commands/document_renamer.py
@@ -7,6 +7,9 @@ from documents.models import Document
 class Command(PaperlessCommand):
    help = "Rename all documents"

+    supports_progress_bar = True
+    supports_multiprocessing = False
+
    def handle(self, *args, **options):
        for document in self.track(Document.objects.all(), description="Renaming..."):
            post_save.send(Document, instance=document, created=False)
--- a/src/documents/management/commands/document_retagger.py
+++ b/src/documents/management/commands/document_retagger.py
@@ -180,6 +180,9 @@ class Command(PaperlessCommand):
        "modified) after their initial import."
    )

+    supports_progress_bar = True
+    supports_multiprocessing = False
+
    def add_arguments(self, parser) -> None:
        super().add_arguments(parser)
        parser.add_argument("-c", "--correspondent", default=False, action="store_true")
--- a/src/documents/management/commands/document_sanity_checker.py
+++ b/src/documents/management/commands/document_sanity_checker.py
@@ -24,6 +24,9 @@ _LEVEL_STYLE: dict[int, tuple[str, str]] = {
 class Command(PaperlessCommand):
    help = "This command checks your document archive for issues."

+    supports_progress_bar = True
+    supports_multiprocessing = False
+
    def _render_results(self, messages: SanityCheckMessages) -> None:
        """Render sanity check results as a Rich table."""

--- a/src/documents/management/commands/document_thumbnails.py
+++ b/src/documents/management/commands/document_thumbnails.py
@@ -36,6 +36,7 @@ def _process_document(doc_id: int) -> None:
 class Command(PaperlessCommand):
    help = "This will regenerate the thumbnails for all documents."

+    supports_progress_bar = True
    supports_multiprocessing = True

    def add_arguments(self, parser) -> None:
--- a/src/documents/management/commands/mixins.py
+++ b/src/documents/management/commands/mixins.py
@@ -1,6 +1,5 @@
 import base64
 import os
-from argparse import ArgumentParser
 from typing import TypedDict

 from cryptography.fernet import Fernet
@@ -21,25 +20,6 @@ class CryptFields(TypedDict):
    fields: list[str]


-class ProgressBarMixin:
-    """
-    Many commands use a progress bar, which can be disabled
-    via this class
-    """
-
-    def add_argument_progress_bar_mixin(self, parser: ArgumentParser) -> None:
-        parser.add_argument(
-            "--no-progress-bar",
-            default=False,
-            action="store_true",
-            help="If set, the progress bar will not be shown",
-        )
-
-    def handle_progress_bar_mixin(self, *args, **options) -> None:
-        self.no_progress_bar = options["no_progress_bar"]
-        self.use_progress_bar = not self.no_progress_bar
-
-
 class CryptMixin:
    """
    Fully based on:
--- a/src/documents/management/commands/prune_audit_logs.py
+++ b/src/documents/management/commands/prune_audit_logs.py
@@ -9,6 +9,9 @@ class Command(PaperlessCommand):

    help = "Prunes the audit logs of objects that no longer exist."

+    supports_progress_bar = True
+    supports_multiprocessing = False
+
    def handle(self, *args, **options):
        with transaction.atomic():
            for log_entry in self.track(
--- a/src/documents/migrations/0017_sha256_checksums.py
+++ b/src/documents/migrations/0017_sha256_checksums.py
@@ -1,130 +0,0 @@
-import hashlib
-import logging
-from pathlib import Path
-
-from django.conf import settings
-from django.db import migrations
-from django.db import models
-
-logger = logging.getLogger("paperless.migrations")
-
-_CHUNK_SIZE = 65536  # 64 KiB — avoids loading entire files into memory
-_BATCH_SIZE = 500  # documents per bulk_update call
-_PROGRESS_INTERVAL = 500  # log a progress line every N documents
-
-
-def _sha256(path: Path) -> str:
-    h = hashlib.sha256()
-    with path.open("rb") as fh:
-        while chunk := fh.read(_CHUNK_SIZE):
-            h.update(chunk)
-    return h.hexdigest()
-
-
-def recompute_checksums(apps, schema_editor):
-    """Recompute all document checksums from MD5 to SHA256."""
-    Document = apps.get_model("documents", "Document")
-
-    total = Document.objects.count()
-    if total == 0:
-        return
-
-    logger.info("Recomputing SHA-256 checksums for %d document(s)...", total)
-
-    batch: list = []
-    processed = 0
-
-    for doc in Document.objects.only(
-        "pk",
-        "filename",
-        "checksum",
-        "archive_filename",
-        "archive_checksum",
-    ).iterator(chunk_size=_BATCH_SIZE):
-        updated_fields: list[str] = []
-
-        # Reconstruct source path the same way Document.source_path does
-        fname = str(doc.filename) if doc.filename else f"{doc.pk:07}.pdf"
-        source_path = (settings.ORIGINALS_DIR / Path(fname)).resolve()
-
-        if source_path.exists():
-            doc.checksum = _sha256(source_path)
-            updated_fields.append("checksum")
-        else:
-            logger.warning(
-                "Document %s: original file %s not found, checksum not updated.",
-                doc.pk,
-                source_path,
-            )
-
-        # Mirror Document.has_archive_version: archive_filename is not None
-        if doc.archive_filename is not None:
-            archive_path = (
-                settings.ARCHIVE_DIR / Path(str(doc.archive_filename))
-            ).resolve()
-            if archive_path.exists():
-                doc.archive_checksum = _sha256(archive_path)
-                updated_fields.append("archive_checksum")
-            else:
-                logger.warning(
-                    "Document %s: archive file %s not found, checksum not updated.",
-                    doc.pk,
-                    archive_path,
-                )
-
-        if updated_fields:
-            batch.append(doc)
-
-        processed += 1
-
-        if len(batch) >= _BATCH_SIZE:
-            Document.objects.bulk_update(batch, ["checksum", "archive_checksum"])
-            batch.clear()
-
-        if processed % _PROGRESS_INTERVAL == 0:
-            logger.info(
-                "SHA-256 checksum progress: %d/%d (%d%%)",
-                processed,
-                total,
-                processed * 100 // total,
-            )
-
-    if batch:
-        Document.objects.bulk_update(batch, ["checksum", "archive_checksum"])
-
-    logger.info(
-        "SHA-256 checksum recomputation complete: %d document(s) processed.",
-        total,
-    )
-
-
-class Migration(migrations.Migration):
-    dependencies = [
-        ("documents", "0016_document_version_index_and_more"),
-    ]
-
-    operations = [
-        migrations.AlterField(
-            model_name="document",
-            name="checksum",
-            field=models.CharField(
-                editable=False,
-                help_text="The checksum of the original document.",
-                max_length=64,
-                verbose_name="checksum",
-            ),
-        ),
-        migrations.AlterField(
-            model_name="document",
-            name="archive_checksum",
-            field=models.CharField(
-                blank=True,
-                editable=False,
-                help_text="The checksum of the archived document.",
-                max_length=64,
-                null=True,
-                verbose_name="archive checksum",
-            ),
-        ),
-        migrations.RunPython(recompute_checksums, migrations.RunPython.noop),
-    ]
--- a/src/documents/models.py
+++ b/src/documents/models.py
@@ -216,14 +216,14 @@ class Document(SoftDeleteModel, ModelWithOwner):  # type: ignore[django-manager-

    checksum = models.CharField(
        _("checksum"),
-        max_length=64,
+        max_length=32,
        editable=False,
        help_text=_("The checksum of the original document."),
    )

    archive_checksum = models.CharField(
        _("archive checksum"),
-        max_length=64,
+        max_length=32,
        editable=False,
        blank=True,
        null=True,
--- a/src/documents/sanity_checker.py
+++ b/src/documents/sanity_checker.py
@@ -11,6 +11,7 @@ is an identity function that adds no overhead.

 from __future__ import annotations

+import hashlib
 import logging
 import uuid
 from collections import defaultdict
@@ -29,7 +30,6 @@ from django.utils import timezone

 from documents.models import Document
 from documents.models import PaperlessTask
-from documents.utils import compute_checksum
 from paperless.config import GeneralConfig

 logger = logging.getLogger("paperless.sanity_checker")
@@ -218,7 +218,7 @@ def _check_original(

    present_files.discard(source_path)
    try:
-        checksum = compute_checksum(source_path)
+        checksum = hashlib.md5(source_path.read_bytes()).hexdigest()
    except OSError as e:
        messages.error(doc.pk, f"Cannot read original file of document: {e}")
    else:
@@ -255,7 +255,7 @@ def _check_archive(

        present_files.discard(archive_path)
        try:
-            checksum = compute_checksum(archive_path)
+            checksum = hashlib.md5(archive_path.read_bytes()).hexdigest()
        except OSError as e:
            messages.error(
                doc.pk,
--- a/src/documents/tasks.py
+++ b/src/documents/tasks.py
@@ -1,4 +1,5 @@
 import datetime
+import hashlib
 import logging
 import shutil
 import uuid
@@ -62,7 +63,6 @@ from documents.signals import document_updated
 from documents.signals.handlers import cleanup_document_deletion
 from documents.signals.handlers import run_workflows
 from documents.signals.handlers import send_websocket_document_updated
-from documents.utils import compute_checksum
 from documents.workflows.utils import get_workflows_for_trigger
 from paperless.config import AIConfig
 from paperless_ai.indexing import llm_index_add_or_update_document
@@ -323,7 +323,8 @@ def update_document_content_maybe_archive_file(document_id) -> None:
        with transaction.atomic():
            oldDocument = Document.objects.get(pk=document.pk)
            if parser.get_archive_path():
-                checksum = compute_checksum(Path(parser.get_archive_path()))
+                with Path(parser.get_archive_path()).open("rb") as f:
+                    checksum = hashlib.md5(f.read()).hexdigest()
                # I'm going to save first so that in case the file move
                # fails, the database is rolled back.
                # We also don't use save() since that triggers the filehandling
--- a/src/documents/tests/conftest.py
+++ b/src/documents/tests/conftest.py
@@ -82,8 +82,8 @@ def sample_doc(

    return DocumentFactory(
        title="test",
-        checksum="1093cf6e32adbd16b06969df09215d42c4a3a8938cc18b39455953f08d1ff2ab",
-        archive_checksum="706124ecde3c31616992fa979caed17a726b1c9ccdba70e82a4ff796cea97ccf",
+        checksum="42995833e01aea9b3edee44bbfdd7ce1",
+        archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b",
        content="test content",
        pk=1,
        filename="0000001.pdf",
--- a/src/documents/tests/factories.py
+++ b/src/documents/tests/factories.py
@@ -60,7 +60,7 @@ class DocumentFactory(DjangoModelFactory):
        model = Document

    title = factory.Faker("sentence", nb_words=4)
-    checksum = factory.Faker("sha256")
+    checksum = factory.Faker("md5")
    content = factory.Faker("paragraph")
    correspondent = None
    document_type = None
--- a/src/documents/tests/test_consumer.py
+++ b/src/documents/tests/test_consumer.py
@@ -245,14 +245,8 @@ class TestConsumer(

        self.assertIsFile(document.archive_path)

-        self.assertEqual(
-            document.checksum,
-            "1093cf6e32adbd16b06969df09215d42c4a3a8938cc18b39455953f08d1ff2ab",
-        )
-        self.assertEqual(
-            document.archive_checksum,
-            "706124ecde3c31616992fa979caed17a726b1c9ccdba70e82a4ff796cea97ccf",
-        )
+        self.assertEqual(document.checksum, "42995833e01aea9b3edee44bbfdd7ce1")
+        self.assertEqual(document.archive_checksum, "62acb0bcbfbcaa62ca6ad3668e4e404b")

        self.assertIsNotFile(filename)

--- a/src/documents/tests/test_management_exporter.py
+++ b/src/documents/tests/test_management_exporter.py
@@ -63,8 +63,8 @@ class TestExportImport(

        self.d1 = Document.objects.create(
            content="Content",
-            checksum="1093cf6e32adbd16b06969df09215d42c4a3a8938cc18b39455953f08d1ff2ab",
-            archive_checksum="706124ecde3c31616992fa979caed17a726b1c9ccdba70e82a4ff796cea97ccf",
+            checksum="42995833e01aea9b3edee44bbfdd7ce1",
+            archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b",
            title="wow1",
            filename="0000001.pdf",
            mime_type="application/pdf",
@@ -72,21 +72,21 @@ class TestExportImport(
        )
        self.d2 = Document.objects.create(
            content="Content",
-            checksum="550d1bae0f746d4f7c6be07054eb20cc2f11988a58ef64ceae45e98f85e92a5b",
+            checksum="9c9691e51741c1f4f41a20896af31770",
            title="wow2",
            filename="0000002.pdf",
            mime_type="application/pdf",
        )
        self.d3 = Document.objects.create(
            content="Content",
-            checksum="f1ba6b7ff8548214a75adec228f5468a14fe187f445bc0b9485cbf1c35b15915",
+            checksum="d38d7ed02e988e072caf924e0f3fcb76",
            title="wow2",
            filename="0000003.pdf",
            mime_type="application/pdf",
        )
        self.d4 = Document.objects.create(
            content="Content",
-            checksum="a81b16b6b313cfd7e60eb7b12598d1343b58622b4030cfa19a2724a02e98db1b",
+            checksum="82186aaa94f0b98697d704b90fd1c072",
            title="wow_dec",
            filename="0000004.pdf",
            mime_type="application/pdf",
@@ -240,7 +240,7 @@ class TestExportImport(
                )

                with Path(fname).open("rb") as f:
-                    checksum = hashlib.sha256(f.read()).hexdigest()
+                    checksum = hashlib.md5(f.read()).hexdigest()
                self.assertEqual(checksum, element["fields"]["checksum"])

                # Generated field "content_length" should not be exported,
@@ -254,7 +254,7 @@ class TestExportImport(
                    self.assertIsFile(fname)

                    with Path(fname).open("rb") as f:
-                        checksum = hashlib.sha256(f.read()).hexdigest()
+                        checksum = hashlib.md5(f.read()).hexdigest()
                    self.assertEqual(checksum, element["fields"]["archive_checksum"])

            elif element["model"] == "documents.note":
--- a/src/documents/tests/test_management_importer.py
+++ b/src/documents/tests/test_management_importer.py
@@ -260,8 +260,8 @@ class TestCommandImport(

        Document.objects.create(
            content="Content",
-            checksum="1093cf6e32adbd16b06969df09215d42c4a3a8938cc18b39455953f08d1ff2ab",
-            archive_checksum="706124ecde3c31616992fa979caed17a726b1c9ccdba70e82a4ff796cea97ccf",
+            checksum="42995833e01aea9b3edee44bbfdd7ce1",
+            archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b",
            title="wow1",
            filename="0000001.pdf",
            mime_type="application/pdf",
--- a/src/documents/utils.py
+++ b/src/documents/utils.py
@@ -1,4 +1,3 @@
-import hashlib
 import logging
 import shutil
 from os import utime
@@ -129,15 +128,3 @@ def get_boolean(boolstr: str) -> bool:
    Return a boolean value from a string representation.
    """
    return bool(boolstr.lower() in ("yes", "y", "1", "t", "true"))
-
-
-def compute_checksum(path: Path, chunk_size: int = 65536) -> str:
-    """
-    Return the SHA256 hex digest of the file at *path*, reading in chunks
-    of *chunk_size* bytes to avoid loading the entire file into memory.
-    """
-    h = hashlib.sha256()
-    with path.open("rb") as f:
-        while chunk := f.read(chunk_size):
-            h.update(chunk)
-    return h.hexdigest()