Compare commits

...

3 Commits

4 changed files with 300 additions and 10 deletions

View File

@@ -45,6 +45,8 @@ from documents.models import DocumentType
from documents.models import Note
from documents.models import SavedView
from documents.models import SavedViewFilterRule
from documents.models import ShareLink
from documents.models import ShareLinkBundle
from documents.models import StoragePath
from documents.models import Tag
from documents.models import UiSettings
@@ -55,6 +57,7 @@ from documents.models import WorkflowActionWebhook
from documents.models import WorkflowTrigger
from documents.settings import EXPORTER_ARCHIVE_NAME
from documents.settings import EXPORTER_FILE_NAME
from documents.settings import EXPORTER_SHARE_LINK_BUNDLE_NAME
from documents.settings import EXPORTER_THUMBNAIL_NAME
from documents.utils import compute_checksum
from documents.utils import copy_file_with_basic_stats
@@ -385,10 +388,12 @@ class Command(CryptMixin, PaperlessCommand):
"workflow_webhook_actions": WorkflowActionWebhook.objects.all(),
"workflows": Workflow.objects.all(),
"custom_fields": CustomField.objects.all(),
"custom_field_instances": CustomFieldInstance.objects.all(),
"custom_field_instances": CustomFieldInstance.global_objects.all(),
"app_configs": ApplicationConfiguration.objects.all(),
"notes": Note.objects.all(),
"documents": Document.objects.order_by("id").all(),
"notes": Note.global_objects.all(),
"documents": Document.global_objects.order_by("id").all(),
"share_links": ShareLink.global_objects.all(),
"share_link_bundles": ShareLinkBundle.objects.order_by("id").all(),
"social_accounts": SocialAccount.objects.all(),
"social_apps": SocialApp.objects.all(),
"social_tokens": SocialToken.objects.all(),
@@ -409,6 +414,7 @@ class Command(CryptMixin, PaperlessCommand):
)
document_manifest: list[dict] = []
share_link_bundle_manifest: list[dict] = []
manifest_path = (self.target / "manifest.json").resolve()
with StreamingManifestWriter(
@@ -427,6 +433,15 @@ class Command(CryptMixin, PaperlessCommand):
for record in batch:
self._encrypt_record_inline(record)
document_manifest.extend(batch)
elif key == "share_link_bundles":
# Accumulate for file-copy loop; written to manifest after
for batch in serialize_queryset_batched(
qs,
batch_size=self.batch_size,
):
for record in batch:
self._encrypt_record_inline(record)
share_link_bundle_manifest.extend(batch)
elif self.split_manifest and key in (
"notes",
"custom_field_instances",
@@ -443,7 +458,13 @@ class Command(CryptMixin, PaperlessCommand):
writer.write_batch(batch)
document_map: dict[int, Document] = {
d.pk: d for d in Document.objects.order_by("id")
d.pk: d for d in Document.global_objects.order_by("id")
}
share_link_bundle_map: dict[int, ShareLinkBundle] = {
b.pk: b
for b in ShareLinkBundle.objects.order_by("id").prefetch_related(
"documents",
)
}
# 3. Export files from each document
@@ -478,6 +499,19 @@ class Command(CryptMixin, PaperlessCommand):
else:
writer.write_record(document_dict)
for bundle_dict in share_link_bundle_manifest:
bundle = share_link_bundle_map[bundle_dict["pk"]]
bundle_target = self.generate_share_link_bundle_target(
bundle,
bundle_dict,
)
if not self.data_only and bundle_target is not None:
self.copy_share_link_bundle_file(bundle, bundle_target)
writer.write_record(bundle_dict)
# 4.2 write version information to target folder
extra_metadata_path = (self.target / "metadata.json").resolve()
metadata: dict[str, str | int | dict[str, str | int]] = {
@@ -598,6 +632,47 @@ class Command(CryptMixin, PaperlessCommand):
archive_target,
)
def generate_share_link_bundle_target(
self,
bundle: ShareLinkBundle,
bundle_dict: dict,
) -> Path | None:
"""
Generates the export target for a share link bundle file, when present.
"""
if not bundle.file_path:
return None
bundle_name = Path(bundle.file_path)
if bundle_name.is_absolute():
bundle_name = Path(bundle_name.name)
bundle_name = Path("share_link_bundles") / bundle_name
bundle_target = (self.target / bundle_name).resolve()
bundle_dict["fields"]["file_path"] = str(
bundle_name.relative_to("share_link_bundles"),
)
bundle_dict[EXPORTER_SHARE_LINK_BUNDLE_NAME] = str(bundle_name)
return bundle_target
def copy_share_link_bundle_file(
self,
bundle: ShareLinkBundle,
bundle_target: Path,
) -> None:
"""
Copies a share link bundle ZIP into the export directory.
"""
bundle_source_path = bundle.absolute_file_path
if bundle_source_path is None:
raise FileNotFoundError(f"Share link bundle {bundle.pk} has no file path")
self.check_and_copy(
bundle_source_path,
None,
bundle_target,
)
def _encrypt_record_inline(self, record: dict) -> None:
"""Encrypt sensitive fields in a single record, if passphrase is set."""
if not self.passphrase:
@@ -619,12 +694,15 @@ class Command(CryptMixin, PaperlessCommand):
"""Write per-document manifest file for --split-manifest mode."""
content = [document_dict]
content.extend(
serializers.serialize("python", Note.objects.filter(document=document)),
serializers.serialize(
"python",
Note.global_objects.filter(document=document),
),
)
content.extend(
serializers.serialize(
"python",
CustomFieldInstance.objects.filter(document=document),
CustomFieldInstance.global_objects.filter(document=document),
),
)
manifest_name = base_name.with_name(f"{base_name.stem}-manifest.json")

View File

@@ -32,10 +32,12 @@ from documents.models import CustomFieldInstance
from documents.models import Document
from documents.models import DocumentType
from documents.models import Note
from documents.models import ShareLinkBundle
from documents.models import Tag
from documents.settings import EXPORTER_ARCHIVE_NAME
from documents.settings import EXPORTER_CRYPTO_SETTINGS_NAME
from documents.settings import EXPORTER_FILE_NAME
from documents.settings import EXPORTER_SHARE_LINK_BUNDLE_NAME
from documents.settings import EXPORTER_THUMBNAIL_NAME
from documents.signals.handlers import check_paths_and_prune_custom_fields
from documents.signals.handlers import update_filename_and_move_files
@@ -125,7 +127,7 @@ class Command(CryptMixin, PaperlessCommand):
"Found existing user(s), this might indicate a non-empty installation",
),
)
if Document.objects.count() != 0:
if Document.global_objects.count() != 0:
self.stdout.write(
self.style.WARNING(
"Found existing documents(s), this might indicate a non-empty installation",
@@ -348,18 +350,42 @@ class Command(CryptMixin, PaperlessCommand):
f"Failed to read from archive file {doc_archive_path}",
) from e
def check_share_link_bundle_validity(bundle_record: dict) -> None:
if EXPORTER_SHARE_LINK_BUNDLE_NAME not in bundle_record:
return
bundle_file = bundle_record[EXPORTER_SHARE_LINK_BUNDLE_NAME]
bundle_path: Path = self.source / bundle_file
if not bundle_path.exists():
raise CommandError(
f'The manifest file refers to "{bundle_file}" which does not '
"appear to be in the source directory.",
)
try:
with bundle_path.open(mode="rb"):
pass
except Exception as e:
raise CommandError(
f"Failed to read from share link bundle file {bundle_path}",
) from e
self.stdout.write("Checking the manifest")
for manifest_path in self.manifest_paths:
for record in iter_manifest_records(manifest_path):
# Only check if the document files exist if this is not data only
# We don't care about documents for a data only import
if not self.data_only and record["model"] == "documents.document":
if self.data_only:
continue
if record["model"] == "documents.document":
check_document_validity(record)
elif record["model"] == "documents.sharelinkbundle":
check_share_link_bundle_validity(record)
def _import_files_from_manifest(self) -> None:
settings.ORIGINALS_DIR.mkdir(parents=True, exist_ok=True)
settings.THUMBNAIL_DIR.mkdir(parents=True, exist_ok=True)
settings.ARCHIVE_DIR.mkdir(parents=True, exist_ok=True)
settings.SHARE_LINK_BUNDLE_DIR.mkdir(parents=True, exist_ok=True)
self.stdout.write("Copy files into paperless...")
@@ -374,9 +400,21 @@ class Command(CryptMixin, PaperlessCommand):
for record in iter_manifest_records(manifest_path)
if record["model"] == "documents.document"
]
share_link_bundle_records = [
{
"pk": record["pk"],
EXPORTER_SHARE_LINK_BUNDLE_NAME: record.get(
EXPORTER_SHARE_LINK_BUNDLE_NAME,
),
}
for manifest_path in self.manifest_paths
for record in iter_manifest_records(manifest_path)
if record["model"] == "documents.sharelinkbundle"
and record.get(EXPORTER_SHARE_LINK_BUNDLE_NAME)
]
for record in self.track(document_records, description="Copying files..."):
document = Document.objects.get(pk=record["pk"])
document = Document.global_objects.get(pk=record["pk"])
doc_file = record[EXPORTER_FILE_NAME]
document_path = self.source / doc_file
@@ -416,6 +454,26 @@ class Command(CryptMixin, PaperlessCommand):
document.save()
for record in self.track(
share_link_bundle_records,
description="Copying share link bundles...",
):
bundle = ShareLinkBundle.objects.get(pk=record["pk"])
bundle_file = record[EXPORTER_SHARE_LINK_BUNDLE_NAME]
bundle_source_path = (self.source / bundle_file).resolve()
bundle_target_path = bundle.absolute_file_path
if bundle_target_path is None:
raise CommandError(
f"Share link bundle {bundle.pk} does not have a valid file path.",
)
with FileLock(settings.MEDIA_LOCK):
bundle_target_path.parent.mkdir(parents=True, exist_ok=True)
copy_file_with_basic_stats(
bundle_source_path,
bundle_target_path,
)
def _decrypt_record_if_needed(self, record: dict) -> dict:
fields = self.CRYPT_FIELDS_BY_MODEL.get(record.get("model", ""))
if fields:

View File

@@ -3,6 +3,7 @@
EXPORTER_FILE_NAME = "__exported_file_name__"
EXPORTER_THUMBNAIL_NAME = "__exported_thumbnail_name__"
EXPORTER_ARCHIVE_NAME = "__exported_archive_name__"
EXPORTER_SHARE_LINK_BUNDLE_NAME = "__exported_share_link_bundle_name__"
EXPORTER_CRYPTO_SETTINGS_NAME = "__crypto__"
EXPORTER_CRYPTO_SALT_NAME = "__salt_hex__"

View File

@@ -2,6 +2,7 @@ import hashlib
import json
import shutil
import tempfile
from datetime import timedelta
from io import StringIO
from pathlib import Path
from unittest import mock
@@ -11,6 +12,7 @@ import pytest
from allauth.socialaccount.models import SocialAccount
from allauth.socialaccount.models import SocialApp
from allauth.socialaccount.models import SocialToken
from django.conf import settings
from django.contrib.auth.models import Group
from django.contrib.auth.models import Permission
from django.contrib.contenttypes.models import ContentType
@@ -31,6 +33,8 @@ from documents.models import CustomFieldInstance
from documents.models import Document
from documents.models import DocumentType
from documents.models import Note
from documents.models import ShareLink
from documents.models import ShareLinkBundle
from documents.models import StoragePath
from documents.models import Tag
from documents.models import User
@@ -39,6 +43,7 @@ from documents.models import WorkflowAction
from documents.models import WorkflowTrigger
from documents.sanity_checker import check_sanity
from documents.settings import EXPORTER_FILE_NAME
from documents.settings import EXPORTER_SHARE_LINK_BUNDLE_NAME
from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import FileSystemAssertsMixin
from documents.tests.utils import SampleDirMixin
@@ -306,6 +311,108 @@ class TestExportImport(
):
self.test_exporter(use_filename_format=True)
def test_exporter_includes_share_links_and_bundles(self) -> None:
shutil.rmtree(Path(self.dirs.media_dir) / "documents")
shutil.copytree(
Path(__file__).parent / "samples" / "documents",
Path(self.dirs.media_dir) / "documents",
)
share_link = ShareLink.objects.create(
slug="share-link-slug",
document=self.d1,
owner=self.user,
file_version=ShareLink.FileVersion.ORIGINAL,
expiration=timezone.now() + timedelta(days=7),
)
bundle_relative_path = Path("nested") / "share-bundle.zip"
bundle_source_path = settings.SHARE_LINK_BUNDLE_DIR / bundle_relative_path
bundle_source_path.parent.mkdir(parents=True, exist_ok=True)
bundle_source_path.write_bytes(b"share-bundle-contents")
bundle = ShareLinkBundle.objects.create(
slug="share-bundle-slug",
owner=self.user,
file_version=ShareLink.FileVersion.ARCHIVE,
expiration=timezone.now() + timedelta(days=7),
status=ShareLinkBundle.Status.READY,
size_bytes=bundle_source_path.stat().st_size,
file_path=str(bundle_relative_path),
built_at=timezone.now(),
)
bundle.documents.set([self.d1, self.d2])
manifest = self._do_export()
share_link_records = [
record for record in manifest if record["model"] == "documents.sharelink"
]
self.assertEqual(len(share_link_records), 1)
self.assertEqual(share_link_records[0]["pk"], share_link.pk)
self.assertEqual(share_link_records[0]["fields"]["document"], self.d1.pk)
self.assertEqual(share_link_records[0]["fields"]["owner"], self.user.pk)
share_link_bundle_records = [
record
for record in manifest
if record["model"] == "documents.sharelinkbundle"
]
self.assertEqual(len(share_link_bundle_records), 1)
bundle_record = share_link_bundle_records[0]
self.assertEqual(bundle_record["pk"], bundle.pk)
self.assertEqual(
bundle_record["fields"]["documents"],
[self.d1.pk, self.d2.pk],
)
self.assertEqual(
bundle_record[EXPORTER_SHARE_LINK_BUNDLE_NAME],
"share_link_bundles/nested/share-bundle.zip",
)
self.assertEqual(
bundle_record["fields"]["file_path"],
"nested/share-bundle.zip",
)
self.assertIsFile(self.target / bundle_record[EXPORTER_SHARE_LINK_BUNDLE_NAME])
with paperless_environment():
ShareLink.objects.all().delete()
ShareLinkBundle.objects.all().delete()
shutil.rmtree(settings.SHARE_LINK_BUNDLE_DIR, ignore_errors=True)
call_command(
"document_importer",
"--no-progress-bar",
self.target,
skip_checks=True,
)
imported_share_link = ShareLink.objects.get(pk=share_link.pk)
self.assertEqual(imported_share_link.document_id, self.d1.pk)
self.assertEqual(imported_share_link.owner_id, self.user.pk)
self.assertEqual(
imported_share_link.file_version,
ShareLink.FileVersion.ORIGINAL,
)
imported_bundle = ShareLinkBundle.objects.get(pk=bundle.pk)
imported_bundle_path = imported_bundle.absolute_file_path
self.assertEqual(imported_bundle.owner_id, self.user.pk)
self.assertEqual(
list(
imported_bundle.documents.order_by("pk").values_list(
"pk",
flat=True,
),
),
[self.d1.pk, self.d2.pk],
)
self.assertEqual(imported_bundle.file_path, "nested/share-bundle.zip")
self.assertIsNotNone(imported_bundle_path)
self.assertEqual(
imported_bundle_path.read_bytes(),
b"share-bundle-contents",
)
def test_update_export_changed_time(self) -> None:
shutil.rmtree(Path(self.dirs.media_dir) / "documents")
shutil.copytree(
@@ -389,7 +496,7 @@ class TestExportImport(
self.assertIsFile(
str(self.target / doc_from_manifest[EXPORTER_FILE_NAME]),
)
self.d3.delete()
self.d3.hard_delete()
manifest = self._do_export()
self.assertRaises(
@@ -868,6 +975,52 @@ class TestExportImport(
for obj in manifest:
self.assertNotEqual(obj["model"], "auditlog.logentry")
def test_export_import_soft_deleted_document(self) -> None:
"""
GIVEN:
- A document with a note and custom field instance has been soft-deleted
WHEN:
- Export and re-import are performed
THEN:
- The soft-deleted document, note, and custom field instance
survive the round-trip with deleted_at preserved
"""
shutil.rmtree(Path(self.dirs.media_dir) / "documents")
shutil.copytree(
Path(__file__).parent / "samples" / "documents",
Path(self.dirs.media_dir) / "documents",
)
# d1 has self.note and self.cfi1 attached via setUp
self.d1.delete()
self._do_export()
with paperless_environment():
Document.global_objects.all().hard_delete()
Correspondent.objects.all().delete()
DocumentType.objects.all().delete()
Tag.objects.all().delete()
call_command(
"document_importer",
"--no-progress-bar",
self.target,
skip_checks=True,
)
self.assertEqual(Document.global_objects.count(), 4)
reimported_doc = Document.global_objects.get(pk=self.d1.pk)
self.assertIsNotNone(reimported_doc.deleted_at)
self.assertEqual(Note.global_objects.count(), 1)
reimported_note = Note.global_objects.get(pk=self.note.pk)
self.assertIsNotNone(reimported_note.deleted_at)
self.assertEqual(CustomFieldInstance.global_objects.count(), 1)
reimported_cfi = CustomFieldInstance.global_objects.get(pk=self.cfi1.pk)
self.assertIsNotNone(reimported_cfi.deleted_at)
def test_export_data_only(self) -> None:
"""
GIVEN: