diff --git a/src/documents/management/commands/document_exporter.py b/src/documents/management/commands/document_exporter.py index ee3b44e0c..30f37b924 100644 --- a/src/documents/management/commands/document_exporter.py +++ b/src/documents/management/commands/document_exporter.py @@ -45,6 +45,8 @@ from documents.models import DocumentType from documents.models import Note from documents.models import SavedView from documents.models import SavedViewFilterRule +from documents.models import ShareLink +from documents.models import ShareLinkBundle from documents.models import StoragePath from documents.models import Tag from documents.models import UiSettings @@ -55,6 +57,7 @@ from documents.models import WorkflowActionWebhook from documents.models import WorkflowTrigger from documents.settings import EXPORTER_ARCHIVE_NAME from documents.settings import EXPORTER_FILE_NAME +from documents.settings import EXPORTER_SHARE_LINK_BUNDLE_NAME from documents.settings import EXPORTER_THUMBNAIL_NAME from documents.utils import compute_checksum from documents.utils import copy_file_with_basic_stats @@ -389,6 +392,8 @@ class Command(CryptMixin, PaperlessCommand): "app_configs": ApplicationConfiguration.objects.all(), "notes": Note.global_objects.all(), "documents": Document.global_objects.order_by("id").all(), + "share_links": ShareLink.global_objects.all(), + "share_link_bundles": ShareLinkBundle.objects.order_by("id").all(), "social_accounts": SocialAccount.objects.all(), "social_apps": SocialApp.objects.all(), "social_tokens": SocialToken.objects.all(), @@ -409,6 +414,7 @@ class Command(CryptMixin, PaperlessCommand): ) document_manifest: list[dict] = [] + share_link_bundle_manifest: list[dict] = [] manifest_path = (self.target / "manifest.json").resolve() with StreamingManifestWriter( @@ -427,6 +433,15 @@ class Command(CryptMixin, PaperlessCommand): for record in batch: self._encrypt_record_inline(record) document_manifest.extend(batch) + elif key == "share_link_bundles": + # Accumulate for file-copy loop; written to manifest after + for batch in serialize_queryset_batched( + qs, + batch_size=self.batch_size, + ): + for record in batch: + self._encrypt_record_inline(record) + share_link_bundle_manifest.extend(batch) elif self.split_manifest and key in ( "notes", "custom_field_instances", @@ -445,6 +460,12 @@ class Command(CryptMixin, PaperlessCommand): document_map: dict[int, Document] = { d.pk: d for d in Document.global_objects.order_by("id") } + share_link_bundle_map: dict[int, ShareLinkBundle] = { + b.pk: b + for b in ShareLinkBundle.objects.order_by("id").prefetch_related( + "documents", + ) + } # 3. Export files from each document for index, document_dict in enumerate( @@ -478,6 +499,19 @@ class Command(CryptMixin, PaperlessCommand): else: writer.write_record(document_dict) + for bundle_dict in share_link_bundle_manifest: + bundle = share_link_bundle_map[bundle_dict["pk"]] + + bundle_target = self.generate_share_link_bundle_target( + bundle, + bundle_dict, + ) + + if not self.data_only and bundle_target is not None: + self.copy_share_link_bundle_file(bundle, bundle_target) + + writer.write_record(bundle_dict) + # 4.2 write version information to target folder extra_metadata_path = (self.target / "metadata.json").resolve() metadata: dict[str, str | int | dict[str, str | int]] = { @@ -598,6 +632,47 @@ class Command(CryptMixin, PaperlessCommand): archive_target, ) + def generate_share_link_bundle_target( + self, + bundle: ShareLinkBundle, + bundle_dict: dict, + ) -> Path | None: + """ + Generates the export target for a share link bundle file, when present. + """ + if not bundle.file_path: + return None + + bundle_name = Path(bundle.file_path) + if bundle_name.is_absolute(): + bundle_name = Path(bundle_name.name) + + bundle_name = Path("share_link_bundles") / bundle_name + bundle_target = (self.target / bundle_name).resolve() + bundle_dict["fields"]["file_path"] = str( + bundle_name.relative_to("share_link_bundles"), + ) + bundle_dict[EXPORTER_SHARE_LINK_BUNDLE_NAME] = str(bundle_name) + return bundle_target + + def copy_share_link_bundle_file( + self, + bundle: ShareLinkBundle, + bundle_target: Path, + ) -> None: + """ + Copies a share link bundle ZIP into the export directory. + """ + bundle_source_path = bundle.absolute_file_path + if bundle_source_path is None: + raise FileNotFoundError(f"Share link bundle {bundle.pk} has no file path") + + self.check_and_copy( + bundle_source_path, + None, + bundle_target, + ) + def _encrypt_record_inline(self, record: dict) -> None: """Encrypt sensitive fields in a single record, if passphrase is set.""" if not self.passphrase: diff --git a/src/documents/management/commands/document_importer.py b/src/documents/management/commands/document_importer.py index 4572b4617..becdf7b76 100644 --- a/src/documents/management/commands/document_importer.py +++ b/src/documents/management/commands/document_importer.py @@ -32,10 +32,12 @@ from documents.models import CustomFieldInstance from documents.models import Document from documents.models import DocumentType from documents.models import Note +from documents.models import ShareLinkBundle from documents.models import Tag from documents.settings import EXPORTER_ARCHIVE_NAME from documents.settings import EXPORTER_CRYPTO_SETTINGS_NAME from documents.settings import EXPORTER_FILE_NAME +from documents.settings import EXPORTER_SHARE_LINK_BUNDLE_NAME from documents.settings import EXPORTER_THUMBNAIL_NAME from documents.signals.handlers import check_paths_and_prune_custom_fields from documents.signals.handlers import update_filename_and_move_files @@ -348,18 +350,42 @@ class Command(CryptMixin, PaperlessCommand): f"Failed to read from archive file {doc_archive_path}", ) from e + def check_share_link_bundle_validity(bundle_record: dict) -> None: + if EXPORTER_SHARE_LINK_BUNDLE_NAME not in bundle_record: + return + + bundle_file = bundle_record[EXPORTER_SHARE_LINK_BUNDLE_NAME] + bundle_path: Path = self.source / bundle_file + if not bundle_path.exists(): + raise CommandError( + f'The manifest file refers to "{bundle_file}" which does not ' + "appear to be in the source directory.", + ) + try: + with bundle_path.open(mode="rb"): + pass + except Exception as e: + raise CommandError( + f"Failed to read from share link bundle file {bundle_path}", + ) from e + self.stdout.write("Checking the manifest") for manifest_path in self.manifest_paths: for record in iter_manifest_records(manifest_path): # Only check if the document files exist if this is not data only # We don't care about documents for a data only import - if not self.data_only and record["model"] == "documents.document": + if self.data_only: + continue + if record["model"] == "documents.document": check_document_validity(record) + elif record["model"] == "documents.sharelinkbundle": + check_share_link_bundle_validity(record) def _import_files_from_manifest(self) -> None: settings.ORIGINALS_DIR.mkdir(parents=True, exist_ok=True) settings.THUMBNAIL_DIR.mkdir(parents=True, exist_ok=True) settings.ARCHIVE_DIR.mkdir(parents=True, exist_ok=True) + settings.SHARE_LINK_BUNDLE_DIR.mkdir(parents=True, exist_ok=True) self.stdout.write("Copy files into paperless...") @@ -374,6 +400,18 @@ class Command(CryptMixin, PaperlessCommand): for record in iter_manifest_records(manifest_path) if record["model"] == "documents.document" ] + share_link_bundle_records = [ + { + "pk": record["pk"], + EXPORTER_SHARE_LINK_BUNDLE_NAME: record.get( + EXPORTER_SHARE_LINK_BUNDLE_NAME, + ), + } + for manifest_path in self.manifest_paths + for record in iter_manifest_records(manifest_path) + if record["model"] == "documents.sharelinkbundle" + and record.get(EXPORTER_SHARE_LINK_BUNDLE_NAME) + ] for record in self.track(document_records, description="Copying files..."): document = Document.global_objects.get(pk=record["pk"]) @@ -416,6 +454,26 @@ class Command(CryptMixin, PaperlessCommand): document.save() + for record in self.track( + share_link_bundle_records, + description="Copying share link bundles...", + ): + bundle = ShareLinkBundle.objects.get(pk=record["pk"]) + bundle_file = record[EXPORTER_SHARE_LINK_BUNDLE_NAME] + bundle_source_path = (self.source / bundle_file).resolve() + bundle_target_path = bundle.absolute_file_path + if bundle_target_path is None: + raise CommandError( + f"Share link bundle {bundle.pk} does not have a valid file path.", + ) + + with FileLock(settings.MEDIA_LOCK): + bundle_target_path.parent.mkdir(parents=True, exist_ok=True) + copy_file_with_basic_stats( + bundle_source_path, + bundle_target_path, + ) + def _decrypt_record_if_needed(self, record: dict) -> dict: fields = self.CRYPT_FIELDS_BY_MODEL.get(record.get("model", "")) if fields: diff --git a/src/documents/settings.py b/src/documents/settings.py index 9dff44c95..c4c87b8a7 100644 --- a/src/documents/settings.py +++ b/src/documents/settings.py @@ -3,6 +3,7 @@ EXPORTER_FILE_NAME = "__exported_file_name__" EXPORTER_THUMBNAIL_NAME = "__exported_thumbnail_name__" EXPORTER_ARCHIVE_NAME = "__exported_archive_name__" +EXPORTER_SHARE_LINK_BUNDLE_NAME = "__exported_share_link_bundle_name__" EXPORTER_CRYPTO_SETTINGS_NAME = "__crypto__" EXPORTER_CRYPTO_SALT_NAME = "__salt_hex__" diff --git a/src/documents/tests/test_management_exporter.py b/src/documents/tests/test_management_exporter.py index a214ef51d..4ee7677ca 100644 --- a/src/documents/tests/test_management_exporter.py +++ b/src/documents/tests/test_management_exporter.py @@ -2,6 +2,7 @@ import hashlib import json import shutil import tempfile +from datetime import timedelta from io import StringIO from pathlib import Path from unittest import mock @@ -11,6 +12,7 @@ import pytest from allauth.socialaccount.models import SocialAccount from allauth.socialaccount.models import SocialApp from allauth.socialaccount.models import SocialToken +from django.conf import settings from django.contrib.auth.models import Group from django.contrib.auth.models import Permission from django.contrib.contenttypes.models import ContentType @@ -31,6 +33,8 @@ from documents.models import CustomFieldInstance from documents.models import Document from documents.models import DocumentType from documents.models import Note +from documents.models import ShareLink +from documents.models import ShareLinkBundle from documents.models import StoragePath from documents.models import Tag from documents.models import User @@ -39,6 +43,7 @@ from documents.models import WorkflowAction from documents.models import WorkflowTrigger from documents.sanity_checker import check_sanity from documents.settings import EXPORTER_FILE_NAME +from documents.settings import EXPORTER_SHARE_LINK_BUNDLE_NAME from documents.tests.utils import DirectoriesMixin from documents.tests.utils import FileSystemAssertsMixin from documents.tests.utils import SampleDirMixin @@ -306,6 +311,108 @@ class TestExportImport( ): self.test_exporter(use_filename_format=True) + def test_exporter_includes_share_links_and_bundles(self) -> None: + shutil.rmtree(Path(self.dirs.media_dir) / "documents") + shutil.copytree( + Path(__file__).parent / "samples" / "documents", + Path(self.dirs.media_dir) / "documents", + ) + + share_link = ShareLink.objects.create( + slug="share-link-slug", + document=self.d1, + owner=self.user, + file_version=ShareLink.FileVersion.ORIGINAL, + expiration=timezone.now() + timedelta(days=7), + ) + + bundle_relative_path = Path("nested") / "share-bundle.zip" + bundle_source_path = settings.SHARE_LINK_BUNDLE_DIR / bundle_relative_path + bundle_source_path.parent.mkdir(parents=True, exist_ok=True) + bundle_source_path.write_bytes(b"share-bundle-contents") + bundle = ShareLinkBundle.objects.create( + slug="share-bundle-slug", + owner=self.user, + file_version=ShareLink.FileVersion.ARCHIVE, + expiration=timezone.now() + timedelta(days=7), + status=ShareLinkBundle.Status.READY, + size_bytes=bundle_source_path.stat().st_size, + file_path=str(bundle_relative_path), + built_at=timezone.now(), + ) + bundle.documents.set([self.d1, self.d2]) + + manifest = self._do_export() + + share_link_records = [ + record for record in manifest if record["model"] == "documents.sharelink" + ] + self.assertEqual(len(share_link_records), 1) + self.assertEqual(share_link_records[0]["pk"], share_link.pk) + self.assertEqual(share_link_records[0]["fields"]["document"], self.d1.pk) + self.assertEqual(share_link_records[0]["fields"]["owner"], self.user.pk) + + share_link_bundle_records = [ + record + for record in manifest + if record["model"] == "documents.sharelinkbundle" + ] + self.assertEqual(len(share_link_bundle_records), 1) + bundle_record = share_link_bundle_records[0] + self.assertEqual(bundle_record["pk"], bundle.pk) + self.assertEqual( + bundle_record["fields"]["documents"], + [self.d1.pk, self.d2.pk], + ) + self.assertEqual( + bundle_record[EXPORTER_SHARE_LINK_BUNDLE_NAME], + "share_link_bundles/nested/share-bundle.zip", + ) + self.assertEqual( + bundle_record["fields"]["file_path"], + "nested/share-bundle.zip", + ) + self.assertIsFile(self.target / bundle_record[EXPORTER_SHARE_LINK_BUNDLE_NAME]) + + with paperless_environment(): + ShareLink.objects.all().delete() + ShareLinkBundle.objects.all().delete() + shutil.rmtree(settings.SHARE_LINK_BUNDLE_DIR, ignore_errors=True) + + call_command( + "document_importer", + "--no-progress-bar", + self.target, + skip_checks=True, + ) + + imported_share_link = ShareLink.objects.get(pk=share_link.pk) + self.assertEqual(imported_share_link.document_id, self.d1.pk) + self.assertEqual(imported_share_link.owner_id, self.user.pk) + self.assertEqual( + imported_share_link.file_version, + ShareLink.FileVersion.ORIGINAL, + ) + + imported_bundle = ShareLinkBundle.objects.get(pk=bundle.pk) + imported_bundle_path = imported_bundle.absolute_file_path + self.assertEqual(imported_bundle.owner_id, self.user.pk) + self.assertEqual( + list( + imported_bundle.documents.order_by("pk").values_list( + "pk", + flat=True, + ), + ), + [self.d1.pk, self.d2.pk], + ) + self.assertEqual(imported_bundle.file_path, "nested/share-bundle.zip") + self.assertIsNotNone(imported_bundle_path) + self.assertEqual( + imported_bundle_path.read_bytes(), + b"share-bundle-contents", + ) + def test_update_export_changed_time(self) -> None: shutil.rmtree(Path(self.dirs.media_dir) / "documents") shutil.copytree(