mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-03-29 20:32:44 +00:00
Compare commits
3 Commits
feature-ar
...
feature-sh
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2a2bc2a97e | ||
|
|
914d8de710 | ||
|
|
2bf028d37b |
@@ -45,6 +45,8 @@ from documents.models import DocumentType
|
||||
from documents.models import Note
|
||||
from documents.models import SavedView
|
||||
from documents.models import SavedViewFilterRule
|
||||
from documents.models import ShareLink
|
||||
from documents.models import ShareLinkBundle
|
||||
from documents.models import StoragePath
|
||||
from documents.models import Tag
|
||||
from documents.models import UiSettings
|
||||
@@ -55,6 +57,7 @@ from documents.models import WorkflowActionWebhook
|
||||
from documents.models import WorkflowTrigger
|
||||
from documents.settings import EXPORTER_ARCHIVE_NAME
|
||||
from documents.settings import EXPORTER_FILE_NAME
|
||||
from documents.settings import EXPORTER_SHARE_LINK_BUNDLE_NAME
|
||||
from documents.settings import EXPORTER_THUMBNAIL_NAME
|
||||
from documents.utils import compute_checksum
|
||||
from documents.utils import copy_file_with_basic_stats
|
||||
@@ -385,10 +388,12 @@ class Command(CryptMixin, PaperlessCommand):
|
||||
"workflow_webhook_actions": WorkflowActionWebhook.objects.all(),
|
||||
"workflows": Workflow.objects.all(),
|
||||
"custom_fields": CustomField.objects.all(),
|
||||
"custom_field_instances": CustomFieldInstance.objects.all(),
|
||||
"custom_field_instances": CustomFieldInstance.global_objects.all(),
|
||||
"app_configs": ApplicationConfiguration.objects.all(),
|
||||
"notes": Note.objects.all(),
|
||||
"documents": Document.objects.order_by("id").all(),
|
||||
"notes": Note.global_objects.all(),
|
||||
"documents": Document.global_objects.order_by("id").all(),
|
||||
"share_links": ShareLink.global_objects.all(),
|
||||
"share_link_bundles": ShareLinkBundle.objects.order_by("id").all(),
|
||||
"social_accounts": SocialAccount.objects.all(),
|
||||
"social_apps": SocialApp.objects.all(),
|
||||
"social_tokens": SocialToken.objects.all(),
|
||||
@@ -409,6 +414,7 @@ class Command(CryptMixin, PaperlessCommand):
|
||||
)
|
||||
|
||||
document_manifest: list[dict] = []
|
||||
share_link_bundle_manifest: list[dict] = []
|
||||
manifest_path = (self.target / "manifest.json").resolve()
|
||||
|
||||
with StreamingManifestWriter(
|
||||
@@ -427,6 +433,15 @@ class Command(CryptMixin, PaperlessCommand):
|
||||
for record in batch:
|
||||
self._encrypt_record_inline(record)
|
||||
document_manifest.extend(batch)
|
||||
elif key == "share_link_bundles":
|
||||
# Accumulate for file-copy loop; written to manifest after
|
||||
for batch in serialize_queryset_batched(
|
||||
qs,
|
||||
batch_size=self.batch_size,
|
||||
):
|
||||
for record in batch:
|
||||
self._encrypt_record_inline(record)
|
||||
share_link_bundle_manifest.extend(batch)
|
||||
elif self.split_manifest and key in (
|
||||
"notes",
|
||||
"custom_field_instances",
|
||||
@@ -443,7 +458,13 @@ class Command(CryptMixin, PaperlessCommand):
|
||||
writer.write_batch(batch)
|
||||
|
||||
document_map: dict[int, Document] = {
|
||||
d.pk: d for d in Document.objects.order_by("id")
|
||||
d.pk: d for d in Document.global_objects.order_by("id")
|
||||
}
|
||||
share_link_bundle_map: dict[int, ShareLinkBundle] = {
|
||||
b.pk: b
|
||||
for b in ShareLinkBundle.objects.order_by("id").prefetch_related(
|
||||
"documents",
|
||||
)
|
||||
}
|
||||
|
||||
# 3. Export files from each document
|
||||
@@ -478,6 +499,19 @@ class Command(CryptMixin, PaperlessCommand):
|
||||
else:
|
||||
writer.write_record(document_dict)
|
||||
|
||||
for bundle_dict in share_link_bundle_manifest:
|
||||
bundle = share_link_bundle_map[bundle_dict["pk"]]
|
||||
|
||||
bundle_target = self.generate_share_link_bundle_target(
|
||||
bundle,
|
||||
bundle_dict,
|
||||
)
|
||||
|
||||
if not self.data_only and bundle_target is not None:
|
||||
self.copy_share_link_bundle_file(bundle, bundle_target)
|
||||
|
||||
writer.write_record(bundle_dict)
|
||||
|
||||
# 4.2 write version information to target folder
|
||||
extra_metadata_path = (self.target / "metadata.json").resolve()
|
||||
metadata: dict[str, str | int | dict[str, str | int]] = {
|
||||
@@ -598,6 +632,47 @@ class Command(CryptMixin, PaperlessCommand):
|
||||
archive_target,
|
||||
)
|
||||
|
||||
def generate_share_link_bundle_target(
|
||||
self,
|
||||
bundle: ShareLinkBundle,
|
||||
bundle_dict: dict,
|
||||
) -> Path | None:
|
||||
"""
|
||||
Generates the export target for a share link bundle file, when present.
|
||||
"""
|
||||
if not bundle.file_path:
|
||||
return None
|
||||
|
||||
bundle_name = Path(bundle.file_path)
|
||||
if bundle_name.is_absolute():
|
||||
bundle_name = Path(bundle_name.name)
|
||||
|
||||
bundle_name = Path("share_link_bundles") / bundle_name
|
||||
bundle_target = (self.target / bundle_name).resolve()
|
||||
bundle_dict["fields"]["file_path"] = str(
|
||||
bundle_name.relative_to("share_link_bundles"),
|
||||
)
|
||||
bundle_dict[EXPORTER_SHARE_LINK_BUNDLE_NAME] = str(bundle_name)
|
||||
return bundle_target
|
||||
|
||||
def copy_share_link_bundle_file(
|
||||
self,
|
||||
bundle: ShareLinkBundle,
|
||||
bundle_target: Path,
|
||||
) -> None:
|
||||
"""
|
||||
Copies a share link bundle ZIP into the export directory.
|
||||
"""
|
||||
bundle_source_path = bundle.absolute_file_path
|
||||
if bundle_source_path is None:
|
||||
raise FileNotFoundError(f"Share link bundle {bundle.pk} has no file path")
|
||||
|
||||
self.check_and_copy(
|
||||
bundle_source_path,
|
||||
None,
|
||||
bundle_target,
|
||||
)
|
||||
|
||||
def _encrypt_record_inline(self, record: dict) -> None:
|
||||
"""Encrypt sensitive fields in a single record, if passphrase is set."""
|
||||
if not self.passphrase:
|
||||
@@ -619,12 +694,15 @@ class Command(CryptMixin, PaperlessCommand):
|
||||
"""Write per-document manifest file for --split-manifest mode."""
|
||||
content = [document_dict]
|
||||
content.extend(
|
||||
serializers.serialize("python", Note.objects.filter(document=document)),
|
||||
serializers.serialize(
|
||||
"python",
|
||||
Note.global_objects.filter(document=document),
|
||||
),
|
||||
)
|
||||
content.extend(
|
||||
serializers.serialize(
|
||||
"python",
|
||||
CustomFieldInstance.objects.filter(document=document),
|
||||
CustomFieldInstance.global_objects.filter(document=document),
|
||||
),
|
||||
)
|
||||
manifest_name = base_name.with_name(f"{base_name.stem}-manifest.json")
|
||||
|
||||
@@ -32,10 +32,12 @@ from documents.models import CustomFieldInstance
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import Note
|
||||
from documents.models import ShareLinkBundle
|
||||
from documents.models import Tag
|
||||
from documents.settings import EXPORTER_ARCHIVE_NAME
|
||||
from documents.settings import EXPORTER_CRYPTO_SETTINGS_NAME
|
||||
from documents.settings import EXPORTER_FILE_NAME
|
||||
from documents.settings import EXPORTER_SHARE_LINK_BUNDLE_NAME
|
||||
from documents.settings import EXPORTER_THUMBNAIL_NAME
|
||||
from documents.signals.handlers import check_paths_and_prune_custom_fields
|
||||
from documents.signals.handlers import update_filename_and_move_files
|
||||
@@ -125,7 +127,7 @@ class Command(CryptMixin, PaperlessCommand):
|
||||
"Found existing user(s), this might indicate a non-empty installation",
|
||||
),
|
||||
)
|
||||
if Document.objects.count() != 0:
|
||||
if Document.global_objects.count() != 0:
|
||||
self.stdout.write(
|
||||
self.style.WARNING(
|
||||
"Found existing documents(s), this might indicate a non-empty installation",
|
||||
@@ -348,18 +350,42 @@ class Command(CryptMixin, PaperlessCommand):
|
||||
f"Failed to read from archive file {doc_archive_path}",
|
||||
) from e
|
||||
|
||||
def check_share_link_bundle_validity(bundle_record: dict) -> None:
|
||||
if EXPORTER_SHARE_LINK_BUNDLE_NAME not in bundle_record:
|
||||
return
|
||||
|
||||
bundle_file = bundle_record[EXPORTER_SHARE_LINK_BUNDLE_NAME]
|
||||
bundle_path: Path = self.source / bundle_file
|
||||
if not bundle_path.exists():
|
||||
raise CommandError(
|
||||
f'The manifest file refers to "{bundle_file}" which does not '
|
||||
"appear to be in the source directory.",
|
||||
)
|
||||
try:
|
||||
with bundle_path.open(mode="rb"):
|
||||
pass
|
||||
except Exception as e:
|
||||
raise CommandError(
|
||||
f"Failed to read from share link bundle file {bundle_path}",
|
||||
) from e
|
||||
|
||||
self.stdout.write("Checking the manifest")
|
||||
for manifest_path in self.manifest_paths:
|
||||
for record in iter_manifest_records(manifest_path):
|
||||
# Only check if the document files exist if this is not data only
|
||||
# We don't care about documents for a data only import
|
||||
if not self.data_only and record["model"] == "documents.document":
|
||||
if self.data_only:
|
||||
continue
|
||||
if record["model"] == "documents.document":
|
||||
check_document_validity(record)
|
||||
elif record["model"] == "documents.sharelinkbundle":
|
||||
check_share_link_bundle_validity(record)
|
||||
|
||||
def _import_files_from_manifest(self) -> None:
|
||||
settings.ORIGINALS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
settings.THUMBNAIL_DIR.mkdir(parents=True, exist_ok=True)
|
||||
settings.ARCHIVE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
settings.SHARE_LINK_BUNDLE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
self.stdout.write("Copy files into paperless...")
|
||||
|
||||
@@ -374,9 +400,21 @@ class Command(CryptMixin, PaperlessCommand):
|
||||
for record in iter_manifest_records(manifest_path)
|
||||
if record["model"] == "documents.document"
|
||||
]
|
||||
share_link_bundle_records = [
|
||||
{
|
||||
"pk": record["pk"],
|
||||
EXPORTER_SHARE_LINK_BUNDLE_NAME: record.get(
|
||||
EXPORTER_SHARE_LINK_BUNDLE_NAME,
|
||||
),
|
||||
}
|
||||
for manifest_path in self.manifest_paths
|
||||
for record in iter_manifest_records(manifest_path)
|
||||
if record["model"] == "documents.sharelinkbundle"
|
||||
and record.get(EXPORTER_SHARE_LINK_BUNDLE_NAME)
|
||||
]
|
||||
|
||||
for record in self.track(document_records, description="Copying files..."):
|
||||
document = Document.objects.get(pk=record["pk"])
|
||||
document = Document.global_objects.get(pk=record["pk"])
|
||||
|
||||
doc_file = record[EXPORTER_FILE_NAME]
|
||||
document_path = self.source / doc_file
|
||||
@@ -416,6 +454,26 @@ class Command(CryptMixin, PaperlessCommand):
|
||||
|
||||
document.save()
|
||||
|
||||
for record in self.track(
|
||||
share_link_bundle_records,
|
||||
description="Copying share link bundles...",
|
||||
):
|
||||
bundle = ShareLinkBundle.objects.get(pk=record["pk"])
|
||||
bundle_file = record[EXPORTER_SHARE_LINK_BUNDLE_NAME]
|
||||
bundle_source_path = (self.source / bundle_file).resolve()
|
||||
bundle_target_path = bundle.absolute_file_path
|
||||
if bundle_target_path is None:
|
||||
raise CommandError(
|
||||
f"Share link bundle {bundle.pk} does not have a valid file path.",
|
||||
)
|
||||
|
||||
with FileLock(settings.MEDIA_LOCK):
|
||||
bundle_target_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
copy_file_with_basic_stats(
|
||||
bundle_source_path,
|
||||
bundle_target_path,
|
||||
)
|
||||
|
||||
def _decrypt_record_if_needed(self, record: dict) -> dict:
|
||||
fields = self.CRYPT_FIELDS_BY_MODEL.get(record.get("model", ""))
|
||||
if fields:
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
EXPORTER_FILE_NAME = "__exported_file_name__"
|
||||
EXPORTER_THUMBNAIL_NAME = "__exported_thumbnail_name__"
|
||||
EXPORTER_ARCHIVE_NAME = "__exported_archive_name__"
|
||||
EXPORTER_SHARE_LINK_BUNDLE_NAME = "__exported_share_link_bundle_name__"
|
||||
|
||||
EXPORTER_CRYPTO_SETTINGS_NAME = "__crypto__"
|
||||
EXPORTER_CRYPTO_SALT_NAME = "__salt_hex__"
|
||||
|
||||
@@ -2,6 +2,7 @@ import hashlib
|
||||
import json
|
||||
import shutil
|
||||
import tempfile
|
||||
from datetime import timedelta
|
||||
from io import StringIO
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
@@ -11,6 +12,7 @@ import pytest
|
||||
from allauth.socialaccount.models import SocialAccount
|
||||
from allauth.socialaccount.models import SocialApp
|
||||
from allauth.socialaccount.models import SocialToken
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import Group
|
||||
from django.contrib.auth.models import Permission
|
||||
from django.contrib.contenttypes.models import ContentType
|
||||
@@ -31,6 +33,8 @@ from documents.models import CustomFieldInstance
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import Note
|
||||
from documents.models import ShareLink
|
||||
from documents.models import ShareLinkBundle
|
||||
from documents.models import StoragePath
|
||||
from documents.models import Tag
|
||||
from documents.models import User
|
||||
@@ -39,6 +43,7 @@ from documents.models import WorkflowAction
|
||||
from documents.models import WorkflowTrigger
|
||||
from documents.sanity_checker import check_sanity
|
||||
from documents.settings import EXPORTER_FILE_NAME
|
||||
from documents.settings import EXPORTER_SHARE_LINK_BUNDLE_NAME
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
from documents.tests.utils import FileSystemAssertsMixin
|
||||
from documents.tests.utils import SampleDirMixin
|
||||
@@ -306,6 +311,108 @@ class TestExportImport(
|
||||
):
|
||||
self.test_exporter(use_filename_format=True)
|
||||
|
||||
def test_exporter_includes_share_links_and_bundles(self) -> None:
|
||||
shutil.rmtree(Path(self.dirs.media_dir) / "documents")
|
||||
shutil.copytree(
|
||||
Path(__file__).parent / "samples" / "documents",
|
||||
Path(self.dirs.media_dir) / "documents",
|
||||
)
|
||||
|
||||
share_link = ShareLink.objects.create(
|
||||
slug="share-link-slug",
|
||||
document=self.d1,
|
||||
owner=self.user,
|
||||
file_version=ShareLink.FileVersion.ORIGINAL,
|
||||
expiration=timezone.now() + timedelta(days=7),
|
||||
)
|
||||
|
||||
bundle_relative_path = Path("nested") / "share-bundle.zip"
|
||||
bundle_source_path = settings.SHARE_LINK_BUNDLE_DIR / bundle_relative_path
|
||||
bundle_source_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
bundle_source_path.write_bytes(b"share-bundle-contents")
|
||||
bundle = ShareLinkBundle.objects.create(
|
||||
slug="share-bundle-slug",
|
||||
owner=self.user,
|
||||
file_version=ShareLink.FileVersion.ARCHIVE,
|
||||
expiration=timezone.now() + timedelta(days=7),
|
||||
status=ShareLinkBundle.Status.READY,
|
||||
size_bytes=bundle_source_path.stat().st_size,
|
||||
file_path=str(bundle_relative_path),
|
||||
built_at=timezone.now(),
|
||||
)
|
||||
bundle.documents.set([self.d1, self.d2])
|
||||
|
||||
manifest = self._do_export()
|
||||
|
||||
share_link_records = [
|
||||
record for record in manifest if record["model"] == "documents.sharelink"
|
||||
]
|
||||
self.assertEqual(len(share_link_records), 1)
|
||||
self.assertEqual(share_link_records[0]["pk"], share_link.pk)
|
||||
self.assertEqual(share_link_records[0]["fields"]["document"], self.d1.pk)
|
||||
self.assertEqual(share_link_records[0]["fields"]["owner"], self.user.pk)
|
||||
|
||||
share_link_bundle_records = [
|
||||
record
|
||||
for record in manifest
|
||||
if record["model"] == "documents.sharelinkbundle"
|
||||
]
|
||||
self.assertEqual(len(share_link_bundle_records), 1)
|
||||
bundle_record = share_link_bundle_records[0]
|
||||
self.assertEqual(bundle_record["pk"], bundle.pk)
|
||||
self.assertEqual(
|
||||
bundle_record["fields"]["documents"],
|
||||
[self.d1.pk, self.d2.pk],
|
||||
)
|
||||
self.assertEqual(
|
||||
bundle_record[EXPORTER_SHARE_LINK_BUNDLE_NAME],
|
||||
"share_link_bundles/nested/share-bundle.zip",
|
||||
)
|
||||
self.assertEqual(
|
||||
bundle_record["fields"]["file_path"],
|
||||
"nested/share-bundle.zip",
|
||||
)
|
||||
self.assertIsFile(self.target / bundle_record[EXPORTER_SHARE_LINK_BUNDLE_NAME])
|
||||
|
||||
with paperless_environment():
|
||||
ShareLink.objects.all().delete()
|
||||
ShareLinkBundle.objects.all().delete()
|
||||
shutil.rmtree(settings.SHARE_LINK_BUNDLE_DIR, ignore_errors=True)
|
||||
|
||||
call_command(
|
||||
"document_importer",
|
||||
"--no-progress-bar",
|
||||
self.target,
|
||||
skip_checks=True,
|
||||
)
|
||||
|
||||
imported_share_link = ShareLink.objects.get(pk=share_link.pk)
|
||||
self.assertEqual(imported_share_link.document_id, self.d1.pk)
|
||||
self.assertEqual(imported_share_link.owner_id, self.user.pk)
|
||||
self.assertEqual(
|
||||
imported_share_link.file_version,
|
||||
ShareLink.FileVersion.ORIGINAL,
|
||||
)
|
||||
|
||||
imported_bundle = ShareLinkBundle.objects.get(pk=bundle.pk)
|
||||
imported_bundle_path = imported_bundle.absolute_file_path
|
||||
self.assertEqual(imported_bundle.owner_id, self.user.pk)
|
||||
self.assertEqual(
|
||||
list(
|
||||
imported_bundle.documents.order_by("pk").values_list(
|
||||
"pk",
|
||||
flat=True,
|
||||
),
|
||||
),
|
||||
[self.d1.pk, self.d2.pk],
|
||||
)
|
||||
self.assertEqual(imported_bundle.file_path, "nested/share-bundle.zip")
|
||||
self.assertIsNotNone(imported_bundle_path)
|
||||
self.assertEqual(
|
||||
imported_bundle_path.read_bytes(),
|
||||
b"share-bundle-contents",
|
||||
)
|
||||
|
||||
def test_update_export_changed_time(self) -> None:
|
||||
shutil.rmtree(Path(self.dirs.media_dir) / "documents")
|
||||
shutil.copytree(
|
||||
@@ -389,7 +496,7 @@ class TestExportImport(
|
||||
self.assertIsFile(
|
||||
str(self.target / doc_from_manifest[EXPORTER_FILE_NAME]),
|
||||
)
|
||||
self.d3.delete()
|
||||
self.d3.hard_delete()
|
||||
|
||||
manifest = self._do_export()
|
||||
self.assertRaises(
|
||||
@@ -868,6 +975,52 @@ class TestExportImport(
|
||||
for obj in manifest:
|
||||
self.assertNotEqual(obj["model"], "auditlog.logentry")
|
||||
|
||||
def test_export_import_soft_deleted_document(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- A document with a note and custom field instance has been soft-deleted
|
||||
WHEN:
|
||||
- Export and re-import are performed
|
||||
THEN:
|
||||
- The soft-deleted document, note, and custom field instance
|
||||
survive the round-trip with deleted_at preserved
|
||||
"""
|
||||
shutil.rmtree(Path(self.dirs.media_dir) / "documents")
|
||||
shutil.copytree(
|
||||
Path(__file__).parent / "samples" / "documents",
|
||||
Path(self.dirs.media_dir) / "documents",
|
||||
)
|
||||
|
||||
# d1 has self.note and self.cfi1 attached via setUp
|
||||
self.d1.delete()
|
||||
|
||||
self._do_export()
|
||||
|
||||
with paperless_environment():
|
||||
Document.global_objects.all().hard_delete()
|
||||
Correspondent.objects.all().delete()
|
||||
DocumentType.objects.all().delete()
|
||||
Tag.objects.all().delete()
|
||||
|
||||
call_command(
|
||||
"document_importer",
|
||||
"--no-progress-bar",
|
||||
self.target,
|
||||
skip_checks=True,
|
||||
)
|
||||
|
||||
self.assertEqual(Document.global_objects.count(), 4)
|
||||
reimported_doc = Document.global_objects.get(pk=self.d1.pk)
|
||||
self.assertIsNotNone(reimported_doc.deleted_at)
|
||||
|
||||
self.assertEqual(Note.global_objects.count(), 1)
|
||||
reimported_note = Note.global_objects.get(pk=self.note.pk)
|
||||
self.assertIsNotNone(reimported_note.deleted_at)
|
||||
|
||||
self.assertEqual(CustomFieldInstance.global_objects.count(), 1)
|
||||
reimported_cfi = CustomFieldInstance.global_objects.get(pk=self.cfi1.pk)
|
||||
self.assertIsNotNone(reimported_cfi.deleted_at)
|
||||
|
||||
def test_export_data_only(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
|
||||
Reference in New Issue
Block a user