mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-03-18 15:05:58 +00:00
Compare commits
1 Commits
release/v2
...
fix-12386
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7100ecb026 |
@@ -1,5 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
@@ -391,6 +392,14 @@ class CannotMoveFilesException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def _path_matches_checksum(path: Path, checksum: str | None) -> bool:
|
||||
if checksum is None or not path.is_file():
|
||||
return False
|
||||
|
||||
with path.open("rb") as f:
|
||||
return hashlib.md5(f.read()).hexdigest() == checksum
|
||||
|
||||
|
||||
def _filename_template_uses_custom_fields(doc: Document) -> bool:
|
||||
template = None
|
||||
if doc.storage_path is not None:
|
||||
@@ -461,10 +470,12 @@ def update_filename_and_move_files(
|
||||
old_filename = instance.filename
|
||||
old_source_path = instance.source_path
|
||||
move_original = False
|
||||
original_already_moved = False
|
||||
|
||||
old_archive_filename = instance.archive_filename
|
||||
old_archive_path = instance.archive_path
|
||||
move_archive = False
|
||||
archive_already_moved = False
|
||||
|
||||
candidate_filename = generate_filename(instance)
|
||||
if len(str(candidate_filename)) > Document.MAX_STORED_FILENAME_LENGTH:
|
||||
@@ -485,14 +496,23 @@ def update_filename_and_move_files(
|
||||
candidate_source_path.exists()
|
||||
and candidate_source_path != old_source_path
|
||||
):
|
||||
# Only fall back to unique search when there is an actual conflict
|
||||
new_filename = generate_unique_filename(instance)
|
||||
if not old_source_path.is_file() and _path_matches_checksum(
|
||||
candidate_source_path,
|
||||
instance.checksum,
|
||||
):
|
||||
new_filename = candidate_filename
|
||||
original_already_moved = True
|
||||
else:
|
||||
# Only fall back to unique search when there is an actual conflict
|
||||
new_filename = generate_unique_filename(instance)
|
||||
else:
|
||||
new_filename = candidate_filename
|
||||
|
||||
# Need to convert to string to be able to save it to the db
|
||||
instance.filename = str(new_filename)
|
||||
move_original = old_filename != instance.filename
|
||||
move_original = (
|
||||
old_filename != instance.filename and not original_already_moved
|
||||
)
|
||||
|
||||
if instance.has_archive_version:
|
||||
archive_candidate = generate_filename(instance, archive_filename=True)
|
||||
@@ -513,24 +533,38 @@ def update_filename_and_move_files(
|
||||
archive_candidate_path.exists()
|
||||
and archive_candidate_path != old_archive_path
|
||||
):
|
||||
new_archive_filename = generate_unique_filename(
|
||||
instance,
|
||||
archive_filename=True,
|
||||
)
|
||||
if not old_archive_path.is_file() and _path_matches_checksum(
|
||||
archive_candidate_path,
|
||||
instance.archive_checksum,
|
||||
):
|
||||
new_archive_filename = archive_candidate
|
||||
archive_already_moved = True
|
||||
else:
|
||||
new_archive_filename = generate_unique_filename(
|
||||
instance,
|
||||
archive_filename=True,
|
||||
)
|
||||
else:
|
||||
new_archive_filename = archive_candidate
|
||||
|
||||
instance.archive_filename = str(new_archive_filename)
|
||||
|
||||
move_archive = old_archive_filename != instance.archive_filename
|
||||
move_archive = (
|
||||
old_archive_filename != instance.archive_filename
|
||||
and not archive_already_moved
|
||||
)
|
||||
else:
|
||||
move_archive = False
|
||||
|
||||
if not move_original and not move_archive:
|
||||
# Just update modified. Also, don't save() here to prevent infinite recursion.
|
||||
Document.objects.filter(pk=instance.pk).update(
|
||||
modified=timezone.now(),
|
||||
)
|
||||
updates = {"modified": timezone.now()}
|
||||
if old_filename != instance.filename:
|
||||
updates["filename"] = instance.filename
|
||||
if old_archive_filename != instance.archive_filename:
|
||||
updates["archive_filename"] = instance.archive_filename
|
||||
|
||||
# Don't save() here to prevent infinite recursion.
|
||||
Document.objects.filter(pk=instance.pk).update(**updates)
|
||||
return
|
||||
|
||||
if move_original:
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import datetime
|
||||
import hashlib
|
||||
import logging
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
@@ -166,6 +167,52 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
)
|
||||
self.assertEqual(document.filename, "none/none.pdf")
|
||||
|
||||
@override_settings(FILENAME_FORMAT=None)
|
||||
def test_stale_save_recovers_already_moved_files(self) -> None:
|
||||
old_storage_path = StoragePath.objects.create(
|
||||
name="old-path",
|
||||
path="old/{{title}}",
|
||||
)
|
||||
new_storage_path = StoragePath.objects.create(
|
||||
name="new-path",
|
||||
path="new/{{title}}",
|
||||
)
|
||||
original_bytes = b"original"
|
||||
archive_bytes = b"archive"
|
||||
|
||||
doc = Document.objects.create(
|
||||
title="document",
|
||||
mime_type="application/pdf",
|
||||
checksum=hashlib.md5(original_bytes).hexdigest(),
|
||||
archive_checksum=hashlib.md5(archive_bytes).hexdigest(),
|
||||
filename="old/document.pdf",
|
||||
archive_filename="old/document.pdf",
|
||||
storage_path=old_storage_path,
|
||||
)
|
||||
create_source_path_directory(doc.source_path)
|
||||
doc.source_path.write_bytes(original_bytes)
|
||||
create_source_path_directory(doc.archive_path)
|
||||
doc.archive_path.write_bytes(archive_bytes)
|
||||
|
||||
stale_doc = Document.objects.get(pk=doc.pk)
|
||||
fresh_doc = Document.objects.get(pk=doc.pk)
|
||||
fresh_doc.storage_path = new_storage_path
|
||||
fresh_doc.save()
|
||||
doc.refresh_from_db()
|
||||
self.assertEqual(doc.filename, "new/document.pdf")
|
||||
self.assertEqual(doc.archive_filename, "new/document.pdf")
|
||||
|
||||
stale_doc.storage_path = new_storage_path
|
||||
stale_doc.save()
|
||||
|
||||
doc.refresh_from_db()
|
||||
self.assertEqual(doc.filename, "new/document.pdf")
|
||||
self.assertEqual(doc.archive_filename, "new/document.pdf")
|
||||
self.assertIsFile(doc.source_path)
|
||||
self.assertIsFile(doc.archive_path)
|
||||
self.assertIsNotFile(settings.ORIGINALS_DIR / "old" / "document.pdf")
|
||||
self.assertIsNotFile(settings.ARCHIVE_DIR / "old" / "document.pdf")
|
||||
|
||||
@override_settings(FILENAME_FORMAT="{correspondent}/{correspondent}")
|
||||
def test_document_delete(self):
|
||||
document = Document()
|
||||
|
||||
Reference in New Issue
Block a user