Compare commits

...

1 Commits

Author SHA1 Message Date
shamoon
7100ecb026 Fix: avoid moving files if already moved 2026-03-18 07:16:09 -07:00
2 changed files with 93 additions and 12 deletions

View File

@@ -1,5 +1,6 @@
from __future__ import annotations
import hashlib
import logging
import shutil
from pathlib import Path
@@ -391,6 +392,14 @@ class CannotMoveFilesException(Exception):
pass
def _path_matches_checksum(path: Path, checksum: str | None) -> bool:
if checksum is None or not path.is_file():
return False
with path.open("rb") as f:
return hashlib.md5(f.read()).hexdigest() == checksum
def _filename_template_uses_custom_fields(doc: Document) -> bool:
template = None
if doc.storage_path is not None:
@@ -461,10 +470,12 @@ def update_filename_and_move_files(
old_filename = instance.filename
old_source_path = instance.source_path
move_original = False
original_already_moved = False
old_archive_filename = instance.archive_filename
old_archive_path = instance.archive_path
move_archive = False
archive_already_moved = False
candidate_filename = generate_filename(instance)
if len(str(candidate_filename)) > Document.MAX_STORED_FILENAME_LENGTH:
@@ -485,14 +496,23 @@ def update_filename_and_move_files(
candidate_source_path.exists()
and candidate_source_path != old_source_path
):
# Only fall back to unique search when there is an actual conflict
new_filename = generate_unique_filename(instance)
if not old_source_path.is_file() and _path_matches_checksum(
candidate_source_path,
instance.checksum,
):
new_filename = candidate_filename
original_already_moved = True
else:
# Only fall back to unique search when there is an actual conflict
new_filename = generate_unique_filename(instance)
else:
new_filename = candidate_filename
# Need to convert to string to be able to save it to the db
instance.filename = str(new_filename)
move_original = old_filename != instance.filename
move_original = (
old_filename != instance.filename and not original_already_moved
)
if instance.has_archive_version:
archive_candidate = generate_filename(instance, archive_filename=True)
@@ -513,24 +533,38 @@ def update_filename_and_move_files(
archive_candidate_path.exists()
and archive_candidate_path != old_archive_path
):
new_archive_filename = generate_unique_filename(
instance,
archive_filename=True,
)
if not old_archive_path.is_file() and _path_matches_checksum(
archive_candidate_path,
instance.archive_checksum,
):
new_archive_filename = archive_candidate
archive_already_moved = True
else:
new_archive_filename = generate_unique_filename(
instance,
archive_filename=True,
)
else:
new_archive_filename = archive_candidate
instance.archive_filename = str(new_archive_filename)
move_archive = old_archive_filename != instance.archive_filename
move_archive = (
old_archive_filename != instance.archive_filename
and not archive_already_moved
)
else:
move_archive = False
if not move_original and not move_archive:
# Just update modified. Also, don't save() here to prevent infinite recursion.
Document.objects.filter(pk=instance.pk).update(
modified=timezone.now(),
)
updates = {"modified": timezone.now()}
if old_filename != instance.filename:
updates["filename"] = instance.filename
if old_archive_filename != instance.archive_filename:
updates["archive_filename"] = instance.archive_filename
# Don't save() here to prevent infinite recursion.
Document.objects.filter(pk=instance.pk).update(**updates)
return
if move_original:

View File

@@ -1,4 +1,5 @@
import datetime
import hashlib
import logging
import tempfile
from pathlib import Path
@@ -166,6 +167,52 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
)
self.assertEqual(document.filename, "none/none.pdf")
@override_settings(FILENAME_FORMAT=None)
def test_stale_save_recovers_already_moved_files(self) -> None:
old_storage_path = StoragePath.objects.create(
name="old-path",
path="old/{{title}}",
)
new_storage_path = StoragePath.objects.create(
name="new-path",
path="new/{{title}}",
)
original_bytes = b"original"
archive_bytes = b"archive"
doc = Document.objects.create(
title="document",
mime_type="application/pdf",
checksum=hashlib.md5(original_bytes).hexdigest(),
archive_checksum=hashlib.md5(archive_bytes).hexdigest(),
filename="old/document.pdf",
archive_filename="old/document.pdf",
storage_path=old_storage_path,
)
create_source_path_directory(doc.source_path)
doc.source_path.write_bytes(original_bytes)
create_source_path_directory(doc.archive_path)
doc.archive_path.write_bytes(archive_bytes)
stale_doc = Document.objects.get(pk=doc.pk)
fresh_doc = Document.objects.get(pk=doc.pk)
fresh_doc.storage_path = new_storage_path
fresh_doc.save()
doc.refresh_from_db()
self.assertEqual(doc.filename, "new/document.pdf")
self.assertEqual(doc.archive_filename, "new/document.pdf")
stale_doc.storage_path = new_storage_path
stale_doc.save()
doc.refresh_from_db()
self.assertEqual(doc.filename, "new/document.pdf")
self.assertEqual(doc.archive_filename, "new/document.pdf")
self.assertIsFile(doc.source_path)
self.assertIsFile(doc.archive_path)
self.assertIsNotFile(settings.ORIGINALS_DIR / "old" / "document.pdf")
self.assertIsNotFile(settings.ARCHIVE_DIR / "old" / "document.pdf")
@override_settings(FILENAME_FORMAT="{correspondent}/{correspondent}")
def test_document_delete(self):
document = Document()