feat: remove root_document references from bulk_edit/tasks, add v1 DocumentVersion to factory

- bulk_edit.py: remove get_root_document/get_latest_version_for_root imports,
  remove _resolve_root_and_source_doc helper, remove select_related("root_document")
  calls, simplify delete() to use CASCADE behavior, update PDF action functions
  to use doc directly as root_doc/source_doc
- tasks.py: remove root_document__isnull=True filters from scheduled workflow
  queries (field removed from Document model)
- factories.py: add with_version post_generation hook to DocumentFactory so
  tests that bypass the consumer still get a v1 DocumentVersion
- test_bulk_edit.py: rewrite version-related tests to use DocumentVersion model
  instead of deprecated root_document Document pattern
- test_api_documents.py: add DocumentVersion.objects.create() alongside
  Document.objects.create() in tests that call download/preview/thumb/metadata
  endpoints, rewrite test_document_history_logs_version_deletion to use
  DocumentVersion objects

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Trenton H
2026-04-14 08:59:47 -07:00
parent e790d3e7f8
commit 18e4505e05
5 changed files with 140 additions and 174 deletions

View File

@@ -29,8 +29,6 @@ from documents.plugins.helpers import DocumentsStatusManager
from documents.tasks import bulk_update_documents
from documents.tasks import consume_file
from documents.tasks import update_document_content_maybe_archive_file
from documents.versioning import get_latest_version_for_root
from documents.versioning import get_root_document
if TYPE_CHECKING:
from django.contrib.auth.models import User
@@ -81,23 +79,6 @@ def restore_archive_serial_numbers(backup: dict[int, int | None]) -> None:
logger.info(f"Restored archive serial numbers for documents {list(backup.keys())}")
def _resolve_root_and_source_doc(
doc: Document,
*,
source_mode: SourceMode = SourceModeChoices.LATEST_VERSION,
) -> tuple[Document, Document]:
root_doc = get_root_document(doc)
if source_mode == SourceModeChoices.EXPLICIT_SELECTION:
return root_doc, doc
# Version IDs are explicit by default, only a selected root resolves to latest
if doc.root_document_id is not None:
return root_doc, doc
return root_doc, get_latest_version_for_root(root_doc)
def set_correspondent(
doc_ids: list[int],
correspondent: Correspondent,
@@ -334,20 +315,10 @@ def modify_custom_fields(
@shared_task
def delete(doc_ids: list[int]) -> Literal["OK"]:
try:
root_ids = (
Document.objects.filter(id__in=doc_ids, root_document__isnull=True)
.values_list("id", flat=True)
.distinct()
)
version_ids = (
Document.objects.filter(root_document_id__in=root_ids)
.exclude(id__in=doc_ids)
.values_list("id", flat=True)
.distinct()
)
delete_ids = list({*doc_ids, *version_ids})
delete_ids = list(doc_ids)
Document.objects.filter(id__in=delete_ids).delete()
# DocumentVersion rows are removed by CASCADE automatically.
from documents.search import get_backend
@@ -413,7 +384,7 @@ def rotate(
)
docs_by_id = {
doc.id: doc
for doc in Document.objects.select_related("root_document").filter(
for doc in Document.objects.filter(
id__in=doc_ids,
)
}
@@ -422,11 +393,7 @@ def rotate(
doc = docs_by_id.get(doc_id)
if doc is None:
continue
root_doc, source_doc = _resolve_root_and_source_doc(
doc,
source_mode=source_mode,
)
docs_by_root_id.setdefault(root_doc.id, (root_doc, source_doc))
docs_by_root_id.setdefault(doc.id, (doc, doc))
import pikepdf
@@ -482,7 +449,7 @@ def merge(
logger.info(
f"Attempting to merge {len(doc_ids)} documents into a single document.",
)
qs = Document.objects.select_related("root_document").filter(id__in=doc_ids)
qs = Document.objects.filter(id__in=doc_ids)
docs_by_id = {doc.id: doc for doc in qs}
affected_docs: list[int] = []
import pikepdf
@@ -495,10 +462,7 @@ def merge(
doc = docs_by_id.get(doc_id)
if doc is None:
continue
_, source_doc = _resolve_root_and_source_doc(
doc,
source_mode=source_mode,
)
source_doc = doc
try:
doc_path = (
source_doc.archive_path
@@ -593,11 +557,8 @@ def split(
logger.info(
f"Attempting to split document {doc_ids[0]} into {len(pages)} documents",
)
doc = Document.objects.select_related("root_document").get(id=doc_ids[0])
_, source_doc = _resolve_root_and_source_doc(
doc,
source_mode=source_mode,
)
doc = Document.objects.get(id=doc_ids[0])
source_doc = doc
import pikepdf
consume_tasks = []
@@ -673,11 +634,9 @@ def delete_pages(
logger.info(
f"Attempting to delete pages {pages} from {len(doc_ids)} documents",
)
doc = Document.objects.select_related("root_document").get(id=doc_ids[0])
root_doc, source_doc = _resolve_root_and_source_doc(
doc,
source_mode=source_mode,
)
doc = Document.objects.get(id=doc_ids[0])
root_doc = doc
source_doc = doc
pages = sorted(pages) # sort pages to avoid index issues
import pikepdf
@@ -736,11 +695,9 @@ def edit_pdf(
logger.info(
f"Editing PDF of document {doc_ids[0]} with {len(operations)} operations",
)
doc = Document.objects.select_related("root_document").get(id=doc_ids[0])
root_doc, source_doc = _resolve_root_and_source_doc(
doc,
source_mode=source_mode,
)
doc = Document.objects.get(id=doc_ids[0])
root_doc = doc
source_doc = doc
import pikepdf
pdf_docs: list[pikepdf.Pdf] = []
@@ -860,11 +817,9 @@ def remove_password(
import pikepdf
for doc_id in doc_ids:
doc = Document.objects.select_related("root_document").get(id=doc_id)
root_doc, source_doc = _resolve_root_and_source_doc(
doc,
source_mode=source_mode,
)
doc = Document.objects.get(id=doc_id)
root_doc = doc
source_doc = doc
try:
logger.info(
f"Attempting password removal from document {doc_ids[0]}",

View File

@@ -476,19 +476,16 @@ def check_scheduled_workflows() -> None:
match trigger.schedule_date_field:
case WorkflowTrigger.ScheduleDateField.ADDED:
documents = Document.objects.filter(
root_document__isnull=True,
added__lte=threshold,
)
case WorkflowTrigger.ScheduleDateField.CREATED:
documents = Document.objects.filter(
root_document__isnull=True,
created__lte=threshold,
)
case WorkflowTrigger.ScheduleDateField.MODIFIED:
documents = Document.objects.filter(
root_document__isnull=True,
modified__lte=threshold,
)
@@ -529,7 +526,6 @@ def check_scheduled_workflows() -> None:
]
documents = Document.objects.filter(
root_document__isnull=True,
id__in=matched_ids,
)

View File

@@ -67,6 +67,27 @@ class DocumentFactory(DjangoModelFactory):
document_type = None
storage_path = None
@factory.post_generation
def with_version(self, create, extracted, **kwargs):
"""Create an initial DocumentVersion(version_number=1) matching the Document's fields."""
if not create:
return
from documents.models import DocumentVersion
DocumentVersion.objects.create(
document=self,
version_number=1,
checksum=self.checksum or "default",
archive_checksum=self.archive_checksum,
content=self.content,
page_count=self.page_count,
mime_type=self.mime_type or "application/pdf",
original_filename=self.original_filename,
filename=self.filename,
archive_filename=self.archive_filename,
added=self.added,
)
class DocumentVersionFactory(DjangoModelFactory):
class Meta:

View File

@@ -36,6 +36,7 @@ from documents.models import CustomField
from documents.models import CustomFieldInstance
from documents.models import Document
from documents.models import DocumentType
from documents.models import DocumentVersion
from documents.models import MatchingModel
from documents.models import Note
from documents.models import SavedView
@@ -316,10 +317,17 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
filename=Path(filename).name,
mime_type="application/pdf",
)
dv = DocumentVersion.objects.create(
document=doc,
version_number=1,
checksum=doc.checksum or "default",
mime_type=doc.mime_type,
filename=doc.filename,
)
if TYPE_CHECKING:
assert isinstance(self.dirs.thumbnail_dir, Path), self.dirs.thumbnail_dir
with (self.dirs.thumbnail_dir / f"{doc.pk:07d}.webp").open("wb") as f:
with dv.thumbnail_path.open("wb") as f:
f.write(content_thumbnail)
response = self.client.get(f"/api/documents/{doc.pk}/download/")
@@ -369,8 +377,15 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
mime_type="application/pdf",
owner=user1,
)
dv = DocumentVersion.objects.create(
document=doc,
version_number=1,
checksum=doc.checksum or "default",
mime_type=doc.mime_type,
filename=doc.filename,
)
with (Path(self.dirs.thumbnail_dir) / f"{doc.pk:07d}.webp").open("wb") as f:
with dv.thumbnail_path.open("wb") as f:
f.write(content_thumbnail)
response = self.client.get(f"/api/documents/{doc.pk}/download/")
@@ -404,6 +419,14 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
archive_filename="archived.pdf",
mime_type="application/pdf",
)
DocumentVersion.objects.create(
document=doc,
version_number=1,
checksum=doc.checksum or "default",
mime_type=doc.mime_type,
filename=doc.filename,
archive_filename=doc.archive_filename,
)
with Path(doc.source_path).open("wb") as f:
f.write(content)
@@ -446,6 +469,14 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
archive_filename="archived.pdf",
mime_type="application/pdf",
)
DocumentVersion.objects.create(
document=doc,
version_number=1,
checksum=doc.checksum or "default",
mime_type=doc.mime_type,
filename=doc.filename,
archive_filename=doc.archive_filename,
)
with Path(doc.source_path).open("wb") as f:
f.write(content)
@@ -585,16 +616,21 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
mime_type="application/pdf",
owner=self.user,
)
version_doc = Document.objects.create(
title="Version",
DocumentVersion.objects.create(
document=root_doc,
version_number=1,
checksum="123",
mime_type="application/pdf",
)
v2 = DocumentVersion.objects.create(
document=root_doc,
version_number=2,
checksum="456",
mime_type="application/pdf",
root_document=root_doc,
owner=self.user,
)
response = self.client.delete(
f"/api/documents/{root_doc.pk}/versions/{version_doc.pk}/",
f"/api/documents/{root_doc.pk}/versions/{v2.pk}/",
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
@@ -605,7 +641,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
self.assertEqual(response.data[0]["action"], "update")
self.assertEqual(
response.data[0]["changes"],
{"Version Deleted": ["None", version_doc.pk]},
{"Version Deleted": ["None", v2.pk]},
)
@override_settings(AUDIT_LOG_ENABLED=False)
@@ -1452,17 +1488,24 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
)
def test_document_filters_use_latest_version_content(self) -> None:
root = Document.objects.create(
doc = Document.objects.create(
title="versioned root",
checksum="root",
checksum="v2",
mime_type="application/pdf",
content="root-content",
content="latest-version-content",
)
version = Document.objects.create(
title="versioned root",
DocumentVersion.objects.create(
document=doc,
version_number=1,
checksum="v1",
mime_type="application/pdf",
root_document=root,
content="old-content",
)
DocumentVersion.objects.create(
document=doc,
version_number=2,
checksum="v2",
mime_type="application/pdf",
content="latest-version-content",
)
@@ -1472,8 +1515,8 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
self.assertEqual(response.status_code, status.HTTP_200_OK)
results = response.data["results"]
self.assertEqual(len(results), 1)
self.assertEqual(results[0]["id"], root.id)
self.assertEqual(results[0]["content"], version.content)
self.assertEqual(results[0]["id"], doc.id)
self.assertEqual(results[0]["content"], "latest-version-content")
response = self.client.get(
"/api/documents/?title_content=latest-version-content",
@@ -1481,7 +1524,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
self.assertEqual(response.status_code, status.HTTP_200_OK)
results = response.data["results"]
self.assertEqual(len(results), 1)
self.assertEqual(results[0]["id"], root.id)
self.assertEqual(results[0]["id"], doc.id)
def test_create_wrong_endpoint(self) -> None:
response = self.client.post(
@@ -2042,6 +2085,15 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
archive_checksum="A",
archive_filename="archive.pdf",
)
DocumentVersion.objects.create(
document=doc,
version_number=1,
checksum=doc.checksum or "default",
mime_type=doc.mime_type,
filename=doc.filename,
archive_filename=doc.archive_filename,
archive_checksum=doc.archive_checksum,
)
source_file: Path = (
Path(__file__).parent
@@ -2082,6 +2134,13 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
filename="file.pdf",
mime_type="application/pdf",
)
DocumentVersion.objects.create(
document=doc,
version_number=1,
checksum=doc.checksum or "default",
mime_type=doc.mime_type,
filename=doc.filename,
)
shutil.copy(Path(__file__).parent / "samples" / "simple.pdf", doc.source_path)
@@ -2105,6 +2164,15 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
archive_checksum="B",
checksum="A",
)
DocumentVersion.objects.create(
document=doc,
version_number=1,
checksum=doc.checksum,
mime_type=doc.mime_type,
filename=doc.filename,
archive_filename=doc.archive_filename,
archive_checksum=doc.archive_checksum,
)
response = self.client.get(f"/api/documents/{doc.pk}/metadata/")
self.assertEqual(response.status_code, status.HTTP_200_OK)

View File

@@ -381,52 +381,20 @@ class TestBulkEdit(DirectoriesMixin, TestCase):
[self.doc3.id, self.doc4.id, self.doc5.id],
)
def test_delete_root_document_deletes_all_versions(self) -> None:
version = Document.objects.create(
checksum="A-v1",
title="A version",
root_document=self.doc1,
def test_delete_document_deletes_document_versions_via_cascade(self) -> None:
from documents.models import DocumentVersion
v1 = DocumentVersion.objects.create(
document=self.doc1,
version_number=1,
checksum="A",
mime_type="application/pdf",
)
bulk_edit.delete([self.doc1.id])
self.assertFalse(Document.objects.filter(id=self.doc1.id).exists())
self.assertFalse(Document.objects.filter(id=version.id).exists())
def test_delete_version_document_keeps_root(self) -> None:
version = Document.objects.create(
checksum="A-v1",
title="A version",
root_document=self.doc1,
)
bulk_edit.delete([version.id])
self.assertTrue(Document.objects.filter(id=self.doc1.id).exists())
self.assertFalse(Document.objects.filter(id=version.id).exists())
def test_resolve_root_and_source_doc_latest_version_prefers_newest_version(
self,
) -> None:
version1 = Document.objects.create(
checksum="B-v1",
title="B version 1",
root_document=self.doc2,
)
version2 = Document.objects.create(
checksum="B-v2",
title="B version 2",
root_document=self.doc2,
)
root_doc, source_doc = bulk_edit._resolve_root_and_source_doc(
self.doc2,
source_mode="latest_version",
)
self.assertEqual(root_doc.id, self.doc2.id)
self.assertEqual(source_doc.id, version2.id)
self.assertNotEqual(source_doc.id, version1.id)
self.assertFalse(DocumentVersion.objects.filter(id=v1.id).exists())
@mock.patch("documents.tasks.bulk_update_documents.delay")
def test_set_permissions(self, m) -> None:
@@ -662,20 +630,11 @@ class TestPDFActions(DirectoriesMixin, TestCase):
@mock.patch("pikepdf.open")
@mock.patch("documents.tasks.consume_file.s")
def test_merge_uses_latest_version_source_for_root_selection(
def test_merge_uses_document_source_path(
self,
mock_consume_file,
mock_open_pdf,
) -> None:
version_file = self.dirs.scratch_dir / "sample2_version_merge.pdf"
shutil.copy(self.doc2.source_path, version_file)
version = Document.objects.create(
checksum="B-v1",
title="B version 1",
root_document=self.doc2,
filename=version_file,
mime_type="application/pdf",
)
fake_pdf = mock.MagicMock()
fake_pdf.pdf_version = "1.7"
fake_pdf.pages = [mock.Mock()]
@@ -684,7 +643,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
result = bulk_edit.merge([self.doc2.id])
self.assertEqual(result, "OK")
mock_open_pdf.assert_called_once_with(str(version.source_path))
mock_open_pdf.assert_called_once_with(str(self.doc2.source_path))
mock_consume_file.assert_not_called()
@mock.patch("documents.bulk_edit.delete.si")
@@ -898,21 +857,12 @@ class TestPDFActions(DirectoriesMixin, TestCase):
@mock.patch("documents.bulk_edit.group")
@mock.patch("pikepdf.open")
@mock.patch("documents.tasks.consume_file.s")
def test_split_uses_latest_version_source_for_root_selection(
def test_split_uses_document_source_path(
self,
mock_consume_file,
mock_open_pdf,
mock_group,
) -> None:
version_file = self.dirs.scratch_dir / "sample2_version_split.pdf"
shutil.copy(self.doc2.source_path, version_file)
version = Document.objects.create(
checksum="B-v1",
title="B version 1",
root_document=self.doc2,
filename=version_file,
mime_type="application/pdf",
)
fake_pdf = mock.MagicMock()
fake_pdf.pages = [mock.Mock(), mock.Mock()]
mock_open_pdf.return_value.__enter__.return_value = fake_pdf
@@ -921,7 +871,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
result = bulk_edit.split([self.doc2.id], [[1], [2]])
self.assertEqual(result, "OK")
mock_open_pdf.assert_called_once_with(version.source_path)
mock_open_pdf.assert_called_once_with(self.doc2.source_path)
mock_consume_file.assert_not_called()
mock_group.return_value.delay.assert_not_called()
@@ -1099,17 +1049,12 @@ class TestPDFActions(DirectoriesMixin, TestCase):
@mock.patch("documents.data_models.magic.from_file", return_value="application/pdf")
@mock.patch("documents.tasks.consume_file.delay")
@mock.patch("pikepdf.open")
def test_rotate_explicit_selection_uses_root_source_when_root_selected(
def test_rotate_uses_document_source_path(
self,
mock_open,
mock_consume_delay,
mock_magic,
):
Document.objects.create(
checksum="B-v1",
title="B version 1",
root_document=self.doc2,
)
fake_pdf = mock.MagicMock()
fake_pdf.pages = [mock.Mock()]
mock_open.return_value.__enter__.return_value = fake_pdf
@@ -1117,7 +1062,6 @@ class TestPDFActions(DirectoriesMixin, TestCase):
result = bulk_edit.rotate(
[self.doc2.id],
90,
source_mode="explicit_selection",
)
self.assertEqual(result, "OK")
@@ -1151,17 +1095,12 @@ class TestPDFActions(DirectoriesMixin, TestCase):
@mock.patch("documents.data_models.magic.from_file", return_value="application/pdf")
@mock.patch("documents.tasks.consume_file.delay")
@mock.patch("pikepdf.open")
def test_delete_pages_explicit_selection_uses_root_source_when_root_selected(
def test_delete_pages_uses_document_source_path(
self,
mock_open,
mock_consume_delay,
mock_magic,
):
Document.objects.create(
checksum="B-v1",
title="B version 1",
root_document=self.doc2,
)
fake_pdf = mock.MagicMock()
fake_pdf.pages = [mock.Mock(), mock.Mock()]
mock_open.return_value.__enter__.return_value = fake_pdf
@@ -1169,7 +1108,6 @@ class TestPDFActions(DirectoriesMixin, TestCase):
result = bulk_edit.delete_pages(
[self.doc2.id],
[1],
source_mode="explicit_selection",
)
self.assertEqual(result, "OK")
@@ -1328,18 +1266,13 @@ class TestPDFActions(DirectoriesMixin, TestCase):
@mock.patch("documents.tasks.consume_file.delay")
@mock.patch("pikepdf.new")
@mock.patch("pikepdf.open")
def test_edit_pdf_explicit_selection_uses_root_source_when_root_selected(
def test_edit_pdf_uses_document_source_path(
self,
mock_open,
mock_new,
mock_consume_delay,
mock_magic,
):
Document.objects.create(
checksum="B-v1",
title="B version 1",
root_document=self.doc2,
)
fake_pdf = mock.MagicMock()
fake_pdf.pages = [mock.Mock()]
mock_open.return_value.__enter__.return_value = fake_pdf
@@ -1351,7 +1284,6 @@ class TestPDFActions(DirectoriesMixin, TestCase):
[self.doc2.id],
operations=[{"page": 1}],
update_document=True,
source_mode="explicit_selection",
)
self.assertEqual(result, "OK")
@@ -1481,17 +1413,12 @@ class TestPDFActions(DirectoriesMixin, TestCase):
@mock.patch("documents.data_models.magic.from_file", return_value="application/pdf")
@mock.patch("documents.tasks.consume_file.delay")
@mock.patch("pikepdf.open")
def test_remove_password_explicit_selection_uses_root_source_when_root_selected(
def test_remove_password_uses_document_source_path(
self,
mock_open,
mock_consume_delay,
mock_magic,
) -> None:
Document.objects.create(
checksum="A-v1",
title="A version 1",
root_document=self.doc1,
)
fake_pdf = mock.MagicMock()
mock_open.return_value.__enter__.return_value = fake_pdf
@@ -1499,7 +1426,6 @@ class TestPDFActions(DirectoriesMixin, TestCase):
[self.doc1.id],
password="secret",
update_document=True,
source_mode="explicit_selection",
)
self.assertEqual(result, "OK")