From 6f8e39c2e0d877d7088a58939ea705530fa7e985 Mon Sep 17 00:00:00 2001 From: shamoon <4887959+shamoon@users.noreply.github.com> Date: Sun, 7 Jun 2026 13:30:08 -0700 Subject: [PATCH] Fix: avoid unnecessary creating new PDF with pw removal workflow (#12948) --- src/documents/bulk_edit.py | 13 +++ src/documents/tests/test_bulk_edit.py | 126 ++++++++++++++++++++++++-- 2 files changed, 130 insertions(+), 9 deletions(-) diff --git a/src/documents/bulk_edit.py b/src/documents/bulk_edit.py index 848919cde..0cea9a3a1 100644 --- a/src/documents/bulk_edit.py +++ b/src/documents/bulk_edit.py @@ -904,6 +904,19 @@ def remove_password( doc.id, pair.source_doc.source_path, ) + try: + with pikepdf.open(source_path) as pdf: + if not pdf.is_encrypted: + logger.info( + "Skipping password removal for document %s because the " + "source PDF is not encrypted", + pair.root_doc.id, + ) + continue + except pikepdf.PasswordError: + # Password-protected PDFs need the supplied password below. + pass + with pikepdf.open(source_path, password=password) as pdf: filepath: Path = ( Path(tempfile.mkdtemp(dir=settings.SCRATCH_DIR)) diff --git a/src/documents/tests/test_bulk_edit.py b/src/documents/tests/test_bulk_edit.py index 8d0c893eb..010744af1 100644 --- a/src/documents/tests/test_bulk_edit.py +++ b/src/documents/tests/test_bulk_edit.py @@ -3,6 +3,7 @@ from datetime import date from pathlib import Path from unittest import mock +import pikepdf from django.contrib.auth.models import Group from django.contrib.auth.models import User from django.test import TestCase @@ -615,6 +616,18 @@ class TestPDFActions(DirectoriesMixin, TestCase): self.img_doc.archive_filename = img_doc_archive self.img_doc.save() + @staticmethod + def mock_password_required_pdf( + mock_open: mock.Mock, + fake_pdf: mock.Mock, + ) -> None: + password_context = mock.MagicMock() + password_context.__enter__.return_value = fake_pdf + mock_open.side_effect = [ + pikepdf.PasswordError("password required"), + password_context, + ] + @mock.patch("documents.tasks.consume_file.s") def test_merge(self, mock_consume_file) -> None: """ @@ -1466,6 +1479,7 @@ class TestPDFActions(DirectoriesMixin, TestCase): fake_pdf = mock.MagicMock() fake_pdf.pages = [mock.Mock(), mock.Mock(), mock.Mock()] + fake_pdf.is_encrypted = True def save_side_effect(target_path): Path(target_path).write_bytes(b"new pdf content") @@ -1480,7 +1494,13 @@ class TestPDFActions(DirectoriesMixin, TestCase): ) self.assertEqual(result, "OK") - mock_open.assert_called_once_with(doc.source_path, password="secret") + self.assertEqual( + mock_open.call_args_list, + [ + mock.call(doc.source_path), + mock.call(doc.source_path, password="secret"), + ], + ) fake_pdf.remove_unreferenced_resources.assert_called_once() mock_update_document.assert_not_called() mock_consume_delay.assert_called_once() @@ -1494,6 +1514,33 @@ class TestPDFActions(DirectoriesMixin, TestCase): self.assertEqual(task_kwargs["input_doc"].root_document_id, doc.id) self.assertIsNotNone(task_kwargs["overrides"]) + @mock.patch("documents.tasks.consume_file.apply_async") + @mock.patch("documents.bulk_edit.tempfile.mkdtemp") + @mock.patch("pikepdf.open") + def test_remove_password_update_document_skips_unencrypted_pdf( + self, + mock_open, + mock_mkdtemp, + mock_consume_delay, + ) -> None: + doc = self.doc1 + fake_pdf = mock.MagicMock() + fake_pdf.is_encrypted = False + mock_open.return_value.__enter__.return_value = fake_pdf + + result = bulk_edit.remove_password( + [doc.id], + password="secret", + update_document=True, + ) + + self.assertEqual(result, "OK") + mock_open.assert_called_once_with(doc.source_path) + fake_pdf.remove_unreferenced_resources.assert_not_called() + fake_pdf.save.assert_not_called() + mock_mkdtemp.assert_not_called() + mock_consume_delay.assert_not_called() + @mock.patch("documents.bulk_edit.update_document_content_maybe_archive_file.delay") @mock.patch("documents.tasks.consume_file.apply_async") @mock.patch("documents.bulk_edit.tempfile.mkdtemp") @@ -1513,12 +1560,12 @@ class TestPDFActions(DirectoriesMixin, TestCase): mock_mkdtemp.return_value = str(temp_dir) fake_pdf = mock.MagicMock() + self.mock_password_required_pdf(mock_open, fake_pdf) def save_side_effect(target_path): Path(target_path).write_bytes(b"new pdf content") fake_pdf.save.side_effect = save_side_effect - mock_open.return_value.__enter__.return_value = fake_pdf result = bulk_edit.remove_password( [doc.id], @@ -1528,7 +1575,13 @@ class TestPDFActions(DirectoriesMixin, TestCase): ) self.assertEqual(result, "OK") - mock_open.assert_called_once_with(source_file, password="secret") + self.assertEqual( + mock_open.call_args_list, + [ + mock.call(source_file), + mock.call(source_file, password="secret"), + ], + ) mock_update_document.assert_not_called() mock_consume_delay.assert_called_once() @@ -1547,7 +1600,7 @@ class TestPDFActions(DirectoriesMixin, TestCase): root_document=self.doc1, ) fake_pdf = mock.MagicMock() - mock_open.return_value.__enter__.return_value = fake_pdf + self.mock_password_required_pdf(mock_open, fake_pdf) result = bulk_edit.remove_password( [self.doc1.id], @@ -1557,7 +1610,13 @@ class TestPDFActions(DirectoriesMixin, TestCase): ) self.assertEqual(result, "OK") - mock_open.assert_called_once_with(self.doc1.source_path, password="secret") + self.assertEqual( + mock_open.call_args_list, + [ + mock.call(self.doc1.source_path), + mock.call(self.doc1.source_path, password="secret"), + ], + ) mock_consume_delay.assert_called_once() @mock.patch("documents.bulk_edit.chord") @@ -1580,12 +1639,12 @@ class TestPDFActions(DirectoriesMixin, TestCase): fake_pdf = mock.MagicMock() fake_pdf.pages = [mock.Mock(), mock.Mock()] + self.mock_password_required_pdf(mock_open, fake_pdf) def save_side_effect(target_path: Path) -> None: target_path.write_bytes(b"password removed") fake_pdf.save.side_effect = save_side_effect - mock_open.return_value.__enter__.return_value = fake_pdf mock_group.return_value.delay.return_value = None user = User.objects.create(username="owner") @@ -1600,7 +1659,13 @@ class TestPDFActions(DirectoriesMixin, TestCase): ) self.assertEqual(result, "OK") - mock_open.assert_called_once_with(doc.source_path, password="secret") + self.assertEqual( + mock_open.call_args_list, + [ + mock.call(doc.source_path), + mock.call(doc.source_path, password="secret"), + ], + ) mock_consume_file.assert_called_once() call_kwargs = mock_consume_file.call_args.kwargs consumable_document = call_kwargs["input_doc"] @@ -1618,6 +1683,43 @@ class TestPDFActions(DirectoriesMixin, TestCase): mock_group.return_value.delay.assert_called_once() mock_chord.assert_not_called() + @mock.patch("documents.bulk_edit.delete") + @mock.patch("documents.bulk_edit.chord") + @mock.patch("documents.bulk_edit.group") + @mock.patch("documents.tasks.consume_file.s") + @mock.patch("documents.bulk_edit.tempfile.mkdtemp") + @mock.patch("pikepdf.open") + def test_remove_password_skips_unencrypted_pdf_without_queueing( + self, + mock_open: mock.Mock, + mock_mkdtemp: mock.Mock, + mock_consume_file: mock.Mock, + mock_group: mock.Mock, + mock_chord: mock.Mock, + mock_delete: mock.Mock, + ) -> None: + doc = self.doc2 + fake_pdf = mock.MagicMock() + fake_pdf.is_encrypted = False + mock_open.return_value.__enter__.return_value = fake_pdf + + result = bulk_edit.remove_password( + [doc.id], + password="secret", + update_document=False, + delete_original=True, + ) + + self.assertEqual(result, "OK") + mock_open.assert_called_once_with(doc.source_path) + fake_pdf.remove_unreferenced_resources.assert_not_called() + fake_pdf.save.assert_not_called() + mock_mkdtemp.assert_not_called() + mock_consume_file.assert_not_called() + mock_group.assert_not_called() + mock_chord.assert_not_called() + mock_delete.si.assert_not_called() + @mock.patch("documents.bulk_edit.delete") @mock.patch("documents.bulk_edit.chord") @mock.patch("documents.bulk_edit.group") @@ -1640,12 +1742,12 @@ class TestPDFActions(DirectoriesMixin, TestCase): fake_pdf = mock.MagicMock() fake_pdf.pages = [mock.Mock(), mock.Mock()] + self.mock_password_required_pdf(mock_open, fake_pdf) def save_side_effect(target_path: Path) -> None: target_path.write_bytes(b"password removed") fake_pdf.save.side_effect = save_side_effect - mock_open.return_value.__enter__.return_value = fake_pdf mock_chord.return_value.delay.return_value = None result = bulk_edit.remove_password( @@ -1657,7 +1759,13 @@ class TestPDFActions(DirectoriesMixin, TestCase): ) self.assertEqual(result, "OK") - mock_open.assert_called_once_with(doc.source_path, password="secret") + self.assertEqual( + mock_open.call_args_list, + [ + mock.call(doc.source_path), + mock.call(doc.source_path, password="secret"), + ], + ) mock_consume_file.assert_called_once() mock_group.assert_not_called() mock_chord.assert_called_once()