diff --git a/src/documents/bulk_edit.py b/src/documents/bulk_edit.py index 848919cde..ad6088cd1 100644 --- a/src/documents/bulk_edit.py +++ b/src/documents/bulk_edit.py @@ -2,6 +2,7 @@ from __future__ import annotations import logging import tempfile +import warnings from pathlib import Path from typing import TYPE_CHECKING from typing import Literal @@ -904,63 +905,77 @@ def remove_password( doc.id, pair.source_doc.source_path, ) - with pikepdf.open(source_path, password=password) as pdf: - filepath: Path = ( - Path(tempfile.mkdtemp(dir=settings.SCRATCH_DIR)) - / f"{pair.root_doc.id}_unprotected.pdf" + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + message="A password was provided, but no password was needed to open this PDF.", + category=UserWarning, ) - pdf.remove_unreferenced_resources() - pdf.save(filepath) + with pikepdf.open(source_path, password=password) as pdf: + if not pdf.is_encrypted: + logger.info( + "Skipping password removal for document %s because the " + "source PDF is not encrypted", + pair.root_doc.id, + ) + continue - if update_document: - # Create a new version rather than modifying the root/original in place. - overrides = ( - DocumentMetadataOverrides().from_document(pair.root_doc) - if include_metadata - else DocumentMetadataOverrides() + filepath: Path = ( + Path(tempfile.mkdtemp(dir=settings.SCRATCH_DIR)) + / f"{pair.root_doc.id}_unprotected.pdf" ) - if user is not None: - overrides.owner_id = user.id - overrides.actor_id = user.id - consume_file.apply_async( - kwargs={ - "input_doc": ConsumableDocument( - source=DocumentSource.ConsumeFolder, - original_file=filepath, - root_document_id=pair.root_doc.id, - ), - "overrides": overrides, - }, - headers={"trigger_source": trigger_source}, - ) - else: - consume_tasks = [] - overrides = ( - DocumentMetadataOverrides().from_document(pair.root_doc) - if include_metadata - else DocumentMetadataOverrides() - ) - if user is not None: - overrides.owner_id = user.id - overrides.actor_id = user.id + pdf.remove_unreferenced_resources() + pdf.save(filepath) - consume_tasks.append( - consume_file.s( - input_doc=ConsumableDocument( - source=DocumentSource.ConsumeFolder, - original_file=filepath, - ), - overrides=overrides, - ).set(headers={"trigger_source": trigger_source}), - ) - - if delete_original: - chord( - header=consume_tasks, - body=delete.si([doc.id]), - ).delay() + if update_document: + # Create a new version rather than modifying the root/original in place. + overrides = ( + DocumentMetadataOverrides().from_document(pair.root_doc) + if include_metadata + else DocumentMetadataOverrides() + ) + if user is not None: + overrides.owner_id = user.id + overrides.actor_id = user.id + consume_file.apply_async( + kwargs={ + "input_doc": ConsumableDocument( + source=DocumentSource.ConsumeFolder, + original_file=filepath, + root_document_id=pair.root_doc.id, + ), + "overrides": overrides, + }, + headers={"trigger_source": trigger_source}, + ) else: - group(consume_tasks).delay() + consume_tasks = [] + overrides = ( + DocumentMetadataOverrides().from_document(pair.root_doc) + if include_metadata + else DocumentMetadataOverrides() + ) + if user is not None: + overrides.owner_id = user.id + overrides.actor_id = user.id + + consume_tasks.append( + consume_file.s( + input_doc=ConsumableDocument( + source=DocumentSource.ConsumeFolder, + original_file=filepath, + ), + overrides=overrides, + ).set(headers={"trigger_source": trigger_source}), + ) + + if delete_original: + chord( + header=consume_tasks, + body=delete.si([doc.id]), + ).delay() + else: + group(consume_tasks).delay() except Exception as e: logger.exception( diff --git a/src/documents/tests/test_bulk_edit.py b/src/documents/tests/test_bulk_edit.py index 8d0c893eb..27c874579 100644 --- a/src/documents/tests/test_bulk_edit.py +++ b/src/documents/tests/test_bulk_edit.py @@ -1494,6 +1494,33 @@ class TestPDFActions(DirectoriesMixin, TestCase): self.assertEqual(task_kwargs["input_doc"].root_document_id, doc.id) self.assertIsNotNone(task_kwargs["overrides"]) + @mock.patch("documents.tasks.consume_file.apply_async") + @mock.patch("documents.bulk_edit.tempfile.mkdtemp") + @mock.patch("pikepdf.open") + def test_remove_password_update_document_skips_unencrypted_pdf( + self, + mock_open, + mock_mkdtemp, + mock_consume_delay, + ) -> None: + doc = self.doc1 + fake_pdf = mock.MagicMock() + fake_pdf.is_encrypted = False + mock_open.return_value.__enter__.return_value = fake_pdf + + result = bulk_edit.remove_password( + [doc.id], + password="secret", + update_document=True, + ) + + self.assertEqual(result, "OK") + mock_open.assert_called_once_with(doc.source_path, password="secret") + fake_pdf.remove_unreferenced_resources.assert_not_called() + fake_pdf.save.assert_not_called() + mock_mkdtemp.assert_not_called() + mock_consume_delay.assert_not_called() + @mock.patch("documents.bulk_edit.update_document_content_maybe_archive_file.delay") @mock.patch("documents.tasks.consume_file.apply_async") @mock.patch("documents.bulk_edit.tempfile.mkdtemp") @@ -1618,6 +1645,43 @@ class TestPDFActions(DirectoriesMixin, TestCase): mock_group.return_value.delay.assert_called_once() mock_chord.assert_not_called() + @mock.patch("documents.bulk_edit.delete") + @mock.patch("documents.bulk_edit.chord") + @mock.patch("documents.bulk_edit.group") + @mock.patch("documents.tasks.consume_file.s") + @mock.patch("documents.bulk_edit.tempfile.mkdtemp") + @mock.patch("pikepdf.open") + def test_remove_password_skips_unencrypted_pdf_without_queueing( + self, + mock_open: mock.Mock, + mock_mkdtemp: mock.Mock, + mock_consume_file: mock.Mock, + mock_group: mock.Mock, + mock_chord: mock.Mock, + mock_delete: mock.Mock, + ) -> None: + doc = self.doc2 + fake_pdf = mock.MagicMock() + fake_pdf.is_encrypted = False + mock_open.return_value.__enter__.return_value = fake_pdf + + result = bulk_edit.remove_password( + [doc.id], + password="secret", + update_document=False, + delete_original=True, + ) + + self.assertEqual(result, "OK") + mock_open.assert_called_once_with(doc.source_path, password="secret") + fake_pdf.remove_unreferenced_resources.assert_not_called() + fake_pdf.save.assert_not_called() + mock_mkdtemp.assert_not_called() + mock_consume_file.assert_not_called() + mock_group.assert_not_called() + mock_chord.assert_not_called() + mock_delete.si.assert_not_called() + @mock.patch("documents.bulk_edit.delete") @mock.patch("documents.bulk_edit.chord") @mock.patch("documents.bulk_edit.group")