Fix: avoid re-creating new PDF when original is unencrypted

This commit is contained in:
shamoon
2026-06-05 22:35:04 -07:00
parent 449fd97b1f
commit fce0b17694
2 changed files with 131 additions and 52 deletions
+67 -52
View File
@@ -2,6 +2,7 @@ from __future__ import annotations
import logging
import tempfile
import warnings
from pathlib import Path
from typing import TYPE_CHECKING
from typing import Literal
@@ -904,63 +905,77 @@ def remove_password(
doc.id,
pair.source_doc.source_path,
)
with pikepdf.open(source_path, password=password) as pdf:
filepath: Path = (
Path(tempfile.mkdtemp(dir=settings.SCRATCH_DIR))
/ f"{pair.root_doc.id}_unprotected.pdf"
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
message="A password was provided, but no password was needed to open this PDF.",
category=UserWarning,
)
pdf.remove_unreferenced_resources()
pdf.save(filepath)
with pikepdf.open(source_path, password=password) as pdf:
if not pdf.is_encrypted:
logger.info(
"Skipping password removal for document %s because the "
"source PDF is not encrypted",
pair.root_doc.id,
)
continue
if update_document:
# Create a new version rather than modifying the root/original in place.
overrides = (
DocumentMetadataOverrides().from_document(pair.root_doc)
if include_metadata
else DocumentMetadataOverrides()
filepath: Path = (
Path(tempfile.mkdtemp(dir=settings.SCRATCH_DIR))
/ f"{pair.root_doc.id}_unprotected.pdf"
)
if user is not None:
overrides.owner_id = user.id
overrides.actor_id = user.id
consume_file.apply_async(
kwargs={
"input_doc": ConsumableDocument(
source=DocumentSource.ConsumeFolder,
original_file=filepath,
root_document_id=pair.root_doc.id,
),
"overrides": overrides,
},
headers={"trigger_source": trigger_source},
)
else:
consume_tasks = []
overrides = (
DocumentMetadataOverrides().from_document(pair.root_doc)
if include_metadata
else DocumentMetadataOverrides()
)
if user is not None:
overrides.owner_id = user.id
overrides.actor_id = user.id
pdf.remove_unreferenced_resources()
pdf.save(filepath)
consume_tasks.append(
consume_file.s(
input_doc=ConsumableDocument(
source=DocumentSource.ConsumeFolder,
original_file=filepath,
),
overrides=overrides,
).set(headers={"trigger_source": trigger_source}),
)
if delete_original:
chord(
header=consume_tasks,
body=delete.si([doc.id]),
).delay()
if update_document:
# Create a new version rather than modifying the root/original in place.
overrides = (
DocumentMetadataOverrides().from_document(pair.root_doc)
if include_metadata
else DocumentMetadataOverrides()
)
if user is not None:
overrides.owner_id = user.id
overrides.actor_id = user.id
consume_file.apply_async(
kwargs={
"input_doc": ConsumableDocument(
source=DocumentSource.ConsumeFolder,
original_file=filepath,
root_document_id=pair.root_doc.id,
),
"overrides": overrides,
},
headers={"trigger_source": trigger_source},
)
else:
group(consume_tasks).delay()
consume_tasks = []
overrides = (
DocumentMetadataOverrides().from_document(pair.root_doc)
if include_metadata
else DocumentMetadataOverrides()
)
if user is not None:
overrides.owner_id = user.id
overrides.actor_id = user.id
consume_tasks.append(
consume_file.s(
input_doc=ConsumableDocument(
source=DocumentSource.ConsumeFolder,
original_file=filepath,
),
overrides=overrides,
).set(headers={"trigger_source": trigger_source}),
)
if delete_original:
chord(
header=consume_tasks,
body=delete.si([doc.id]),
).delay()
else:
group(consume_tasks).delay()
except Exception as e:
logger.exception(
+64
View File
@@ -1494,6 +1494,33 @@ class TestPDFActions(DirectoriesMixin, TestCase):
self.assertEqual(task_kwargs["input_doc"].root_document_id, doc.id)
self.assertIsNotNone(task_kwargs["overrides"])
@mock.patch("documents.tasks.consume_file.apply_async")
@mock.patch("documents.bulk_edit.tempfile.mkdtemp")
@mock.patch("pikepdf.open")
def test_remove_password_update_document_skips_unencrypted_pdf(
self,
mock_open,
mock_mkdtemp,
mock_consume_delay,
) -> None:
doc = self.doc1
fake_pdf = mock.MagicMock()
fake_pdf.is_encrypted = False
mock_open.return_value.__enter__.return_value = fake_pdf
result = bulk_edit.remove_password(
[doc.id],
password="secret",
update_document=True,
)
self.assertEqual(result, "OK")
mock_open.assert_called_once_with(doc.source_path, password="secret")
fake_pdf.remove_unreferenced_resources.assert_not_called()
fake_pdf.save.assert_not_called()
mock_mkdtemp.assert_not_called()
mock_consume_delay.assert_not_called()
@mock.patch("documents.bulk_edit.update_document_content_maybe_archive_file.delay")
@mock.patch("documents.tasks.consume_file.apply_async")
@mock.patch("documents.bulk_edit.tempfile.mkdtemp")
@@ -1618,6 +1645,43 @@ class TestPDFActions(DirectoriesMixin, TestCase):
mock_group.return_value.delay.assert_called_once()
mock_chord.assert_not_called()
@mock.patch("documents.bulk_edit.delete")
@mock.patch("documents.bulk_edit.chord")
@mock.patch("documents.bulk_edit.group")
@mock.patch("documents.tasks.consume_file.s")
@mock.patch("documents.bulk_edit.tempfile.mkdtemp")
@mock.patch("pikepdf.open")
def test_remove_password_skips_unencrypted_pdf_without_queueing(
self,
mock_open: mock.Mock,
mock_mkdtemp: mock.Mock,
mock_consume_file: mock.Mock,
mock_group: mock.Mock,
mock_chord: mock.Mock,
mock_delete: mock.Mock,
) -> None:
doc = self.doc2
fake_pdf = mock.MagicMock()
fake_pdf.is_encrypted = False
mock_open.return_value.__enter__.return_value = fake_pdf
result = bulk_edit.remove_password(
[doc.id],
password="secret",
update_document=False,
delete_original=True,
)
self.assertEqual(result, "OK")
mock_open.assert_called_once_with(doc.source_path, password="secret")
fake_pdf.remove_unreferenced_resources.assert_not_called()
fake_pdf.save.assert_not_called()
mock_mkdtemp.assert_not_called()
mock_consume_file.assert_not_called()
mock_group.assert_not_called()
mock_chord.assert_not_called()
mock_delete.si.assert_not_called()
@mock.patch("documents.bulk_edit.delete")
@mock.patch("documents.bulk_edit.chord")
@mock.patch("documents.bulk_edit.group")