Fix: Password removal source file location (#12830)

Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>
This commit is contained in:
Trenton H
2026-05-19 13:52:04 -07:00
committed by GitHub
parent 9f45737b94
commit bd86dca57e
5 changed files with 105 additions and 47 deletions
+11 -2
View File
@@ -35,6 +35,8 @@ from documents.versioning import get_latest_version_for_root
from documents.versioning import get_root_document
if TYPE_CHECKING:
from collections.abc import Mapping
from django.contrib.auth.models import User
logger: logging.Logger = logging.getLogger("paperless.bulk_edit")
@@ -882,6 +884,7 @@ def remove_password(
source_mode: SourceMode = SourceModeChoices.LATEST_VERSION,
user: User | None = None,
trigger_source: PaperlessTask.TriggerSource = PaperlessTask.TriggerSource.WEB_UI,
source_paths_by_id: Mapping[int, Path] | None = None,
) -> Literal["OK"]:
"""
Remove password protection from PDF documents.
@@ -893,9 +896,15 @@ def remove_password(
pair = _resolve_root_and_source_doc(doc, source_mode=source_mode)
try:
logger.info(
f"Attempting password removal from document {doc_ids[0]}",
f"Attempting password removal from document {pair.root_doc.id}",
)
with pikepdf.open(pair.source_doc.source_path, password=password) as pdf:
# The caller may supply an explicit source path (e.g. the staged
# file during consumption, before source_path is populated).
source_path = (source_paths_by_id or {}).get(
doc.id,
pair.source_doc.source_path,
)
with pikepdf.open(source_path, password=password) as pdf:
filepath: Path = (
Path(tempfile.mkdtemp(dir=settings.SCRATCH_DIR))
/ f"{pair.root_doc.id}_unprotected.pdf"
+13 -1
View File
@@ -879,6 +879,11 @@ def run_workflows(
)
return None
# Track whether the caller supplied original_file. When set explicitly (e.g. by
# run_workflows_added during consumption), it points at the staged file that has
# not yet been moved into its final storage location. This matters for password
# removal, which must read from the staged path rather than document.source_path.
caller_supplied_original_file = original_file is not None
if original_file is None:
original_file = (
document.source_path if not use_overrides else document.original_file
@@ -956,7 +961,14 @@ def run_workflows(
original_file,
)
elif action.type == WorkflowAction.WorkflowActionType.PASSWORD_REMOVAL:
execute_password_removal_action(action, document, logging_group)
execute_password_removal_action(
action,
document,
logging_group,
source_file=(
original_file if caller_supplied_original_file else None
),
)
elif action.type == WorkflowAction.WorkflowActionType.MOVE_TO_TRASH:
has_move_to_trash_action = True
+38
View File
@@ -1480,6 +1480,44 @@ class TestPDFActions(DirectoriesMixin, TestCase):
self.assertEqual(task_kwargs["input_doc"].root_document_id, doc.id)
self.assertIsNotNone(task_kwargs["overrides"])
@mock.patch("documents.bulk_edit.update_document_content_maybe_archive_file.delay")
@mock.patch("documents.tasks.consume_file.apply_async")
@mock.patch("documents.bulk_edit.tempfile.mkdtemp")
@mock.patch("pikepdf.open")
def test_remove_password_update_document_uses_source_paths(
self,
mock_open,
mock_mkdtemp,
mock_consume_delay,
mock_update_document,
) -> None:
doc = self.doc1
source_file = self.dirs.scratch_dir / "consumption-source.pdf"
source_file.write_bytes(b"protected pdf content")
temp_dir = self.dirs.scratch_dir / "remove-password-source-file"
temp_dir.mkdir(parents=True, exist_ok=True)
mock_mkdtemp.return_value = str(temp_dir)
fake_pdf = mock.MagicMock()
def save_side_effect(target_path):
Path(target_path).write_bytes(b"new pdf content")
fake_pdf.save.side_effect = save_side_effect
mock_open.return_value.__enter__.return_value = fake_pdf
result = bulk_edit.remove_password(
[doc.id],
password="secret",
update_document=True,
source_paths_by_id={doc.id: source_file},
)
self.assertEqual(result, "OK")
mock_open.assert_called_once_with(source_file, password="secret")
mock_update_document.assert_not_called()
mock_consume_delay.assert_called_once()
@mock.patch("documents.data_models.magic.from_file", return_value="application/pdf")
@mock.patch("documents.tasks.consume_file.apply_async")
@mock.patch("pikepdf.open")
+35 -36
View File
@@ -4185,12 +4185,14 @@ class TestWorkflows(
password="wrong",
update_document=True,
user=doc.owner,
source_paths_by_id=None,
),
mock.call(
[doc.id],
password="right",
update_document=True,
user=doc.owner,
source_paths_by_id=None,
),
],
)
@@ -4301,11 +4303,11 @@ class TestWorkflows(
title="Protected",
)
with self.captureOnCommitCallbacks(execute=True):
document_consumption_finished.send(
sender=self.__class__,
document=doc,
)
document_consumption_finished.send(
sender=self.__class__,
document=doc,
original_file=original_file,
)
assert mock_remove_password.call_count == 2
mock_remove_password.assert_has_calls(
@@ -4315,73 +4317,70 @@ class TestWorkflows(
password="first",
update_document=True,
user=doc.owner,
source_paths_by_id={doc.id: original_file},
),
mock.call(
[doc.id],
password="second",
update_document=True,
user=doc.owner,
source_paths_by_id={doc.id: original_file},
),
],
)
# ensure handler disconnected after first run
with self.captureOnCommitCallbacks(execute=True):
document_consumption_finished.send(
sender=self.__class__,
document=doc,
)
document_consumption_finished.send(
sender=self.__class__,
document=doc,
)
assert mock_remove_password.call_count == 2
@mock.patch("documents.bulk_edit.remove_password")
def test_password_removal_deferred_until_transaction_commit(
def test_password_removal_document_added_uses_original_file(
self,
mock_remove_password,
) -> None:
"""
GIVEN:
- Workflow password removal action triggered during consumption
- document_consumption_finished signal fires inside a transaction
- Workflow password removal action on a DOCUMENT_ADDED trigger
- run_workflows called with an explicit original_file (staged file
from the consumer, before the source path is populated)
WHEN:
- Signal fires but transaction has not yet committed
- The workflow runs
THEN:
- Password removal is not attempted until the transaction commits
- remove_password is called with source_paths_by_id pointing at the
staged file rather than the not-yet-existing source_path
"""
from django.db import transaction
doc = Document.objects.create(
title="Protected",
checksum="pw-checksum-added",
)
trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
)
action = WorkflowAction.objects.create(
type=WorkflowAction.WorkflowActionType.PASSWORD_REMOVAL,
passwords=["secret"],
)
workflow = Workflow.objects.create(name="Password workflow added")
workflow.triggers.add(trigger)
workflow.actions.add(action)
mock_remove_password.return_value = "OK"
temp_dir = Path(tempfile.mkdtemp())
original_file = temp_dir / "file.pdf"
original_file = temp_dir / "staged.pdf"
original_file.write_bytes(b"pdf content")
consumable = ConsumableDocument(
source=DocumentSource.ApiUpload,
original_file=original_file,
)
execute_password_removal_action(action, consumable, logging_group=None)
doc = Document.objects.create(
checksum="pw-checksum-on-commit",
title="Protected",
)
with self.captureOnCommitCallbacks(execute=True):
with transaction.atomic():
document_consumption_finished.send(
sender=self.__class__,
document=doc,
)
mock_remove_password.assert_not_called()
run_workflows(trigger.type, doc, original_file=original_file)
mock_remove_password.assert_called_once_with(
[doc.id],
password="secret",
update_document=True,
user=doc.owner,
source_paths_by_id={doc.id: original_file},
)
def test_workflow_trash_action_soft_delete(self) -> None:
+8 -8
View File
@@ -4,7 +4,6 @@ from pathlib import Path
from django.conf import settings
from django.contrib.auth.models import User
from django.db import transaction
from django.utils import timezone
from documents.data_models import ConsumableDocument
@@ -277,6 +276,7 @@ def execute_password_removal_action(
action: WorkflowAction,
document: Document | ConsumableDocument,
logging_group,
source_file: Path | None = None,
) -> None:
"""
Try to remove a password from a document using the configured list.
@@ -296,15 +296,14 @@ def execute_password_removal_action(
# hook the consumption-finished signal to attempt password removal later
def handler(sender, **kwargs):
consumed_document: Document = kwargs.get("document")
document_consumption_finished.disconnect(handler)
if consumed_document is not None:
transaction.on_commit(
lambda: execute_password_removal_action(
action,
consumed_document,
logging_group,
),
execute_password_removal_action(
action,
consumed_document,
logging_group,
source_file=kwargs.get("original_file"),
)
document_consumption_finished.disconnect(handler)
document_consumption_finished.connect(handler, weak=False)
return
@@ -319,6 +318,7 @@ def execute_password_removal_action(
password=password,
update_document=True,
user=document.owner,
source_paths_by_id={document.id: source_file} if source_file else None,
)
logger.info(
"Unlocked document %s using workflow action %s",