mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-05-27 00:45:23 +00:00
Fix: Password removal source file location (#12830)
Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>
This commit is contained in:
@@ -35,6 +35,8 @@ from documents.versioning import get_latest_version_for_root
|
||||
from documents.versioning import get_root_document
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Mapping
|
||||
|
||||
from django.contrib.auth.models import User
|
||||
|
||||
logger: logging.Logger = logging.getLogger("paperless.bulk_edit")
|
||||
@@ -882,6 +884,7 @@ def remove_password(
|
||||
source_mode: SourceMode = SourceModeChoices.LATEST_VERSION,
|
||||
user: User | None = None,
|
||||
trigger_source: PaperlessTask.TriggerSource = PaperlessTask.TriggerSource.WEB_UI,
|
||||
source_paths_by_id: Mapping[int, Path] | None = None,
|
||||
) -> Literal["OK"]:
|
||||
"""
|
||||
Remove password protection from PDF documents.
|
||||
@@ -893,9 +896,15 @@ def remove_password(
|
||||
pair = _resolve_root_and_source_doc(doc, source_mode=source_mode)
|
||||
try:
|
||||
logger.info(
|
||||
f"Attempting password removal from document {doc_ids[0]}",
|
||||
f"Attempting password removal from document {pair.root_doc.id}",
|
||||
)
|
||||
with pikepdf.open(pair.source_doc.source_path, password=password) as pdf:
|
||||
# The caller may supply an explicit source path (e.g. the staged
|
||||
# file during consumption, before source_path is populated).
|
||||
source_path = (source_paths_by_id or {}).get(
|
||||
doc.id,
|
||||
pair.source_doc.source_path,
|
||||
)
|
||||
with pikepdf.open(source_path, password=password) as pdf:
|
||||
filepath: Path = (
|
||||
Path(tempfile.mkdtemp(dir=settings.SCRATCH_DIR))
|
||||
/ f"{pair.root_doc.id}_unprotected.pdf"
|
||||
|
||||
@@ -879,6 +879,11 @@ def run_workflows(
|
||||
)
|
||||
return None
|
||||
|
||||
# Track whether the caller supplied original_file. When set explicitly (e.g. by
|
||||
# run_workflows_added during consumption), it points at the staged file that has
|
||||
# not yet been moved into its final storage location. This matters for password
|
||||
# removal, which must read from the staged path rather than document.source_path.
|
||||
caller_supplied_original_file = original_file is not None
|
||||
if original_file is None:
|
||||
original_file = (
|
||||
document.source_path if not use_overrides else document.original_file
|
||||
@@ -956,7 +961,14 @@ def run_workflows(
|
||||
original_file,
|
||||
)
|
||||
elif action.type == WorkflowAction.WorkflowActionType.PASSWORD_REMOVAL:
|
||||
execute_password_removal_action(action, document, logging_group)
|
||||
execute_password_removal_action(
|
||||
action,
|
||||
document,
|
||||
logging_group,
|
||||
source_file=(
|
||||
original_file if caller_supplied_original_file else None
|
||||
),
|
||||
)
|
||||
elif action.type == WorkflowAction.WorkflowActionType.MOVE_TO_TRASH:
|
||||
has_move_to_trash_action = True
|
||||
|
||||
|
||||
@@ -1480,6 +1480,44 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(task_kwargs["input_doc"].root_document_id, doc.id)
|
||||
self.assertIsNotNone(task_kwargs["overrides"])
|
||||
|
||||
@mock.patch("documents.bulk_edit.update_document_content_maybe_archive_file.delay")
|
||||
@mock.patch("documents.tasks.consume_file.apply_async")
|
||||
@mock.patch("documents.bulk_edit.tempfile.mkdtemp")
|
||||
@mock.patch("pikepdf.open")
|
||||
def test_remove_password_update_document_uses_source_paths(
|
||||
self,
|
||||
mock_open,
|
||||
mock_mkdtemp,
|
||||
mock_consume_delay,
|
||||
mock_update_document,
|
||||
) -> None:
|
||||
doc = self.doc1
|
||||
source_file = self.dirs.scratch_dir / "consumption-source.pdf"
|
||||
source_file.write_bytes(b"protected pdf content")
|
||||
temp_dir = self.dirs.scratch_dir / "remove-password-source-file"
|
||||
temp_dir.mkdir(parents=True, exist_ok=True)
|
||||
mock_mkdtemp.return_value = str(temp_dir)
|
||||
|
||||
fake_pdf = mock.MagicMock()
|
||||
|
||||
def save_side_effect(target_path):
|
||||
Path(target_path).write_bytes(b"new pdf content")
|
||||
|
||||
fake_pdf.save.side_effect = save_side_effect
|
||||
mock_open.return_value.__enter__.return_value = fake_pdf
|
||||
|
||||
result = bulk_edit.remove_password(
|
||||
[doc.id],
|
||||
password="secret",
|
||||
update_document=True,
|
||||
source_paths_by_id={doc.id: source_file},
|
||||
)
|
||||
|
||||
self.assertEqual(result, "OK")
|
||||
mock_open.assert_called_once_with(source_file, password="secret")
|
||||
mock_update_document.assert_not_called()
|
||||
mock_consume_delay.assert_called_once()
|
||||
|
||||
@mock.patch("documents.data_models.magic.from_file", return_value="application/pdf")
|
||||
@mock.patch("documents.tasks.consume_file.apply_async")
|
||||
@mock.patch("pikepdf.open")
|
||||
|
||||
@@ -4185,12 +4185,14 @@ class TestWorkflows(
|
||||
password="wrong",
|
||||
update_document=True,
|
||||
user=doc.owner,
|
||||
source_paths_by_id=None,
|
||||
),
|
||||
mock.call(
|
||||
[doc.id],
|
||||
password="right",
|
||||
update_document=True,
|
||||
user=doc.owner,
|
||||
source_paths_by_id=None,
|
||||
),
|
||||
],
|
||||
)
|
||||
@@ -4301,11 +4303,11 @@ class TestWorkflows(
|
||||
title="Protected",
|
||||
)
|
||||
|
||||
with self.captureOnCommitCallbacks(execute=True):
|
||||
document_consumption_finished.send(
|
||||
sender=self.__class__,
|
||||
document=doc,
|
||||
)
|
||||
document_consumption_finished.send(
|
||||
sender=self.__class__,
|
||||
document=doc,
|
||||
original_file=original_file,
|
||||
)
|
||||
|
||||
assert mock_remove_password.call_count == 2
|
||||
mock_remove_password.assert_has_calls(
|
||||
@@ -4315,73 +4317,70 @@ class TestWorkflows(
|
||||
password="first",
|
||||
update_document=True,
|
||||
user=doc.owner,
|
||||
source_paths_by_id={doc.id: original_file},
|
||||
),
|
||||
mock.call(
|
||||
[doc.id],
|
||||
password="second",
|
||||
update_document=True,
|
||||
user=doc.owner,
|
||||
source_paths_by_id={doc.id: original_file},
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
# ensure handler disconnected after first run
|
||||
with self.captureOnCommitCallbacks(execute=True):
|
||||
document_consumption_finished.send(
|
||||
sender=self.__class__,
|
||||
document=doc,
|
||||
)
|
||||
document_consumption_finished.send(
|
||||
sender=self.__class__,
|
||||
document=doc,
|
||||
)
|
||||
assert mock_remove_password.call_count == 2
|
||||
|
||||
@mock.patch("documents.bulk_edit.remove_password")
|
||||
def test_password_removal_deferred_until_transaction_commit(
|
||||
def test_password_removal_document_added_uses_original_file(
|
||||
self,
|
||||
mock_remove_password,
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- Workflow password removal action triggered during consumption
|
||||
- document_consumption_finished signal fires inside a transaction
|
||||
- Workflow password removal action on a DOCUMENT_ADDED trigger
|
||||
- run_workflows called with an explicit original_file (staged file
|
||||
from the consumer, before the source path is populated)
|
||||
WHEN:
|
||||
- Signal fires but transaction has not yet committed
|
||||
- The workflow runs
|
||||
THEN:
|
||||
- Password removal is not attempted until the transaction commits
|
||||
- remove_password is called with source_paths_by_id pointing at the
|
||||
staged file rather than the not-yet-existing source_path
|
||||
"""
|
||||
from django.db import transaction
|
||||
|
||||
doc = Document.objects.create(
|
||||
title="Protected",
|
||||
checksum="pw-checksum-added",
|
||||
)
|
||||
trigger = WorkflowTrigger.objects.create(
|
||||
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
|
||||
)
|
||||
action = WorkflowAction.objects.create(
|
||||
type=WorkflowAction.WorkflowActionType.PASSWORD_REMOVAL,
|
||||
passwords=["secret"],
|
||||
)
|
||||
workflow = Workflow.objects.create(name="Password workflow added")
|
||||
workflow.triggers.add(trigger)
|
||||
workflow.actions.add(action)
|
||||
|
||||
mock_remove_password.return_value = "OK"
|
||||
|
||||
temp_dir = Path(tempfile.mkdtemp())
|
||||
original_file = temp_dir / "file.pdf"
|
||||
original_file = temp_dir / "staged.pdf"
|
||||
original_file.write_bytes(b"pdf content")
|
||||
consumable = ConsumableDocument(
|
||||
source=DocumentSource.ApiUpload,
|
||||
original_file=original_file,
|
||||
)
|
||||
|
||||
execute_password_removal_action(action, consumable, logging_group=None)
|
||||
|
||||
doc = Document.objects.create(
|
||||
checksum="pw-checksum-on-commit",
|
||||
title="Protected",
|
||||
)
|
||||
|
||||
with self.captureOnCommitCallbacks(execute=True):
|
||||
with transaction.atomic():
|
||||
document_consumption_finished.send(
|
||||
sender=self.__class__,
|
||||
document=doc,
|
||||
)
|
||||
mock_remove_password.assert_not_called()
|
||||
run_workflows(trigger.type, doc, original_file=original_file)
|
||||
|
||||
mock_remove_password.assert_called_once_with(
|
||||
[doc.id],
|
||||
password="secret",
|
||||
update_document=True,
|
||||
user=doc.owner,
|
||||
source_paths_by_id={doc.id: original_file},
|
||||
)
|
||||
|
||||
def test_workflow_trash_action_soft_delete(self) -> None:
|
||||
|
||||
@@ -4,7 +4,6 @@ from pathlib import Path
|
||||
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import User
|
||||
from django.db import transaction
|
||||
from django.utils import timezone
|
||||
|
||||
from documents.data_models import ConsumableDocument
|
||||
@@ -277,6 +276,7 @@ def execute_password_removal_action(
|
||||
action: WorkflowAction,
|
||||
document: Document | ConsumableDocument,
|
||||
logging_group,
|
||||
source_file: Path | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Try to remove a password from a document using the configured list.
|
||||
@@ -296,15 +296,14 @@ def execute_password_removal_action(
|
||||
# hook the consumption-finished signal to attempt password removal later
|
||||
def handler(sender, **kwargs):
|
||||
consumed_document: Document = kwargs.get("document")
|
||||
document_consumption_finished.disconnect(handler)
|
||||
if consumed_document is not None:
|
||||
transaction.on_commit(
|
||||
lambda: execute_password_removal_action(
|
||||
action,
|
||||
consumed_document,
|
||||
logging_group,
|
||||
),
|
||||
execute_password_removal_action(
|
||||
action,
|
||||
consumed_document,
|
||||
logging_group,
|
||||
source_file=kwargs.get("original_file"),
|
||||
)
|
||||
document_consumption_finished.disconnect(handler)
|
||||
|
||||
document_consumption_finished.connect(handler, weak=False)
|
||||
return
|
||||
@@ -319,6 +318,7 @@ def execute_password_removal_action(
|
||||
password=password,
|
||||
update_document=True,
|
||||
user=document.owner,
|
||||
source_paths_by_id={document.id: source_file} if source_file else None,
|
||||
)
|
||||
logger.info(
|
||||
"Unlocked document %s using workflow action %s",
|
||||
|
||||
Reference in New Issue
Block a user