mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-06-06 13:49:44 +00:00
Fix: avoid re-creating new PDF when original is unencrypted
This commit is contained in:
+67
-52
@@ -2,6 +2,7 @@ from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import tempfile
|
||||
import warnings
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import Literal
|
||||
@@ -904,63 +905,77 @@ def remove_password(
|
||||
doc.id,
|
||||
pair.source_doc.source_path,
|
||||
)
|
||||
with pikepdf.open(source_path, password=password) as pdf:
|
||||
filepath: Path = (
|
||||
Path(tempfile.mkdtemp(dir=settings.SCRATCH_DIR))
|
||||
/ f"{pair.root_doc.id}_unprotected.pdf"
|
||||
with warnings.catch_warnings():
|
||||
warnings.filterwarnings(
|
||||
"ignore",
|
||||
message="A password was provided, but no password was needed to open this PDF.",
|
||||
category=UserWarning,
|
||||
)
|
||||
pdf.remove_unreferenced_resources()
|
||||
pdf.save(filepath)
|
||||
with pikepdf.open(source_path, password=password) as pdf:
|
||||
if not pdf.is_encrypted:
|
||||
logger.info(
|
||||
"Skipping password removal for document %s because the "
|
||||
"source PDF is not encrypted",
|
||||
pair.root_doc.id,
|
||||
)
|
||||
continue
|
||||
|
||||
if update_document:
|
||||
# Create a new version rather than modifying the root/original in place.
|
||||
overrides = (
|
||||
DocumentMetadataOverrides().from_document(pair.root_doc)
|
||||
if include_metadata
|
||||
else DocumentMetadataOverrides()
|
||||
filepath: Path = (
|
||||
Path(tempfile.mkdtemp(dir=settings.SCRATCH_DIR))
|
||||
/ f"{pair.root_doc.id}_unprotected.pdf"
|
||||
)
|
||||
if user is not None:
|
||||
overrides.owner_id = user.id
|
||||
overrides.actor_id = user.id
|
||||
consume_file.apply_async(
|
||||
kwargs={
|
||||
"input_doc": ConsumableDocument(
|
||||
source=DocumentSource.ConsumeFolder,
|
||||
original_file=filepath,
|
||||
root_document_id=pair.root_doc.id,
|
||||
),
|
||||
"overrides": overrides,
|
||||
},
|
||||
headers={"trigger_source": trigger_source},
|
||||
)
|
||||
else:
|
||||
consume_tasks = []
|
||||
overrides = (
|
||||
DocumentMetadataOverrides().from_document(pair.root_doc)
|
||||
if include_metadata
|
||||
else DocumentMetadataOverrides()
|
||||
)
|
||||
if user is not None:
|
||||
overrides.owner_id = user.id
|
||||
overrides.actor_id = user.id
|
||||
pdf.remove_unreferenced_resources()
|
||||
pdf.save(filepath)
|
||||
|
||||
consume_tasks.append(
|
||||
consume_file.s(
|
||||
input_doc=ConsumableDocument(
|
||||
source=DocumentSource.ConsumeFolder,
|
||||
original_file=filepath,
|
||||
),
|
||||
overrides=overrides,
|
||||
).set(headers={"trigger_source": trigger_source}),
|
||||
)
|
||||
|
||||
if delete_original:
|
||||
chord(
|
||||
header=consume_tasks,
|
||||
body=delete.si([doc.id]),
|
||||
).delay()
|
||||
if update_document:
|
||||
# Create a new version rather than modifying the root/original in place.
|
||||
overrides = (
|
||||
DocumentMetadataOverrides().from_document(pair.root_doc)
|
||||
if include_metadata
|
||||
else DocumentMetadataOverrides()
|
||||
)
|
||||
if user is not None:
|
||||
overrides.owner_id = user.id
|
||||
overrides.actor_id = user.id
|
||||
consume_file.apply_async(
|
||||
kwargs={
|
||||
"input_doc": ConsumableDocument(
|
||||
source=DocumentSource.ConsumeFolder,
|
||||
original_file=filepath,
|
||||
root_document_id=pair.root_doc.id,
|
||||
),
|
||||
"overrides": overrides,
|
||||
},
|
||||
headers={"trigger_source": trigger_source},
|
||||
)
|
||||
else:
|
||||
group(consume_tasks).delay()
|
||||
consume_tasks = []
|
||||
overrides = (
|
||||
DocumentMetadataOverrides().from_document(pair.root_doc)
|
||||
if include_metadata
|
||||
else DocumentMetadataOverrides()
|
||||
)
|
||||
if user is not None:
|
||||
overrides.owner_id = user.id
|
||||
overrides.actor_id = user.id
|
||||
|
||||
consume_tasks.append(
|
||||
consume_file.s(
|
||||
input_doc=ConsumableDocument(
|
||||
source=DocumentSource.ConsumeFolder,
|
||||
original_file=filepath,
|
||||
),
|
||||
overrides=overrides,
|
||||
).set(headers={"trigger_source": trigger_source}),
|
||||
)
|
||||
|
||||
if delete_original:
|
||||
chord(
|
||||
header=consume_tasks,
|
||||
body=delete.si([doc.id]),
|
||||
).delay()
|
||||
else:
|
||||
group(consume_tasks).delay()
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(
|
||||
|
||||
@@ -1494,6 +1494,33 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(task_kwargs["input_doc"].root_document_id, doc.id)
|
||||
self.assertIsNotNone(task_kwargs["overrides"])
|
||||
|
||||
@mock.patch("documents.tasks.consume_file.apply_async")
|
||||
@mock.patch("documents.bulk_edit.tempfile.mkdtemp")
|
||||
@mock.patch("pikepdf.open")
|
||||
def test_remove_password_update_document_skips_unencrypted_pdf(
|
||||
self,
|
||||
mock_open,
|
||||
mock_mkdtemp,
|
||||
mock_consume_delay,
|
||||
) -> None:
|
||||
doc = self.doc1
|
||||
fake_pdf = mock.MagicMock()
|
||||
fake_pdf.is_encrypted = False
|
||||
mock_open.return_value.__enter__.return_value = fake_pdf
|
||||
|
||||
result = bulk_edit.remove_password(
|
||||
[doc.id],
|
||||
password="secret",
|
||||
update_document=True,
|
||||
)
|
||||
|
||||
self.assertEqual(result, "OK")
|
||||
mock_open.assert_called_once_with(doc.source_path, password="secret")
|
||||
fake_pdf.remove_unreferenced_resources.assert_not_called()
|
||||
fake_pdf.save.assert_not_called()
|
||||
mock_mkdtemp.assert_not_called()
|
||||
mock_consume_delay.assert_not_called()
|
||||
|
||||
@mock.patch("documents.bulk_edit.update_document_content_maybe_archive_file.delay")
|
||||
@mock.patch("documents.tasks.consume_file.apply_async")
|
||||
@mock.patch("documents.bulk_edit.tempfile.mkdtemp")
|
||||
@@ -1618,6 +1645,43 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
||||
mock_group.return_value.delay.assert_called_once()
|
||||
mock_chord.assert_not_called()
|
||||
|
||||
@mock.patch("documents.bulk_edit.delete")
|
||||
@mock.patch("documents.bulk_edit.chord")
|
||||
@mock.patch("documents.bulk_edit.group")
|
||||
@mock.patch("documents.tasks.consume_file.s")
|
||||
@mock.patch("documents.bulk_edit.tempfile.mkdtemp")
|
||||
@mock.patch("pikepdf.open")
|
||||
def test_remove_password_skips_unencrypted_pdf_without_queueing(
|
||||
self,
|
||||
mock_open: mock.Mock,
|
||||
mock_mkdtemp: mock.Mock,
|
||||
mock_consume_file: mock.Mock,
|
||||
mock_group: mock.Mock,
|
||||
mock_chord: mock.Mock,
|
||||
mock_delete: mock.Mock,
|
||||
) -> None:
|
||||
doc = self.doc2
|
||||
fake_pdf = mock.MagicMock()
|
||||
fake_pdf.is_encrypted = False
|
||||
mock_open.return_value.__enter__.return_value = fake_pdf
|
||||
|
||||
result = bulk_edit.remove_password(
|
||||
[doc.id],
|
||||
password="secret",
|
||||
update_document=False,
|
||||
delete_original=True,
|
||||
)
|
||||
|
||||
self.assertEqual(result, "OK")
|
||||
mock_open.assert_called_once_with(doc.source_path, password="secret")
|
||||
fake_pdf.remove_unreferenced_resources.assert_not_called()
|
||||
fake_pdf.save.assert_not_called()
|
||||
mock_mkdtemp.assert_not_called()
|
||||
mock_consume_file.assert_not_called()
|
||||
mock_group.assert_not_called()
|
||||
mock_chord.assert_not_called()
|
||||
mock_delete.si.assert_not_called()
|
||||
|
||||
@mock.patch("documents.bulk_edit.delete")
|
||||
@mock.patch("documents.bulk_edit.chord")
|
||||
@mock.patch("documents.bulk_edit.group")
|
||||
|
||||
Reference in New Issue
Block a user