From e9e1d4cccae903d2669cd2e3a058bc7f742b54f4 Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Fri, 13 Mar 2026 12:09:33 -0700 Subject: [PATCH] Refactor: wire RemoteDocumentParser into consumer and fix signals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - paperless_remote/signals.py: import from paperless.parsers.remote (new location after git mv). supported_mime_types() is now a classmethod that always returns the full set, so get_supported_mime_types() in the signal layer explicitly checks RemoteEngineConfig validity and returns {} when unconfigured — preserving the old behaviour where an unconfigured remote parser does not register for any MIME types. - documents/consumer.py: extend the _parser_cleanup() shim, parse() dispatch, and get_thumbnail() dispatch to include RemoteDocumentParser alongside TextDocumentParser. Both new-style parsers use __exit__ for cleanup and take (document_path, mime_type) without a file_name argument. Co-Authored-By: Claude Sonnet 4.6 --- src/documents/consumer.py | 10 +++++++--- src/paperless_remote/signals.py | 16 +++++++++++++--- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/src/documents/consumer.py b/src/documents/consumer.py index cbc2198ef..b016b4fac 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -51,6 +51,7 @@ from documents.templating.workflows import parse_w_workflow_placeholders from documents.utils import copy_basic_file_stats from documents.utils import copy_file_with_basic_stats from documents.utils import run_subprocess +from paperless.parsers.remote import RemoteDocumentParser from paperless.parsers.text import TextDocumentParser from paperless_mail.parsers import MailDocumentParser @@ -67,7 +68,7 @@ def _parser_cleanup(parser: DocumentParser) -> None: TODO(stumpylog): Remove me in the future """ - if isinstance(parser, TextDocumentParser): + if isinstance(parser, (TextDocumentParser, RemoteDocumentParser)): parser.__exit__(None, None, None) else: parser.cleanup() @@ -476,7 +477,10 @@ class ConsumerPlugin( self.filename, self.input_doc.mailrule_id, ) - elif isinstance(document_parser, TextDocumentParser): + elif isinstance( + document_parser, + (TextDocumentParser, RemoteDocumentParser), + ): # TODO(stumpylog): Remove me in the future document_parser.parse(self.working_copy, mime_type) else: @@ -489,7 +493,7 @@ class ConsumerPlugin( ProgressStatusOptions.WORKING, ConsumerStatusShortMessage.GENERATING_THUMBNAIL, ) - if isinstance(document_parser, TextDocumentParser): + if isinstance(document_parser, (TextDocumentParser, RemoteDocumentParser)): # TODO(stumpylog): Remove me in the future thumbnail = document_parser.get_thumbnail(self.working_copy, mime_type) else: diff --git a/src/paperless_remote/signals.py b/src/paperless_remote/signals.py index 81955a479..a5de1c3f2 100644 --- a/src/paperless_remote/signals.py +++ b/src/paperless_remote/signals.py @@ -1,13 +1,23 @@ def get_parser(*args, **kwargs): - from paperless_remote.parsers import RemoteDocumentParser + from paperless.parsers.remote import RemoteDocumentParser return RemoteDocumentParser(*args, **kwargs) def get_supported_mime_types(): - from paperless_remote.parsers import RemoteDocumentParser + from django.conf import settings - return RemoteDocumentParser(None).supported_mime_types() + from paperless.parsers.remote import RemoteDocumentParser + from paperless.parsers.remote import RemoteEngineConfig + + config = RemoteEngineConfig( + engine=settings.REMOTE_OCR_ENGINE, + api_key=settings.REMOTE_OCR_API_KEY, + endpoint=settings.REMOTE_OCR_ENDPOINT, + ) + if not config.engine_is_valid(): + return {} + return RemoteDocumentParser.supported_mime_types() def remote_consumer_declaration(sender, **kwargs):