From 8148f2ced2bc5ee11e202efef18d28ec2113ce31 Mon Sep 17 00:00:00 2001
From: Trenton H <797416+stumpylog@users.noreply.github.com>
Date: Thu, 19 Mar 2026 08:28:17 -0700
Subject: [PATCH] Feat(parsers): call configure(ParserContext()) in
 update_document task
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Apply the same new-style parser shim pattern as the consumer to
update_document_content_maybe_archive_file:

- Call __enter__ for Text/Tika parsers after instantiation
- Call configure(ParserContext()) before parse() for all new-style parsers
  (mailrule_id is not available here — this is a re-process of an
  existing document, so the default empty context is correct)
- Call parse(path, mime_type) with 2 args for new-style parsers
- Call get_thumbnail(path, mime_type) with 2 args for new-style parsers
- Call __exit__ instead of cleanup() in the finally block

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/documents/tasks.py | 51 ++++++++++++++++++++++++++++++++++--------
 1 file changed, 42 insertions(+), 9 deletions(-)

diff --git a/src/documents/tasks.py b/src/documents/tasks.py
index 378695731..6f4e20c29 100644
--- a/src/documents/tasks.py
+++ b/src/documents/tasks.py
@@ -65,6 +65,10 @@ from documents.signals.handlers import run_workflows
 from documents.signals.handlers import send_websocket_document_updated
 from documents.workflows.utils import get_workflows_for_trigger
 from paperless.config import AIConfig
+from paperless.parsers import ParserContext
+from paperless.parsers.mail import MailDocumentParser
+from paperless.parsers.text import TextDocumentParser
+from paperless.parsers.tika import TikaDocumentParser
 from paperless_ai.indexing import llm_index_add_or_update_document
 from paperless_ai.indexing import llm_index_remove_document
 from paperless_ai.indexing import update_llm_index
@@ -315,14 +319,37 @@ def update_document_content_maybe_archive_file(document_id) -> None:
 
     parser: DocumentParser = parser_class(logging_group=uuid.uuid4())
 
-    try:
-        parser.parse(document.source_path, mime_type, document.get_public_filename())
+    # TODO(stumpylog): Remove branch in the future when all parsers use new protocol
+    if isinstance(parser, (TextDocumentParser, TikaDocumentParser)):
+        parser.__enter__()
 
-        thumbnail = parser.get_thumbnail(
-            document.source_path,
-            mime_type,
-            document.get_public_filename(),
-        )
+    try:
+        # TODO(stumpylog): Remove branch in the future when all parsers use new protocol
+        if isinstance(
+            parser,
+            (MailDocumentParser, TextDocumentParser, TikaDocumentParser),
+        ):
+            parser.configure(ParserContext())
+            parser.parse(document.source_path, mime_type)
+        else:
+            parser.parse(
+                document.source_path,
+                mime_type,
+                document.get_public_filename(),
+            )
+
+        # TODO(stumpylog): Remove branch in the future when all parsers use new protocol
+        if isinstance(
+            parser,
+            (MailDocumentParser, TextDocumentParser, TikaDocumentParser),
+        ):
+            thumbnail = parser.get_thumbnail(document.source_path, mime_type)
+        else:
+            thumbnail = parser.get_thumbnail(
+                document.source_path,
+                mime_type,
+                document.get_public_filename(),
+            )
 
         with transaction.atomic():
             oldDocument = Document.objects.get(pk=document.pk)
@@ -403,8 +430,14 @@ def update_document_content_maybe_archive_file(document_id) -> None:
             f"Error while parsing document {document} (ID: {document_id})",
         )
     finally:
-        # TODO(stumpylog): Cleanup once all parsers are handled
-        parser.cleanup()
+        # TODO(stumpylog): Remove branch in the future when all parsers use new protocol
+        if isinstance(
+            parser,
+            (MailDocumentParser, TextDocumentParser, TikaDocumentParser),
+        ):
+            parser.__exit__(None, None, None)
+        else:
+            parser.cleanup()
 
 
 @shared_task