mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-05-12 09:35:25 +00:00
Feat(parsers): add ParserContext and configure() to ParserProtocol
Replace the ad-hoc mailrule_id attribute assignment with a typed, immutable ParserContext dataclass and a configure() method on the Protocol: - ParserContext(frozen=True, slots=True) lives in paperless/parsers/ alongside ParserProtocol and MetadataEntry; currently carries only mailrule_id but is designed to grow with output_type, ocr_mode, and ocr_language in a future phase (decoupling parsers from settings.*) - ParserProtocol.configure(context: ParserContext) -> None is the extension point; no-op by default - MailDocumentParser.configure() reads mailrule_id into _mailrule_id - TextDocumentParser and TikaDocumentParser implement a no-op configure() - Consumer calls document_parser.configure(ParserContext(...)) before parse(), replacing the isinstance(parser, MailDocumentParser) guard and the direct attribute mutation Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -51,6 +51,7 @@ from documents.templating.workflows import parse_w_workflow_placeholders
|
||||
from documents.utils import copy_basic_file_stats
|
||||
from documents.utils import copy_file_with_basic_stats
|
||||
from documents.utils import run_subprocess
|
||||
from paperless.parsers import ParserContext
|
||||
from paperless.parsers.mail import MailDocumentParser
|
||||
from paperless.parsers.text import TextDocumentParser
|
||||
from paperless.parsers.tika import TikaDocumentParser
|
||||
@@ -473,16 +474,14 @@ class ConsumerPlugin(
|
||||
ConsumerStatusShortMessage.PARSING_DOCUMENT,
|
||||
)
|
||||
self.log.debug(f"Parsing {self.filename}...")
|
||||
if (
|
||||
isinstance(document_parser, MailDocumentParser)
|
||||
and self.input_doc.mailrule_id
|
||||
):
|
||||
document_parser.mailrule_id = self.input_doc.mailrule_id
|
||||
if isinstance(
|
||||
document_parser,
|
||||
(MailDocumentParser, TextDocumentParser, TikaDocumentParser),
|
||||
):
|
||||
# TODO(stumpylog): Remove me in the future when all parsers use new protocol
|
||||
document_parser.configure(
|
||||
ParserContext(mailrule_id=self.input_doc.mailrule_id),
|
||||
)
|
||||
document_parser.parse(self.working_copy, mime_type)
|
||||
else:
|
||||
document_parser.parse(self.working_copy, mime_type, self.filename)
|
||||
|
||||
Reference in New Issue
Block a user