From 2dce3c78c1578a66bc6a2ef3ef247d75a4898a57 Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Mon, 30 Mar 2026 13:54:28 -0700 Subject: [PATCH] Updates to plugin documentation --- .github/ISSUE_TEMPLATE/bug-report.yml | 2 -- docs/development.md | 20 ++++++++++++-------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml index 196a7b594..e87c3e0c6 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.yml +++ b/.github/ISSUE_TEMPLATE/bug-report.yml @@ -121,7 +121,5 @@ body: required: true - label: I have already searched for relevant existing issues and discussions before opening this report. required: true - - label: I have reproduced this issue with all third-party parser plugins removed. I understand that issues caused by third-party plugins will be closed without investigation. - required: true - label: I have updated the title field above with a concise description. required: true diff --git a/docs/development.md b/docs/development.md index fdb58dfa9..ec367694a 100644 --- a/docs/development.md +++ b/docs/development.md @@ -433,15 +433,16 @@ def supported_mime_types(cls) -> dict[str, str]: **Scoring** When more than one parser can handle a file, the registry calls `score()` on -each candidate and picks the one with the highest result. Return `None` to +each candidate and picks the one with the highest result and equal scores favor third-party parsers over built-ins.. Return `None` to decline handling a file even though the MIME type is listed as supported (for example, when a required external service is not configured). -| Score | Meaning | -| ------ | ------------------------------------------------- | -| `None` | Decline — do not handle this file | -| `10` | Default priority used by all built-in parsers | -| `> 10` | Override a built-in parser for the same MIME type | +| Score | Meaning | +| ------ | --------------------------------------------------------------------------------- | +| `None` | Decline — do not handle this file | +| `10` | Default priority used by all built-in parsers | +| `20` | Priority used by the remote OCR built-in parser, allowing it to replace Tesseract | +| `> 10` | Override a built-in parser for the same MIME type | ```python @classmethod @@ -562,6 +563,11 @@ def get_date(self) -> "datetime.datetime | None": def get_archive_path(self) -> Path | None: return self._archive_path + +def get_page_count(self, document_path: Path, mime_type: str) -> int | None: + # If the format doesn't have the concept of pages, return None + return count_pages(document_path) + ``` **Thumbnail** @@ -584,8 +590,6 @@ Implement them if your format supports the information; otherwise return `None` / `[]`. ```python -def get_page_count(self, document_path: Path, mime_type: str) -> int | None: - return count_pages(document_path) def extract_metadata( self,