From 38d2abb982163bba3c497cfab20a6d3689691380 Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Thu, 26 Mar 2026 13:14:29 -0700 Subject: [PATCH] feat!: rename OCR_SKIP_ARCHIVE_FILE to ARCHIVE_FILE_GENERATION Rename the Django setting OCR_SKIP_ARCHIVE_FILE to ARCHIVE_FILE_GENERATION and the env var PAPERLESS_OCR_SKIP_ARCHIVE_FILE to PAPERLESS_ARCHIVE_FILE_GENERATION. Rename the OcrConfig attribute skip_archive_file to archive_file_generation. Update checks.py error messages and all tests accordingly. Co-Authored-By: Claude Sonnet 4.6 --- src/paperless/checks.py | 6 +++--- src/paperless/config.py | 6 +++--- src/paperless/parsers/tesseract.py | 7 +++++-- src/paperless/settings/__init__.py | 4 ++-- src/paperless/tests/parsers/test_tesseract_parser.py | 10 +++++----- src/paperless/tests/test_checks.py | 4 ++-- 6 files changed, 20 insertions(+), 17 deletions(-) diff --git a/src/paperless/checks.py b/src/paperless/checks.py index e8ced2a76..f8727c961 100644 --- a/src/paperless/checks.py +++ b/src/paperless/checks.py @@ -135,11 +135,11 @@ def settings_values_check(app_configs, **kwargs): if settings.OCR_MODE not in {"auto", "force", "redo", "off"}: msgs.append(Error(f'OCR output mode "{settings.OCR_MODE}" is not valid')) - if settings.OCR_SKIP_ARCHIVE_FILE not in {"auto", "always", "never"}: + if settings.ARCHIVE_FILE_GENERATION not in {"auto", "always", "never"}: msgs.append( Error( - "OCR_SKIP_ARCHIVE_FILE setting " - f'"{settings.OCR_SKIP_ARCHIVE_FILE}" is not valid', + "PAPERLESS_ARCHIVE_FILE_GENERATION setting " + f'"{settings.ARCHIVE_FILE_GENERATION}" is not valid', ), ) diff --git a/src/paperless/config.py b/src/paperless/config.py index fd2c1620d..418112e03 100644 --- a/src/paperless/config.py +++ b/src/paperless/config.py @@ -46,7 +46,7 @@ class OcrConfig(OutputTypeConfig): pages: int | None = dataclasses.field(init=False) language: str = dataclasses.field(init=False) mode: str = dataclasses.field(init=False) - skip_archive_file: str = dataclasses.field(init=False) + archive_file_generation: str = dataclasses.field(init=False) image_dpi: int | None = dataclasses.field(init=False) clean: str = dataclasses.field(init=False) deskew: bool = dataclasses.field(init=False) @@ -64,8 +64,8 @@ class OcrConfig(OutputTypeConfig): self.pages = app_config.pages or settings.OCR_PAGES self.language = app_config.language or settings.OCR_LANGUAGE self.mode = app_config.mode or settings.OCR_MODE - self.skip_archive_file = ( - app_config.skip_archive_file or settings.OCR_SKIP_ARCHIVE_FILE + self.archive_file_generation = ( + app_config.skip_archive_file or settings.ARCHIVE_FILE_GENERATION ) self.image_dpi = app_config.image_dpi or settings.OCR_IMAGE_DPI self.clean = app_config.unpaper_clean or settings.OCR_CLEAN diff --git a/src/paperless/parsers/tesseract.py b/src/paperless/parsers/tesseract.py index 5379cc1a5..6716194f4 100644 --- a/src/paperless/parsers/tesseract.py +++ b/src/paperless/parsers/tesseract.py @@ -422,7 +422,7 @@ class RasterisedDocumentParser: skip_archive_for_text = self.settings.mode not in { ModeChoices.FORCE, ModeChoices.REDO, - } and self.settings.skip_archive_file in { + } and self.settings.archive_file_generation in { ArchiveFileGenerationChoices.NEVER, ArchiveFileGenerationChoices.AUTO, } @@ -455,7 +455,10 @@ class RasterisedDocumentParser: self.log.debug(f"Calling OCRmyPDF with args: {args}") ocrmypdf.ocr(**args) - if self.settings.skip_archive_file != ArchiveFileGenerationChoices.NEVER: + if ( + self.settings.archive_file_generation + != ArchiveFileGenerationChoices.NEVER + ): self.archive_path = archive_path self.text = self.extract_text(sidecar_file, archive_path) diff --git a/src/paperless/settings/__init__.py b/src/paperless/settings/__init__.py index e0bd0ac94..1e1f7ecec 100644 --- a/src/paperless/settings/__init__.py +++ b/src/paperless/settings/__init__.py @@ -881,8 +881,8 @@ OCR_MODE = get_choice_from_env( default="auto", ) -OCR_SKIP_ARCHIVE_FILE = get_choice_from_env( - "PAPERLESS_OCR_SKIP_ARCHIVE_FILE", +ARCHIVE_FILE_GENERATION = get_choice_from_env( + "PAPERLESS_ARCHIVE_FILE_GENERATION", {"auto", "always", "never"}, default="auto", ) diff --git a/src/paperless/tests/parsers/test_tesseract_parser.py b/src/paperless/tests/parsers/test_tesseract_parser.py index 2be76ad4b..d3d08bc41 100644 --- a/src/paperless/tests/parsers/test_tesseract_parser.py +++ b/src/paperless/tests/parsers/test_tesseract_parser.py @@ -386,7 +386,7 @@ class TestParsePdf: tesseract_parser: RasterisedDocumentParser, tesseract_samples_dir: Path, ) -> None: - tesseract_parser.settings.skip_archive_file = "always" + tesseract_parser.settings.archive_file_generation = "always" tesseract_parser.parse( tesseract_samples_dir / "with-form.pdf", "application/pdf", @@ -546,7 +546,7 @@ class TestParseMultiPage: tesseract_parser: RasterisedDocumentParser, tesseract_samples_dir: Path, ) -> None: - tesseract_parser.settings.skip_archive_file = "always" + tesseract_parser.settings.archive_file_generation = "always" tesseract_parser.parse( tesseract_samples_dir / "multi-page-digital.pdf", "application/pdf", @@ -574,7 +574,7 @@ class TestParseMultiPage: ) -> None: tesseract_parser.settings.pages = 2 tesseract_parser.settings.mode = mode - tesseract_parser.settings.skip_archive_file = "always" + tesseract_parser.settings.archive_file_generation = "always" tesseract_parser.parse( tesseract_samples_dir / "multi-page-digital.pdf", "application/pdf", @@ -819,7 +819,7 @@ class TestSkipArchive: tesseract_parser: RasterisedDocumentParser, tesseract_samples_dir: Path, ) -> None: - tesseract_parser.settings.skip_archive_file = skip_archive_file + tesseract_parser.settings.archive_file_generation = skip_archive_file tesseract_parser.parse(tesseract_samples_dir / filename, "application/pdf") text = tesseract_parser.get_text().lower() assert_ordered_substrings(text, ["page 1", "page 2", "page 3"]) @@ -850,7 +850,7 @@ class TestParseMixed: - All pages extracted; archive created; sidecar notes skipped pages """ tesseract_parser.settings.mode = "auto" - tesseract_parser.settings.skip_archive_file = "always" + tesseract_parser.settings.archive_file_generation = "always" tesseract_parser.parse( tesseract_samples_dir / "multi-page-mixed.pdf", "application/pdf", diff --git a/src/paperless/tests/test_checks.py b/src/paperless/tests/test_checks.py index 561d61430..c4935c1f1 100644 --- a/src/paperless/tests/test_checks.py +++ b/src/paperless/tests/test_checks.py @@ -136,9 +136,9 @@ class TestOcrSettingsChecks: id="deprecated-mode-now-invalid", ), pytest.param( - "OCR_SKIP_ARCHIVE_FILE", + "ARCHIVE_FILE_GENERATION", "invalid", - 'OCR_SKIP_ARCHIVE_FILE setting "invalid"', + 'PAPERLESS_ARCHIVE_FILE_GENERATION setting "invalid"', id="invalid-skip-archive-file", ), pytest.param(