feat!: rename OCR_SKIP_ARCHIVE_FILE to ARCHIVE_FILE_GENERATION

Rename the Django setting OCR_SKIP_ARCHIVE_FILE to ARCHIVE_FILE_GENERATION
and the env var PAPERLESS_OCR_SKIP_ARCHIVE_FILE to PAPERLESS_ARCHIVE_FILE_GENERATION.
Rename the OcrConfig attribute skip_archive_file to archive_file_generation.
Update checks.py error messages and all tests accordingly.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Trenton H
2026-03-26 13:14:29 -07:00
parent cd653959d6
commit 38d2abb982
6 changed files with 20 additions and 17 deletions
+3 -3
View File
@@ -135,11 +135,11 @@ def settings_values_check(app_configs, **kwargs):
if settings.OCR_MODE not in {"auto", "force", "redo", "off"}:
msgs.append(Error(f'OCR output mode "{settings.OCR_MODE}" is not valid'))
if settings.OCR_SKIP_ARCHIVE_FILE not in {"auto", "always", "never"}:
if settings.ARCHIVE_FILE_GENERATION not in {"auto", "always", "never"}:
msgs.append(
Error(
"OCR_SKIP_ARCHIVE_FILE setting "
f'"{settings.OCR_SKIP_ARCHIVE_FILE}" is not valid',
"PAPERLESS_ARCHIVE_FILE_GENERATION setting "
f'"{settings.ARCHIVE_FILE_GENERATION}" is not valid',
),
)
+3 -3
View File
@@ -46,7 +46,7 @@ class OcrConfig(OutputTypeConfig):
pages: int | None = dataclasses.field(init=False)
language: str = dataclasses.field(init=False)
mode: str = dataclasses.field(init=False)
skip_archive_file: str = dataclasses.field(init=False)
archive_file_generation: str = dataclasses.field(init=False)
image_dpi: int | None = dataclasses.field(init=False)
clean: str = dataclasses.field(init=False)
deskew: bool = dataclasses.field(init=False)
@@ -64,8 +64,8 @@ class OcrConfig(OutputTypeConfig):
self.pages = app_config.pages or settings.OCR_PAGES
self.language = app_config.language or settings.OCR_LANGUAGE
self.mode = app_config.mode or settings.OCR_MODE
self.skip_archive_file = (
app_config.skip_archive_file or settings.OCR_SKIP_ARCHIVE_FILE
self.archive_file_generation = (
app_config.skip_archive_file or settings.ARCHIVE_FILE_GENERATION
)
self.image_dpi = app_config.image_dpi or settings.OCR_IMAGE_DPI
self.clean = app_config.unpaper_clean or settings.OCR_CLEAN
+5 -2
View File
@@ -422,7 +422,7 @@ class RasterisedDocumentParser:
skip_archive_for_text = self.settings.mode not in {
ModeChoices.FORCE,
ModeChoices.REDO,
} and self.settings.skip_archive_file in {
} and self.settings.archive_file_generation in {
ArchiveFileGenerationChoices.NEVER,
ArchiveFileGenerationChoices.AUTO,
}
@@ -455,7 +455,10 @@ class RasterisedDocumentParser:
self.log.debug(f"Calling OCRmyPDF with args: {args}")
ocrmypdf.ocr(**args)
if self.settings.skip_archive_file != ArchiveFileGenerationChoices.NEVER:
if (
self.settings.archive_file_generation
!= ArchiveFileGenerationChoices.NEVER
):
self.archive_path = archive_path
self.text = self.extract_text(sidecar_file, archive_path)
+2 -2
View File
@@ -881,8 +881,8 @@ OCR_MODE = get_choice_from_env(
default="auto",
)
OCR_SKIP_ARCHIVE_FILE = get_choice_from_env(
"PAPERLESS_OCR_SKIP_ARCHIVE_FILE",
ARCHIVE_FILE_GENERATION = get_choice_from_env(
"PAPERLESS_ARCHIVE_FILE_GENERATION",
{"auto", "always", "never"},
default="auto",
)
@@ -386,7 +386,7 @@ class TestParsePdf:
tesseract_parser: RasterisedDocumentParser,
tesseract_samples_dir: Path,
) -> None:
tesseract_parser.settings.skip_archive_file = "always"
tesseract_parser.settings.archive_file_generation = "always"
tesseract_parser.parse(
tesseract_samples_dir / "with-form.pdf",
"application/pdf",
@@ -546,7 +546,7 @@ class TestParseMultiPage:
tesseract_parser: RasterisedDocumentParser,
tesseract_samples_dir: Path,
) -> None:
tesseract_parser.settings.skip_archive_file = "always"
tesseract_parser.settings.archive_file_generation = "always"
tesseract_parser.parse(
tesseract_samples_dir / "multi-page-digital.pdf",
"application/pdf",
@@ -574,7 +574,7 @@ class TestParseMultiPage:
) -> None:
tesseract_parser.settings.pages = 2
tesseract_parser.settings.mode = mode
tesseract_parser.settings.skip_archive_file = "always"
tesseract_parser.settings.archive_file_generation = "always"
tesseract_parser.parse(
tesseract_samples_dir / "multi-page-digital.pdf",
"application/pdf",
@@ -819,7 +819,7 @@ class TestSkipArchive:
tesseract_parser: RasterisedDocumentParser,
tesseract_samples_dir: Path,
) -> None:
tesseract_parser.settings.skip_archive_file = skip_archive_file
tesseract_parser.settings.archive_file_generation = skip_archive_file
tesseract_parser.parse(tesseract_samples_dir / filename, "application/pdf")
text = tesseract_parser.get_text().lower()
assert_ordered_substrings(text, ["page 1", "page 2", "page 3"])
@@ -850,7 +850,7 @@ class TestParseMixed:
- All pages extracted; archive created; sidecar notes skipped pages
"""
tesseract_parser.settings.mode = "auto"
tesseract_parser.settings.skip_archive_file = "always"
tesseract_parser.settings.archive_file_generation = "always"
tesseract_parser.parse(
tesseract_samples_dir / "multi-page-mixed.pdf",
"application/pdf",
+2 -2
View File
@@ -136,9 +136,9 @@ class TestOcrSettingsChecks:
id="deprecated-mode-now-invalid",
),
pytest.param(
"OCR_SKIP_ARCHIVE_FILE",
"ARCHIVE_FILE_GENERATION",
"invalid",
'OCR_SKIP_ARCHIVE_FILE setting "invalid"',
'PAPERLESS_ARCHIVE_FILE_GENERATION setting "invalid"',
id="invalid-skip-archive-file",
),
pytest.param(