mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-06-28 16:24:19 +00:00
feat!: rename OCR_SKIP_ARCHIVE_FILE to ARCHIVE_FILE_GENERATION
Rename the Django setting OCR_SKIP_ARCHIVE_FILE to ARCHIVE_FILE_GENERATION and the env var PAPERLESS_OCR_SKIP_ARCHIVE_FILE to PAPERLESS_ARCHIVE_FILE_GENERATION. Rename the OcrConfig attribute skip_archive_file to archive_file_generation. Update checks.py error messages and all tests accordingly. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -135,11 +135,11 @@ def settings_values_check(app_configs, **kwargs):
|
||||
if settings.OCR_MODE not in {"auto", "force", "redo", "off"}:
|
||||
msgs.append(Error(f'OCR output mode "{settings.OCR_MODE}" is not valid'))
|
||||
|
||||
if settings.OCR_SKIP_ARCHIVE_FILE not in {"auto", "always", "never"}:
|
||||
if settings.ARCHIVE_FILE_GENERATION not in {"auto", "always", "never"}:
|
||||
msgs.append(
|
||||
Error(
|
||||
"OCR_SKIP_ARCHIVE_FILE setting "
|
||||
f'"{settings.OCR_SKIP_ARCHIVE_FILE}" is not valid',
|
||||
"PAPERLESS_ARCHIVE_FILE_GENERATION setting "
|
||||
f'"{settings.ARCHIVE_FILE_GENERATION}" is not valid',
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -46,7 +46,7 @@ class OcrConfig(OutputTypeConfig):
|
||||
pages: int | None = dataclasses.field(init=False)
|
||||
language: str = dataclasses.field(init=False)
|
||||
mode: str = dataclasses.field(init=False)
|
||||
skip_archive_file: str = dataclasses.field(init=False)
|
||||
archive_file_generation: str = dataclasses.field(init=False)
|
||||
image_dpi: int | None = dataclasses.field(init=False)
|
||||
clean: str = dataclasses.field(init=False)
|
||||
deskew: bool = dataclasses.field(init=False)
|
||||
@@ -64,8 +64,8 @@ class OcrConfig(OutputTypeConfig):
|
||||
self.pages = app_config.pages or settings.OCR_PAGES
|
||||
self.language = app_config.language or settings.OCR_LANGUAGE
|
||||
self.mode = app_config.mode or settings.OCR_MODE
|
||||
self.skip_archive_file = (
|
||||
app_config.skip_archive_file or settings.OCR_SKIP_ARCHIVE_FILE
|
||||
self.archive_file_generation = (
|
||||
app_config.skip_archive_file or settings.ARCHIVE_FILE_GENERATION
|
||||
)
|
||||
self.image_dpi = app_config.image_dpi or settings.OCR_IMAGE_DPI
|
||||
self.clean = app_config.unpaper_clean or settings.OCR_CLEAN
|
||||
|
||||
@@ -422,7 +422,7 @@ class RasterisedDocumentParser:
|
||||
skip_archive_for_text = self.settings.mode not in {
|
||||
ModeChoices.FORCE,
|
||||
ModeChoices.REDO,
|
||||
} and self.settings.skip_archive_file in {
|
||||
} and self.settings.archive_file_generation in {
|
||||
ArchiveFileGenerationChoices.NEVER,
|
||||
ArchiveFileGenerationChoices.AUTO,
|
||||
}
|
||||
@@ -455,7 +455,10 @@ class RasterisedDocumentParser:
|
||||
self.log.debug(f"Calling OCRmyPDF with args: {args}")
|
||||
ocrmypdf.ocr(**args)
|
||||
|
||||
if self.settings.skip_archive_file != ArchiveFileGenerationChoices.NEVER:
|
||||
if (
|
||||
self.settings.archive_file_generation
|
||||
!= ArchiveFileGenerationChoices.NEVER
|
||||
):
|
||||
self.archive_path = archive_path
|
||||
|
||||
self.text = self.extract_text(sidecar_file, archive_path)
|
||||
|
||||
@@ -881,8 +881,8 @@ OCR_MODE = get_choice_from_env(
|
||||
default="auto",
|
||||
)
|
||||
|
||||
OCR_SKIP_ARCHIVE_FILE = get_choice_from_env(
|
||||
"PAPERLESS_OCR_SKIP_ARCHIVE_FILE",
|
||||
ARCHIVE_FILE_GENERATION = get_choice_from_env(
|
||||
"PAPERLESS_ARCHIVE_FILE_GENERATION",
|
||||
{"auto", "always", "never"},
|
||||
default="auto",
|
||||
)
|
||||
|
||||
@@ -386,7 +386,7 @@ class TestParsePdf:
|
||||
tesseract_parser: RasterisedDocumentParser,
|
||||
tesseract_samples_dir: Path,
|
||||
) -> None:
|
||||
tesseract_parser.settings.skip_archive_file = "always"
|
||||
tesseract_parser.settings.archive_file_generation = "always"
|
||||
tesseract_parser.parse(
|
||||
tesseract_samples_dir / "with-form.pdf",
|
||||
"application/pdf",
|
||||
@@ -546,7 +546,7 @@ class TestParseMultiPage:
|
||||
tesseract_parser: RasterisedDocumentParser,
|
||||
tesseract_samples_dir: Path,
|
||||
) -> None:
|
||||
tesseract_parser.settings.skip_archive_file = "always"
|
||||
tesseract_parser.settings.archive_file_generation = "always"
|
||||
tesseract_parser.parse(
|
||||
tesseract_samples_dir / "multi-page-digital.pdf",
|
||||
"application/pdf",
|
||||
@@ -574,7 +574,7 @@ class TestParseMultiPage:
|
||||
) -> None:
|
||||
tesseract_parser.settings.pages = 2
|
||||
tesseract_parser.settings.mode = mode
|
||||
tesseract_parser.settings.skip_archive_file = "always"
|
||||
tesseract_parser.settings.archive_file_generation = "always"
|
||||
tesseract_parser.parse(
|
||||
tesseract_samples_dir / "multi-page-digital.pdf",
|
||||
"application/pdf",
|
||||
@@ -819,7 +819,7 @@ class TestSkipArchive:
|
||||
tesseract_parser: RasterisedDocumentParser,
|
||||
tesseract_samples_dir: Path,
|
||||
) -> None:
|
||||
tesseract_parser.settings.skip_archive_file = skip_archive_file
|
||||
tesseract_parser.settings.archive_file_generation = skip_archive_file
|
||||
tesseract_parser.parse(tesseract_samples_dir / filename, "application/pdf")
|
||||
text = tesseract_parser.get_text().lower()
|
||||
assert_ordered_substrings(text, ["page 1", "page 2", "page 3"])
|
||||
@@ -850,7 +850,7 @@ class TestParseMixed:
|
||||
- All pages extracted; archive created; sidecar notes skipped pages
|
||||
"""
|
||||
tesseract_parser.settings.mode = "auto"
|
||||
tesseract_parser.settings.skip_archive_file = "always"
|
||||
tesseract_parser.settings.archive_file_generation = "always"
|
||||
tesseract_parser.parse(
|
||||
tesseract_samples_dir / "multi-page-mixed.pdf",
|
||||
"application/pdf",
|
||||
|
||||
@@ -136,9 +136,9 @@ class TestOcrSettingsChecks:
|
||||
id="deprecated-mode-now-invalid",
|
||||
),
|
||||
pytest.param(
|
||||
"OCR_SKIP_ARCHIVE_FILE",
|
||||
"ARCHIVE_FILE_GENERATION",
|
||||
"invalid",
|
||||
'OCR_SKIP_ARCHIVE_FILE setting "invalid"',
|
||||
'PAPERLESS_ARCHIVE_FILE_GENERATION setting "invalid"',
|
||||
id="invalid-skip-archive-file",
|
||||
),
|
||||
pytest.param(
|
||||
|
||||
Reference in New Issue
Block a user