mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-07-01 17:54:25 +00:00
Breaking: Decouple OCR control from archive file control (#12448)
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com> Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>
This commit is contained in:
@@ -0,0 +1,25 @@
|
||||
"""Tests for paperless.parsers.utils helpers."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from paperless.parsers.utils import is_tagged_pdf
|
||||
|
||||
SAMPLES = Path(__file__).parent / "samples" / "tesseract"
|
||||
|
||||
|
||||
class TestIsTaggedPdf:
|
||||
def test_tagged_pdf_returns_true(self) -> None:
|
||||
assert is_tagged_pdf(SAMPLES / "simple-digital.pdf") is True
|
||||
|
||||
def test_untagged_pdf_returns_false(self) -> None:
|
||||
assert is_tagged_pdf(SAMPLES / "multi-page-images.pdf") is False
|
||||
|
||||
def test_nonexistent_path_returns_false(self) -> None:
|
||||
assert is_tagged_pdf(Path("/nonexistent/file.pdf")) is False
|
||||
|
||||
def test_corrupt_pdf_returns_false(self, tmp_path: Path) -> None:
|
||||
bad = tmp_path / "bad.pdf"
|
||||
bad.write_bytes(b"not a pdf")
|
||||
assert is_tagged_pdf(bad) is False
|
||||
Reference in New Issue
Block a user