diff --git a/src/paperless/parsers/tesseract.py b/src/paperless/parsers/tesseract.py
index 6716194f4..5c344fe9f 100644
--- a/src/paperless/parsers/tesseract.py
+++ b/src/paperless/parsers/tesseract.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import importlib.resources
 import logging
 import os
 import re
@@ -18,7 +19,6 @@ from documents.parsers import make_thumbnail_from_pdf
 from documents.utils import maybe_override_pixel_limit
 from documents.utils import run_subprocess
 from paperless.config import OcrConfig
-from paperless.models import ArchiveFileGenerationChoices
 from paperless.models import CleanChoices
 from paperless.models import ModeChoices
 from paperless.parsers.utils import read_file_handle_unicode_errors
@@ -289,6 +289,7 @@ class RasterisedDocumentParser:
         sidecar_file: Path,
         *,
         safe_fallback: bool = False,
+        skip_text: bool = False,
     ) -> dict[str, Any]:
         ocrmypdf_args: dict[str, Any] = {
             "input_file_or_options": input_file,
@@ -307,12 +308,14 @@ class RasterisedDocumentParser:
                 self.settings.color_conversion_strategy
             )
 
-        if self.settings.mode == ModeChoices.FORCE or safe_fallback:
+        if safe_fallback or self.settings.mode == ModeChoices.FORCE:
             ocrmypdf_args["force_ocr"] = True
-        elif self.settings.mode == ModeChoices.AUTO:
-            ocrmypdf_args["skip_text"] = True
         elif self.settings.mode == ModeChoices.REDO:
             ocrmypdf_args["redo_ocr"] = True
+        elif skip_text or self.settings.mode == ModeChoices.OFF:
+            ocrmypdf_args["skip_text"] = True
+        elif self.settings.mode == ModeChoices.AUTO:
+            pass  # no extra flag: normal OCR (text not found case)
         else:  # pragma: no cover
             raise ParseError(f"Invalid ocr mode: {self.settings.mode}")
 
@@ -397,6 +400,62 @@ class RasterisedDocumentParser:
 
         return ocrmypdf_args
 
+    def _convert_image_to_pdfa(self, document_path: Path, mime_type: str) -> Path:
+        """Convert an image to a PDF/A-2b file without invoking the OCR engine.
+
+        Uses img2pdf for the initial image->PDF wrapping, then pikepdf to stamp
+        PDF/A-2b conformance metadata.
+
+        No Tesseract and no Ghostscript are invoked.
+        """
+        import img2pdf
+        import pikepdf
+
+        plain_pdf_path = Path(self.tempdir) / "image_plain.pdf"
+        try:
+            layout_fun = None
+            if self.settings.image_dpi is not None:
+                layout_fun = img2pdf.get_fixed_dpi_layout_fun(
+                    (self.settings.image_dpi, self.settings.image_dpi),
+                )
+            plain_pdf_path.write_bytes(
+                img2pdf.convert(str(document_path), layout_fun=layout_fun),
+            )
+        except Exception as e:
+            raise ParseError(
+                f"img2pdf conversion failed for {document_path}: {e!s}",
+            ) from e
+
+        icc_data = (
+            importlib.resources.files("ocrmypdf.data").joinpath("sRGB.icc").read_bytes()
+        )
+
+        pdfa_path = Path(self.tempdir) / "archive.pdf"
+        try:
+            with pikepdf.open(plain_pdf_path) as pdf:
+                cs = pdf.make_stream(icc_data)
+                cs["/N"] = 3
+                output_intent = pikepdf.Dictionary(
+                    Type=pikepdf.Name("/OutputIntent"),
+                    S=pikepdf.Name("/GTS_PDFA1"),
+                    OutputConditionIdentifier=pikepdf.String("sRGB"),
+                    DestOutputProfile=cs,
+                )
+                pdf.Root["/OutputIntents"] = pdf.make_indirect(
+                    pikepdf.Array([output_intent]),
+                )
+                meta = pdf.open_metadata(set_pikepdf_as_editor=False)
+                meta["pdfaid:part"] = "2"
+                meta["pdfaid:conformance"] = "B"
+                pdf.save(pdfa_path)
+        except Exception as e:
+            self.log.warning(
+                f"PDF/A metadata stamping failed ({e!s}); falling back to plain PDF.",
+            )
+            pdfa_path.write_bytes(plain_pdf_path.read_bytes())
+
+        return pdfa_path
+
     def parse(
         self,
         document_path: Path,
@@ -417,48 +476,96 @@ class RasterisedDocumentParser:
             text_original = None
             original_has_text = False
 
-        # If the original has text, and the user doesn't want an archive,
-        # we're done here (but not when force/redo mode is explicitly requested)
-        skip_archive_for_text = self.settings.mode not in {
-            ModeChoices.FORCE,
-            ModeChoices.REDO,
-        } and self.settings.archive_file_generation in {
-            ArchiveFileGenerationChoices.NEVER,
-            ArchiveFileGenerationChoices.AUTO,
-        }
-        if skip_archive_for_text and original_has_text:
-            self.log.debug("Document has text, skipping OCRmyPDF entirely.")
+        # --- OCR_MODE=off: never invoke OCR engine ---
+        if self.settings.mode == ModeChoices.OFF:
+            if not produce_archive:
+                self.text = text_original or ""
+                return
+            if self.is_image(mime_type):
+                try:
+                    self.archive_path = self._convert_image_to_pdfa(
+                        document_path,
+                        mime_type,
+                    )
+                    self.text = ""
+                except Exception as e:
+                    raise ParseError(
+                        f"Image to PDF/A conversion failed: {e!s}",
+                    ) from e
+                return
+            # PDFs in off mode: PDF/A conversion only via skip_text
+            import ocrmypdf
+            from ocrmypdf import SubprocessOutputError
+
+            archive_path = Path(self.tempdir) / "archive.pdf"
+            sidecar_file = Path(self.tempdir) / "sidecar.txt"
+            args = self.construct_ocrmypdf_parameters(
+                document_path,
+                mime_type,
+                archive_path,
+                sidecar_file,
+                skip_text=True,
+            )
+            try:
+                self.log.debug(
+                    f"Calling OCRmyPDF (off mode, PDF/A conversion only): {args}",
+                )
+                ocrmypdf.ocr(**args)
+                self.archive_path = archive_path
+                self.text = self.extract_text(None, archive_path) or text_original or ""
+            except SubprocessOutputError as e:
+                if "Ghostscript PDF/A rendering" in str(e):
+                    self.log.warning(
+                        "Ghostscript PDF/A rendering failed, consider setting "
+                        "PAPERLESS_OCR_USER_ARGS: "
+                        "'{\"continue_on_soft_render_error\": true}'",
+                    )
+                raise ParseError(
+                    f"SubprocessOutputError: {e!s}. See logs for more information.",
+                ) from e
+            except Exception as e:
+                raise ParseError(f"{e.__class__.__name__}: {e!s}") from e
+            return
+
+        # --- OCR_MODE=auto: skip ocrmypdf entirely if text exists and no archive needed ---
+        if (
+            self.settings.mode == ModeChoices.AUTO
+            and original_has_text
+            and not produce_archive
+        ):
+            self.log.debug(
+                "Document has text and no archive requested; skipping OCRmyPDF entirely.",
+            )
             self.text = text_original
             return
 
-        # Either no text was in the original or there should be an archive
-        # file created, so OCR the file and create an archive with any
-        # text located via OCR
-
+        # --- All other paths: run ocrmypdf ---
         import ocrmypdf
         from ocrmypdf import EncryptedPdfError
         from ocrmypdf import InputFileError
         from ocrmypdf import SubprocessOutputError
         from ocrmypdf.exceptions import DigitalSignatureError
+        from ocrmypdf.exceptions import PriorOcrFoundError
 
         archive_path = Path(self.tempdir) / "archive.pdf"
         sidecar_file = Path(self.tempdir) / "sidecar.txt"
 
+        # auto mode with existing text: PDF/A conversion only (no OCR).
+        skip_text = self.settings.mode == ModeChoices.AUTO and original_has_text
+
         args = self.construct_ocrmypdf_parameters(
             document_path,
             mime_type,
             archive_path,
             sidecar_file,
+            skip_text=skip_text,
         )
 
         try:
             self.log.debug(f"Calling OCRmyPDF with args: {args}")
             ocrmypdf.ocr(**args)
 
-            if (
-                self.settings.archive_file_generation
-                != ArchiveFileGenerationChoices.NEVER
-            ):
+            if produce_archive:
                 self.archive_path = archive_path
 
             self.text = self.extract_text(sidecar_file, archive_path)
@@ -478,11 +585,10 @@ class RasterisedDocumentParser:
                     "Ghostscript PDF/A rendering failed, consider setting "
                     "PAPERLESS_OCR_USER_ARGS: '{\"continue_on_soft_render_error\": true}'",
                 )
-
             raise ParseError(
                 f"SubprocessOutputError: {e!s}. See logs for more information.",
             ) from e
-        except (NoTextFoundException, InputFileError) as e:
+        except (NoTextFoundException, InputFileError, PriorOcrFoundError) as e:
             self.log.warning(
                 f"Encountered an error while running OCR: {e!s}. "
                 f"Attempting force OCR to get the text.",
@@ -491,8 +597,6 @@ class RasterisedDocumentParser:
             archive_path_fallback = Path(self.tempdir) / "archive-fallback.pdf"
             sidecar_file_fallback = Path(self.tempdir) / "sidecar-fallback.txt"
 
-            # Attempt to run OCR with safe settings.
-
             args = self.construct_ocrmypdf_parameters(
                 document_path,
                 mime_type,
@@ -504,25 +608,16 @@ class RasterisedDocumentParser:
             try:
                 self.log.debug(f"Fallback: Calling OCRmyPDF with args: {args}")
                 ocrmypdf.ocr(**args)
-
-                # Don't return the archived file here, since this file
-                # is bigger and blurry due to --force-ocr.
-
                 self.text = self.extract_text(
                     sidecar_file_fallback,
                     archive_path_fallback,
                 )
-
             except Exception as e:
-                # If this fails, we have a serious issue at hand.
                 raise ParseError(f"{e.__class__.__name__}: {e!s}") from e
 
         except Exception as e:
-            # Anything else is probably serious.
             raise ParseError(f"{e.__class__.__name__}: {e!s}") from e
 
-        # As a last resort, if we still don't have any text for any reason,
-        # try to extract the text from the original document.
         if not self.text:
             if original_has_text:
                 self.text = text_original
diff --git a/src/paperless/tests/parsers/test_parse_modes.py b/src/paperless/tests/parsers/test_parse_modes.py
new file mode 100644
index 000000000..6766379c3
--- /dev/null
+++ b/src/paperless/tests/parsers/test_parse_modes.py
@@ -0,0 +1,436 @@
+"""
+Focused tests for RasterisedDocumentParser.parse() mode behaviour.
+
+These tests mock ``ocrmypdf.ocr`` so they run without a real Tesseract/OCRmyPDF
+installation and execute quickly.  The intent is to verify the *control flow*
+introduced by the ``produce_archive`` flag and the ``OCR_MODE=auto/off`` logic,
+not to test OCRmyPDF itself.
+
+Fixtures are pulled from conftest.py in this package.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+import pytest
+
+if TYPE_CHECKING:
+    from pytest_mock import MockerFixture
+
+    from paperless.parsers.tesseract import RasterisedDocumentParser
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+_LONG_TEXT = "This is a test document with enough text. " * 5  # >50 chars
+_SHORT_TEXT = "Hi."  # <50 chars
+
+
+def _make_extract_text(text: str | None):
+    """Return a side_effect function for ``extract_text`` that returns *text*."""
+
+    def _extract(sidecar_file, pdf_file):
+        return text
+
+    return _extract
+
+
+# ---------------------------------------------------------------------------
+# AUTO mode — PDF with sufficient text layer
+# ---------------------------------------------------------------------------
+
+
+class TestAutoModeWithText:
+    """AUTO mode, original PDF has detectable text (>50 chars)."""
+
+    def test_auto_text_no_archive_skips_ocrmypdf(
+        self,
+        mocker: MockerFixture,
+        tesseract_parser: RasterisedDocumentParser,
+        simple_digital_pdf_file: Path,
+    ) -> None:
+        """
+        GIVEN:
+            - AUTO mode, produce_archive=False
+            - PDF with text > VALID_TEXT_LENGTH
+        WHEN:
+            - parse() is called
+        THEN:
+            - ocrmypdf.ocr is NOT called (early return path)
+            - archive_path remains None
+            - text is set from the original
+        """
+        # Patch extract_text to return long text (simulating detectable text layer)
+        mocker.patch.object(
+            tesseract_parser,
+            "extract_text",
+            return_value=_LONG_TEXT,
+        )
+        mock_ocr = mocker.patch("ocrmypdf.ocr")
+
+        tesseract_parser.settings.mode = "auto"
+        tesseract_parser.parse(
+            simple_digital_pdf_file,
+            "application/pdf",
+            produce_archive=False,
+        )
+
+        mock_ocr.assert_not_called()
+        assert tesseract_parser.archive_path is None
+        assert tesseract_parser.get_text() == _LONG_TEXT
+
+    def test_auto_text_with_archive_calls_ocrmypdf_skip_text(
+        self,
+        mocker: MockerFixture,
+        tesseract_parser: RasterisedDocumentParser,
+        simple_digital_pdf_file: Path,
+    ) -> None:
+        """
+        GIVEN:
+            - AUTO mode, produce_archive=True
+            - PDF with text > VALID_TEXT_LENGTH
+        WHEN:
+            - parse() is called
+        THEN:
+            - ocrmypdf.ocr IS called with skip_text=True
+            - archive_path is set
+        """
+        mocker.patch.object(
+            tesseract_parser,
+            "extract_text",
+            return_value=_LONG_TEXT,
+        )
+        mock_ocr = mocker.patch("ocrmypdf.ocr")
+
+        tesseract_parser.settings.mode = "auto"
+        tesseract_parser.parse(
+            simple_digital_pdf_file,
+            "application/pdf",
+            produce_archive=True,
+        )
+
+        mock_ocr.assert_called_once()
+        call_kwargs = mock_ocr.call_args.kwargs
+        assert call_kwargs.get("skip_text") is True
+        assert "force_ocr" not in call_kwargs
+        assert "redo_ocr" not in call_kwargs
+        assert tesseract_parser.archive_path is not None
+
+
+# ---------------------------------------------------------------------------
+# AUTO mode — PDF without text layer (or too short)
+# ---------------------------------------------------------------------------
+
+
+class TestAutoModeNoText:
+    """AUTO mode, original PDF has no detectable text (<= 50 chars)."""
+
+    def test_auto_no_text_with_archive_calls_ocrmypdf_no_extra_flag(
+        self,
+        mocker: MockerFixture,
+        tesseract_parser: RasterisedDocumentParser,
+        multi_page_images_pdf_file: Path,
+    ) -> None:
+        """
+        GIVEN:
+            - AUTO mode, produce_archive=True
+            - PDF with no text (or text <= VALID_TEXT_LENGTH)
+        WHEN:
+            - parse() is called
+        THEN:
+            - ocrmypdf.ocr IS called WITHOUT skip_text/force_ocr/redo_ocr
+            - archive_path is set (since produce_archive=True)
+        """
+        # Return "no text" for the original; return real text for archive
+        extract_call_count = 0
+
+        def _extract_side(sidecar_file, pdf_file):
+            nonlocal extract_call_count
+            extract_call_count += 1
+            if extract_call_count == 1:
+                return None  # original has no text
+            return _LONG_TEXT  # text from archive after OCR
+
+        mocker.patch.object(tesseract_parser, "extract_text", side_effect=_extract_side)
+        mock_ocr = mocker.patch("ocrmypdf.ocr")
+
+        tesseract_parser.settings.mode = "auto"
+        tesseract_parser.parse(
+            multi_page_images_pdf_file,
+            "application/pdf",
+            produce_archive=True,
+        )
+
+        mock_ocr.assert_called_once()
+        call_kwargs = mock_ocr.call_args.kwargs
+        assert "skip_text" not in call_kwargs
+        assert "force_ocr" not in call_kwargs
+        assert "redo_ocr" not in call_kwargs
+        assert tesseract_parser.archive_path is not None
+
+    def test_auto_no_text_no_archive_calls_ocrmypdf(
+        self,
+        mocker: MockerFixture,
+        tesseract_parser: RasterisedDocumentParser,
+        multi_page_images_pdf_file: Path,
+    ) -> None:
+        """
+        GIVEN:
+            - AUTO mode, produce_archive=False
+            - PDF with no text
+        WHEN:
+            - parse() is called
+        THEN:
+            - ocrmypdf.ocr IS called (no early return since no text detected)
+            - archive_path is NOT set (produce_archive=False)
+        """
+        extract_call_count = 0
+
+        def _extract_side(sidecar_file, pdf_file):
+            nonlocal extract_call_count
+            extract_call_count += 1
+            if extract_call_count == 1:
+                return None
+            return _LONG_TEXT
+
+        mocker.patch.object(tesseract_parser, "extract_text", side_effect=_extract_side)
+        mock_ocr = mocker.patch("ocrmypdf.ocr")
+
+        tesseract_parser.settings.mode = "auto"
+        tesseract_parser.parse(
+            multi_page_images_pdf_file,
+            "application/pdf",
+            produce_archive=False,
+        )
+
+        mock_ocr.assert_called_once()
+        assert tesseract_parser.archive_path is None
+
+
+# ---------------------------------------------------------------------------
+# OFF mode — PDF
+# ---------------------------------------------------------------------------
+
+
+class TestOffModePdf:
+    """OCR_MODE=off, document is a PDF."""
+
+    def test_off_no_archive_returns_pdftotext(
+        self,
+        mocker: MockerFixture,
+        tesseract_parser: RasterisedDocumentParser,
+        simple_digital_pdf_file: Path,
+    ) -> None:
+        """
+        GIVEN:
+            - OFF mode, produce_archive=False
+            - PDF with text
+        WHEN:
+            - parse() is called
+        THEN:
+            - ocrmypdf.ocr is NOT called
+            - archive_path is None
+            - text comes from pdftotext (extract_text)
+        """
+        mocker.patch.object(
+            tesseract_parser,
+            "extract_text",
+            return_value=_LONG_TEXT,
+        )
+        mock_ocr = mocker.patch("ocrmypdf.ocr")
+
+        tesseract_parser.settings.mode = "off"
+        tesseract_parser.parse(
+            simple_digital_pdf_file,
+            "application/pdf",
+            produce_archive=False,
+        )
+
+        mock_ocr.assert_not_called()
+        assert tesseract_parser.archive_path is None
+        assert tesseract_parser.get_text() == _LONG_TEXT
+
+    def test_off_with_archive_calls_ocrmypdf_skip_text(
+        self,
+        mocker: MockerFixture,
+        tesseract_parser: RasterisedDocumentParser,
+        simple_digital_pdf_file: Path,
+    ) -> None:
+        """
+        GIVEN:
+            - OFF mode, produce_archive=True
+            - PDF document
+        WHEN:
+            - parse() is called
+        THEN:
+            - ocrmypdf.ocr IS called with skip_text=True (PDF/A conversion only)
+            - archive_path is set
+        """
+        mocker.patch.object(
+            tesseract_parser,
+            "extract_text",
+            return_value=_LONG_TEXT,
+        )
+        mock_ocr = mocker.patch("ocrmypdf.ocr")
+
+        tesseract_parser.settings.mode = "off"
+        tesseract_parser.parse(
+            simple_digital_pdf_file,
+            "application/pdf",
+            produce_archive=True,
+        )
+
+        mock_ocr.assert_called_once()
+        call_kwargs = mock_ocr.call_args.kwargs
+        assert call_kwargs.get("skip_text") is True
+        assert "force_ocr" not in call_kwargs
+        assert "redo_ocr" not in call_kwargs
+        assert tesseract_parser.archive_path is not None
+
+
+# ---------------------------------------------------------------------------
+# OFF mode — image
+# ---------------------------------------------------------------------------
+
+
+class TestOffModeImage:
+    """OCR_MODE=off, document is an image (PNG)."""
+
+    def test_off_image_no_archive_no_ocrmypdf(
+        self,
+        mocker: MockerFixture,
+        tesseract_parser: RasterisedDocumentParser,
+        simple_png_file: Path,
+    ) -> None:
+        """
+        GIVEN:
+            - OFF mode, produce_archive=False
+            - Image document (PNG)
+        WHEN:
+            - parse() is called
+        THEN:
+            - ocrmypdf.ocr is NOT called
+            - archive_path is None
+            - text is empty string (images have no text layer)
+        """
+        mock_ocr = mocker.patch("ocrmypdf.ocr")
+
+        tesseract_parser.settings.mode = "off"
+        tesseract_parser.parse(simple_png_file, "image/png", produce_archive=False)
+
+        mock_ocr.assert_not_called()
+        assert tesseract_parser.archive_path is None
+        assert tesseract_parser.get_text() == ""
+
+    def test_off_image_with_archive_uses_img2pdf_path(
+        self,
+        mocker: MockerFixture,
+        tesseract_parser: RasterisedDocumentParser,
+        simple_png_file: Path,
+    ) -> None:
+        """
+        GIVEN:
+            - OFF mode, produce_archive=True
+            - Image document (PNG)
+        WHEN:
+            - parse() is called
+        THEN:
+            - _convert_image_to_pdfa() is called instead of ocrmypdf.ocr
+            - archive_path is set to the returned path
+            - text is empty string
+        """
+        fake_archive = Path("/tmp/fake-archive.pdf")
+        mock_convert = mocker.patch.object(
+            tesseract_parser,
+            "_convert_image_to_pdfa",
+            return_value=fake_archive,
+        )
+        mock_ocr = mocker.patch("ocrmypdf.ocr")
+
+        tesseract_parser.settings.mode = "off"
+        tesseract_parser.parse(simple_png_file, "image/png", produce_archive=True)
+
+        mock_convert.assert_called_once_with(simple_png_file, "image/png")
+        mock_ocr.assert_not_called()
+        assert tesseract_parser.archive_path == fake_archive
+        assert tesseract_parser.get_text() == ""
+
+
+# ---------------------------------------------------------------------------
+# produce_archive=False never sets archive_path for FORCE / REDO / AUTO modes
+# ---------------------------------------------------------------------------
+
+
+class TestProduceArchiveFalse:
+    """Verify produce_archive=False never results in an archive regardless of mode."""
+
+    @pytest.mark.parametrize("mode", ["force", "redo"])
+    def test_produce_archive_false_force_redo_modes(
+        self,
+        mode: str,
+        mocker: MockerFixture,
+        tesseract_parser: RasterisedDocumentParser,
+        multi_page_images_pdf_file: Path,
+    ) -> None:
+        """
+        GIVEN:
+            - FORCE or REDO mode, produce_archive=False
+            - Any PDF
+        WHEN:
+            - parse() is called (ocrmypdf mocked to succeed)
+        THEN:
+            - archive_path is NOT set even though ocrmypdf ran
+        """
+        mocker.patch.object(
+            tesseract_parser,
+            "extract_text",
+            return_value=_LONG_TEXT,
+        )
+        mocker.patch("ocrmypdf.ocr")
+
+        tesseract_parser.settings.mode = mode
+        tesseract_parser.parse(
+            multi_page_images_pdf_file,
+            "application/pdf",
+            produce_archive=False,
+        )
+
+        assert tesseract_parser.archive_path is None
+        assert tesseract_parser.get_text() is not None
+
+    def test_produce_archive_false_auto_with_text(
+        self,
+        mocker: MockerFixture,
+        tesseract_parser: RasterisedDocumentParser,
+        simple_digital_pdf_file: Path,
+    ) -> None:
+        """
+        GIVEN:
+            - AUTO mode, produce_archive=False
+            - PDF with text > VALID_TEXT_LENGTH
+        WHEN:
+            - parse() is called
+        THEN:
+            - ocrmypdf is skipped entirely (early return)
+            - archive_path is None
+        """
+        mocker.patch.object(
+            tesseract_parser,
+            "extract_text",
+            return_value=_LONG_TEXT,
+        )
+        mock_ocr = mocker.patch("ocrmypdf.ocr")
+
+        tesseract_parser.settings.mode = "auto"
+        tesseract_parser.parse(
+            simple_digital_pdf_file,
+            "application/pdf",
+            produce_archive=False,
+        )
+
+        mock_ocr.assert_not_called()
+        assert tesseract_parser.archive_path is None
diff --git a/src/paperless/tests/parsers/test_tesseract_custom_settings.py b/src/paperless/tests/parsers/test_tesseract_custom_settings.py
index bade65ef1..e6ab6cf81 100644
--- a/src/paperless/tests/parsers/test_tesseract_custom_settings.py
+++ b/src/paperless/tests/parsers/test_tesseract_custom_settings.py
@@ -89,15 +89,35 @@ class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCas
         WHEN:
             - OCR parameters are constructed
         THEN:
-            - Configuration from database is utilized
+            - Configuration from database is utilized (AUTO mode with skip_text=True
+              triggers skip_text; AUTO mode alone does not add any extra flag)
         """
+        # AUTO mode with skip_text=True explicitly passed: skip_text is set
+        with override_settings(OCR_MODE="redo"):
+            instance = ApplicationConfiguration.objects.all().first()
+            instance.mode = ModeChoices.AUTO
+            instance.save()
+
+            params = RasterisedDocumentParser(None).construct_ocrmypdf_parameters(
+                input_file="input.pdf",
+                output_file="output.pdf",
+                sidecar_file="sidecar.txt",
+                mime_type="application/pdf",
+                safe_fallback=False,
+                skip_text=True,
+            )
+        self.assertTrue(params["skip_text"])
+        self.assertNotIn("redo_ocr", params)
+        self.assertNotIn("force_ocr", params)
+
+        # AUTO mode alone (no skip_text): no extra OCR flag is set
         with override_settings(OCR_MODE="redo"):
             instance = ApplicationConfiguration.objects.all().first()
             instance.mode = ModeChoices.AUTO
             instance.save()
 
             params = self.get_params()
-        self.assertTrue(params["skip_text"])
+        self.assertNotIn("skip_text", params)
         self.assertNotIn("redo_ocr", params)
         self.assertNotIn("force_ocr", params)
 
diff --git a/src/paperless/tests/parsers/test_tesseract_parser.py b/src/paperless/tests/parsers/test_tesseract_parser.py
index d3d08bc41..088031766 100644
--- a/src/paperless/tests/parsers/test_tesseract_parser.py
+++ b/src/paperless/tests/parsers/test_tesseract_parser.py
@@ -370,15 +370,26 @@ class TestParsePdf:
         tesseract_parser: RasterisedDocumentParser,
         tesseract_samples_dir: Path,
     ) -> None:
+        """
+        GIVEN:
+            - Multi-page digital PDF with sufficient text layer
+            - Default settings (mode=auto, produce_archive=True)
+        WHEN:
+            - Document is parsed
+        THEN:
+            - Archive is created (AUTO mode + text present + produce_archive=True
+              → PDF/A conversion via skip_text)
+            - Text is extracted
+        """
         tesseract_parser.parse(
-            tesseract_samples_dir / "simple-digital.pdf",
+            tesseract_samples_dir / "multi-page-digital.pdf",
             "application/pdf",
         )
         assert tesseract_parser.archive_path is not None
         assert tesseract_parser.archive_path.is_file()
         assert_ordered_substrings(
-            tesseract_parser.get_text(),
-            ["This is a test document."],
+            tesseract_parser.get_text().lower(),
+            ["page 1", "page 2", "page 3"],
         )
 
     def test_with_form_default(
@@ -738,16 +749,18 @@ class TestSkipArchive:
         """
         GIVEN:
             - File with existing text layer
-            - Mode: auto, skip_archive_file: auto
+            - Mode: auto, produce_archive=False
         WHEN:
             - Document is parsed
         THEN:
-            - Text extracted; no archive created (text exists, auto skips OCR)
+            - Text extracted from original; no archive created (text exists +
+              produce_archive=False skips OCRmyPDF entirely)
         """
         tesseract_parser.settings.mode = "auto"
         tesseract_parser.parse(
             tesseract_samples_dir / "multi-page-digital.pdf",
             "application/pdf",
+            produce_archive=False,
         )
         assert tesseract_parser.archive_path is None
         assert_ordered_substrings(
@@ -781,46 +794,58 @@ class TestSkipArchive:
         )
 
     @pytest.mark.parametrize(
-        ("skip_archive_file", "filename", "expect_archive"),
+        ("produce_archive", "filename", "expect_archive"),
         [
             pytest.param(
-                "always",
+                True,
                 "multi-page-digital.pdf",
                 True,
-                id="always-with-text",
-            ),
-            pytest.param("always", "multi-page-images.pdf", True, id="always-no-text"),
-            pytest.param(
-                "auto",
-                "multi-page-digital.pdf",
-                False,
-                id="auto-with-text-layer",
+                id="produce-archive-with-text",
             ),
             pytest.param(
-                "auto",
+                True,
                 "multi-page-images.pdf",
                 True,
-                id="auto-no-text-layer",
+                id="produce-archive-no-text",
             ),
             pytest.param(
-                "never",
+                False,
                 "multi-page-digital.pdf",
                 False,
-                id="never-with-text",
+                id="no-archive-with-text-layer",
+            ),
+            pytest.param(
+                False,
+                "multi-page-images.pdf",
+                False,
+                id="no-archive-no-text-layer",
             ),
-            pytest.param("never", "multi-page-images.pdf", False, id="never-no-text"),
         ],
     )
-    def test_skip_archive_file_setting(
+    def test_produce_archive_flag(
         self,
-        skip_archive_file: str,
+        produce_archive: bool,  # noqa: FBT001
         filename: str,
-        expect_archive: str,
+        expect_archive: bool,  # noqa: FBT001
         tesseract_parser: RasterisedDocumentParser,
         tesseract_samples_dir: Path,
     ) -> None:
-        tesseract_parser.settings.archive_file_generation = skip_archive_file
-        tesseract_parser.parse(tesseract_samples_dir / filename, "application/pdf")
+        """
+        GIVEN:
+            - Various PDFs (with and without text layers)
+            - produce_archive flag set to True or False
+        WHEN:
+            - Document is parsed
+        THEN:
+            - archive_path is set if and only if produce_archive=True
+            - Text is always extracted
+        """
+        tesseract_parser.settings.mode = "auto"
+        tesseract_parser.parse(
+            tesseract_samples_dir / filename,
+            "application/pdf",
+            produce_archive=produce_archive,
+        )
         text = tesseract_parser.get_text().lower()
         assert_ordered_substrings(text, ["page 1", "page 2", "page 3"])
         if expect_archive:
@@ -907,17 +932,18 @@ class TestParseMixed:
     ) -> None:
         """
         GIVEN:
-            - File with mixed pages
-            - Mode: auto, skip_archive_file: auto
+            - File with mixed pages (some with text, some image-only)
+            - Mode: auto, produce_archive=False
         WHEN:
             - Document is parsed
         THEN:
-            - No archive created (file has text layer); later-page text present
+            - No archive created (produce_archive=False); text from text layer present
         """
         tesseract_parser.settings.mode = "auto"
         tesseract_parser.parse(
             tesseract_samples_dir / "multi-page-mixed.pdf",
             "application/pdf",
+            produce_archive=False,
         )
         assert tesseract_parser.archive_path is None
         assert_ordered_substrings(
@@ -964,12 +990,19 @@ class TestParseRtl:
     ) -> None:
         """
         GIVEN:
-            - PDF with RTL Arabic text
+            - PDF with RTL Arabic text in its text layer (short: 18 chars)
+            - mode=off, produce_archive=True: PDF/A conversion via skip_text, no OCR engine
         WHEN:
             - Document is parsed
         THEN:
-            - Arabic content is extracted (normalised for bidi)
+            - Arabic content is extracted from the PDF text layer (normalised for bidi)
+
+        Note: The RTL PDF has a short text layer (< VALID_TEXT_LENGTH=50) so AUTO mode
+        would attempt full OCR, which fails due to PriorOcrFoundError and falls back to
+        force-ocr with English Tesseract (producing garbage).  Using mode="off" forces
+        skip_text=True so the Arabic text layer is preserved through PDF/A conversion.
         """
+        tesseract_parser.settings.mode = "off"
         tesseract_parser.parse(
             tesseract_samples_dir / "rtl-test.pdf",
             "application/pdf",