Feature: paginate search highlights and remove 10k document search limit (#12518)

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com> Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>
2026-05-23 06:55:23 +00:00 · 2026-04-15 16:20:31 -07:00
parent 21db608d57
commit 3ffbb8862c
12 changed files with 904 additions and 664 deletions
@@ -33,19 +33,12 @@ class TestWriteBatch:
        except RuntimeError:
            pass

-        r = backend.search(
-            "should survive",
-            user=None,
-            page=1,
-            page_size=10,
-            sort_field=None,
-            sort_reverse=False,
-        )
-        assert r.total == 1
+        ids = backend.search_ids("should survive", user=None)
+        assert len(ids) == 1


 class TestSearch:
-    """Test search functionality."""
+    """Test search query parsing and matching via search_ids."""

    def test_text_mode_limits_default_search_to_title_and_content(
        self,
@@ -60,27 +53,20 @@ class TestSearch:
        )
        backend.add_or_update(doc)

-        metadata_only = backend.search(
-            "document_type:invoice",
-            user=None,
-            page=1,
-            page_size=10,
-            sort_field=None,
-            sort_reverse=False,
-            search_mode=SearchMode.TEXT,
+        assert (
+            len(
+                backend.search_ids(
+                    "document_type:invoice",
+                    user=None,
+                    search_mode=SearchMode.TEXT,
+                ),
+            )
+            == 0
        )
-        assert metadata_only.total == 0
-
-        content_match = backend.search(
-            "monthly",
-            user=None,
-            page=1,
-            page_size=10,
-            sort_field=None,
-            sort_reverse=False,
-            search_mode=SearchMode.TEXT,
+        assert (
+            len(backend.search_ids("monthly", user=None, search_mode=SearchMode.TEXT))
+            == 1
        )
-        assert content_match.total == 1

    def test_title_mode_limits_default_search_to_title_only(
        self,
@@ -95,27 +81,14 @@ class TestSearch:
        )
        backend.add_or_update(doc)

-        content_only = backend.search(
-            "monthly",
-            user=None,
-            page=1,
-            page_size=10,
-            sort_field=None,
-            sort_reverse=False,
-            search_mode=SearchMode.TITLE,
+        assert (
+            len(backend.search_ids("monthly", user=None, search_mode=SearchMode.TITLE))
+            == 0
        )
-        assert content_only.total == 0
-
-        title_match = backend.search(
-            "invoice",
-            user=None,
-            page=1,
-            page_size=10,
-            sort_field=None,
-            sort_reverse=False,
-            search_mode=SearchMode.TITLE,
+        assert (
+            len(backend.search_ids("invoice", user=None, search_mode=SearchMode.TITLE))
+            == 1
        )
-        assert title_match.total == 1

    def test_text_mode_matches_partial_term_substrings(
        self,
@@ -130,38 +103,16 @@ class TestSearch:
        )
        backend.add_or_update(doc)

-        prefix_match = backend.search(
-            "pass",
-            user=None,
-            page=1,
-            page_size=10,
-            sort_field=None,
-            sort_reverse=False,
-            search_mode=SearchMode.TEXT,
+        assert (
+            len(backend.search_ids("pass", user=None, search_mode=SearchMode.TEXT)) == 1
        )
-        assert prefix_match.total == 1
-
-        infix_match = backend.search(
-            "sswo",
-            user=None,
-            page=1,
-            page_size=10,
-            sort_field=None,
-            sort_reverse=False,
-            search_mode=SearchMode.TEXT,
+        assert (
+            len(backend.search_ids("sswo", user=None, search_mode=SearchMode.TEXT)) == 1
        )
-        assert infix_match.total == 1
-
-        phrase_match = backend.search(
-            "sswo re",
-            user=None,
-            page=1,
-            page_size=10,
-            sort_field=None,
-            sort_reverse=False,
-            search_mode=SearchMode.TEXT,
+        assert (
+            len(backend.search_ids("sswo re", user=None, search_mode=SearchMode.TEXT))
+            == 1
        )
-        assert phrase_match.total == 1

    def test_text_mode_does_not_match_on_partial_term_overlap(
        self,
@@ -176,16 +127,10 @@ class TestSearch:
        )
        backend.add_or_update(doc)

-        non_match = backend.search(
-            "raptor",
-            user=None,
-            page=1,
-            page_size=10,
-            sort_field=None,
-            sort_reverse=False,
-            search_mode=SearchMode.TEXT,
+        assert (
+            len(backend.search_ids("raptor", user=None, search_mode=SearchMode.TEXT))
+            == 0
        )
-        assert non_match.total == 0

    def test_text_mode_anchors_later_query_tokens_to_token_starts(
        self,
@@ -214,16 +159,9 @@ class TestSearch:
        backend.add_or_update(prefix_doc)
        backend.add_or_update(false_positive)

-        results = backend.search(
-            "Z-Berichte 6",
-            user=None,
-            page=1,
-            page_size=10,
-            sort_field=None,
-            sort_reverse=False,
-            search_mode=SearchMode.TEXT,
+        result_ids = set(
+            backend.search_ids("Z-Berichte 6", user=None, search_mode=SearchMode.TEXT),
        )
-        result_ids = {hit["id"] for hit in results.hits}

        assert exact_doc.id in result_ids
        assert prefix_doc.id in result_ids
@@ -242,16 +180,9 @@ class TestSearch:
        )
        backend.add_or_update(doc)

-        no_tokens = backend.search(
-            "!!!",
-            user=None,
-            page=1,
-            page_size=10,
-            sort_field=None,
-            sort_reverse=False,
-            search_mode=SearchMode.TEXT,
+        assert (
+            len(backend.search_ids("!!!", user=None, search_mode=SearchMode.TEXT)) == 0
        )
-        assert no_tokens.total == 0

    def test_title_mode_matches_partial_term_substrings(
        self,
@@ -266,59 +197,18 @@ class TestSearch:
        )
        backend.add_or_update(doc)

-        prefix_match = backend.search(
-            "pass",
-            user=None,
-            page=1,
-            page_size=10,
-            sort_field=None,
-            sort_reverse=False,
-            search_mode=SearchMode.TITLE,
+        assert (
+            len(backend.search_ids("pass", user=None, search_mode=SearchMode.TITLE))
+            == 1
        )
-        assert prefix_match.total == 1
-
-        infix_match = backend.search(
-            "sswo",
-            user=None,
-            page=1,
-            page_size=10,
-            sort_field=None,
-            sort_reverse=False,
-            search_mode=SearchMode.TITLE,
+        assert (
+            len(backend.search_ids("sswo", user=None, search_mode=SearchMode.TITLE))
+            == 1
        )
-        assert infix_match.total == 1
-
-        phrase_match = backend.search(
-            "sswo gu",
-            user=None,
-            page=1,
-            page_size=10,
-            sort_field=None,
-            sort_reverse=False,
-            search_mode=SearchMode.TITLE,
+        assert (
+            len(backend.search_ids("sswo gu", user=None, search_mode=SearchMode.TITLE))
+            == 1
        )
-        assert phrase_match.total == 1
-
-    def test_scores_normalised_top_hit_is_one(self, backend: TantivyBackend):
-        """Search scores must be normalized so top hit has score 1.0 for UI consistency."""
-        for i, title in enumerate(["bank invoice", "bank statement", "bank receipt"]):
-            doc = Document.objects.create(
-                title=title,
-                content=title,
-                checksum=f"SN{i}",
-                pk=10 + i,
-            )
-            backend.add_or_update(doc)
-        r = backend.search(
-            "bank",
-            user=None,
-            page=1,
-            page_size=10,
-            sort_field=None,
-            sort_reverse=False,
-        )
-        assert r.hits[0]["score"] == pytest.approx(1.0)
-        assert all(0.0 <= h["score"] <= 1.0 for h in r.hits)

    def test_sort_field_ascending(self, backend: TantivyBackend):
        """Searching with sort_reverse=False must return results in ascending ASN order."""
@@ -331,16 +221,14 @@ class TestSearch:
            )
            backend.add_or_update(doc)

-        r = backend.search(
+        ids = backend.search_ids(
            "sortable",
            user=None,
-            page=1,
-            page_size=10,
            sort_field="archive_serial_number",
            sort_reverse=False,
        )
-        assert r.total == 3
-        asns = [Document.objects.get(pk=h["id"]).archive_serial_number for h in r.hits]
+        assert len(ids) == 3
+        asns = [Document.objects.get(pk=doc_id).archive_serial_number for doc_id in ids]
        assert asns == [10, 20, 30]

    def test_sort_field_descending(self, backend: TantivyBackend):
@@ -354,79 +242,91 @@ class TestSearch:
            )
            backend.add_or_update(doc)

-        r = backend.search(
+        ids = backend.search_ids(
            "sortable",
            user=None,
-            page=1,
-            page_size=10,
            sort_field="archive_serial_number",
            sort_reverse=True,
        )
-        assert r.total == 3
-        asns = [Document.objects.get(pk=h["id"]).archive_serial_number for h in r.hits]
+        assert len(ids) == 3
+        asns = [Document.objects.get(pk=doc_id).archive_serial_number for doc_id in ids]
        assert asns == [30, 20, 10]

-    def test_fuzzy_threshold_filters_low_score_hits(
-        self,
-        backend: TantivyBackend,
-        settings,
-    ):
-        """When ADVANCED_FUZZY_SEARCH_THRESHOLD exceeds all normalized scores, hits must be filtered out."""
-        doc = Document.objects.create(
-            title="Invoice document",
-            content="financial report",
-            checksum="FT1",
-            pk=120,
-        )
-        backend.add_or_update(doc)

-        # Threshold above 1.0 filters every hit (normalized scores top out at 1.0)
-        settings.ADVANCED_FUZZY_SEARCH_THRESHOLD = 1.1
-        r = backend.search(
-            "invoice",
+class TestSearchIds:
+    """Test lightweight ID-only search."""
+
+    def test_returns_matching_ids(self, backend: TantivyBackend):
+        """search_ids must return IDs of all matching documents."""
+        docs = []
+        for i in range(5):
+            doc = Document.objects.create(
+                title=f"findable doc {i}",
+                content="common keyword",
+                checksum=f"SI{i}",
+            )
+            backend.add_or_update(doc)
+            docs.append(doc)
+        other = Document.objects.create(
+            title="unrelated",
+            content="nothing here",
+            checksum="SI_other",
+        )
+        backend.add_or_update(other)
+
+        ids = backend.search_ids(
+            "common keyword",
            user=None,
-            page=1,
-            page_size=10,
-            sort_field=None,
-            sort_reverse=False,
+            search_mode=SearchMode.QUERY,
        )
-        assert r.hits == []
+        assert set(ids) == {d.pk for d in docs}
+        assert other.pk not in ids

-    def test_owner_filter(self, backend: TantivyBackend):
-        """Document owners can search their private documents; other users cannot access them."""
-        owner = User.objects.create_user("owner")
-        other = User.objects.create_user("other")
+    def test_respects_permission_filter(self, backend: TantivyBackend):
+        """search_ids must respect user permission filtering."""
+        owner = User.objects.create_user("ids_owner")
+        other = User.objects.create_user("ids_other")
        doc = Document.objects.create(
-            title="Private",
-            content="secret",
-            checksum="PF1",
-            pk=20,
+            title="private doc",
+            content="secret keyword",
+            checksum="SIP1",
            owner=owner,
        )
        backend.add_or_update(doc)

+        assert backend.search_ids(
+            "secret",
+            user=owner,
+            search_mode=SearchMode.QUERY,
+        ) == [doc.pk]
        assert (
-            backend.search(
-                "secret",
-                user=owner,
-                page=1,
-                page_size=10,
-                sort_field=None,
-                sort_reverse=False,
-            ).total
-            == 1
+            backend.search_ids("secret", user=other, search_mode=SearchMode.QUERY) == []
        )
-        assert (
-            backend.search(
-                "secret",
-                user=other,
-                page=1,
-                page_size=10,
-                sort_field=None,
-                sort_reverse=False,
-            ).total
-            == 0
+
+    def test_respects_fuzzy_threshold(self, backend: TantivyBackend, settings):
+        """search_ids must apply the same fuzzy threshold as search()."""
+        doc = Document.objects.create(
+            title="threshold test",
+            content="unique term",
+            checksum="SIT1",
        )
+        backend.add_or_update(doc)
+
+        settings.ADVANCED_FUZZY_SEARCH_THRESHOLD = 1.1
+        ids = backend.search_ids("unique", user=None, search_mode=SearchMode.QUERY)
+        assert ids == []
+
+    def test_returns_ids_for_text_mode(self, backend: TantivyBackend):
+        """search_ids must work with TEXT search mode."""
+        doc = Document.objects.create(
+            title="text mode doc",
+            content="findable phrase",
+            checksum="SIM1",
+        )
+        backend.add_or_update(doc)
+
+        ids = backend.search_ids("findable", user=None, search_mode=SearchMode.TEXT)
+        assert ids == [doc.pk]


 class TestRebuild:
@@ -490,57 +390,26 @@ class TestAutocomplete:
 class TestMoreLikeThis:
    """Test more like this functionality."""

-    def test_excludes_original(self, backend: TantivyBackend):
-        """More like this queries must exclude the reference document from results."""
+    def test_more_like_this_ids_excludes_original(self, backend: TantivyBackend):
+        """more_like_this_ids must return IDs of similar documents, excluding the original."""
        doc1 = Document.objects.create(
            title="Important document",
-            content="financial information",
-            checksum="MLT1",
-            pk=50,
+            content="financial information report",
+            checksum="MLTI1",
+            pk=150,
        )
        doc2 = Document.objects.create(
            title="Another document",
-            content="financial report",
-            checksum="MLT2",
-            pk=51,
+            content="financial information report",
+            checksum="MLTI2",
+            pk=151,
        )
        backend.add_or_update(doc1)
        backend.add_or_update(doc2)

-        results = backend.more_like_this(doc_id=50, user=None, page=1, page_size=10)
-        returned_ids = [hit["id"] for hit in results.hits]
-        assert 50 not in returned_ids  # Original document excluded
-
-    def test_with_user_applies_permission_filter(self, backend: TantivyBackend):
-        """more_like_this with a user must exclude documents that user cannot see."""
-        viewer = User.objects.create_user("mlt_viewer")
-        other = User.objects.create_user("mlt_other")
-        public_doc = Document.objects.create(
-            title="Public financial document",
-            content="quarterly financial analysis report figures",
-            checksum="MLT3",
-            pk=52,
-        )
-        private_doc = Document.objects.create(
-            title="Private financial document",
-            content="quarterly financial analysis report figures",
-            checksum="MLT4",
-            pk=53,
-            owner=other,
-        )
-        backend.add_or_update(public_doc)
-        backend.add_or_update(private_doc)
-
-        results = backend.more_like_this(doc_id=52, user=viewer, page=1, page_size=10)
-        returned_ids = [hit["id"] for hit in results.hits]
-        # private_doc is owned by other, so viewer cannot see it
-        assert 53 not in returned_ids
-
-    def test_document_not_in_index_returns_empty(self, backend: TantivyBackend):
-        """more_like_this for a doc_id absent from the index must return empty results."""
-        results = backend.more_like_this(doc_id=9999, user=None, page=1, page_size=10)
-        assert results.hits == []
-        assert results.total == 0
+        ids = backend.more_like_this_ids(doc_id=150, user=None)
+        assert 150 not in ids
+        assert 151 in ids


 class TestSingleton:
@@ -593,19 +462,10 @@ class TestFieldHandling:
        # Should not raise an exception
        backend.add_or_update(doc)

-        results = backend.search(
-            "test",
-            user=None,
-            page=1,
-            page_size=10,
-            sort_field=None,
-            sort_reverse=False,
-        )
-        assert results.total == 1
+        assert len(backend.search_ids("test", user=None)) == 1

    def test_custom_fields_include_name_and_value(self, backend: TantivyBackend):
        """Custom fields must be indexed with both field name and value for structured queries."""
-        # Create a custom field
        field = CustomField.objects.create(
            name="Invoice Number",
            data_type=CustomField.FieldDataType.STRING,
@@ -622,18 +482,9 @@ class TestFieldHandling:
            value_text="INV-2024-001",
        )

-        # Should not raise an exception during indexing
        backend.add_or_update(doc)

-        results = backend.search(
-            "invoice",
-            user=None,
-            page=1,
-            page_size=10,
-            sort_field=None,
-            sort_reverse=False,
-        )
-        assert results.total == 1
+        assert len(backend.search_ids("invoice", user=None)) == 1

    def test_select_custom_field_indexes_label_not_id(self, backend: TantivyBackend):
        """SELECT custom fields must index the human-readable label, not the opaque option ID."""
@@ -660,27 +511,8 @@ class TestFieldHandling:
        )
        backend.add_or_update(doc)

-        # Label should be findable
-        results = backend.search(
-            "custom_fields.value:invoice",
-            user=None,
-            page=1,
-            page_size=10,
-            sort_field=None,
-            sort_reverse=False,
-        )
-        assert results.total == 1
-
-        # Opaque ID must not appear in the index
-        results = backend.search(
-            "custom_fields.value:opt_abc",
-            user=None,
-            page=1,
-            page_size=10,
-            sort_field=None,
-            sort_reverse=False,
-        )
-        assert results.total == 0
+        assert len(backend.search_ids("custom_fields.value:invoice", user=None)) == 1
+        assert len(backend.search_ids("custom_fields.value:opt_abc", user=None)) == 0

    def test_none_custom_field_value_not_indexed(self, backend: TantivyBackend):
        """Custom field instances with no value set must not produce an index entry."""
@@ -702,16 +534,7 @@ class TestFieldHandling:
        )
        backend.add_or_update(doc)

-        # The string "none" must not appear as an indexed value
-        results = backend.search(
-            "custom_fields.value:none",
-            user=None,
-            page=1,
-            page_size=10,
-            sort_field=None,
-            sort_reverse=False,
-        )
-        assert results.total == 0
+        assert len(backend.search_ids("custom_fields.value:none", user=None)) == 0

    def test_notes_include_user_information(self, backend: TantivyBackend):
        """Notes must be indexed with user information when available for structured queries."""
@@ -724,32 +547,96 @@ class TestFieldHandling:
        )
        Note.objects.create(document=doc, note="Important note", user=user)

-        # Should not raise an exception during indexing
        backend.add_or_update(doc)

-        # Test basic document search first
-        results = backend.search(
-            "test",
-            user=None,
-            page=1,
-            page_size=10,
-            sort_field=None,
-            sort_reverse=False,
-        )
-        assert results.total == 1, (
-            f"Expected 1, got {results.total}. Document content should be searchable."
+        ids = backend.search_ids("test", user=None)
+        assert len(ids) == 1, (
+            f"Expected 1, got {len(ids)}. Document content should be searchable."
        )

-        # Test notes search — must use structured JSON syntax now that note
-        # is no longer in DEFAULT_SEARCH_FIELDS
-        results = backend.search(
-            "notes.note:important",
-            user=None,
-            page=1,
-            page_size=10,
-            sort_field=None,
-            sort_reverse=False,
+        ids = backend.search_ids("notes.note:important", user=None)
+        assert len(ids) == 1, (
+            f"Expected 1, got {len(ids)}. Note content should be searchable via notes.note: prefix."
        )
-        assert results.total == 1, (
-            f"Expected 1, got {results.total}. Note content should be searchable via notes.note: prefix."
+
+
+class TestHighlightHits:
+    """Test highlight_hits returns proper HTML strings, not raw Snippet objects."""
+
+    def test_highlights_content_returns_html_string(self, backend: TantivyBackend):
+        """highlight_hits must return HTML strings (from Snippet.to_html()), not Snippet objects."""
+        doc = Document.objects.create(
+            title="Highlight Test",
+            content="The quick brown fox jumps over the lazy dog",
+            checksum="HH1",
+            pk=90,
        )
+        backend.add_or_update(doc)
+
+        hits = backend.highlight_hits("quick", [doc.pk])
+
+        assert len(hits) == 1
+        highlights = hits[0]["highlights"]
+        assert "content" in highlights
+        content_highlight = highlights["content"]
+        assert isinstance(content_highlight, str), (
+            f"Expected str, got {type(content_highlight)}: {content_highlight!r}"
+        )
+        # Tantivy wraps matched terms in <b> tags
+        assert "<b>" in content_highlight, (
+            f"Expected HTML with <b> tags, got: {content_highlight!r}"
+        )
+
+    def test_highlights_notes_returns_html_string(self, backend: TantivyBackend):
+        """Note highlights must be HTML strings via notes_text companion field.
+
+        The notes JSON field does not support tantivy SnippetGenerator; the
+        notes_text plain-text field is used instead.  We use the full-text
+        query "urgent" (not notes.note:) because notes_text IS in
+        DEFAULT_SEARCH_FIELDS via the normal search path… actually, we use
+        notes.note: prefix so the query targets notes content directly, but
+        the snippet is generated from notes_text which stores the same text.
+        """
+        user = User.objects.create_user("hl_noteuser")
+        doc = Document.objects.create(
+            title="Doc with matching note",
+            content="unrelated content",
+            checksum="HH2",
+            pk=91,
+        )
+        Note.objects.create(document=doc, note="urgent payment required", user=user)
+        backend.add_or_update(doc)
+
+        # Use notes.note: prefix so the document matches the query and the
+        # notes_text snippet generator can produce highlights.
+        hits = backend.highlight_hits("notes.note:urgent", [doc.pk])
+
+        assert len(hits) == 1
+        highlights = hits[0]["highlights"]
+        assert "notes" in highlights
+        note_highlight = highlights["notes"]
+        assert isinstance(note_highlight, str), (
+            f"Expected str, got {type(note_highlight)}: {note_highlight!r}"
+        )
+        assert "<b>" in note_highlight, (
+            f"Expected HTML with <b> tags, got: {note_highlight!r}"
+        )
+
+    def test_empty_doc_list_returns_empty_hits(self, backend: TantivyBackend):
+        """highlight_hits with no doc IDs must return an empty list."""
+        hits = backend.highlight_hits("anything", [])
+        assert hits == []
+
+    def test_no_highlights_when_no_match(self, backend: TantivyBackend):
+        """Documents not matching the query should not appear in results."""
+        doc = Document.objects.create(
+            title="Unrelated",
+            content="completely different text",
+            checksum="HH3",
+            pk=92,
+        )
+        backend.add_or_update(doc)
+
+        hits = backend.highlight_hits("quick", [doc.pk])
+
+        assert len(hits) == 0
@@ -1503,6 +1503,126 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
            [d2.id, d1.id, d3.id],
        )

+    def test_search_ordering_by_score(self) -> None:
+        """ordering=-score must return results in descending relevance order (best first)."""
+        backend = get_backend()
+        # doc_high has more occurrences of the search term → higher BM25 score
+        doc_low = Document.objects.create(
+            title="score sort low",
+            content="apple",
+            checksum="SCL1",
+        )
+        doc_high = Document.objects.create(
+            title="score sort high",
+            content="apple apple apple apple apple",
+            checksum="SCH1",
+        )
+        backend.add_or_update(doc_low)
+        backend.add_or_update(doc_high)
+
+        # -score = descending = best first (highest score)
+        response = self.client.get("/api/documents/?query=apple&ordering=-score")
+        self.assertEqual(response.status_code, status.HTTP_200_OK)
+        ids = [r["id"] for r in response.data["results"]]
+        self.assertEqual(
+            ids[0],
+            doc_high.id,
+            "Most relevant doc should be first for -score",
+        )
+
+        # score = ascending = worst first (lowest score)
+        response = self.client.get("/api/documents/?query=apple&ordering=score")
+        self.assertEqual(response.status_code, status.HTTP_200_OK)
+        ids = [r["id"] for r in response.data["results"]]
+        self.assertEqual(
+            ids[0],
+            doc_low.id,
+            "Least relevant doc should be first for +score",
+        )
+
+    def test_search_with_tantivy_native_sort(self) -> None:
+        """When ordering by a Tantivy-sortable field, results must be correctly sorted."""
+        backend = get_backend()
+        for i, asn in enumerate([30, 10, 20]):
+            doc = Document.objects.create(
+                title=f"sortable doc {i}",
+                content="searchable content",
+                checksum=f"TNS{i}",
+                archive_serial_number=asn,
+            )
+            backend.add_or_update(doc)
+
+        response = self.client.get(
+            "/api/documents/?query=searchable&ordering=archive_serial_number",
+        )
+        self.assertEqual(response.status_code, status.HTTP_200_OK)
+        asns = [doc["archive_serial_number"] for doc in response.data["results"]]
+        self.assertEqual(asns, [10, 20, 30])
+
+        response = self.client.get(
+            "/api/documents/?query=searchable&ordering=-archive_serial_number",
+        )
+        self.assertEqual(response.status_code, status.HTTP_200_OK)
+        asns = [doc["archive_serial_number"] for doc in response.data["results"]]
+        self.assertEqual(asns, [30, 20, 10])
+
+    def test_search_page_2_returns_correct_slice(self) -> None:
+        """Page 2 must return the second slice, not overlap with page 1."""
+        backend = get_backend()
+        for i in range(10):
+            doc = Document.objects.create(
+                title=f"doc {i}",
+                content="paginated content",
+                checksum=f"PG2{i}",
+                archive_serial_number=i + 1,
+            )
+            backend.add_or_update(doc)
+
+        response = self.client.get(
+            "/api/documents/?query=paginated&ordering=archive_serial_number&page=1&page_size=3",
+        )
+        page1_ids = [r["id"] for r in response.data["results"]]
+        self.assertEqual(len(page1_ids), 3)
+
+        response = self.client.get(
+            "/api/documents/?query=paginated&ordering=archive_serial_number&page=2&page_size=3",
+        )
+        page2_ids = [r["id"] for r in response.data["results"]]
+        self.assertEqual(len(page2_ids), 3)
+
+        # No overlap between pages
+        self.assertEqual(set(page1_ids) & set(page2_ids), set())
+        # Page 2 ASNs are higher than page 1
+        page1_asns = [
+            Document.objects.get(pk=pk).archive_serial_number for pk in page1_ids
+        ]
+        page2_asns = [
+            Document.objects.get(pk=pk).archive_serial_number for pk in page2_ids
+        ]
+        self.assertTrue(max(page1_asns) < min(page2_asns))
+
+    def test_search_all_field_contains_all_ids_when_paginated(self) -> None:
+        """The 'all' field must contain every matching ID, even when paginated."""
+        backend = get_backend()
+        doc_ids = []
+        for i in range(10):
+            doc = Document.objects.create(
+                title=f"all field doc {i}",
+                content="allfield content",
+                checksum=f"AF{i}",
+            )
+            backend.add_or_update(doc)
+            doc_ids.append(doc.pk)
+
+        response = self.client.get(
+            "/api/documents/?query=allfield&page=1&page_size=3",
+            headers={"Accept": "application/json; version=9"},
+        )
+        self.assertEqual(response.status_code, status.HTTP_200_OK)
+        self.assertEqual(len(response.data["results"]), 3)
+        # "all" must contain ALL 10 matching IDs
+        self.assertCountEqual(response.data["all"], doc_ids)
+
    @mock.patch("documents.bulk_edit.bulk_update_documents")
    def test_global_search(self, m) -> None:
        """