From 534fcfde6b7cdc7112ffb3b44e88f927a062b330 Mon Sep 17 00:00:00 2001
From: Trenton Holmes <797416+stumpylog@users.noreply.github.com>
Date: Mon, 6 Apr 2026 13:10:58 -0700
Subject: [PATCH] refactor: remove dead more_like_this() method from
 TantivyBackend
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The method is no longer called anywhere in production code — all callers
were migrated to more_like_this_ids() during the search pagination work.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/documents/search/_backend.py           | 98 ----------------------
 src/documents/tests/search/test_backend.py | 52 ------------
 2 files changed, 150 deletions(-)

diff --git a/src/documents/search/_backend.py b/src/documents/search/_backend.py
index 49fdb4111..ab2f5a104 100644
--- a/src/documents/search/_backend.py
+++ b/src/documents/search/_backend.py
@@ -873,104 +873,6 @@ class TantivyBackend:
 
         return matches[:limit]
 
-    def more_like_this(
-        self,
-        doc_id: int,
-        user: AbstractBaseUser | None,
-        page: int,
-        page_size: int,
-    ) -> SearchResults:
-        """
-        Find documents similar to the given document using content analysis.
-
-        Uses Tantivy's "more like this" query to find documents with similar
-        content patterns. The original document is excluded from results.
-
-        Args:
-            doc_id: Primary key of the reference document
-            user: User for permission filtering (None for no filtering)
-            page: Page number (1-indexed) for pagination
-            page_size: Number of results per page
-
-        Returns:
-            SearchResults with similar documents (excluding the original)
-        """
-        self._ensure_open()
-        searcher = self._index.searcher()
-
-        # First find the document address
-        id_query = tantivy.Query.range_query(
-            self._schema,
-            "id",
-            tantivy.FieldType.Unsigned,
-            doc_id,
-            doc_id,
-        )
-        results = searcher.search(id_query, limit=1)
-
-        if not results.hits:
-            # Document not found
-            return SearchResults(hits=[], total=0, query=f"more_like:{doc_id}")
-
-        # Extract doc_address from (score, doc_address) tuple
-        doc_address = results.hits[0][1]
-
-        # Build more like this query
-        mlt_query = tantivy.Query.more_like_this_query(
-            doc_address,
-            min_doc_frequency=1,
-            max_doc_frequency=None,
-            min_term_frequency=1,
-            max_query_terms=12,
-            min_word_length=None,
-            max_word_length=None,
-            boost_factor=None,
-        )
-
-        final_query = self._apply_permission_filter(mlt_query, user)
-
-        # Search
-        offset = (page - 1) * page_size
-        results = searcher.search(final_query, limit=offset + page_size)
-
-        total = results.count
-        # Convert from (score, doc_address) to (doc_address, score)
-        all_hits = [(hit[1], hit[0]) for hit in results.hits]
-
-        # Normalize scores
-        if all_hits:
-            max_score = max(hit[1] for hit in all_hits) or 1.0
-            all_hits = [(hit[0], hit[1] / max_score) for hit in all_hits]
-
-        # Get page hits
-        page_hits = all_hits[offset : offset + page_size]
-
-        # Build results
-        hits: list[SearchHit] = []
-        for rank, (doc_address, score) in enumerate(page_hits, start=offset + 1):
-            actual_doc = searcher.doc(doc_address)
-            doc_dict = actual_doc.to_dict()
-            result_doc_id = doc_dict["id"][0]
-
-            # Skip the original document
-            if result_doc_id == doc_id:
-                continue
-
-            hits.append(
-                SearchHit(
-                    id=result_doc_id,
-                    score=score,
-                    rank=rank,
-                    highlights={},  # MLT doesn't generate highlights
-                ),
-            )
-
-        return SearchResults(
-            hits=hits,
-            total=max(0, total - 1),  # Subtract 1 for the original document
-            query=f"more_like:{doc_id}",
-        )
-
     def more_like_this_ids(
         self,
         doc_id: int,
diff --git a/src/documents/tests/search/test_backend.py b/src/documents/tests/search/test_backend.py
index d5b8a0122..71099c8c9 100644
--- a/src/documents/tests/search/test_backend.py
+++ b/src/documents/tests/search/test_backend.py
@@ -646,58 +646,6 @@ class TestAutocomplete:
 class TestMoreLikeThis:
     """Test more like this functionality."""
 
-    def test_excludes_original(self, backend: TantivyBackend):
-        """More like this queries must exclude the reference document from results."""
-        doc1 = Document.objects.create(
-            title="Important document",
-            content="financial information",
-            checksum="MLT1",
-            pk=50,
-        )
-        doc2 = Document.objects.create(
-            title="Another document",
-            content="financial report",
-            checksum="MLT2",
-            pk=51,
-        )
-        backend.add_or_update(doc1)
-        backend.add_or_update(doc2)
-
-        results = backend.more_like_this(doc_id=50, user=None, page=1, page_size=10)
-        returned_ids = [hit["id"] for hit in results.hits]
-        assert 50 not in returned_ids  # Original document excluded
-
-    def test_with_user_applies_permission_filter(self, backend: TantivyBackend):
-        """more_like_this with a user must exclude documents that user cannot see."""
-        viewer = User.objects.create_user("mlt_viewer")
-        other = User.objects.create_user("mlt_other")
-        public_doc = Document.objects.create(
-            title="Public financial document",
-            content="quarterly financial analysis report figures",
-            checksum="MLT3",
-            pk=52,
-        )
-        private_doc = Document.objects.create(
-            title="Private financial document",
-            content="quarterly financial analysis report figures",
-            checksum="MLT4",
-            pk=53,
-            owner=other,
-        )
-        backend.add_or_update(public_doc)
-        backend.add_or_update(private_doc)
-
-        results = backend.more_like_this(doc_id=52, user=viewer, page=1, page_size=10)
-        returned_ids = [hit["id"] for hit in results.hits]
-        # private_doc is owned by other, so viewer cannot see it
-        assert 53 not in returned_ids
-
-    def test_document_not_in_index_returns_empty(self, backend: TantivyBackend):
-        """more_like_this for a doc_id absent from the index must return empty results."""
-        results = backend.more_like_this(doc_id=9999, user=None, page=1, page_size=10)
-        assert results.hits == []
-        assert results.total == 0
-
     def test_more_like_this_ids_excludes_original(self, backend: TantivyBackend):
         """more_like_this_ids must return IDs of similar documents, excluding the original."""
         doc1 = Document.objects.create(