Refactor(beta): extract visible_document_ids_for_user helper

The owner-aware "resolve user to visible document pks" block was duplicated
verbatim between get_context_for_document and get_taxonomy_hints_for_document.
Extract it into indexing.visible_document_ids_for_user, next to its sibling
normalize_document_ids, and call it from both paths.

No behavior change: the helper returns None when user is None (unfiltered
retrieval) and the same pk list otherwise.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
stumpylog
2026-06-13 13:00:21 -07:00
parent 29f9475818
commit 1f4a871b8f
4 changed files with 25 additions and 36 deletions
+2 -15
View File
@@ -6,12 +6,12 @@ from django.conf import settings
from django.contrib.auth.models import User
from documents.models import Document
from documents.permissions import get_objects_for_user_owner_aware
from paperless.config import AIConfig
from paperless_ai.client import AIClient
from paperless_ai.db import db_connection_released
from paperless_ai.indexing import query_similar_documents
from paperless_ai.indexing import truncate_content
from paperless_ai.indexing import visible_document_ids_for_user
from paperless_ai.taxonomy import format_hints_for_prompt
if TYPE_CHECKING:
@@ -109,20 +109,7 @@ def get_context_for_document(
user: User | None = None,
max_docs: int = 5,
) -> str:
visible_documents = (
get_objects_for_user_owner_aware(
user,
"view_document",
Document,
)
if user
else None
)
visible_document_ids = (
list(visible_documents.values_list("pk", flat=True))
if visible_documents is not None
else None
)
visible_document_ids = visible_document_ids_for_user(user)
similar_docs = query_similar_documents(
document=doc,
document_ids=visible_document_ids,
+19
View File
@@ -5,6 +5,7 @@ from datetime import timedelta
from typing import TYPE_CHECKING
from django.conf import settings
from django.contrib.auth.models import User
from django.utils import timezone
from filelock import FileLock
from filelock import ReadWriteLock
@@ -12,6 +13,7 @@ from filelock import Timeout
from documents.models import Document
from documents.models import PaperlessTask
from documents.permissions import get_objects_for_user_owner_aware
from documents.utils import IterWrapper
from documents.utils import identity
from paperless.config import AIConfig
@@ -450,6 +452,23 @@ def normalize_document_ids(document_ids: Iterable[int | str] | None) -> set[str]
return {str(document_id) for document_id in document_ids}
def visible_document_ids_for_user(user: User | None) -> list[int] | None:
"""Return the pks of documents ``user`` may view, or ``None`` for no filter.
Returns ``None`` when ``user`` is ``None`` so retrieval runs unfiltered. Used
by both the similarity-context and taxonomy-hints paths to scope RAG
neighbours to documents the requesting user is allowed to see.
"""
if user is None:
return None
visible_documents = get_objects_for_user_owner_aware(
user,
"view_document",
Document,
)
return list(visible_documents.values_list("pk", flat=True))
def retrieve_similar_nodes(
document: Document,
document_ids: Iterable[int | str] | None = None,
+2 -17
View File
@@ -4,9 +4,9 @@ from typing import TypedDict
from django.contrib.auth.models import User
from documents.models import Document
from documents.permissions import get_objects_for_user_owner_aware
from paperless.config import AIConfig
from paperless_ai.indexing import retrieve_similar_nodes
from paperless_ai.indexing import visible_document_ids_for_user
if TYPE_CHECKING:
from llama_index.core.schema import NodeWithScore
@@ -108,23 +108,8 @@ def get_taxonomy_hints_for_document(
if not AIConfig().llm_embedding_backend:
return None
visible_documents = (
get_objects_for_user_owner_aware(
user,
"view_document",
Document,
)
if user
else None
)
visible_document_ids = (
list(visible_documents.values_list("pk", flat=True))
if visible_documents is not None
else None
)
nodes = retrieve_similar_nodes(
document=document,
document_ids=visible_document_ids,
document_ids=visible_document_ids_for_user(user),
)
return build_taxonomy_hints_from_nodes(nodes)
+2 -4
View File
@@ -157,11 +157,9 @@ class TestGetTaxonomyHintsForDocument:
"paperless_ai.taxonomy.AIConfig",
return_value=SimpleNamespace(llm_embedding_backend="huggingface"),
)
visible = mocker.MagicMock()
visible.values_list.return_value = [1, 2, 3]
mocker.patch(
"paperless_ai.taxonomy.get_objects_for_user_owner_aware",
return_value=visible,
"paperless_ai.taxonomy.visible_document_ids_for_user",
return_value=[1, 2, 3],
)
retrieve = mocker.patch(
"paperless_ai.taxonomy.retrieve_similar_nodes",