mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-06-28 16:24:19 +00:00
1f4a871b8f
The owner-aware "resolve user to visible document pks" block was duplicated verbatim between get_context_for_document and get_taxonomy_hints_for_document. Extract it into indexing.visible_document_ids_for_user, next to its sibling normalize_document_ids, and call it from both paths. No behavior change: the helper returns None when user is None (unfiltered retrieval) and the same pk list otherwise. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
221 lines
7.4 KiB
Python
221 lines
7.4 KiB
Python
from types import SimpleNamespace
|
|
|
|
import pytest_mock
|
|
|
|
from documents.tests.factories import DocumentFactory
|
|
from paperless_ai.taxonomy import TaxonomyHints
|
|
from paperless_ai.taxonomy import build_taxonomy_hints_from_nodes
|
|
from paperless_ai.taxonomy import format_hints_for_prompt
|
|
from paperless_ai.taxonomy import get_taxonomy_hints_for_document
|
|
|
|
|
|
def make_node(**metadata: object) -> SimpleNamespace:
|
|
"""A stand-in for NodeWithScore: only ``.metadata`` is accessed."""
|
|
return SimpleNamespace(metadata=metadata)
|
|
|
|
|
|
class TestBuildTaxonomyHintsFromNodes:
|
|
def test_returns_all_four_keys(self) -> None:
|
|
hints = build_taxonomy_hints_from_nodes([])
|
|
assert set(hints.keys()) == {
|
|
"tags",
|
|
"document_types",
|
|
"correspondents",
|
|
"storage_paths",
|
|
}
|
|
|
|
def test_collects_and_sorts_values(self) -> None:
|
|
nodes = [
|
|
make_node(
|
|
tags=["Taxes", "Bloodwork"],
|
|
document_type="Invoice",
|
|
correspondent="IRS",
|
|
storage_path="Financial",
|
|
),
|
|
]
|
|
hints = build_taxonomy_hints_from_nodes(nodes)
|
|
assert hints["tags"] == ["Bloodwork", "Taxes"]
|
|
assert hints["document_types"] == ["Invoice"]
|
|
assert hints["correspondents"] == ["IRS"]
|
|
assert hints["storage_paths"] == ["Financial"]
|
|
|
|
def test_deduplicates_across_nodes(self) -> None:
|
|
nodes = [
|
|
make_node(tags=["Taxes"], document_type="Invoice"),
|
|
make_node(tags=["Taxes", "Medical"], document_type="Invoice"),
|
|
]
|
|
hints = build_taxonomy_hints_from_nodes(nodes)
|
|
assert hints["tags"] == ["Medical", "Taxes"]
|
|
assert hints["document_types"] == ["Invoice"]
|
|
|
|
def test_none_values_skipped(self) -> None:
|
|
nodes = [
|
|
make_node(
|
|
tags=["Taxes", None, ""],
|
|
document_type=None,
|
|
correspondent=None,
|
|
storage_path=None,
|
|
),
|
|
]
|
|
hints = build_taxonomy_hints_from_nodes(nodes)
|
|
assert hints["tags"] == ["Taxes"]
|
|
assert hints["document_types"] == []
|
|
assert hints["correspondents"] == []
|
|
assert hints["storage_paths"] == []
|
|
|
|
def test_missing_storage_path_key_handled(self) -> None:
|
|
# Pre-enrichment nodes have no storage_path key at all.
|
|
nodes = [make_node(tags=["Taxes"], document_type="Invoice")]
|
|
hints = build_taxonomy_hints_from_nodes(nodes)
|
|
assert hints["storage_paths"] == []
|
|
|
|
def test_empty_node_list_all_empty(self) -> None:
|
|
hints = build_taxonomy_hints_from_nodes([])
|
|
assert hints == {
|
|
"tags": [],
|
|
"document_types": [],
|
|
"correspondents": [],
|
|
"storage_paths": [],
|
|
}
|
|
|
|
def test_output_stable_across_calls(self) -> None:
|
|
nodes = [make_node(tags=["b", "a", "c"])]
|
|
assert build_taxonomy_hints_from_nodes(
|
|
nodes,
|
|
) == build_taxonomy_hints_from_nodes(nodes)
|
|
|
|
|
|
class TestFormatHintsForPrompt:
|
|
def test_all_blocks_present_when_all_categories_nonempty(self) -> None:
|
|
hints: TaxonomyHints = {
|
|
"tags": ["Bloodwork"],
|
|
"document_types": ["Invoice"],
|
|
"correspondents": ["IRS"],
|
|
"storage_paths": ["Financial"],
|
|
}
|
|
result = format_hints_for_prompt(hints)
|
|
assert "Available tags:" in result
|
|
assert "Available document types:" in result
|
|
assert "Available correspondents:" in result
|
|
assert "Available storage paths:" in result
|
|
assert "- Bloodwork" in result
|
|
|
|
def test_empty_category_produces_no_block(self) -> None:
|
|
hints: TaxonomyHints = {
|
|
"tags": ["Bloodwork"],
|
|
"document_types": [],
|
|
"correspondents": [],
|
|
"storage_paths": [],
|
|
}
|
|
result = format_hints_for_prompt(hints)
|
|
assert "Available tags:" in result
|
|
assert "Available document types:" not in result
|
|
assert "Available correspondents:" not in result
|
|
assert "Available storage paths:" not in result
|
|
|
|
def test_all_empty_produces_empty_string(self) -> None:
|
|
hints: TaxonomyHints = {
|
|
"tags": [],
|
|
"document_types": [],
|
|
"correspondents": [],
|
|
"storage_paths": [],
|
|
}
|
|
assert format_hints_for_prompt(hints) == ""
|
|
|
|
def test_instruction_line_appears_once(self) -> None:
|
|
hints: TaxonomyHints = {
|
|
"tags": ["Bloodwork"],
|
|
"document_types": ["Invoice"],
|
|
"correspondents": [],
|
|
"storage_paths": [],
|
|
}
|
|
result = format_hints_for_prompt(hints)
|
|
assert result.count("Prefer existing names from these lists verbatim") == 1
|
|
|
|
|
|
class TestGetTaxonomyHintsForDocument:
|
|
def test_returns_none_when_embedding_backend_off(
|
|
self,
|
|
mocker: pytest_mock.MockerFixture,
|
|
) -> None:
|
|
mocker.patch(
|
|
"paperless_ai.taxonomy.AIConfig",
|
|
return_value=SimpleNamespace(llm_embedding_backend=None),
|
|
)
|
|
retrieve = mocker.patch("paperless_ai.taxonomy.retrieve_similar_nodes")
|
|
|
|
result = get_taxonomy_hints_for_document(DocumentFactory.build(), user=None)
|
|
|
|
assert result is None
|
|
retrieve.assert_not_called()
|
|
|
|
def test_passes_owner_aware_ids_when_user_present(
|
|
self,
|
|
mocker: pytest_mock.MockerFixture,
|
|
) -> None:
|
|
mocker.patch(
|
|
"paperless_ai.taxonomy.AIConfig",
|
|
return_value=SimpleNamespace(llm_embedding_backend="huggingface"),
|
|
)
|
|
mocker.patch(
|
|
"paperless_ai.taxonomy.visible_document_ids_for_user",
|
|
return_value=[1, 2, 3],
|
|
)
|
|
retrieve = mocker.patch(
|
|
"paperless_ai.taxonomy.retrieve_similar_nodes",
|
|
return_value=[],
|
|
)
|
|
document = DocumentFactory.build()
|
|
user = mocker.MagicMock()
|
|
|
|
get_taxonomy_hints_for_document(document, user=user)
|
|
|
|
retrieve.assert_called_once_with(
|
|
document=document,
|
|
document_ids=[1, 2, 3],
|
|
)
|
|
|
|
def test_returns_populated_hints_when_nodes_found(
|
|
self,
|
|
mocker: pytest_mock.MockerFixture,
|
|
) -> None:
|
|
mocker.patch(
|
|
"paperless_ai.taxonomy.AIConfig",
|
|
return_value=SimpleNamespace(llm_embedding_backend="huggingface"),
|
|
)
|
|
mocker.patch(
|
|
"paperless_ai.taxonomy.retrieve_similar_nodes",
|
|
return_value=[make_node(tags=["Taxes"], document_type="Invoice")],
|
|
)
|
|
|
|
result = get_taxonomy_hints_for_document(DocumentFactory.build(), user=None)
|
|
|
|
assert result == {
|
|
"tags": ["Taxes"],
|
|
"document_types": ["Invoice"],
|
|
"correspondents": [],
|
|
"storage_paths": [],
|
|
}
|
|
|
|
def test_returns_empty_hints_not_none_when_no_nodes(
|
|
self,
|
|
mocker: pytest_mock.MockerFixture,
|
|
) -> None:
|
|
mocker.patch(
|
|
"paperless_ai.taxonomy.AIConfig",
|
|
return_value=SimpleNamespace(llm_embedding_backend="huggingface"),
|
|
)
|
|
mocker.patch(
|
|
"paperless_ai.taxonomy.retrieve_similar_nodes",
|
|
return_value=[],
|
|
)
|
|
|
|
result = get_taxonomy_hints_for_document(DocumentFactory.build(), user=None)
|
|
|
|
assert result == {
|
|
"tags": [],
|
|
"document_types": [],
|
|
"correspondents": [],
|
|
"storage_paths": [],
|
|
}
|