Enhancement(beta): format taxonomy hints into prompt blocks

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
stumpylog
2026-06-12 15:34:53 -07:00
parent e0ba4cfada
commit 43eb3295ce
2 changed files with 82 additions and 0 deletions
+32
View File
@@ -55,3 +55,35 @@ def build_taxonomy_hints_from_nodes(
correspondents=sorted(correspondents),
storage_paths=sorted(storage_paths),
)
_HINT_INSTRUCTION = (
"Prefer existing names from these lists verbatim. Only propose a new value "
"if none of the existing names fits."
)
def format_hints_for_prompt(hints: TaxonomyHints) -> str:
"""Render non-empty hint categories as labelled blocks plus one instruction.
Returns "" when every category is empty, so callers can treat the result
the same as no hints at all.
"""
# Literal-key access keeps this TypedDict-safe for mypy; the order here is
# the order the blocks appear in the prompt.
labelled_values: list[tuple[str, list[str]]] = [
("Available tags", hints["tags"]),
("Available document types", hints["document_types"]),
("Available correspondents", hints["correspondents"]),
("Available storage paths", hints["storage_paths"]),
]
blocks: list[str] = []
for label, values in labelled_values:
if values:
listing = "\n".join(f"- {value}" for value in values)
blocks.append(f"{label}:\n{listing}")
if not blocks:
return ""
return "\n\n".join([*blocks, _HINT_INSTRUCTION])
+50
View File
@@ -1,6 +1,8 @@
from types import SimpleNamespace
from paperless_ai.taxonomy import TaxonomyHints
from paperless_ai.taxonomy import build_taxonomy_hints_from_nodes
from paperless_ai.taxonomy import format_hints_for_prompt
def make_node(**metadata: object) -> SimpleNamespace:
@@ -77,3 +79,51 @@ class TestBuildTaxonomyHintsFromNodes:
assert build_taxonomy_hints_from_nodes(
nodes,
) == build_taxonomy_hints_from_nodes(nodes)
class TestFormatHintsForPrompt:
def test_all_blocks_present_when_all_categories_nonempty(self) -> None:
hints: TaxonomyHints = {
"tags": ["Bloodwork"],
"document_types": ["Invoice"],
"correspondents": ["IRS"],
"storage_paths": ["Financial"],
}
result = format_hints_for_prompt(hints)
assert "Available tags:" in result
assert "Available document types:" in result
assert "Available correspondents:" in result
assert "Available storage paths:" in result
assert "- Bloodwork" in result
def test_empty_category_produces_no_block(self) -> None:
hints: TaxonomyHints = {
"tags": ["Bloodwork"],
"document_types": [],
"correspondents": [],
"storage_paths": [],
}
result = format_hints_for_prompt(hints)
assert "Available tags:" in result
assert "Available document types:" not in result
assert "Available correspondents:" not in result
assert "Available storage paths:" not in result
def test_all_empty_produces_empty_string(self) -> None:
hints: TaxonomyHints = {
"tags": [],
"document_types": [],
"correspondents": [],
"storage_paths": [],
}
assert format_hints_for_prompt(hints) == ""
def test_instruction_line_appears_once(self) -> None:
hints: TaxonomyHints = {
"tags": ["Bloodwork"],
"document_types": ["Invoice"],
"correspondents": [],
"storage_paths": [],
}
result = format_hints_for_prompt(hints)
assert result.count("Prefer existing names from these lists verbatim") == 1