mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-06-30 01:04:18 +00:00
Enhancement(beta): format taxonomy hints into prompt blocks
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -55,3 +55,35 @@ def build_taxonomy_hints_from_nodes(
|
||||
correspondents=sorted(correspondents),
|
||||
storage_paths=sorted(storage_paths),
|
||||
)
|
||||
|
||||
|
||||
_HINT_INSTRUCTION = (
|
||||
"Prefer existing names from these lists verbatim. Only propose a new value "
|
||||
"if none of the existing names fits."
|
||||
)
|
||||
|
||||
|
||||
def format_hints_for_prompt(hints: TaxonomyHints) -> str:
|
||||
"""Render non-empty hint categories as labelled blocks plus one instruction.
|
||||
|
||||
Returns "" when every category is empty, so callers can treat the result
|
||||
the same as no hints at all.
|
||||
"""
|
||||
# Literal-key access keeps this TypedDict-safe for mypy; the order here is
|
||||
# the order the blocks appear in the prompt.
|
||||
labelled_values: list[tuple[str, list[str]]] = [
|
||||
("Available tags", hints["tags"]),
|
||||
("Available document types", hints["document_types"]),
|
||||
("Available correspondents", hints["correspondents"]),
|
||||
("Available storage paths", hints["storage_paths"]),
|
||||
]
|
||||
blocks: list[str] = []
|
||||
for label, values in labelled_values:
|
||||
if values:
|
||||
listing = "\n".join(f"- {value}" for value in values)
|
||||
blocks.append(f"{label}:\n{listing}")
|
||||
|
||||
if not blocks:
|
||||
return ""
|
||||
|
||||
return "\n\n".join([*blocks, _HINT_INSTRUCTION])
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
from types import SimpleNamespace
|
||||
|
||||
from paperless_ai.taxonomy import TaxonomyHints
|
||||
from paperless_ai.taxonomy import build_taxonomy_hints_from_nodes
|
||||
from paperless_ai.taxonomy import format_hints_for_prompt
|
||||
|
||||
|
||||
def make_node(**metadata: object) -> SimpleNamespace:
|
||||
@@ -77,3 +79,51 @@ class TestBuildTaxonomyHintsFromNodes:
|
||||
assert build_taxonomy_hints_from_nodes(
|
||||
nodes,
|
||||
) == build_taxonomy_hints_from_nodes(nodes)
|
||||
|
||||
|
||||
class TestFormatHintsForPrompt:
|
||||
def test_all_blocks_present_when_all_categories_nonempty(self) -> None:
|
||||
hints: TaxonomyHints = {
|
||||
"tags": ["Bloodwork"],
|
||||
"document_types": ["Invoice"],
|
||||
"correspondents": ["IRS"],
|
||||
"storage_paths": ["Financial"],
|
||||
}
|
||||
result = format_hints_for_prompt(hints)
|
||||
assert "Available tags:" in result
|
||||
assert "Available document types:" in result
|
||||
assert "Available correspondents:" in result
|
||||
assert "Available storage paths:" in result
|
||||
assert "- Bloodwork" in result
|
||||
|
||||
def test_empty_category_produces_no_block(self) -> None:
|
||||
hints: TaxonomyHints = {
|
||||
"tags": ["Bloodwork"],
|
||||
"document_types": [],
|
||||
"correspondents": [],
|
||||
"storage_paths": [],
|
||||
}
|
||||
result = format_hints_for_prompt(hints)
|
||||
assert "Available tags:" in result
|
||||
assert "Available document types:" not in result
|
||||
assert "Available correspondents:" not in result
|
||||
assert "Available storage paths:" not in result
|
||||
|
||||
def test_all_empty_produces_empty_string(self) -> None:
|
||||
hints: TaxonomyHints = {
|
||||
"tags": [],
|
||||
"document_types": [],
|
||||
"correspondents": [],
|
||||
"storage_paths": [],
|
||||
}
|
||||
assert format_hints_for_prompt(hints) == ""
|
||||
|
||||
def test_instruction_line_appears_once(self) -> None:
|
||||
hints: TaxonomyHints = {
|
||||
"tags": ["Bloodwork"],
|
||||
"document_types": ["Invoice"],
|
||||
"correspondents": [],
|
||||
"storage_paths": [],
|
||||
}
|
||||
result = format_hints_for_prompt(hints)
|
||||
assert result.count("Prefer existing names from these lists verbatim") == 1
|
||||
|
||||
Reference in New Issue
Block a user