mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-04-16 13:08:50 +00:00
refactor(tasks): add docstrings and rename _parse_legacy_result
- Add docstrings to _extract_input_data, _determine_trigger_source, _extract_owner_id explaining what each helper does and why - Rename _parse_legacy_result -> _parse_consume_result: the function parses current consume_file string outputs (consumer.py returns "New document id N created" and "It is a duplicate of X (#N)"), not legacy data; the old name was misleading Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1040,6 +1040,13 @@ def _extract_input_data(
|
||||
args: tuple,
|
||||
task_kwargs: dict,
|
||||
) -> dict:
|
||||
"""Build the input_data dict stored on the PaperlessTask record.
|
||||
|
||||
For consume_file tasks this includes the filename, MIME type, and any
|
||||
non-null overrides from the DocumentMetadataOverrides object. For
|
||||
mail_fetch tasks it captures the account_ids list. All other task
|
||||
types store no input data and return {}.
|
||||
"""
|
||||
if task_type == PaperlessTask.TaskType.CONSUME_FILE:
|
||||
input_doc, overrides = _get_consume_args(args, task_kwargs)
|
||||
if input_doc is None:
|
||||
@@ -1075,6 +1082,13 @@ def _determine_trigger_source(
|
||||
task_kwargs: dict,
|
||||
headers: dict,
|
||||
) -> PaperlessTask.TriggerSource:
|
||||
"""Resolve the TriggerSource for a task being published to the broker.
|
||||
|
||||
Priority order:
|
||||
1. Explicit trigger_source header (set by beat schedule or apply_async callers).
|
||||
2. For consume_file tasks, the DocumentSource on the input document.
|
||||
3. MANUAL as the catch-all for all other cases.
|
||||
"""
|
||||
# Explicit header takes priority -- covers beat ("scheduled") and system auto-runs ("system")
|
||||
header_source = headers.get("trigger_source")
|
||||
if header_source == "scheduled":
|
||||
@@ -1098,6 +1112,7 @@ def _extract_owner_id(
|
||||
args: tuple,
|
||||
task_kwargs: dict,
|
||||
) -> int | None:
|
||||
"""Return the owner_id from consume_file overrides, or None for all other task types."""
|
||||
if task_type != PaperlessTask.TaskType.CONSUME_FILE:
|
||||
return None
|
||||
_, overrides = _get_consume_args(args, task_kwargs)
|
||||
@@ -1106,7 +1121,15 @@ def _extract_owner_id(
|
||||
return None
|
||||
|
||||
|
||||
def _parse_legacy_result(result: str) -> dict | None:
|
||||
def _parse_consume_result(result: str) -> dict | None:
|
||||
"""Parse a consume_file string result into a structured dict.
|
||||
|
||||
consume_file returns human-readable strings rather than dicts (e.g.
|
||||
"Success. New document id 42 created" or "It is a duplicate of foo (#7)").
|
||||
This function extracts the document ID or duplicate reference so the
|
||||
result can be stored as structured data on the PaperlessTask record.
|
||||
Returns None when the string does not match any known pattern.
|
||||
"""
|
||||
if match := _re.search(r"New document id (\d+) created", result):
|
||||
return {"document_id": int(match.group(1))}
|
||||
if match := _re.search(r"It is a duplicate of .* \(#(\d+)\)", result):
|
||||
@@ -1210,7 +1233,7 @@ def task_postrun_handler(
|
||||
result_data = retval
|
||||
elif isinstance(retval, str):
|
||||
result_message = retval
|
||||
result_data = _parse_legacy_result(retval)
|
||||
result_data = _parse_consume_result(retval)
|
||||
|
||||
now = timezone.now()
|
||||
task_instance = PaperlessTask.objects.filter(task_id=task_id).first()
|
||||
|
||||
Reference in New Issue
Block a user