refactor(tasks): add docstrings and rename _parse_legacy_result

- Add docstrings to _extract_input_data, _determine_trigger_source, _extract_owner_id explaining what each helper does and why - Rename _parse_legacy_result -> _parse_consume_result: the function parses current consume_file string outputs (consumer.py returns "New document id N created" and "It is a duplicate of X (#N)"), not legacy data; the old name was misleading Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-07-17 17:34:55 +00:00 · 2026-04-15 18:34:34 -07:00
parent 0bc9227d6f
commit 979d8a67f0
1 changed files with 25 additions and 2 deletions
@@ -1040,6 +1040,13 @@ def _extract_input_data(
    args: tuple,
    task_kwargs: dict,
 ) -> dict:
+    """Build the input_data dict stored on the PaperlessTask record.
+
+    For consume_file tasks this includes the filename, MIME type, and any
+    non-null overrides from the DocumentMetadataOverrides object.  For
+    mail_fetch tasks it captures the account_ids list.  All other task
+    types store no input data and return {}.
+    """
    if task_type == PaperlessTask.TaskType.CONSUME_FILE:
        input_doc, overrides = _get_consume_args(args, task_kwargs)
        if input_doc is None:
@@ -1075,6 +1082,13 @@ def _determine_trigger_source(
    task_kwargs: dict,
    headers: dict,
 ) -> PaperlessTask.TriggerSource:
+    """Resolve the TriggerSource for a task being published to the broker.
+
+    Priority order:
+    1. Explicit trigger_source header (set by beat schedule or apply_async callers).
+    2. For consume_file tasks, the DocumentSource on the input document.
+    3. MANUAL as the catch-all for all other cases.
+    """
    # Explicit header takes priority -- covers beat ("scheduled") and system auto-runs ("system")
    header_source = headers.get("trigger_source")
    if header_source == "scheduled":
@@ -1098,6 +1112,7 @@ def _extract_owner_id(
    args: tuple,
    task_kwargs: dict,
 ) -> int | None:
+    """Return the owner_id from consume_file overrides, or None for all other task types."""
    if task_type != PaperlessTask.TaskType.CONSUME_FILE:
        return None
    _, overrides = _get_consume_args(args, task_kwargs)
@@ -1106,7 +1121,15 @@ def _extract_owner_id(
    return None


-def _parse_legacy_result(result: str) -> dict | None:
+def _parse_consume_result(result: str) -> dict | None:
+    """Parse a consume_file string result into a structured dict.
+
+    consume_file returns human-readable strings rather than dicts (e.g.
+    "Success. New document id 42 created" or "It is a duplicate of foo (#7)").
+    This function extracts the document ID or duplicate reference so the
+    result can be stored as structured data on the PaperlessTask record.
+    Returns None when the string does not match any known pattern.
+    """
    if match := _re.search(r"New document id (\d+) created", result):
        return {"document_id": int(match.group(1))}
    if match := _re.search(r"It is a duplicate of .* \(#(\d+)\)", result):
@@ -1210,7 +1233,7 @@ def task_postrun_handler(
            result_data = retval
        elif isinstance(retval, str):
            result_message = retval
-            result_data = _parse_legacy_result(retval)
+            result_data = _parse_consume_result(retval)

        now = timezone.now()
        task_instance = PaperlessTask.objects.filter(task_id=task_id).first()