feat(profiling): add workflow trigger matching profiling

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Trenton H
2026-04-11 14:55:25 -07:00
parent ea14c0b06f
commit 0887203d45

231
test_workflow_profile.py Normal file
View File

@@ -0,0 +1,231 @@
"""
Workflow trigger matching profiling.
Run with:
uv run pytest ../test_workflow_profile.py \
-m profiling --override-ini="addopts=" -s -v
Corpus: 500 documents + correspondents + tags + sets of WorkflowTrigger
objects at 5 and 20 count to allow scaling comparisons.
Scenarios
---------
TestWorkflowMatchingProfile
- test_existing_document_5_workflows existing_document_matches_workflow x 5 triggers
- test_existing_document_20_workflows same x 20 triggers
- test_workflow_prefilter prefilter_documents_by_workflowtrigger on 500 docs
- test_trigger_type_comparison compare DOCUMENT_ADDED vs DOCUMENT_UPDATED overhead
"""
from __future__ import annotations
import random
import time
import pytest
from faker import Faker
from profiling import profile_block
from documents.matching import existing_document_matches_workflow
from documents.matching import prefilter_documents_by_workflowtrigger
from documents.models import Correspondent
from documents.models import Document
from documents.models import Tag
from documents.models import Workflow
from documents.models import WorkflowAction
from documents.models import WorkflowTrigger
pytestmark = [pytest.mark.profiling, pytest.mark.django_db]
NUM_DOCS = 500
NUM_CORRESPONDENTS = 10
NUM_TAGS = 20
SEED = 42
# ---------------------------------------------------------------------------
# Module-scoped fixture
# ---------------------------------------------------------------------------
@pytest.fixture(scope="module")
def module_db(django_db_setup, django_db_blocker):
"""Unlock the DB for the whole module (module-scoped)."""
with django_db_blocker.unblock():
yield
@pytest.fixture(scope="module")
def workflow_corpus(module_db):
"""
500 documents + correspondents + tags + sets of workflow triggers
at 5 and 20 count to allow scaling comparisons.
"""
fake = Faker()
Faker.seed(SEED)
rng = random.Random(SEED)
# ---- lookup objects ---------------------------------------------------
print("\n[setup] Creating lookup objects...") # noqa: T201
correspondents = [
Correspondent.objects.create(name=f"wfcorp-{i}-{fake.company()}"[:128])
for i in range(NUM_CORRESPONDENTS)
]
tags = [
Tag.objects.create(name=f"wftag-{i}-{fake.word()}"[:100])
for i in range(NUM_TAGS)
]
# ---- documents --------------------------------------------------------
print(f"[setup] Building {NUM_DOCS} documents...") # noqa: T201
raw_docs = []
for i in range(NUM_DOCS):
raw_docs.append(
Document(
title=fake.sentence(nb_words=4).rstrip("."),
content=fake.paragraph(nb_sentences=3),
checksum=f"WF{i:07d}",
correspondent=rng.choice(correspondents + [None] * 3),
),
)
documents = Document.objects.bulk_create(raw_docs, batch_size=500)
for doc in documents:
k = rng.randint(0, 3)
if k:
doc.tags.add(*rng.sample(tags, k))
sample_doc = documents[0]
print(f"[setup] Sample doc pk={sample_doc.pk}") # noqa: T201
# ---- build triggers at scale 5 and 20 --------------------------------
_wf_counter = [0]
def _make_triggers(n: int, trigger_type: int) -> list[WorkflowTrigger]:
triggers = []
for i in range(n):
# Alternate between no filter and a correspondent filter
corr = correspondents[i % NUM_CORRESPONDENTS] if i % 3 == 0 else None
trigger = WorkflowTrigger.objects.create(
type=trigger_type,
filter_has_correspondent=corr,
)
action = WorkflowAction.objects.create(
type=WorkflowAction.WorkflowActionType.ASSIGNMENT,
)
idx = _wf_counter[0]
_wf_counter[0] += 1
wf = Workflow.objects.create(name=f"wf-profile-{idx}")
wf.triggers.add(trigger)
wf.actions.add(action)
triggers.append(trigger)
return triggers
print("[setup] Creating workflow triggers...") # noqa: T201
triggers_5 = _make_triggers(5, WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED)
triggers_20 = _make_triggers(
20,
WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED,
)
triggers_added = _make_triggers(
5,
WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
)
yield {
"doc": sample_doc,
"triggers_5": triggers_5,
"triggers_20": triggers_20,
"triggers_added": triggers_added,
}
# Teardown
print("\n[teardown] Removing workflow corpus...") # noqa: T201
Workflow.objects.all().delete()
WorkflowTrigger.objects.all().delete()
WorkflowAction.objects.all().delete()
Document.objects.all().delete()
Correspondent.objects.all().delete()
Tag.objects.all().delete()
# ---------------------------------------------------------------------------
# TestWorkflowMatchingProfile
# ---------------------------------------------------------------------------
class TestWorkflowMatchingProfile:
"""Profile workflow trigger evaluation per document save."""
@pytest.fixture(autouse=True)
def _setup(self, workflow_corpus):
self.doc = workflow_corpus["doc"]
self.triggers_5 = workflow_corpus["triggers_5"]
self.triggers_20 = workflow_corpus["triggers_20"]
self.triggers_added = workflow_corpus["triggers_added"]
def test_existing_document_5_workflows(self):
"""existing_document_matches_workflow x 5 DOCUMENT_UPDATED triggers."""
doc = self.doc
triggers = self.triggers_5
with profile_block(
f"existing_document_matches_workflow [{len(triggers)} triggers]",
):
for trigger in triggers:
existing_document_matches_workflow(doc, trigger)
def test_existing_document_20_workflows(self):
"""existing_document_matches_workflow x 20 triggers -- shows linear scaling."""
doc = self.doc
triggers = self.triggers_20
with profile_block(
f"existing_document_matches_workflow [{len(triggers)} triggers]",
):
for trigger in triggers:
existing_document_matches_workflow(doc, trigger)
# Also time each call individually to show per-trigger overhead
timings = []
for trigger in triggers:
t0 = time.perf_counter()
existing_document_matches_workflow(doc, trigger)
timings.append((time.perf_counter() - t0) * 1_000_000)
avg_us = sum(timings) / len(timings)
print(f"\n Per-trigger avg: {avg_us:.1f} us (n={len(timings)})") # noqa: T201
def test_workflow_prefilter(self):
"""prefilter_documents_by_workflowtrigger on 500 docs -- tag + correspondent filters."""
qs = Document.objects.all()
print(f"\n Corpus: {qs.count()} documents") # noqa: T201
for trigger in self.triggers_20[:3]:
label = (
f"prefilter_documents_by_workflowtrigger "
f"[corr={trigger.filter_has_correspondent_id}]"
)
with profile_block(label):
result = prefilter_documents_by_workflowtrigger(qs, trigger)
# Evaluate the queryset
count = result.count()
print(f" -> {count} docs passed filter") # noqa: T201
def test_trigger_type_comparison(self):
"""Compare per-call overhead of DOCUMENT_UPDATED vs DOCUMENT_ADDED."""
doc = self.doc
runs = 200
for label, triggers in [
("DOCUMENT_UPDATED", self.triggers_5),
("DOCUMENT_ADDED", self.triggers_added),
]:
t0 = time.perf_counter()
for _ in range(runs):
for trigger in triggers:
existing_document_matches_workflow(doc, trigger)
total_calls = runs * len(triggers)
us_per_call = (time.perf_counter() - t0) / total_calls * 1_000_000
print( # noqa: T201
f" {label:<22s} {us_per_call:.2f} us/call "
f"({total_calls} calls, {len(triggers)} triggers)",
)