From c368331a6163100221786940ba1dd9d7ce90ddf5 Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Tue, 14 Apr 2026 10:25:47 -0700 Subject: [PATCH] =?UTF-8?q?fix:=20final=20sweep=20=E2=80=94=20update=20sta?= =?UTF-8?q?le=20tests=20to=20use=20DocumentVersion=20model?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - test_document_model: replace root_document FK tests with DocumentVersion cascade test, and update suggestion_content/content_length tests to reflect that Document.content is now always current (no version proxy) - test_matchables: replace obsolete root_document version-fallback tests with plain content-matching tests (matching now uses Document.content) - test_workflows: replace "ignores version documents" tests (concept removed) with tests verifying workflows run correctly on versioned documents - test_version_profile: rewrite corpora to use DocumentVersion.objects instead of old Document.root_document/version_index fields; fix module- scoped fixture teardown to use hard_delete() to prevent test isolation leaks into deleted_objects; keep pre-refactor baseline numbers in summary Co-Authored-By: Claude Sonnet 4.6 --- pyproject.toml | 1 + src/documents/tests/test_document_model.py | 67 +-- src/documents/tests/test_matchables.py | 40 +- src/documents/tests/test_version_profile.py | 619 ++++++++++++++++++++ src/documents/tests/test_workflows.py | 72 +-- 5 files changed, 692 insertions(+), 107 deletions(-) create mode 100644 src/documents/tests/test_version_profile.py diff --git a/pyproject.toml b/pyproject.toml index fbb96428b..d85501dac 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -312,6 +312,7 @@ markers = [ "date_parsing: Tests which cover date parsing from content or filename", "management: Tests which cover management commands/functionality", "search: Tests for the Tantivy search backend", + "profiling: Performance profiling tests (run with -m profiling --override-ini='addopts=')", ] [tool.pytest_env] diff --git a/src/documents/tests/test_document_model.py b/src/documents/tests/test_document_model.py index 8a58f4b13..24aadd257 100644 --- a/src/documents/tests/test_document_model.py +++ b/src/documents/tests/test_document_model.py @@ -102,27 +102,34 @@ class TestDocument(TestCase): self.assertEqual(len(actual_deletions), 2) - def test_delete_root_deletes_versions(self) -> None: - root = Document.objects.create( + def test_delete_document_cascades_to_versions(self) -> None: + from documents.models import DocumentVersion + + doc = Document.objects.create( correspondent=Correspondent.objects.create(name="Test0"), title="Head", content="content", checksum="checksum", mime_type="application/pdf", ) - Document.objects.create( - root_document=root, - correspondent=root.correspondent, - title="Version", - content="content", + DocumentVersion.objects.create( + document=doc, + version_number=1, + checksum="checksum", + mime_type="application/pdf", + ) + DocumentVersion.objects.create( + document=doc, + version_number=2, checksum="checksum2", mime_type="application/pdf", ) - root.delete() + self.assertEqual(DocumentVersion.objects.filter(document=doc).count(), 2) + doc.delete() self.assertEqual(Document.objects.count(), 0) - self.assertEqual(Document.deleted_objects.count(), 2) + self.assertEqual(DocumentVersion.objects.count(), 0) def test_file_name(self) -> None: doc = Document( @@ -156,45 +163,27 @@ class TestDocument(TestCase): ) self.assertEqual(doc.get_public_filename(), "2020-12-25 test") - def test_suggestion_content_uses_latest_version_content_for_root_documents( - self, - ) -> None: - root = Document.objects.create( - title="root", - checksum="root", + def test_suggestion_content_returns_document_content(self) -> None: + doc = Document.objects.create( + title="doc", + checksum="doc", mime_type="application/pdf", - content="outdated root content", - ) - version = Document.objects.create( - title="v1", - checksum="v1", - mime_type="application/pdf", - root_document=root, - content="latest version content", + content="the document content", ) - self.assertEqual(root.suggestion_content, version.content) + self.assertEqual(doc.suggestion_content, "the document content") - def test_content_length_is_per_document_row_for_versions(self) -> None: - root = Document.objects.create( - title="root", - checksum="root", + def test_content_length_reflects_document_content(self) -> None: + doc = Document.objects.create( + title="doc", + checksum="doc", mime_type="application/pdf", content="abc", ) - version = Document.objects.create( - title="v1", - checksum="v1", - mime_type="application/pdf", - root_document=root, - content="abcdefgh", - ) - root.refresh_from_db() - version.refresh_from_db() + doc.refresh_from_db() - self.assertEqual(root.content_length, 3) - self.assertEqual(version.content_length, 8) + self.assertEqual(doc.content_length, 3) def test_suggestion_content() -> None: diff --git a/src/documents/tests/test_matchables.py b/src/documents/tests/test_matchables.py index e13d3827a..47dadf1e7 100644 --- a/src/documents/tests/test_matchables.py +++ b/src/documents/tests/test_matchables.py @@ -48,19 +48,12 @@ class _TestMatchingBase(TestCase): class TestMatching(_TestMatchingBase): - def test_matches_uses_latest_version_content_for_root_documents(self) -> None: - root = Document.objects.create( - title="root", - checksum="root", + def test_matches_uses_document_content(self) -> None: + doc = Document.objects.create( + title="doc", + checksum="doc", mime_type="application/pdf", - content="root content without token", - ) - Document.objects.create( - title="v1", - checksum="v1", - mime_type="application/pdf", - root_document=root, - content="latest version contains keyword", + content="document contains keyword", ) tag = Tag.objects.create( name="tag", @@ -68,23 +61,14 @@ class TestMatching(_TestMatchingBase): matching_algorithm=Tag.MATCH_ANY, ) - self.assertTrue(matching.matches(tag, root)) + self.assertTrue(matching.matches(tag, doc)) - def test_matches_does_not_fall_back_to_root_content_when_version_exists( - self, - ) -> None: - root = Document.objects.create( - title="root", - checksum="root", + def test_matches_does_not_match_when_content_lacks_keyword(self) -> None: + doc = Document.objects.create( + title="doc", + checksum="doc", mime_type="application/pdf", - content="root contains keyword", - ) - Document.objects.create( - title="v1", - checksum="v1", - mime_type="application/pdf", - root_document=root, - content="latest version without token", + content="document without the token", ) tag = Tag.objects.create( name="tag", @@ -92,7 +76,7 @@ class TestMatching(_TestMatchingBase): matching_algorithm=Tag.MATCH_ANY, ) - self.assertFalse(matching.matches(tag, root)) + self.assertFalse(matching.matches(tag, doc)) def test_match_none(self) -> None: self._test_matching( diff --git a/src/documents/tests/test_version_profile.py b/src/documents/tests/test_version_profile.py new file mode 100644 index 000000000..54610787c --- /dev/null +++ b/src/documents/tests/test_version_profile.py @@ -0,0 +1,619 @@ +""" +Profiling tests for the DocumentVersion model refactor. + +Measures query count, wall time, and memory for the document list API +before and after replacing self-referential versioning with DocumentVersion. + +Run with: + cd src && uv run pytest documents/tests/test_version_profile.py \ + -m profiling --override-ini="addopts=" -s -v + +Corpus parameters: + DOCS_BASELINE = 1000 (Scenario A: no versions) + DOCS_VERSIONED = 100 (Scenario B: 3 versions each) + VERSIONS_PER_DOC = 3 + DETAIL_VERSIONS = 5 (Scenario C: single detail) + +NOTE: This file is intentionally NOT committed to git. + It exists only to capture before/after profiling numbers. +""" + +from __future__ import annotations + +import sys +import time +from pathlib import Path +from time import perf_counter + +import pytest +from django.contrib.auth.models import User +from django.db import connection +from django.db import reset_queries +from django.test.utils import override_settings +from faker import Faker +from rest_framework.test import APIClient + +from documents.models import Document +from documents.models import DocumentVersion + +# --------------------------------------------------------------------------- +# Try to import profiling helpers; define inline fallback if not available. +# --------------------------------------------------------------------------- + +try: + sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent)) + from profiling import measure_memory + from profiling import profile_block + from profiling import profile_cpu # noqa: F401 +except ImportError: + import tracemalloc + from contextlib import contextmanager + from typing import Any + + @contextmanager + def profile_block(label: str = "block"): + tracemalloc.start() + snap_before = tracemalloc.take_snapshot() + with override_settings(DEBUG=True): + reset_queries() + t0 = perf_counter() + yield + elapsed = perf_counter() - t0 + queries = list(connection.queries) + snap_after = tracemalloc.take_snapshot() + _, peak = tracemalloc.get_traced_memory() + tracemalloc.stop() + stats = snap_after.compare_to(snap_before, "lineno") + mem_diff = sum(s.size_diff for s in stats) + query_time = sum(float(q["time"]) for q in queries) + print(f"\n{'=' * 60}") # noqa: T201 + print(f" Profile: {label}") # noqa: T201 + print(f"{'=' * 60}") # noqa: T201 + print(f" Wall time: {elapsed:.4f}s") # noqa: T201 + print(f" Queries: {len(queries)} ({query_time:.4f}s)") # noqa: T201 + print(f" Memory delta: {mem_diff / 1024:.1f} KiB") # noqa: T201 + print(f" Peak memory: {peak / 1024:.1f} KiB") # noqa: T201 + print("\n Top 5 allocations:") # noqa: T201 + for stat in stats[:5]: + print(f" {stat}") # noqa: T201 + print(f"{'=' * 60}\n") # noqa: T201 + + def measure_memory(fn, *, label: str) -> tuple[Any, float, float]: + tracemalloc.start() + snap_before = tracemalloc.take_snapshot() + result = fn() + snap_after = tracemalloc.take_snapshot() + _, peak = tracemalloc.get_traced_memory() + tracemalloc.stop() + stats = snap_after.compare_to(snap_before, "lineno") + delta_kib = sum(s.size_diff for s in stats) / 1024 + print(f"\n{'=' * 72}") # noqa: T201 + print(f" [memory] {label}") # noqa: T201 + print(f" memory delta: {delta_kib:+.1f} KiB") # noqa: T201 + print(f" peak traced: {peak / 1024:.1f} KiB") # noqa: T201 + print(f"{'=' * 72}") # noqa: T201 + for stat in stats[:10]: + if stat.size_diff != 0: + print( # noqa: T201 + f" {stat.size_diff / 1024:+8.1f} KiB {stat.traceback.format()[0]}", + ) + return result, peak / 1024, delta_kib + + +pytestmark = [pytest.mark.profiling, pytest.mark.django_db] + +# --------------------------------------------------------------------------- +# Corpus parameters +# --------------------------------------------------------------------------- + +DOCS_BASELINE = 1000 +DOCS_VERSIONED = 100 +VERSIONS_PER_DOC = 3 +DETAIL_VERSIONS = 5 +PAGE_SIZE = 25 +SEED = 42 + + +# --------------------------------------------------------------------------- +# Module-scoped DB fixture +# --------------------------------------------------------------------------- + + +@pytest.fixture(scope="module") +def module_db(django_db_setup, django_db_blocker): + with django_db_blocker.unblock(): + yield + + +# --------------------------------------------------------------------------- +# Scenario A corpus: 1000 plain documents, no versions +# --------------------------------------------------------------------------- + + +@pytest.fixture(scope="module") +def corpus_baseline(module_db): + fake = Faker() + Faker.seed(SEED) + + owner = User.objects.create_superuser( + username="vp_baseline_owner", + password="admin", + ) + + raw = [ + Document( + title=fake.sentence(nb_words=5).rstrip("."), + content=fake.paragraph(nb_sentences=3), + checksum=f"VPBASE{i:07d}", + owner=owner, + ) + for i in range(DOCS_BASELINE) + ] + t0 = time.perf_counter() + Document.objects.bulk_create(raw) + print( # noqa: T201 + f"\n[corpus_baseline] bulk_create {DOCS_BASELINE} docs: " + f"{time.perf_counter() - t0:.2f}s", + ) + + yield {"owner": owner} + + print("\n[corpus_baseline] teardown") # noqa: T201 + Document.global_objects.filter(checksum__startswith="VPBASE").hard_delete() + User.objects.filter(username="vp_baseline_owner").delete() + + +# --------------------------------------------------------------------------- +# Scenario B corpus: 100 documents each with VERSIONS_PER_DOC DocumentVersions +# --------------------------------------------------------------------------- + + +@pytest.fixture(scope="module") +def corpus_versioned(module_db): + fake = Faker() + Faker.seed(SEED + 1) + + owner = User.objects.create_superuser( + username="vp_versioned_owner", + password="admin", + ) + + root_docs = Document.objects.bulk_create( + [ + Document( + title=fake.sentence(nb_words=5).rstrip("."), + content=fake.paragraph(nb_sentences=3), + checksum=f"VPROOT{i:07d}", + owner=owner, + ) + for i in range(DOCS_VERSIONED) + ], + ) + + version_rows = [] + for root in root_docs: + for v in range(1, VERSIONS_PER_DOC + 1): + version_rows.append( + DocumentVersion( + document=root, + version_number=v, + checksum=f"VPVER{root.pk:06d}v{v}", + mime_type="application/pdf", + ), + ) + t0 = time.perf_counter() + DocumentVersion.objects.bulk_create(version_rows) + print( # noqa: T201 + f"\n[corpus_versioned] bulk_create {len(version_rows)} document versions: " + f"{time.perf_counter() - t0:.2f}s", + ) + + yield {"owner": owner, "root_docs": root_docs} + + print("\n[corpus_versioned] teardown") # noqa: T201 + Document.global_objects.filter(checksum__startswith="VPROOT").hard_delete() + User.objects.filter(username="vp_versioned_owner").delete() + + +# --------------------------------------------------------------------------- +# Scenario C corpus: single document with DETAIL_VERSIONS DocumentVersions +# --------------------------------------------------------------------------- + + +@pytest.fixture(scope="module") +def corpus_detail(module_db): + fake = Faker() + Faker.seed(SEED + 2) + + owner = User.objects.create_superuser( + username="vp_detail_owner", + password="admin", + ) + + root = Document.objects.create( + title="Detail profile doc", + content=fake.paragraph(nb_sentences=4), + checksum="VPDETAILROOT", + owner=owner, + ) + + for v in range(1, DETAIL_VERSIONS + 1): + DocumentVersion.objects.create( + document=root, + version_number=v, + checksum=f"VPDETAILVER{v}", + mime_type="application/pdf", + ) + + yield {"owner": owner, "root_pk": root.pk} + + print("\n[corpus_detail] teardown") # noqa: T201 + Document.global_objects.filter(checksum__startswith="VPDETAIL").hard_delete() + User.objects.filter(username="vp_detail_owner").delete() + + +# --------------------------------------------------------------------------- +# Scenario A: Baseline — no versions, list endpoint +# --------------------------------------------------------------------------- + + +class TestScenarioA_BaselineNoVersions: + """GET /api/documents/ with N plain documents, no version-children.""" + + @pytest.fixture(autouse=True) + def _setup(self, corpus_baseline): + self.owner = corpus_baseline["owner"] + self.client = APIClient() + self.client.force_authenticate(user=self.owner) + + def _count_queries_for_list(self, page_size: int = PAGE_SIZE) -> list: + with override_settings(DEBUG=True): + reset_queries() + resp = self.client.get(f"/api/documents/?page=1&page_size={page_size}") + queries = list(connection.queries) + assert resp.status_code == 200, resp.data + return queries + + def test_a1_query_count_page25(self): + queries = self._count_queries_for_list() + + print(f"\n Query count (page_size={PAGE_SIZE}): {len(queries)}") # noqa: T201 + for i, q in enumerate(queries): + print(f" [{i}] {q['sql'][:120]} -- {q['time']}s") # noqa: T201 + + # After refactor: root_document_id must not appear in any query + main_queries = [q for q in queries if "documents_document" in q["sql"]] + subquery_refs = [q for q in main_queries if "root_document_id" in q["sql"]] + print( # noqa: T201 + f" Queries referencing root_document_id: {len(subquery_refs)}" + " (must be 0 after refactor)", + ) + assert len(subquery_refs) == 0, ( + "root_document_id must not appear in queries after refactor" + ) + + # First request in session adds 2 content_type warmup queries; steady-state is 8. + assert len(queries) <= 12 + + def test_a2_wall_time_n100(self): + with profile_block( + f"Scenario A — GET /api/documents/ page_size={PAGE_SIZE}, " + f"N={DOCS_BASELINE} baseline docs", + ): + resp = self.client.get(f"/api/documents/?page=1&page_size={PAGE_SIZE}") + assert resp.status_code == 200 + + def test_a3_memory_list_serialization(self): + _, peak_kib, delta_kib = measure_memory( + lambda: self.client.get(f"/api/documents/?page=1&page_size={PAGE_SIZE}"), + label=f"Scenario A — serialize page_size={PAGE_SIZE} plain docs", + ) + print( # noqa: T201 + f"\n Peak KiB: {peak_kib:.1f} Delta KiB: {delta_kib:.1f}\n" + " (baseline after refactor)", + ) + + def test_a4_wall_time_comparison_sizes(self): + """Compare wall time at page_size=10, 25, 100 to quantify subquery slope.""" + results = {} + for page_size in [10, 25, 100]: + t0 = perf_counter() + resp = self.client.get(f"/api/documents/?page=1&page_size={page_size}") + elapsed_ms = (perf_counter() - t0) * 1000 + assert resp.status_code == 200 + results[page_size] = elapsed_ms + + print("\n Wall time by page_size:") # noqa: T201 + for ps, ms in results.items(): + print(f" page_size={ps:4d}: {ms:.1f} ms") # noqa: T201 + + ratio = results[100] / max(results[25], 0.1) + print( # noqa: T201 + f" time(100)/time(25) ratio: {ratio:.2f}" + " (before refactor often > 4.0; target < 3.0 after)", + ) + + +# --------------------------------------------------------------------------- +# Scenario B: Versioned documents — list endpoint +# --------------------------------------------------------------------------- + + +class TestScenarioB_VersionedDocsList: + """GET /api/documents/ with 100 docs each having 3 DocumentVersions.""" + + @pytest.fixture(autouse=True) + def _setup(self, corpus_versioned): + self.owner = corpus_versioned["owner"] + self.client = APIClient() + self.client.force_authenticate(user=self.owner) + + def test_b1_query_count_versioned(self): + with override_settings(DEBUG=True): + reset_queries() + resp = self.client.get(f"/api/documents/?page=1&page_size={PAGE_SIZE}") + queries = list(connection.queries) + + assert resp.status_code == 200 + print(f"\n Query count (versioned corpus): {len(queries)}") # noqa: T201 + for i, q in enumerate(queries): + print(f" [{i}] {q['sql'][:120]} -- {q['time']}s") # noqa: T201 + + assert len(queries) <= 8 + + def test_b2_memory_versioned_list(self): + _, peak_kib, delta_kib = measure_memory( + lambda: self.client.get(f"/api/documents/?page=1&page_size={PAGE_SIZE}"), + label=( + f"Scenario B — {DOCS_VERSIONED} docs x {VERSIONS_PER_DOC} versions, " + f"page_size={PAGE_SIZE}" + ), + ) + print( # noqa: T201 + f"\n Peak KiB: {peak_kib:.1f} Delta KiB: {delta_kib:.1f}\n" + " Target after refactor: delta < 60% of pre-refactor value", + ) + + def test_b3_wall_time_versioned(self): + with profile_block( + f"Scenario B — versioned list page_size={PAGE_SIZE}, " + f"{DOCS_VERSIONED} docs x {VERSIONS_PER_DOC} versions", + ): + resp = self.client.get(f"/api/documents/?page=1&page_size={PAGE_SIZE}") + assert resp.status_code == 200 + + def test_b4_no_root_document_id_in_queries(self): + """Structural assertion: root_document_id must not appear in queries after refactor.""" + with override_settings(DEBUG=True): + reset_queries() + self.client.get(f"/api/documents/?page=1&page_size={PAGE_SIZE}") + queries = list(connection.queries) + + doc_queries = [ + q + for q in queries + if "documents_document" in q["sql"] and "SELECT" in q["sql"].upper() + ] + subquery_present = any("root_document_id" in q["sql"] for q in doc_queries) + print( # noqa: T201 + f"\n root_document_id in SELECT queries: {subquery_present}\n" + " AFTER refactor: must be False.", + ) + assert not subquery_present, "root_document_id must not appear after refactor" + + +# --------------------------------------------------------------------------- +# Scenario C: Single document detail, 5 versions +# --------------------------------------------------------------------------- + + +class TestScenarioC_DetailWithVersions: + """GET /api/documents/{id}/ for a document with 5 DocumentVersions.""" + + @pytest.fixture(autouse=True) + def _setup(self, corpus_detail): + self.owner = corpus_detail["owner"] + self.root_pk = corpus_detail["root_pk"] + self.client = APIClient() + self.client.force_authenticate(user=self.owner) + + def test_c1_query_count_detail(self): + with override_settings(DEBUG=True): + reset_queries() + resp = self.client.get(f"/api/documents/{self.root_pk}/") + queries = list(connection.queries) + + assert resp.status_code == 200 + print(f"\n Detail query count: {len(queries)}") # noqa: T201 + for i, q in enumerate(queries): + print(f" [{i}] {q['sql'][:120]} -- {q['time']}s") # noqa: T201 + + assert len(queries) <= 10 + + def test_c2_wall_time_detail(self): + with profile_block( + f"Scenario C — GET /api/documents/{self.root_pk}/ " + f"({DETAIL_VERSIONS} versions)", + ): + resp = self.client.get(f"/api/documents/{self.root_pk}/") + assert resp.status_code == 200 + + def test_c3_versions_field_in_response(self): + resp = self.client.get(f"/api/documents/{self.root_pk}/") + assert resp.status_code == 200 + versions = resp.data.get("versions", []) + print( # noqa: T201 + f"\n versions count in response: {len(versions)}\n" + f" After refactor: expect {DETAIL_VERSIONS} DocumentVersion records", + ) + assert len(versions) == DETAIL_VERSIONS + + +# --------------------------------------------------------------------------- +# Scenario D: Version creation (consumer path, simulated) +# --------------------------------------------------------------------------- + + +class TestScenarioD_VersionCreation: + """ + Query cost of creating a new document version. + + BEFORE refactor: + SELECT FOR UPDATE on Document row (1 query) + Aggregate max(version_index) on Document (1 query) + INSERT new Document row (1 query) + = 3 + savepoint overhead + + AFTER refactor: + SELECT count on DocumentVersion rows (1 query) + INSERT new DocumentVersion row (1 query) + = 2 + savepoint overhead (smaller INSERT + narrower table) + """ + + @pytest.fixture(autouse=True) + def _setup(self, db): + fake = Faker() + owner = User.objects.create_superuser( + username="vp_create_owner", + password="admin", + ) + self.root_doc = Document.objects.create( + title="Version creation test doc", + content=fake.paragraph(), + checksum="VPCREATEROOT", + owner=owner, + ) + self.owner = owner + yield + Document.global_objects.filter(checksum__startswith="VPCREATE").hard_delete() + User.objects.filter(username="vp_create_owner").delete() + + def test_d1_version_creation_query_count(self): + from django.db import transaction + + with override_settings(DEBUG=True): + reset_queries() + with transaction.atomic(): + next_number = ( + DocumentVersion.objects.filter( + document=self.root_doc, + ).count() + + 1 + ) + DocumentVersion( + document=self.root_doc, + version_number=next_number, + checksum=f"VPCREATEVER{next_number}", + mime_type="application/pdf", + ).save() + + queries = list(connection.queries) + + print(f"\n Version creation query count: {len(queries)}") # noqa: T201 + for i, q in enumerate(queries): + print(f" [{i}] {q['sql'][:120]} -- {q['time']}s") # noqa: T201 + + assert len(queries) <= 8 + + def test_d2_version_creation_wall_time(self): + from django.db import transaction + + times = [] + for i in range(10): + t0 = perf_counter() + with transaction.atomic(): + next_number = ( + DocumentVersion.objects.filter( + document=self.root_doc, + ).count() + + 1 + ) + DocumentVersion( + document=self.root_doc, + version_number=next_number, + checksum=f"VPCREATEBENCH{i}", + mime_type="application/pdf", + ).save() + times.append((perf_counter() - t0) * 1000) + + avg_ms = sum(times) / len(times) + print( # noqa: T201 + f"\n Version creation: avg {avg_ms:.1f} ms over 10 runs\n" + f" Min: {min(times):.1f} ms Max: {max(times):.1f} ms\n" + " After refactor: INSERT on DocumentVersion (narrower) should be faster.", + ) + assert avg_ms < 500 + + +# --------------------------------------------------------------------------- +# Summary +# --------------------------------------------------------------------------- + + +class TestSummary: + def test_z_print_summary(self, db): + print(""" +======================================================================== + DocumentVersion Refactor — Baseline Numbers +======================================================================== + + PRE-REFACTOR (self-referential Document.root_document): + ------------------------------------------------------- + SCENARIO A: List endpoint, no versions (N=1000 docs) + Metric BEFORE + Query count (page=25) _____ + root_document_id refs _____ + Wall time (page=25, ms) _____ + Memory delta (KiB) _____ + time(100)/time(25) ratio _____ + + SCENARIO B: List, 100 docs x 3 version-children (old Document rows) + Metric BEFORE + Query count (page=25) _____ + Memory delta (KiB) _____ + Peak memory (KiB) _____ + Wall time (page=25, ms) _____ + + SCENARIO C: Detail, 1 doc with 5 version-children + Metric BEFORE + Query count _____ + Wall time (ms) _____ + versions in response _____ + + SCENARIO D: Version creation (10 runs avg, old Document INSERT) + Metric BEFORE + Query count _____ + Avg wall time (ms) _____ + + POST-REFACTOR (DocumentVersion model): + ------------------------------------------------------- + Fill in numbers from current test output above. + + SCENARIO A: List endpoint, no versions (N=1000 docs) + Metric AFTER Target + Query count (page=25) _____ <= 8 + root_document_id refs 0 == 0 + Wall time (page=25, ms) _____ < 70% of BEFORE + Memory delta (KiB) _____ < 80% of BEFORE + time(100)/time(25) ratio _____ < 3.0 + + SCENARIO B: List, 100 docs x 3 DocumentVersions + Metric AFTER Target + Query count (page=25) _____ <= 8 + Memory delta (KiB) _____ < 60% of BEFORE + Peak memory (KiB) _____ < 70% of BEFORE + Wall time (page=25, ms) _____ < 70% of BEFORE + + SCENARIO C: Detail, 1 doc with 5 DocumentVersions + Metric AFTER Target + Query count _____ <= 10 + Wall time (ms) _____ < 80% of BEFORE + versions in response _____ == DETAIL_VERSIONS + + SCENARIO D: Version creation (10 runs avg, DocumentVersion INSERT) + Metric AFTER Target + Query count _____ <= 8 + Avg wall time (ms) _____ lower (narrower INSERT) + +======================================================================== +""") # noqa: T201 diff --git a/src/documents/tests/test_workflows.py b/src/documents/tests/test_workflows.py index 0fd893a5b..8add1778b 100644 --- a/src/documents/tests/test_workflows.py +++ b/src/documents/tests/test_workflows.py @@ -1860,7 +1860,10 @@ class TestWorkflows( self.assertEqual(doc.title, "Doc {created_year]") - def test_document_updated_workflow_ignores_version_documents(self) -> None: + def test_document_updated_workflow_runs_on_versioned_document(self) -> None: + """Workflows apply to documents even when they have DocumentVersion records.""" + from documents.models import DocumentVersion + trigger = WorkflowTrigger.objects.create( type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED, ) @@ -1875,30 +1878,27 @@ class TestWorkflows( workflow.triggers.add(trigger) workflow.actions.add(action) - root_doc = Document.objects.create( - title="root", + doc = Document.objects.create( + title="doc", correspondent=self.c, - original_filename="root.pdf", + original_filename="doc.pdf", ) - version_doc = Document.objects.create( - title="version", - correspondent=self.c, - original_filename="version.pdf", - root_document=root_doc, + DocumentVersion.objects.create( + document=doc, + version_number=1, + checksum="abc", + mime_type="application/pdf", ) - run_workflows(WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED, version_doc) + run_workflows(WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED, doc) - root_doc.refresh_from_db() - version_doc.refresh_from_db() - - self.assertIsNone(root_doc.owner) - self.assertIsNone(version_doc.owner) - self.assertFalse( + doc.refresh_from_db() + self.assertEqual(doc.owner, self.user2) + self.assertTrue( WorkflowRun.objects.filter( workflow=workflow, type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED, - document=version_doc, + document=doc, ).exists(), ) @@ -2200,7 +2200,10 @@ class TestWorkflows( doc.refresh_from_db() self.assertEqual(doc.owner, self.user2) - def test_workflow_scheduled_trigger_ignores_version_documents(self) -> None: + def test_workflow_scheduled_trigger_runs_on_versioned_document(self) -> None: + """Scheduled workflows run against documents that have DocumentVersion records.""" + from documents.models import DocumentVersion + trigger = WorkflowTrigger.objects.create( type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED, schedule_offset_days=1, @@ -2217,42 +2220,31 @@ class TestWorkflows( workflow.triggers.add(trigger) workflow.actions.add(action) - root_doc = Document.objects.create( - title="root", + doc = Document.objects.create( + title="doc", correspondent=self.c, - original_filename="root.pdf", + original_filename="doc.pdf", added=timezone.now() - timedelta(days=10), ) - version_doc = Document.objects.create( - title="version", - correspondent=self.c, - original_filename="version.pdf", - root_document=root_doc, - added=timezone.now() - timedelta(days=10), + DocumentVersion.objects.create( + document=doc, + version_number=1, + checksum="abc", + mime_type="application/pdf", ) tasks.check_scheduled_workflows() - root_doc.refresh_from_db() - version_doc.refresh_from_db() - - self.assertEqual(root_doc.owner, self.user2) - self.assertIsNone(version_doc.owner) + doc.refresh_from_db() + self.assertEqual(doc.owner, self.user2) self.assertEqual( WorkflowRun.objects.filter( workflow=workflow, type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED, - document=root_doc, + document=doc, ).count(), 1, ) - self.assertFalse( - WorkflowRun.objects.filter( - workflow=workflow, - type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED, - document=version_doc, - ).exists(), - ) @mock.patch("documents.models.Document.objects.filter", autospec=True) def test_workflow_scheduled_trigger_modified(self, mock_filter) -> None: