mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-05-31 02:45:27 +00:00
fix: final sweep — update stale tests to use DocumentVersion model
- test_document_model: replace root_document FK tests with DocumentVersion cascade test, and update suggestion_content/content_length tests to reflect that Document.content is now always current (no version proxy) - test_matchables: replace obsolete root_document version-fallback tests with plain content-matching tests (matching now uses Document.content) - test_workflows: replace "ignores version documents" tests (concept removed) with tests verifying workflows run correctly on versioned documents - test_version_profile: rewrite corpora to use DocumentVersion.objects instead of old Document.root_document/version_index fields; fix module- scoped fixture teardown to use hard_delete() to prevent test isolation leaks into deleted_objects; keep pre-refactor baseline numbers in summary Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -312,6 +312,7 @@ markers = [
|
||||
"date_parsing: Tests which cover date parsing from content or filename",
|
||||
"management: Tests which cover management commands/functionality",
|
||||
"search: Tests for the Tantivy search backend",
|
||||
"profiling: Performance profiling tests (run with -m profiling --override-ini='addopts=')",
|
||||
]
|
||||
|
||||
[tool.pytest_env]
|
||||
|
||||
@@ -102,27 +102,34 @@ class TestDocument(TestCase):
|
||||
|
||||
self.assertEqual(len(actual_deletions), 2)
|
||||
|
||||
def test_delete_root_deletes_versions(self) -> None:
|
||||
root = Document.objects.create(
|
||||
def test_delete_document_cascades_to_versions(self) -> None:
|
||||
from documents.models import DocumentVersion
|
||||
|
||||
doc = Document.objects.create(
|
||||
correspondent=Correspondent.objects.create(name="Test0"),
|
||||
title="Head",
|
||||
content="content",
|
||||
checksum="checksum",
|
||||
mime_type="application/pdf",
|
||||
)
|
||||
Document.objects.create(
|
||||
root_document=root,
|
||||
correspondent=root.correspondent,
|
||||
title="Version",
|
||||
content="content",
|
||||
DocumentVersion.objects.create(
|
||||
document=doc,
|
||||
version_number=1,
|
||||
checksum="checksum",
|
||||
mime_type="application/pdf",
|
||||
)
|
||||
DocumentVersion.objects.create(
|
||||
document=doc,
|
||||
version_number=2,
|
||||
checksum="checksum2",
|
||||
mime_type="application/pdf",
|
||||
)
|
||||
|
||||
root.delete()
|
||||
self.assertEqual(DocumentVersion.objects.filter(document=doc).count(), 2)
|
||||
doc.delete()
|
||||
|
||||
self.assertEqual(Document.objects.count(), 0)
|
||||
self.assertEqual(Document.deleted_objects.count(), 2)
|
||||
self.assertEqual(DocumentVersion.objects.count(), 0)
|
||||
|
||||
def test_file_name(self) -> None:
|
||||
doc = Document(
|
||||
@@ -156,45 +163,27 @@ class TestDocument(TestCase):
|
||||
)
|
||||
self.assertEqual(doc.get_public_filename(), "2020-12-25 test")
|
||||
|
||||
def test_suggestion_content_uses_latest_version_content_for_root_documents(
|
||||
self,
|
||||
) -> None:
|
||||
root = Document.objects.create(
|
||||
title="root",
|
||||
checksum="root",
|
||||
def test_suggestion_content_returns_document_content(self) -> None:
|
||||
doc = Document.objects.create(
|
||||
title="doc",
|
||||
checksum="doc",
|
||||
mime_type="application/pdf",
|
||||
content="outdated root content",
|
||||
)
|
||||
version = Document.objects.create(
|
||||
title="v1",
|
||||
checksum="v1",
|
||||
mime_type="application/pdf",
|
||||
root_document=root,
|
||||
content="latest version content",
|
||||
content="the document content",
|
||||
)
|
||||
|
||||
self.assertEqual(root.suggestion_content, version.content)
|
||||
self.assertEqual(doc.suggestion_content, "the document content")
|
||||
|
||||
def test_content_length_is_per_document_row_for_versions(self) -> None:
|
||||
root = Document.objects.create(
|
||||
title="root",
|
||||
checksum="root",
|
||||
def test_content_length_reflects_document_content(self) -> None:
|
||||
doc = Document.objects.create(
|
||||
title="doc",
|
||||
checksum="doc",
|
||||
mime_type="application/pdf",
|
||||
content="abc",
|
||||
)
|
||||
version = Document.objects.create(
|
||||
title="v1",
|
||||
checksum="v1",
|
||||
mime_type="application/pdf",
|
||||
root_document=root,
|
||||
content="abcdefgh",
|
||||
)
|
||||
|
||||
root.refresh_from_db()
|
||||
version.refresh_from_db()
|
||||
doc.refresh_from_db()
|
||||
|
||||
self.assertEqual(root.content_length, 3)
|
||||
self.assertEqual(version.content_length, 8)
|
||||
self.assertEqual(doc.content_length, 3)
|
||||
|
||||
|
||||
def test_suggestion_content() -> None:
|
||||
|
||||
@@ -48,19 +48,12 @@ class _TestMatchingBase(TestCase):
|
||||
|
||||
|
||||
class TestMatching(_TestMatchingBase):
|
||||
def test_matches_uses_latest_version_content_for_root_documents(self) -> None:
|
||||
root = Document.objects.create(
|
||||
title="root",
|
||||
checksum="root",
|
||||
def test_matches_uses_document_content(self) -> None:
|
||||
doc = Document.objects.create(
|
||||
title="doc",
|
||||
checksum="doc",
|
||||
mime_type="application/pdf",
|
||||
content="root content without token",
|
||||
)
|
||||
Document.objects.create(
|
||||
title="v1",
|
||||
checksum="v1",
|
||||
mime_type="application/pdf",
|
||||
root_document=root,
|
||||
content="latest version contains keyword",
|
||||
content="document contains keyword",
|
||||
)
|
||||
tag = Tag.objects.create(
|
||||
name="tag",
|
||||
@@ -68,23 +61,14 @@ class TestMatching(_TestMatchingBase):
|
||||
matching_algorithm=Tag.MATCH_ANY,
|
||||
)
|
||||
|
||||
self.assertTrue(matching.matches(tag, root))
|
||||
self.assertTrue(matching.matches(tag, doc))
|
||||
|
||||
def test_matches_does_not_fall_back_to_root_content_when_version_exists(
|
||||
self,
|
||||
) -> None:
|
||||
root = Document.objects.create(
|
||||
title="root",
|
||||
checksum="root",
|
||||
def test_matches_does_not_match_when_content_lacks_keyword(self) -> None:
|
||||
doc = Document.objects.create(
|
||||
title="doc",
|
||||
checksum="doc",
|
||||
mime_type="application/pdf",
|
||||
content="root contains keyword",
|
||||
)
|
||||
Document.objects.create(
|
||||
title="v1",
|
||||
checksum="v1",
|
||||
mime_type="application/pdf",
|
||||
root_document=root,
|
||||
content="latest version without token",
|
||||
content="document without the token",
|
||||
)
|
||||
tag = Tag.objects.create(
|
||||
name="tag",
|
||||
@@ -92,7 +76,7 @@ class TestMatching(_TestMatchingBase):
|
||||
matching_algorithm=Tag.MATCH_ANY,
|
||||
)
|
||||
|
||||
self.assertFalse(matching.matches(tag, root))
|
||||
self.assertFalse(matching.matches(tag, doc))
|
||||
|
||||
def test_match_none(self) -> None:
|
||||
self._test_matching(
|
||||
|
||||
@@ -0,0 +1,619 @@
|
||||
"""
|
||||
Profiling tests for the DocumentVersion model refactor.
|
||||
|
||||
Measures query count, wall time, and memory for the document list API
|
||||
before and after replacing self-referential versioning with DocumentVersion.
|
||||
|
||||
Run with:
|
||||
cd src && uv run pytest documents/tests/test_version_profile.py \
|
||||
-m profiling --override-ini="addopts=" -s -v
|
||||
|
||||
Corpus parameters:
|
||||
DOCS_BASELINE = 1000 (Scenario A: no versions)
|
||||
DOCS_VERSIONED = 100 (Scenario B: 3 versions each)
|
||||
VERSIONS_PER_DOC = 3
|
||||
DETAIL_VERSIONS = 5 (Scenario C: single detail)
|
||||
|
||||
NOTE: This file is intentionally NOT committed to git.
|
||||
It exists only to capture before/after profiling numbers.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
from time import perf_counter
|
||||
|
||||
import pytest
|
||||
from django.contrib.auth.models import User
|
||||
from django.db import connection
|
||||
from django.db import reset_queries
|
||||
from django.test.utils import override_settings
|
||||
from faker import Faker
|
||||
from rest_framework.test import APIClient
|
||||
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentVersion
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Try to import profiling helpers; define inline fallback if not available.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
try:
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
|
||||
from profiling import measure_memory
|
||||
from profiling import profile_block
|
||||
from profiling import profile_cpu # noqa: F401
|
||||
except ImportError:
|
||||
import tracemalloc
|
||||
from contextlib import contextmanager
|
||||
from typing import Any
|
||||
|
||||
@contextmanager
|
||||
def profile_block(label: str = "block"):
|
||||
tracemalloc.start()
|
||||
snap_before = tracemalloc.take_snapshot()
|
||||
with override_settings(DEBUG=True):
|
||||
reset_queries()
|
||||
t0 = perf_counter()
|
||||
yield
|
||||
elapsed = perf_counter() - t0
|
||||
queries = list(connection.queries)
|
||||
snap_after = tracemalloc.take_snapshot()
|
||||
_, peak = tracemalloc.get_traced_memory()
|
||||
tracemalloc.stop()
|
||||
stats = snap_after.compare_to(snap_before, "lineno")
|
||||
mem_diff = sum(s.size_diff for s in stats)
|
||||
query_time = sum(float(q["time"]) for q in queries)
|
||||
print(f"\n{'=' * 60}") # noqa: T201
|
||||
print(f" Profile: {label}") # noqa: T201
|
||||
print(f"{'=' * 60}") # noqa: T201
|
||||
print(f" Wall time: {elapsed:.4f}s") # noqa: T201
|
||||
print(f" Queries: {len(queries)} ({query_time:.4f}s)") # noqa: T201
|
||||
print(f" Memory delta: {mem_diff / 1024:.1f} KiB") # noqa: T201
|
||||
print(f" Peak memory: {peak / 1024:.1f} KiB") # noqa: T201
|
||||
print("\n Top 5 allocations:") # noqa: T201
|
||||
for stat in stats[:5]:
|
||||
print(f" {stat}") # noqa: T201
|
||||
print(f"{'=' * 60}\n") # noqa: T201
|
||||
|
||||
def measure_memory(fn, *, label: str) -> tuple[Any, float, float]:
|
||||
tracemalloc.start()
|
||||
snap_before = tracemalloc.take_snapshot()
|
||||
result = fn()
|
||||
snap_after = tracemalloc.take_snapshot()
|
||||
_, peak = tracemalloc.get_traced_memory()
|
||||
tracemalloc.stop()
|
||||
stats = snap_after.compare_to(snap_before, "lineno")
|
||||
delta_kib = sum(s.size_diff for s in stats) / 1024
|
||||
print(f"\n{'=' * 72}") # noqa: T201
|
||||
print(f" [memory] {label}") # noqa: T201
|
||||
print(f" memory delta: {delta_kib:+.1f} KiB") # noqa: T201
|
||||
print(f" peak traced: {peak / 1024:.1f} KiB") # noqa: T201
|
||||
print(f"{'=' * 72}") # noqa: T201
|
||||
for stat in stats[:10]:
|
||||
if stat.size_diff != 0:
|
||||
print( # noqa: T201
|
||||
f" {stat.size_diff / 1024:+8.1f} KiB {stat.traceback.format()[0]}",
|
||||
)
|
||||
return result, peak / 1024, delta_kib
|
||||
|
||||
|
||||
pytestmark = [pytest.mark.profiling, pytest.mark.django_db]
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Corpus parameters
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
DOCS_BASELINE = 1000
|
||||
DOCS_VERSIONED = 100
|
||||
VERSIONS_PER_DOC = 3
|
||||
DETAIL_VERSIONS = 5
|
||||
PAGE_SIZE = 25
|
||||
SEED = 42
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Module-scoped DB fixture
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def module_db(django_db_setup, django_db_blocker):
|
||||
with django_db_blocker.unblock():
|
||||
yield
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scenario A corpus: 1000 plain documents, no versions
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def corpus_baseline(module_db):
|
||||
fake = Faker()
|
||||
Faker.seed(SEED)
|
||||
|
||||
owner = User.objects.create_superuser(
|
||||
username="vp_baseline_owner",
|
||||
password="admin",
|
||||
)
|
||||
|
||||
raw = [
|
||||
Document(
|
||||
title=fake.sentence(nb_words=5).rstrip("."),
|
||||
content=fake.paragraph(nb_sentences=3),
|
||||
checksum=f"VPBASE{i:07d}",
|
||||
owner=owner,
|
||||
)
|
||||
for i in range(DOCS_BASELINE)
|
||||
]
|
||||
t0 = time.perf_counter()
|
||||
Document.objects.bulk_create(raw)
|
||||
print( # noqa: T201
|
||||
f"\n[corpus_baseline] bulk_create {DOCS_BASELINE} docs: "
|
||||
f"{time.perf_counter() - t0:.2f}s",
|
||||
)
|
||||
|
||||
yield {"owner": owner}
|
||||
|
||||
print("\n[corpus_baseline] teardown") # noqa: T201
|
||||
Document.global_objects.filter(checksum__startswith="VPBASE").hard_delete()
|
||||
User.objects.filter(username="vp_baseline_owner").delete()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scenario B corpus: 100 documents each with VERSIONS_PER_DOC DocumentVersions
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def corpus_versioned(module_db):
|
||||
fake = Faker()
|
||||
Faker.seed(SEED + 1)
|
||||
|
||||
owner = User.objects.create_superuser(
|
||||
username="vp_versioned_owner",
|
||||
password="admin",
|
||||
)
|
||||
|
||||
root_docs = Document.objects.bulk_create(
|
||||
[
|
||||
Document(
|
||||
title=fake.sentence(nb_words=5).rstrip("."),
|
||||
content=fake.paragraph(nb_sentences=3),
|
||||
checksum=f"VPROOT{i:07d}",
|
||||
owner=owner,
|
||||
)
|
||||
for i in range(DOCS_VERSIONED)
|
||||
],
|
||||
)
|
||||
|
||||
version_rows = []
|
||||
for root in root_docs:
|
||||
for v in range(1, VERSIONS_PER_DOC + 1):
|
||||
version_rows.append(
|
||||
DocumentVersion(
|
||||
document=root,
|
||||
version_number=v,
|
||||
checksum=f"VPVER{root.pk:06d}v{v}",
|
||||
mime_type="application/pdf",
|
||||
),
|
||||
)
|
||||
t0 = time.perf_counter()
|
||||
DocumentVersion.objects.bulk_create(version_rows)
|
||||
print( # noqa: T201
|
||||
f"\n[corpus_versioned] bulk_create {len(version_rows)} document versions: "
|
||||
f"{time.perf_counter() - t0:.2f}s",
|
||||
)
|
||||
|
||||
yield {"owner": owner, "root_docs": root_docs}
|
||||
|
||||
print("\n[corpus_versioned] teardown") # noqa: T201
|
||||
Document.global_objects.filter(checksum__startswith="VPROOT").hard_delete()
|
||||
User.objects.filter(username="vp_versioned_owner").delete()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scenario C corpus: single document with DETAIL_VERSIONS DocumentVersions
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def corpus_detail(module_db):
|
||||
fake = Faker()
|
||||
Faker.seed(SEED + 2)
|
||||
|
||||
owner = User.objects.create_superuser(
|
||||
username="vp_detail_owner",
|
||||
password="admin",
|
||||
)
|
||||
|
||||
root = Document.objects.create(
|
||||
title="Detail profile doc",
|
||||
content=fake.paragraph(nb_sentences=4),
|
||||
checksum="VPDETAILROOT",
|
||||
owner=owner,
|
||||
)
|
||||
|
||||
for v in range(1, DETAIL_VERSIONS + 1):
|
||||
DocumentVersion.objects.create(
|
||||
document=root,
|
||||
version_number=v,
|
||||
checksum=f"VPDETAILVER{v}",
|
||||
mime_type="application/pdf",
|
||||
)
|
||||
|
||||
yield {"owner": owner, "root_pk": root.pk}
|
||||
|
||||
print("\n[corpus_detail] teardown") # noqa: T201
|
||||
Document.global_objects.filter(checksum__startswith="VPDETAIL").hard_delete()
|
||||
User.objects.filter(username="vp_detail_owner").delete()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scenario A: Baseline — no versions, list endpoint
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestScenarioA_BaselineNoVersions:
|
||||
"""GET /api/documents/ with N plain documents, no version-children."""
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _setup(self, corpus_baseline):
|
||||
self.owner = corpus_baseline["owner"]
|
||||
self.client = APIClient()
|
||||
self.client.force_authenticate(user=self.owner)
|
||||
|
||||
def _count_queries_for_list(self, page_size: int = PAGE_SIZE) -> list:
|
||||
with override_settings(DEBUG=True):
|
||||
reset_queries()
|
||||
resp = self.client.get(f"/api/documents/?page=1&page_size={page_size}")
|
||||
queries = list(connection.queries)
|
||||
assert resp.status_code == 200, resp.data
|
||||
return queries
|
||||
|
||||
def test_a1_query_count_page25(self):
|
||||
queries = self._count_queries_for_list()
|
||||
|
||||
print(f"\n Query count (page_size={PAGE_SIZE}): {len(queries)}") # noqa: T201
|
||||
for i, q in enumerate(queries):
|
||||
print(f" [{i}] {q['sql'][:120]} -- {q['time']}s") # noqa: T201
|
||||
|
||||
# After refactor: root_document_id must not appear in any query
|
||||
main_queries = [q for q in queries if "documents_document" in q["sql"]]
|
||||
subquery_refs = [q for q in main_queries if "root_document_id" in q["sql"]]
|
||||
print( # noqa: T201
|
||||
f" Queries referencing root_document_id: {len(subquery_refs)}"
|
||||
" (must be 0 after refactor)",
|
||||
)
|
||||
assert len(subquery_refs) == 0, (
|
||||
"root_document_id must not appear in queries after refactor"
|
||||
)
|
||||
|
||||
# First request in session adds 2 content_type warmup queries; steady-state is 8.
|
||||
assert len(queries) <= 12
|
||||
|
||||
def test_a2_wall_time_n100(self):
|
||||
with profile_block(
|
||||
f"Scenario A — GET /api/documents/ page_size={PAGE_SIZE}, "
|
||||
f"N={DOCS_BASELINE} baseline docs",
|
||||
):
|
||||
resp = self.client.get(f"/api/documents/?page=1&page_size={PAGE_SIZE}")
|
||||
assert resp.status_code == 200
|
||||
|
||||
def test_a3_memory_list_serialization(self):
|
||||
_, peak_kib, delta_kib = measure_memory(
|
||||
lambda: self.client.get(f"/api/documents/?page=1&page_size={PAGE_SIZE}"),
|
||||
label=f"Scenario A — serialize page_size={PAGE_SIZE} plain docs",
|
||||
)
|
||||
print( # noqa: T201
|
||||
f"\n Peak KiB: {peak_kib:.1f} Delta KiB: {delta_kib:.1f}\n"
|
||||
" (baseline after refactor)",
|
||||
)
|
||||
|
||||
def test_a4_wall_time_comparison_sizes(self):
|
||||
"""Compare wall time at page_size=10, 25, 100 to quantify subquery slope."""
|
||||
results = {}
|
||||
for page_size in [10, 25, 100]:
|
||||
t0 = perf_counter()
|
||||
resp = self.client.get(f"/api/documents/?page=1&page_size={page_size}")
|
||||
elapsed_ms = (perf_counter() - t0) * 1000
|
||||
assert resp.status_code == 200
|
||||
results[page_size] = elapsed_ms
|
||||
|
||||
print("\n Wall time by page_size:") # noqa: T201
|
||||
for ps, ms in results.items():
|
||||
print(f" page_size={ps:4d}: {ms:.1f} ms") # noqa: T201
|
||||
|
||||
ratio = results[100] / max(results[25], 0.1)
|
||||
print( # noqa: T201
|
||||
f" time(100)/time(25) ratio: {ratio:.2f}"
|
||||
" (before refactor often > 4.0; target < 3.0 after)",
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scenario B: Versioned documents — list endpoint
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestScenarioB_VersionedDocsList:
|
||||
"""GET /api/documents/ with 100 docs each having 3 DocumentVersions."""
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _setup(self, corpus_versioned):
|
||||
self.owner = corpus_versioned["owner"]
|
||||
self.client = APIClient()
|
||||
self.client.force_authenticate(user=self.owner)
|
||||
|
||||
def test_b1_query_count_versioned(self):
|
||||
with override_settings(DEBUG=True):
|
||||
reset_queries()
|
||||
resp = self.client.get(f"/api/documents/?page=1&page_size={PAGE_SIZE}")
|
||||
queries = list(connection.queries)
|
||||
|
||||
assert resp.status_code == 200
|
||||
print(f"\n Query count (versioned corpus): {len(queries)}") # noqa: T201
|
||||
for i, q in enumerate(queries):
|
||||
print(f" [{i}] {q['sql'][:120]} -- {q['time']}s") # noqa: T201
|
||||
|
||||
assert len(queries) <= 8
|
||||
|
||||
def test_b2_memory_versioned_list(self):
|
||||
_, peak_kib, delta_kib = measure_memory(
|
||||
lambda: self.client.get(f"/api/documents/?page=1&page_size={PAGE_SIZE}"),
|
||||
label=(
|
||||
f"Scenario B — {DOCS_VERSIONED} docs x {VERSIONS_PER_DOC} versions, "
|
||||
f"page_size={PAGE_SIZE}"
|
||||
),
|
||||
)
|
||||
print( # noqa: T201
|
||||
f"\n Peak KiB: {peak_kib:.1f} Delta KiB: {delta_kib:.1f}\n"
|
||||
" Target after refactor: delta < 60% of pre-refactor value",
|
||||
)
|
||||
|
||||
def test_b3_wall_time_versioned(self):
|
||||
with profile_block(
|
||||
f"Scenario B — versioned list page_size={PAGE_SIZE}, "
|
||||
f"{DOCS_VERSIONED} docs x {VERSIONS_PER_DOC} versions",
|
||||
):
|
||||
resp = self.client.get(f"/api/documents/?page=1&page_size={PAGE_SIZE}")
|
||||
assert resp.status_code == 200
|
||||
|
||||
def test_b4_no_root_document_id_in_queries(self):
|
||||
"""Structural assertion: root_document_id must not appear in queries after refactor."""
|
||||
with override_settings(DEBUG=True):
|
||||
reset_queries()
|
||||
self.client.get(f"/api/documents/?page=1&page_size={PAGE_SIZE}")
|
||||
queries = list(connection.queries)
|
||||
|
||||
doc_queries = [
|
||||
q
|
||||
for q in queries
|
||||
if "documents_document" in q["sql"] and "SELECT" in q["sql"].upper()
|
||||
]
|
||||
subquery_present = any("root_document_id" in q["sql"] for q in doc_queries)
|
||||
print( # noqa: T201
|
||||
f"\n root_document_id in SELECT queries: {subquery_present}\n"
|
||||
" AFTER refactor: must be False.",
|
||||
)
|
||||
assert not subquery_present, "root_document_id must not appear after refactor"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scenario C: Single document detail, 5 versions
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestScenarioC_DetailWithVersions:
|
||||
"""GET /api/documents/{id}/ for a document with 5 DocumentVersions."""
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _setup(self, corpus_detail):
|
||||
self.owner = corpus_detail["owner"]
|
||||
self.root_pk = corpus_detail["root_pk"]
|
||||
self.client = APIClient()
|
||||
self.client.force_authenticate(user=self.owner)
|
||||
|
||||
def test_c1_query_count_detail(self):
|
||||
with override_settings(DEBUG=True):
|
||||
reset_queries()
|
||||
resp = self.client.get(f"/api/documents/{self.root_pk}/")
|
||||
queries = list(connection.queries)
|
||||
|
||||
assert resp.status_code == 200
|
||||
print(f"\n Detail query count: {len(queries)}") # noqa: T201
|
||||
for i, q in enumerate(queries):
|
||||
print(f" [{i}] {q['sql'][:120]} -- {q['time']}s") # noqa: T201
|
||||
|
||||
assert len(queries) <= 10
|
||||
|
||||
def test_c2_wall_time_detail(self):
|
||||
with profile_block(
|
||||
f"Scenario C — GET /api/documents/{self.root_pk}/ "
|
||||
f"({DETAIL_VERSIONS} versions)",
|
||||
):
|
||||
resp = self.client.get(f"/api/documents/{self.root_pk}/")
|
||||
assert resp.status_code == 200
|
||||
|
||||
def test_c3_versions_field_in_response(self):
|
||||
resp = self.client.get(f"/api/documents/{self.root_pk}/")
|
||||
assert resp.status_code == 200
|
||||
versions = resp.data.get("versions", [])
|
||||
print( # noqa: T201
|
||||
f"\n versions count in response: {len(versions)}\n"
|
||||
f" After refactor: expect {DETAIL_VERSIONS} DocumentVersion records",
|
||||
)
|
||||
assert len(versions) == DETAIL_VERSIONS
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scenario D: Version creation (consumer path, simulated)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestScenarioD_VersionCreation:
|
||||
"""
|
||||
Query cost of creating a new document version.
|
||||
|
||||
BEFORE refactor:
|
||||
SELECT FOR UPDATE on Document row (1 query)
|
||||
Aggregate max(version_index) on Document (1 query)
|
||||
INSERT new Document row (1 query)
|
||||
= 3 + savepoint overhead
|
||||
|
||||
AFTER refactor:
|
||||
SELECT count on DocumentVersion rows (1 query)
|
||||
INSERT new DocumentVersion row (1 query)
|
||||
= 2 + savepoint overhead (smaller INSERT + narrower table)
|
||||
"""
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _setup(self, db):
|
||||
fake = Faker()
|
||||
owner = User.objects.create_superuser(
|
||||
username="vp_create_owner",
|
||||
password="admin",
|
||||
)
|
||||
self.root_doc = Document.objects.create(
|
||||
title="Version creation test doc",
|
||||
content=fake.paragraph(),
|
||||
checksum="VPCREATEROOT",
|
||||
owner=owner,
|
||||
)
|
||||
self.owner = owner
|
||||
yield
|
||||
Document.global_objects.filter(checksum__startswith="VPCREATE").hard_delete()
|
||||
User.objects.filter(username="vp_create_owner").delete()
|
||||
|
||||
def test_d1_version_creation_query_count(self):
|
||||
from django.db import transaction
|
||||
|
||||
with override_settings(DEBUG=True):
|
||||
reset_queries()
|
||||
with transaction.atomic():
|
||||
next_number = (
|
||||
DocumentVersion.objects.filter(
|
||||
document=self.root_doc,
|
||||
).count()
|
||||
+ 1
|
||||
)
|
||||
DocumentVersion(
|
||||
document=self.root_doc,
|
||||
version_number=next_number,
|
||||
checksum=f"VPCREATEVER{next_number}",
|
||||
mime_type="application/pdf",
|
||||
).save()
|
||||
|
||||
queries = list(connection.queries)
|
||||
|
||||
print(f"\n Version creation query count: {len(queries)}") # noqa: T201
|
||||
for i, q in enumerate(queries):
|
||||
print(f" [{i}] {q['sql'][:120]} -- {q['time']}s") # noqa: T201
|
||||
|
||||
assert len(queries) <= 8
|
||||
|
||||
def test_d2_version_creation_wall_time(self):
|
||||
from django.db import transaction
|
||||
|
||||
times = []
|
||||
for i in range(10):
|
||||
t0 = perf_counter()
|
||||
with transaction.atomic():
|
||||
next_number = (
|
||||
DocumentVersion.objects.filter(
|
||||
document=self.root_doc,
|
||||
).count()
|
||||
+ 1
|
||||
)
|
||||
DocumentVersion(
|
||||
document=self.root_doc,
|
||||
version_number=next_number,
|
||||
checksum=f"VPCREATEBENCH{i}",
|
||||
mime_type="application/pdf",
|
||||
).save()
|
||||
times.append((perf_counter() - t0) * 1000)
|
||||
|
||||
avg_ms = sum(times) / len(times)
|
||||
print( # noqa: T201
|
||||
f"\n Version creation: avg {avg_ms:.1f} ms over 10 runs\n"
|
||||
f" Min: {min(times):.1f} ms Max: {max(times):.1f} ms\n"
|
||||
" After refactor: INSERT on DocumentVersion (narrower) should be faster.",
|
||||
)
|
||||
assert avg_ms < 500
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Summary
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestSummary:
|
||||
def test_z_print_summary(self, db):
|
||||
print("""
|
||||
========================================================================
|
||||
DocumentVersion Refactor — Baseline Numbers
|
||||
========================================================================
|
||||
|
||||
PRE-REFACTOR (self-referential Document.root_document):
|
||||
-------------------------------------------------------
|
||||
SCENARIO A: List endpoint, no versions (N=1000 docs)
|
||||
Metric BEFORE
|
||||
Query count (page=25) _____
|
||||
root_document_id refs _____
|
||||
Wall time (page=25, ms) _____
|
||||
Memory delta (KiB) _____
|
||||
time(100)/time(25) ratio _____
|
||||
|
||||
SCENARIO B: List, 100 docs x 3 version-children (old Document rows)
|
||||
Metric BEFORE
|
||||
Query count (page=25) _____
|
||||
Memory delta (KiB) _____
|
||||
Peak memory (KiB) _____
|
||||
Wall time (page=25, ms) _____
|
||||
|
||||
SCENARIO C: Detail, 1 doc with 5 version-children
|
||||
Metric BEFORE
|
||||
Query count _____
|
||||
Wall time (ms) _____
|
||||
versions in response _____
|
||||
|
||||
SCENARIO D: Version creation (10 runs avg, old Document INSERT)
|
||||
Metric BEFORE
|
||||
Query count _____
|
||||
Avg wall time (ms) _____
|
||||
|
||||
POST-REFACTOR (DocumentVersion model):
|
||||
-------------------------------------------------------
|
||||
Fill in numbers from current test output above.
|
||||
|
||||
SCENARIO A: List endpoint, no versions (N=1000 docs)
|
||||
Metric AFTER Target
|
||||
Query count (page=25) _____ <= 8
|
||||
root_document_id refs 0 == 0
|
||||
Wall time (page=25, ms) _____ < 70% of BEFORE
|
||||
Memory delta (KiB) _____ < 80% of BEFORE
|
||||
time(100)/time(25) ratio _____ < 3.0
|
||||
|
||||
SCENARIO B: List, 100 docs x 3 DocumentVersions
|
||||
Metric AFTER Target
|
||||
Query count (page=25) _____ <= 8
|
||||
Memory delta (KiB) _____ < 60% of BEFORE
|
||||
Peak memory (KiB) _____ < 70% of BEFORE
|
||||
Wall time (page=25, ms) _____ < 70% of BEFORE
|
||||
|
||||
SCENARIO C: Detail, 1 doc with 5 DocumentVersions
|
||||
Metric AFTER Target
|
||||
Query count _____ <= 10
|
||||
Wall time (ms) _____ < 80% of BEFORE
|
||||
versions in response _____ == DETAIL_VERSIONS
|
||||
|
||||
SCENARIO D: Version creation (10 runs avg, DocumentVersion INSERT)
|
||||
Metric AFTER Target
|
||||
Query count _____ <= 8
|
||||
Avg wall time (ms) _____ lower (narrower INSERT)
|
||||
|
||||
========================================================================
|
||||
""") # noqa: T201
|
||||
@@ -1860,7 +1860,10 @@ class TestWorkflows(
|
||||
|
||||
self.assertEqual(doc.title, "Doc {created_year]")
|
||||
|
||||
def test_document_updated_workflow_ignores_version_documents(self) -> None:
|
||||
def test_document_updated_workflow_runs_on_versioned_document(self) -> None:
|
||||
"""Workflows apply to documents even when they have DocumentVersion records."""
|
||||
from documents.models import DocumentVersion
|
||||
|
||||
trigger = WorkflowTrigger.objects.create(
|
||||
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED,
|
||||
)
|
||||
@@ -1875,30 +1878,27 @@ class TestWorkflows(
|
||||
workflow.triggers.add(trigger)
|
||||
workflow.actions.add(action)
|
||||
|
||||
root_doc = Document.objects.create(
|
||||
title="root",
|
||||
doc = Document.objects.create(
|
||||
title="doc",
|
||||
correspondent=self.c,
|
||||
original_filename="root.pdf",
|
||||
original_filename="doc.pdf",
|
||||
)
|
||||
version_doc = Document.objects.create(
|
||||
title="version",
|
||||
correspondent=self.c,
|
||||
original_filename="version.pdf",
|
||||
root_document=root_doc,
|
||||
DocumentVersion.objects.create(
|
||||
document=doc,
|
||||
version_number=1,
|
||||
checksum="abc",
|
||||
mime_type="application/pdf",
|
||||
)
|
||||
|
||||
run_workflows(WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED, version_doc)
|
||||
run_workflows(WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED, doc)
|
||||
|
||||
root_doc.refresh_from_db()
|
||||
version_doc.refresh_from_db()
|
||||
|
||||
self.assertIsNone(root_doc.owner)
|
||||
self.assertIsNone(version_doc.owner)
|
||||
self.assertFalse(
|
||||
doc.refresh_from_db()
|
||||
self.assertEqual(doc.owner, self.user2)
|
||||
self.assertTrue(
|
||||
WorkflowRun.objects.filter(
|
||||
workflow=workflow,
|
||||
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED,
|
||||
document=version_doc,
|
||||
document=doc,
|
||||
).exists(),
|
||||
)
|
||||
|
||||
@@ -2200,7 +2200,10 @@ class TestWorkflows(
|
||||
doc.refresh_from_db()
|
||||
self.assertEqual(doc.owner, self.user2)
|
||||
|
||||
def test_workflow_scheduled_trigger_ignores_version_documents(self) -> None:
|
||||
def test_workflow_scheduled_trigger_runs_on_versioned_document(self) -> None:
|
||||
"""Scheduled workflows run against documents that have DocumentVersion records."""
|
||||
from documents.models import DocumentVersion
|
||||
|
||||
trigger = WorkflowTrigger.objects.create(
|
||||
type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED,
|
||||
schedule_offset_days=1,
|
||||
@@ -2217,42 +2220,31 @@ class TestWorkflows(
|
||||
workflow.triggers.add(trigger)
|
||||
workflow.actions.add(action)
|
||||
|
||||
root_doc = Document.objects.create(
|
||||
title="root",
|
||||
doc = Document.objects.create(
|
||||
title="doc",
|
||||
correspondent=self.c,
|
||||
original_filename="root.pdf",
|
||||
original_filename="doc.pdf",
|
||||
added=timezone.now() - timedelta(days=10),
|
||||
)
|
||||
version_doc = Document.objects.create(
|
||||
title="version",
|
||||
correspondent=self.c,
|
||||
original_filename="version.pdf",
|
||||
root_document=root_doc,
|
||||
added=timezone.now() - timedelta(days=10),
|
||||
DocumentVersion.objects.create(
|
||||
document=doc,
|
||||
version_number=1,
|
||||
checksum="abc",
|
||||
mime_type="application/pdf",
|
||||
)
|
||||
|
||||
tasks.check_scheduled_workflows()
|
||||
|
||||
root_doc.refresh_from_db()
|
||||
version_doc.refresh_from_db()
|
||||
|
||||
self.assertEqual(root_doc.owner, self.user2)
|
||||
self.assertIsNone(version_doc.owner)
|
||||
doc.refresh_from_db()
|
||||
self.assertEqual(doc.owner, self.user2)
|
||||
self.assertEqual(
|
||||
WorkflowRun.objects.filter(
|
||||
workflow=workflow,
|
||||
type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED,
|
||||
document=root_doc,
|
||||
document=doc,
|
||||
).count(),
|
||||
1,
|
||||
)
|
||||
self.assertFalse(
|
||||
WorkflowRun.objects.filter(
|
||||
workflow=workflow,
|
||||
type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED,
|
||||
document=version_doc,
|
||||
).exists(),
|
||||
)
|
||||
|
||||
@mock.patch("documents.models.Document.objects.filter", autospec=True)
|
||||
def test_workflow_scheduled_trigger_modified(self, mock_filter) -> None:
|
||||
|
||||
Reference in New Issue
Block a user