""" Document list API profiling — no search, pure ORM path. Run with: uv run pytest ../test_doclist_profile.py \ -m profiling --override-ini="addopts=" -s -v Corpus: 5 000 documents, 30 correspondents, 20 doc types, 80 tags, ~500 notes (10 %), 10 custom fields with instances on ~50 % of docs. Scenarios --------- TestDocListProfile - test_list_default_ordering GET /api/documents/ created desc, page 1, page_size=25 - test_list_title_ordering same with ordering=title - test_list_page_size_comparison page_size=10 / 25 / 100 in sequence - test_list_detail_fields GET /api/documents/{id}/ — single document serializer cost - test_list_cpu_profile cProfile of one list request TestSelectionDataProfile - test_selection_data_unfiltered _get_selection_data_for_queryset(all docs) in isolation - test_selection_data_via_api GET /api/documents/?include_selection_data=true - test_selection_data_filtered filtered vs unfiltered COUNT query comparison """ from __future__ import annotations import datetime import random import time import pytest from django.contrib.auth.models import User from faker import Faker from profiling import profile_block from profiling import profile_cpu from rest_framework.test import APIClient from documents.models import Correspondent from documents.models import CustomField from documents.models import CustomFieldInstance from documents.models import Document from documents.models import DocumentType from documents.models import Note from documents.models import Tag from documents.views import DocumentViewSet pytestmark = [pytest.mark.profiling, pytest.mark.django_db] # --------------------------------------------------------------------------- # Corpus parameters # --------------------------------------------------------------------------- NUM_DOCS = 5_000 NUM_CORRESPONDENTS = 30 NUM_DOC_TYPES = 20 NUM_TAGS = 80 NOTE_FRACTION = 0.10 CUSTOM_FIELD_COUNT = 10 CUSTOM_FIELD_FRACTION = 0.50 PAGE_SIZE = 25 SEED = 42 # --------------------------------------------------------------------------- # Module-scoped corpus fixture # --------------------------------------------------------------------------- @pytest.fixture(scope="module") def module_db(django_db_setup, django_db_blocker): """Unlock the DB for the whole module (module-scoped).""" with django_db_blocker.unblock(): yield @pytest.fixture(scope="module") def doclist_corpus(module_db): """ Build a 5 000-document corpus with tags, notes, custom fields, correspondents, and doc types. All objects are deleted on teardown. """ fake = Faker() Faker.seed(SEED) rng = random.Random(SEED) print(f"\n[setup] Creating {NUM_CORRESPONDENTS} correspondents...") # noqa: T201 correspondents = [ Correspondent.objects.create(name=f"dlcorp-{i}-{fake.company()}"[:128]) for i in range(NUM_CORRESPONDENTS) ] print(f"[setup] Creating {NUM_DOC_TYPES} doc types...") # noqa: T201 doc_types = [ DocumentType.objects.create(name=f"dltype-{i}-{fake.word()}"[:128]) for i in range(NUM_DOC_TYPES) ] print(f"[setup] Creating {NUM_TAGS} tags...") # noqa: T201 tags = [ Tag.objects.create(name=f"dltag-{i}-{fake.word()}"[:100]) for i in range(NUM_TAGS) ] print(f"[setup] Creating {CUSTOM_FIELD_COUNT} custom fields...") # noqa: T201 custom_fields = [ CustomField.objects.create( name=f"Field {i}", data_type=CustomField.FieldDataType.STRING, ) for i in range(CUSTOM_FIELD_COUNT) ] note_user = User.objects.create_user(username="doclistnoteuser", password="x") owner = User.objects.create_superuser(username="doclistowner", password="admin") print(f"[setup] Building {NUM_DOCS} document rows...") # noqa: T201 base_date = datetime.date(2018, 1, 1) raw_docs = [] for i in range(NUM_DOCS): day_offset = rng.randint(0, 6 * 365) raw_docs.append( Document( title=fake.sentence(nb_words=rng.randint(3, 8)).rstrip("."), content="\n\n".join( fake.paragraph(nb_sentences=rng.randint(2, 5)) for _ in range(rng.randint(1, 3)) ), checksum=f"DL{i:07d}", correspondent=rng.choice(correspondents + [None] * 5), document_type=rng.choice(doc_types + [None] * 4), created=base_date + datetime.timedelta(days=day_offset), owner=owner if rng.random() < 0.8 else None, ), ) t0 = time.perf_counter() documents = Document.objects.bulk_create(raw_docs) print(f"[setup] bulk_create {NUM_DOCS} docs: {time.perf_counter() - t0:.2f}s") # noqa: T201 t0 = time.perf_counter() for doc in documents: k = rng.randint(0, 5) if k: doc.tags.add(*rng.sample(tags, k)) print(f"[setup] tag M2M assignments: {time.perf_counter() - t0:.2f}s") # noqa: T201 note_docs = rng.sample(documents, int(NUM_DOCS * NOTE_FRACTION)) Note.objects.bulk_create( [ Note( document=doc, note=fake.sentence(nb_words=rng.randint(4, 15)), user=note_user, ) for doc in note_docs ], ) cf_docs = rng.sample(documents, int(NUM_DOCS * CUSTOM_FIELD_FRACTION)) CustomFieldInstance.objects.bulk_create( [ CustomFieldInstance( document=doc, field=rng.choice(custom_fields), value_text=fake.word(), ) for doc in cf_docs ], ) first_doc_pk = documents[0].pk yield {"owner": owner, "first_doc_pk": first_doc_pk, "tags": tags} print("\n[teardown] Removing doclist corpus...") # noqa: T201 Document.objects.all().delete() Correspondent.objects.all().delete() DocumentType.objects.all().delete() Tag.objects.all().delete() CustomField.objects.all().delete() User.objects.filter(username__in=["doclistnoteuser", "doclistowner"]).delete() # --------------------------------------------------------------------------- # TestDocListProfile # --------------------------------------------------------------------------- class TestDocListProfile: """Profile GET /api/documents/ — pure ORM path, no Tantivy.""" @pytest.fixture(autouse=True) def _client(self, doclist_corpus): owner = doclist_corpus["owner"] self.client = APIClient() self.client.force_authenticate(user=owner) self.first_doc_pk = doclist_corpus["first_doc_pk"] def test_list_default_ordering(self): """GET /api/documents/ default ordering (-created), page 1, page_size=25.""" with profile_block( f"GET /api/documents/ default ordering [page_size={PAGE_SIZE}]", ): response = self.client.get( f"/api/documents/?page=1&page_size={PAGE_SIZE}", ) assert response.status_code == 200 def test_list_title_ordering(self): """GET /api/documents/ ordered by title — tests ORM sort path.""" with profile_block( f"GET /api/documents/?ordering=title [page_size={PAGE_SIZE}]", ): response = self.client.get( f"/api/documents/?ordering=title&page=1&page_size={PAGE_SIZE}", ) assert response.status_code == 200 def test_list_page_size_comparison(self): """Compare serializer cost at page_size=10, 25, 100.""" for page_size in [10, 25, 100]: with profile_block(f"GET /api/documents/ [page_size={page_size}]"): response = self.client.get( f"/api/documents/?page=1&page_size={page_size}", ) assert response.status_code == 200 def test_list_detail_fields(self): """GET /api/documents/{id}/ — per-doc serializer cost with all relations.""" pk = self.first_doc_pk with profile_block(f"GET /api/documents/{pk}/ — single doc serializer"): response = self.client.get(f"/api/documents/{pk}/") assert response.status_code == 200 def test_list_cpu_profile(self): """cProfile of one list request — surfaces hot frames in serializer.""" profile_cpu( lambda: self.client.get( f"/api/documents/?page=1&page_size={PAGE_SIZE}", ), label=f"GET /api/documents/ cProfile [page_size={PAGE_SIZE}]", top=30, ) # --------------------------------------------------------------------------- # TestSelectionDataProfile # --------------------------------------------------------------------------- class TestSelectionDataProfile: """Profile _get_selection_data_for_queryset — the 5+ COUNT queries per request.""" @pytest.fixture(autouse=True) def _setup(self, doclist_corpus): owner = doclist_corpus["owner"] self.client = APIClient() self.client.force_authenticate(user=owner) self.tags = doclist_corpus["tags"] def test_selection_data_unfiltered(self): """Call _get_selection_data_for_queryset(all docs) directly — COUNT queries in isolation.""" viewset = DocumentViewSet() qs = Document.objects.all() with profile_block("_get_selection_data_for_queryset(all docs) — direct call"): viewset._get_selection_data_for_queryset(qs) def test_selection_data_via_api(self): """Full API round-trip with include_selection_data=true.""" with profile_block( f"GET /api/documents/?include_selection_data=true [page_size={PAGE_SIZE}]", ): response = self.client.get( f"/api/documents/?page=1&page_size={PAGE_SIZE}&include_selection_data=true", ) assert response.status_code == 200 assert "selection_data" in response.data def test_selection_data_filtered(self): """selection_data on a tag-filtered queryset — filtered COUNT vs unfiltered.""" tag = self.tags[0] viewset = DocumentViewSet() filtered_qs = Document.objects.filter(tags=tag) unfiltered_qs = Document.objects.all() print(f"\n Tag '{tag.name}' matches {filtered_qs.count()} docs") # noqa: T201 with profile_block("_get_selection_data_for_queryset(unfiltered)"): viewset._get_selection_data_for_queryset(unfiltered_qs) with profile_block("_get_selection_data_for_queryset(filtered by tag)"): viewset._get_selection_data_for_queryset(filtered_qs)