From 7c50e0077c307e8f6aed0a6471c82b86e1a3eceb Mon Sep 17 00:00:00 2001 From: Trenton Holmes <797416+stumpylog@users.noreply.github.com> Date: Fri, 3 Apr 2026 15:53:55 -0700 Subject: [PATCH] chore: remove temporary profiling infrastructure Profiling tests and helper served their purpose during the search performance optimization work. Baseline and post-implementation data captured in docs/superpowers/plans/. Co-Authored-By: Claude Opus 4.6 --- src/documents/profiling.py | 71 ------------ src/documents/tests/test_search_profiling.py | 111 ------------------- 2 files changed, 182 deletions(-) delete mode 100644 src/documents/profiling.py delete mode 100644 src/documents/tests/test_search_profiling.py diff --git a/src/documents/profiling.py b/src/documents/profiling.py deleted file mode 100644 index 0c938e6dc..000000000 --- a/src/documents/profiling.py +++ /dev/null @@ -1,71 +0,0 @@ -""" -Temporary profiling utilities for comparing implementations. - -Usage in a management command or shell:: - - from documents.profiling import profile_block - - with profile_block("new check_sanity"): - messages = check_sanity() - - with profile_block("old check_sanity"): - messages = check_sanity_old() - -Drop this file when done. -""" - -from __future__ import annotations - -import tracemalloc -from contextlib import contextmanager -from time import perf_counter -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from collections.abc import Generator - -from django.db import connection -from django.db import reset_queries -from django.test.utils import override_settings - - -@contextmanager -def profile_block(label: str = "block") -> Generator[None, None, None]: - """Profile memory, wall time, and DB queries for a code block. - - Prints a summary to stdout on exit. Requires no external packages. - Enables DEBUG temporarily to capture Django's query log. - """ - tracemalloc.start() - snapshot_before = tracemalloc.take_snapshot() - - with override_settings(DEBUG=True): - reset_queries() - start = perf_counter() - - yield - - elapsed = perf_counter() - start - queries = list(connection.queries) - - snapshot_after = tracemalloc.take_snapshot() - _, peak = tracemalloc.get_traced_memory() - tracemalloc.stop() - - # Compare snapshots for top allocations - stats = snapshot_after.compare_to(snapshot_before, "lineno") - - query_time = sum(float(q["time"]) for q in queries) - mem_diff = sum(s.size_diff for s in stats) - - print(f"\n{'=' * 60}") # noqa: T201 - print(f" Profile: {label}") # noqa: T201 - print(f"{'=' * 60}") # noqa: T201 - print(f" Wall time: {elapsed:.4f}s") # noqa: T201 - print(f" Queries: {len(queries)} ({query_time:.4f}s)") # noqa: T201 - print(f" Memory delta: {mem_diff / 1024:.1f} KiB") # noqa: T201 - print(f" Peak memory: {peak / 1024:.1f} KiB") # noqa: T201 - print("\n Top 5 allocations:") # noqa: T201 - for stat in stats[:5]: - print(f" {stat}") # noqa: T201 - print(f"{'=' * 60}\n") # noqa: T201 diff --git a/src/documents/tests/test_search_profiling.py b/src/documents/tests/test_search_profiling.py deleted file mode 100644 index 5d1cf2393..000000000 --- a/src/documents/tests/test_search_profiling.py +++ /dev/null @@ -1,111 +0,0 @@ -""" -Temporary profiling tests for search performance. - -Run with: uv run pytest src/documents/tests/test_search_profiling.py -v -s -p no:xdist -The -s flag is required to see profile_block() output on stdout. -The -p no:xdist flag disables parallel execution so profiling data is accurate. - -Delete this file when profiling is complete. -""" - -import pytest -from django.contrib.auth.models import User -from rest_framework.test import APIClient - -from documents.models import Document -from documents.profiling import profile_block -from documents.search import get_backend -from documents.search import reset_backend -from documents.tests.utils import DirectoriesMixin - -pytestmark = [pytest.mark.search, pytest.mark.django_db] - -DOC_COUNT = 200 # Enough to exercise pagination and overfetch behavior - - -class TestSearchProfilingBaseline(DirectoriesMixin): - """Baseline profiling of the CURRENT search implementation. - - Run BEFORE making changes, record the output, then compare with Task 6. - """ - - @pytest.fixture(autouse=True) - def _setup(self): - reset_backend() - self.user = User.objects.create_superuser(username="profiler") - self.client = APIClient() - self.client.force_authenticate(user=self.user) - - backend = get_backend() - for i in range(DOC_COUNT): - doc = Document.objects.create( - title=f"Profiling document number {i}", - content=f"This is searchable content for document {i} with keyword profiling", - checksum=f"PROF{i:04d}", - archive_serial_number=i + 1, - ) - backend.add_or_update(doc) - yield - reset_backend() - - def test_profile_relevance_search(self): - """Profile: relevance-ranked search, no ordering, page 1 default page_size.""" - with profile_block("BEFORE — relevance search (no ordering)"): - response = self.client.get("/api/documents/?query=profiling") - assert response.status_code == 200 - assert response.data["count"] == DOC_COUNT - - def test_profile_sorted_search(self): - """Profile: search with ORM-based ordering (created field).""" - with profile_block("BEFORE — sorted search (ordering=created)"): - response = self.client.get( - "/api/documents/?query=profiling&ordering=created", - ) - assert response.status_code == 200 - assert response.data["count"] == DOC_COUNT - - def test_profile_paginated_search(self): - """Profile: search requesting page 2 with explicit page_size.""" - with profile_block("BEFORE — paginated search (page=2, page_size=25)"): - response = self.client.get( - "/api/documents/?query=profiling&page=2&page_size=25", - ) - assert response.status_code == 200 - assert len(response.data["results"]) == 25 - - def test_profile_search_with_selection_data(self): - """Profile: search with include_selection_data=true.""" - with profile_block("BEFORE — search with selection_data"): - response = self.client.get( - "/api/documents/?query=profiling&include_selection_data=true", - ) - assert response.status_code == 200 - assert "selection_data" in response.data - - def test_profile_backend_search_only(self): - """Profile: raw backend.search() call to isolate Tantivy cost from DRF.""" - backend = get_backend() - with profile_block("BEFORE — backend.search(page_size=10000, all highlights)"): - results = backend.search( - "profiling", - user=None, - page=1, - page_size=10000, - sort_field=None, - sort_reverse=False, - ) - assert results.total == DOC_COUNT - - def test_profile_backend_search_single_page(self): - """Profile: raw backend.search() with real page size to compare.""" - backend = get_backend() - with profile_block("BEFORE — backend.search(page_size=25)"): - results = backend.search( - "profiling", - user=None, - page=1, - page_size=25, - sort_field=None, - sort_reverse=False, - ) - assert len(results.hits) == 25