mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-04-07 00:28:54 +00:00
chore: remove temporary profiling infrastructure
Profiling tests and helper served their purpose during the search performance optimization work. Baseline and post-implementation data captured in docs/superpowers/plans/. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,71 +0,0 @@
|
||||
"""
|
||||
Temporary profiling utilities for comparing implementations.
|
||||
|
||||
Usage in a management command or shell::
|
||||
|
||||
from documents.profiling import profile_block
|
||||
|
||||
with profile_block("new check_sanity"):
|
||||
messages = check_sanity()
|
||||
|
||||
with profile_block("old check_sanity"):
|
||||
messages = check_sanity_old()
|
||||
|
||||
Drop this file when done.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import tracemalloc
|
||||
from contextlib import contextmanager
|
||||
from time import perf_counter
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Generator
|
||||
|
||||
from django.db import connection
|
||||
from django.db import reset_queries
|
||||
from django.test.utils import override_settings
|
||||
|
||||
|
||||
@contextmanager
|
||||
def profile_block(label: str = "block") -> Generator[None, None, None]:
|
||||
"""Profile memory, wall time, and DB queries for a code block.
|
||||
|
||||
Prints a summary to stdout on exit. Requires no external packages.
|
||||
Enables DEBUG temporarily to capture Django's query log.
|
||||
"""
|
||||
tracemalloc.start()
|
||||
snapshot_before = tracemalloc.take_snapshot()
|
||||
|
||||
with override_settings(DEBUG=True):
|
||||
reset_queries()
|
||||
start = perf_counter()
|
||||
|
||||
yield
|
||||
|
||||
elapsed = perf_counter() - start
|
||||
queries = list(connection.queries)
|
||||
|
||||
snapshot_after = tracemalloc.take_snapshot()
|
||||
_, peak = tracemalloc.get_traced_memory()
|
||||
tracemalloc.stop()
|
||||
|
||||
# Compare snapshots for top allocations
|
||||
stats = snapshot_after.compare_to(snapshot_before, "lineno")
|
||||
|
||||
query_time = sum(float(q["time"]) for q in queries)
|
||||
mem_diff = sum(s.size_diff for s in stats)
|
||||
|
||||
print(f"\n{'=' * 60}") # noqa: T201
|
||||
print(f" Profile: {label}") # noqa: T201
|
||||
print(f"{'=' * 60}") # noqa: T201
|
||||
print(f" Wall time: {elapsed:.4f}s") # noqa: T201
|
||||
print(f" Queries: {len(queries)} ({query_time:.4f}s)") # noqa: T201
|
||||
print(f" Memory delta: {mem_diff / 1024:.1f} KiB") # noqa: T201
|
||||
print(f" Peak memory: {peak / 1024:.1f} KiB") # noqa: T201
|
||||
print("\n Top 5 allocations:") # noqa: T201
|
||||
for stat in stats[:5]:
|
||||
print(f" {stat}") # noqa: T201
|
||||
print(f"{'=' * 60}\n") # noqa: T201
|
||||
@@ -1,111 +0,0 @@
|
||||
"""
|
||||
Temporary profiling tests for search performance.
|
||||
|
||||
Run with: uv run pytest src/documents/tests/test_search_profiling.py -v -s -p no:xdist
|
||||
The -s flag is required to see profile_block() output on stdout.
|
||||
The -p no:xdist flag disables parallel execution so profiling data is accurate.
|
||||
|
||||
Delete this file when profiling is complete.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from django.contrib.auth.models import User
|
||||
from rest_framework.test import APIClient
|
||||
|
||||
from documents.models import Document
|
||||
from documents.profiling import profile_block
|
||||
from documents.search import get_backend
|
||||
from documents.search import reset_backend
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
pytestmark = [pytest.mark.search, pytest.mark.django_db]
|
||||
|
||||
DOC_COUNT = 200 # Enough to exercise pagination and overfetch behavior
|
||||
|
||||
|
||||
class TestSearchProfilingBaseline(DirectoriesMixin):
|
||||
"""Baseline profiling of the CURRENT search implementation.
|
||||
|
||||
Run BEFORE making changes, record the output, then compare with Task 6.
|
||||
"""
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _setup(self):
|
||||
reset_backend()
|
||||
self.user = User.objects.create_superuser(username="profiler")
|
||||
self.client = APIClient()
|
||||
self.client.force_authenticate(user=self.user)
|
||||
|
||||
backend = get_backend()
|
||||
for i in range(DOC_COUNT):
|
||||
doc = Document.objects.create(
|
||||
title=f"Profiling document number {i}",
|
||||
content=f"This is searchable content for document {i} with keyword profiling",
|
||||
checksum=f"PROF{i:04d}",
|
||||
archive_serial_number=i + 1,
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
yield
|
||||
reset_backend()
|
||||
|
||||
def test_profile_relevance_search(self):
|
||||
"""Profile: relevance-ranked search, no ordering, page 1 default page_size."""
|
||||
with profile_block("BEFORE — relevance search (no ordering)"):
|
||||
response = self.client.get("/api/documents/?query=profiling")
|
||||
assert response.status_code == 200
|
||||
assert response.data["count"] == DOC_COUNT
|
||||
|
||||
def test_profile_sorted_search(self):
|
||||
"""Profile: search with ORM-based ordering (created field)."""
|
||||
with profile_block("BEFORE — sorted search (ordering=created)"):
|
||||
response = self.client.get(
|
||||
"/api/documents/?query=profiling&ordering=created",
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.data["count"] == DOC_COUNT
|
||||
|
||||
def test_profile_paginated_search(self):
|
||||
"""Profile: search requesting page 2 with explicit page_size."""
|
||||
with profile_block("BEFORE — paginated search (page=2, page_size=25)"):
|
||||
response = self.client.get(
|
||||
"/api/documents/?query=profiling&page=2&page_size=25",
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert len(response.data["results"]) == 25
|
||||
|
||||
def test_profile_search_with_selection_data(self):
|
||||
"""Profile: search with include_selection_data=true."""
|
||||
with profile_block("BEFORE — search with selection_data"):
|
||||
response = self.client.get(
|
||||
"/api/documents/?query=profiling&include_selection_data=true",
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert "selection_data" in response.data
|
||||
|
||||
def test_profile_backend_search_only(self):
|
||||
"""Profile: raw backend.search() call to isolate Tantivy cost from DRF."""
|
||||
backend = get_backend()
|
||||
with profile_block("BEFORE — backend.search(page_size=10000, all highlights)"):
|
||||
results = backend.search(
|
||||
"profiling",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10000,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
)
|
||||
assert results.total == DOC_COUNT
|
||||
|
||||
def test_profile_backend_search_single_page(self):
|
||||
"""Profile: raw backend.search() with real page size to compare."""
|
||||
backend = get_backend()
|
||||
with profile_block("BEFORE — backend.search(page_size=25)"):
|
||||
results = backend.search(
|
||||
"profiling",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=25,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
)
|
||||
assert len(results.hits) == 25
|
||||
Reference in New Issue
Block a user