diff --git a/docs/superpowers/plans/profiling-baseline.txt b/docs/superpowers/plans/profiling-baseline.txt new file mode 100644 index 000000000..07a66a4b5 --- /dev/null +++ b/docs/superpowers/plans/profiling-baseline.txt @@ -0,0 +1,121 @@ +============================= test session starts ============================== +platform linux -- Python 3.14.3, pytest-9.0.2, pluggy-1.6.0 -- /home/trenton/Documents/projects/paperless-ngx/.venv/bin/python +cachedir: .pytest_cache +django: version: 5.2.12, settings: paperless.settings (from ini) +rootdir: /home/trenton/Documents/projects/paperless-ngx +configfile: pyproject.toml +plugins: sugar-1.1.1, xdist-3.8.0, cov-7.0.0, httpx-0.36.0, django-4.12.0, Faker-40.8.0, env-1.5.0, time-machine-3.2.0, mock-3.15.1, anyio-4.12.1, rerunfailures-16.1 +collecting ... collected 6 items + +src/documents/tests/test_search_profiling.py::TestSearchProfilingBaseline::test_profile_relevance_search Creating test database for alias 'default'... + +============================================================ + Profile: BEFORE — relevance search (no ordering) +============================================================ + Wall time: 0.9622s + Queries: 33 (0.0000s) + Memory delta: 16557.2 KiB + Peak memory: 16584.0 KiB + + Top 5 allocations: + :511: size=5480 KiB (+5480 KiB), count=45642 (+45642), average=123 B + /home/trenton/Documents/projects/paperless-ngx/.venv/lib/python3.14/site-packages/fido2/rpid.py:47: size=518 KiB (+518 KiB), count=9769 (+9769), average=54 B + :106: size=432 KiB (+432 KiB), count=1480 (+1480), average=299 B + /home/trenton/Documents/projects/paperless-ngx/.venv/lib/python3.14/site-packages/langdetect/utils/ngram.py:257: size=391 KiB (+391 KiB), count=6667 (+6667), average=60 B + :491: size=284 KiB (+284 KiB), count=2543 (+2543), average=114 B +============================================================ + +PASSED +src/documents/tests/test_search_profiling.py::TestSearchProfilingBaseline::test_profile_sorted_search +============================================================ + Profile: BEFORE — sorted search (ordering=created) +============================================================ + Wall time: 0.1320s + Queries: 32 (0.0010s) + Memory delta: 880.8 KiB + Peak memory: 906.8 KiB + + Top 5 allocations: + /home/trenton/Documents/projects/paperless-ngx/src/documents/search/_backend.py:575: size=50.1 KiB (+50.1 KiB), count=521 (+521), average=99 B + /home/trenton/.local/share/uv/python/cpython-3.14.3-linux-x86_64-gnu/lib/python3.14/copyreg.py:104: size=49.7 KiB (+49.7 KiB), count=315 (+315), average=162 B + /home/trenton/Documents/projects/paperless-ngx/.venv/lib/python3.14/site-packages/django/db/models/sql/query.py:386: size=38.0 KiB (+38.0 KiB), count=160 (+160), average=243 B + /home/trenton/Documents/projects/paperless-ngx/.venv/lib/python3.14/site-packages/django_filters/filterset.py:209: size=32.0 KiB (+32.0 KiB), count=82 (+82), average=400 B + /home/trenton/Documents/projects/paperless-ngx/.venv/lib/python3.14/site-packages/django_filters/filters.py:158: size=21.4 KiB (+21.4 KiB), count=104 (+104), average=210 B +============================================================ + +PASSED +src/documents/tests/test_search_profiling.py::TestSearchProfilingBaseline::test_profile_paginated_search +============================================================ + Profile: BEFORE — paginated search (page=2, page_size=25) +============================================================ + Wall time: 0.1395s + Queries: 32 (0.0000s) + Memory delta: 868.1 KiB + Peak memory: 893.5 KiB + + Top 5 allocations: + /home/trenton/Documents/projects/paperless-ngx/src/documents/search/_backend.py:575: size=50.1 KiB (+50.1 KiB), count=521 (+521), average=99 B + /home/trenton/.local/share/uv/python/cpython-3.14.3-linux-x86_64-gnu/lib/python3.14/copyreg.py:104: size=49.2 KiB (+49.2 KiB), count=315 (+315), average=160 B + /home/trenton/Documents/projects/paperless-ngx/.venv/lib/python3.14/site-packages/django/db/models/sql/query.py:386: size=38.1 KiB (+38.1 KiB), count=161 (+161), average=242 B + /home/trenton/Documents/projects/paperless-ngx/.venv/lib/python3.14/site-packages/django_filters/filterset.py:209: size=32.0 KiB (+32.0 KiB), count=82 (+82), average=400 B + /home/trenton/Documents/projects/paperless-ngx/.venv/lib/python3.14/site-packages/django_filters/filters.py:158: size=21.3 KiB (+21.3 KiB), count=104 (+104), average=209 B +============================================================ + +PASSED +src/documents/tests/test_search_profiling.py::TestSearchProfilingBaseline::test_profile_search_with_selection_data +============================================================ + Profile: BEFORE — search with selection_data +============================================================ + Wall time: 0.1656s + Queries: 37 (0.0020s) + Memory delta: 926.9 KiB + Peak memory: 1084.3 KiB + + Top 5 allocations: + /home/trenton/Documents/projects/paperless-ngx/src/documents/search/_backend.py:575: size=50.1 KiB (+50.1 KiB), count=521 (+521), average=99 B + /home/trenton/.local/share/uv/python/cpython-3.14.3-linux-x86_64-gnu/lib/python3.14/copyreg.py:104: size=49.6 KiB (+49.6 KiB), count=327 (+327), average=155 B + /home/trenton/Documents/projects/paperless-ngx/.venv/lib/python3.14/site-packages/django/db/models/sql/query.py:386: size=38.1 KiB (+38.1 KiB), count=161 (+161), average=242 B + /home/trenton/Documents/projects/paperless-ngx/.venv/lib/python3.14/site-packages/django_filters/filterset.py:209: size=32.0 KiB (+32.0 KiB), count=82 (+82), average=400 B + /home/trenton/Documents/projects/paperless-ngx/.venv/lib/python3.14/site-packages/django/db/backends/sqlite3/operations.py:193: size=27.1 KiB (+27.1 KiB), count=37 (+37), average=751 B +============================================================ + +PASSED +src/documents/tests/test_search_profiling.py::TestSearchProfilingBaseline::test_profile_backend_search_only +============================================================ + Profile: BEFORE — backend.search(page_size=10000, all highlights) +============================================================ + Wall time: 0.0175s + Queries: 0 (0.0000s) + Memory delta: 88.6 KiB + Peak memory: 100.3 KiB + + Top 5 allocations: + /home/trenton/Documents/projects/paperless-ngx/src/documents/search/_backend.py:575: size=51.2 KiB (+51.2 KiB), count=530 (+530), average=99 B + /home/trenton/Documents/projects/paperless-ngx/src/documents/search/_backend.py:557: size=17.8 KiB (+17.8 KiB), count=200 (+200), average=91 B + /home/trenton/Documents/projects/paperless-ngx/src/documents/search/_backend.py:542: size=8576 B (+8576 B), count=134 (+134), average=64 B + /home/trenton/Documents/projects/paperless-ngx/src/documents/search/_backend.py:522: size=4800 B (+4800 B), count=200 (+200), average=24 B + /home/trenton/Documents/projects/paperless-ngx/src/documents/search/_backend.py:515: size=2376 B (+2376 B), count=99 (+99), average=24 B +============================================================ + +PASSED +src/documents/tests/test_search_profiling.py::TestSearchProfilingBaseline::test_profile_backend_search_single_page +============================================================ + Profile: BEFORE — backend.search(page_size=25) +============================================================ + Wall time: 0.0070s + Queries: 0 (0.0000s) + Memory delta: 5.9 KiB + Peak memory: 11.3 KiB + + Top 5 allocations: + /home/trenton/Documents/projects/paperless-ngx/src/documents/search/_backend.py:557: size=2275 B (+2275 B), count=25 (+25), average=91 B + /home/trenton/Documents/projects/paperless-ngx/src/documents/search/_backend.py:575: size=1600 B (+1600 B), count=25 (+25), average=64 B + /home/trenton/.local/share/uv/python/cpython-3.14.3-linux-x86_64-gnu/lib/python3.14/weakref.py:73: size=1280 B (+1280 B), count=20 (+20), average=64 B + /home/trenton/Documents/projects/paperless-ngx/src/documents/search/_backend.py:574: size=256 B (+256 B), count=1 (+1), average=256 B + /home/trenton/.local/share/uv/python/cpython-3.14.3-linux-x86_64-gnu/lib/python3.14/tracemalloc.py:560: size=240 B (+240 B), count=1 (+1), average=240 B +============================================================ + +PASSEDDestroying test database for alias 'default'... + + +======================== 6 passed in 241.83s (0:04:01) ========================= diff --git a/src/documents/profiling.py b/src/documents/profiling.py new file mode 100644 index 000000000..0c938e6dc --- /dev/null +++ b/src/documents/profiling.py @@ -0,0 +1,71 @@ +""" +Temporary profiling utilities for comparing implementations. + +Usage in a management command or shell:: + + from documents.profiling import profile_block + + with profile_block("new check_sanity"): + messages = check_sanity() + + with profile_block("old check_sanity"): + messages = check_sanity_old() + +Drop this file when done. +""" + +from __future__ import annotations + +import tracemalloc +from contextlib import contextmanager +from time import perf_counter +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from collections.abc import Generator + +from django.db import connection +from django.db import reset_queries +from django.test.utils import override_settings + + +@contextmanager +def profile_block(label: str = "block") -> Generator[None, None, None]: + """Profile memory, wall time, and DB queries for a code block. + + Prints a summary to stdout on exit. Requires no external packages. + Enables DEBUG temporarily to capture Django's query log. + """ + tracemalloc.start() + snapshot_before = tracemalloc.take_snapshot() + + with override_settings(DEBUG=True): + reset_queries() + start = perf_counter() + + yield + + elapsed = perf_counter() - start + queries = list(connection.queries) + + snapshot_after = tracemalloc.take_snapshot() + _, peak = tracemalloc.get_traced_memory() + tracemalloc.stop() + + # Compare snapshots for top allocations + stats = snapshot_after.compare_to(snapshot_before, "lineno") + + query_time = sum(float(q["time"]) for q in queries) + mem_diff = sum(s.size_diff for s in stats) + + print(f"\n{'=' * 60}") # noqa: T201 + print(f" Profile: {label}") # noqa: T201 + print(f"{'=' * 60}") # noqa: T201 + print(f" Wall time: {elapsed:.4f}s") # noqa: T201 + print(f" Queries: {len(queries)} ({query_time:.4f}s)") # noqa: T201 + print(f" Memory delta: {mem_diff / 1024:.1f} KiB") # noqa: T201 + print(f" Peak memory: {peak / 1024:.1f} KiB") # noqa: T201 + print("\n Top 5 allocations:") # noqa: T201 + for stat in stats[:5]: + print(f" {stat}") # noqa: T201 + print(f"{'=' * 60}\n") # noqa: T201 diff --git a/src/documents/tests/test_search_profiling.py b/src/documents/tests/test_search_profiling.py new file mode 100644 index 000000000..5d1cf2393 --- /dev/null +++ b/src/documents/tests/test_search_profiling.py @@ -0,0 +1,111 @@ +""" +Temporary profiling tests for search performance. + +Run with: uv run pytest src/documents/tests/test_search_profiling.py -v -s -p no:xdist +The -s flag is required to see profile_block() output on stdout. +The -p no:xdist flag disables parallel execution so profiling data is accurate. + +Delete this file when profiling is complete. +""" + +import pytest +from django.contrib.auth.models import User +from rest_framework.test import APIClient + +from documents.models import Document +from documents.profiling import profile_block +from documents.search import get_backend +from documents.search import reset_backend +from documents.tests.utils import DirectoriesMixin + +pytestmark = [pytest.mark.search, pytest.mark.django_db] + +DOC_COUNT = 200 # Enough to exercise pagination and overfetch behavior + + +class TestSearchProfilingBaseline(DirectoriesMixin): + """Baseline profiling of the CURRENT search implementation. + + Run BEFORE making changes, record the output, then compare with Task 6. + """ + + @pytest.fixture(autouse=True) + def _setup(self): + reset_backend() + self.user = User.objects.create_superuser(username="profiler") + self.client = APIClient() + self.client.force_authenticate(user=self.user) + + backend = get_backend() + for i in range(DOC_COUNT): + doc = Document.objects.create( + title=f"Profiling document number {i}", + content=f"This is searchable content for document {i} with keyword profiling", + checksum=f"PROF{i:04d}", + archive_serial_number=i + 1, + ) + backend.add_or_update(doc) + yield + reset_backend() + + def test_profile_relevance_search(self): + """Profile: relevance-ranked search, no ordering, page 1 default page_size.""" + with profile_block("BEFORE — relevance search (no ordering)"): + response = self.client.get("/api/documents/?query=profiling") + assert response.status_code == 200 + assert response.data["count"] == DOC_COUNT + + def test_profile_sorted_search(self): + """Profile: search with ORM-based ordering (created field).""" + with profile_block("BEFORE — sorted search (ordering=created)"): + response = self.client.get( + "/api/documents/?query=profiling&ordering=created", + ) + assert response.status_code == 200 + assert response.data["count"] == DOC_COUNT + + def test_profile_paginated_search(self): + """Profile: search requesting page 2 with explicit page_size.""" + with profile_block("BEFORE — paginated search (page=2, page_size=25)"): + response = self.client.get( + "/api/documents/?query=profiling&page=2&page_size=25", + ) + assert response.status_code == 200 + assert len(response.data["results"]) == 25 + + def test_profile_search_with_selection_data(self): + """Profile: search with include_selection_data=true.""" + with profile_block("BEFORE — search with selection_data"): + response = self.client.get( + "/api/documents/?query=profiling&include_selection_data=true", + ) + assert response.status_code == 200 + assert "selection_data" in response.data + + def test_profile_backend_search_only(self): + """Profile: raw backend.search() call to isolate Tantivy cost from DRF.""" + backend = get_backend() + with profile_block("BEFORE — backend.search(page_size=10000, all highlights)"): + results = backend.search( + "profiling", + user=None, + page=1, + page_size=10000, + sort_field=None, + sort_reverse=False, + ) + assert results.total == DOC_COUNT + + def test_profile_backend_search_single_page(self): + """Profile: raw backend.search() with real page size to compare.""" + backend = get_backend() + with profile_block("BEFORE — backend.search(page_size=25)"): + results = backend.search( + "profiling", + user=None, + page=1, + page_size=25, + sort_field=None, + sort_reverse=False, + ) + assert len(results.hits) == 25