mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-04-06 08:08:51 +00:00
Compare commits
12 Commits
feature-ar
...
feature-se
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0b5b6fdad5 | ||
|
|
d98dbd50f4 | ||
|
|
7649e4a6b1 | ||
|
|
610ba27891 | ||
|
|
7c50e0077c | ||
|
|
288740ea62 | ||
|
|
d998d3fbaf | ||
|
|
6cf01dd383 | ||
|
|
0d915c58a4 | ||
|
|
46008d2da7 | ||
|
|
6768c1e6f8 | ||
|
|
5a94291b79 |
1176
docs/superpowers/plans/2026-04-03-search-performance.md
Normal file
1176
docs/superpowers/plans/2026-04-03-search-performance.md
Normal file
File diff suppressed because it is too large
Load Diff
121
docs/superpowers/plans/profiling-baseline.txt
Normal file
121
docs/superpowers/plans/profiling-baseline.txt
Normal file
@@ -0,0 +1,121 @@
|
||||
============================= test session starts ==============================
|
||||
platform linux -- Python 3.14.3, pytest-9.0.2, pluggy-1.6.0 -- /home/trenton/Documents/projects/paperless-ngx/.venv/bin/python
|
||||
cachedir: .pytest_cache
|
||||
django: version: 5.2.12, settings: paperless.settings (from ini)
|
||||
rootdir: /home/trenton/Documents/projects/paperless-ngx
|
||||
configfile: pyproject.toml
|
||||
plugins: sugar-1.1.1, xdist-3.8.0, cov-7.0.0, httpx-0.36.0, django-4.12.0, Faker-40.8.0, env-1.5.0, time-machine-3.2.0, mock-3.15.1, anyio-4.12.1, rerunfailures-16.1
|
||||
collecting ... collected 6 items
|
||||
|
||||
src/documents/tests/test_search_profiling.py::TestSearchProfilingBaseline::test_profile_relevance_search Creating test database for alias 'default'...
|
||||
|
||||
============================================================
|
||||
Profile: BEFORE — relevance search (no ordering)
|
||||
============================================================
|
||||
Wall time: 0.9622s
|
||||
Queries: 33 (0.0000s)
|
||||
Memory delta: 16557.2 KiB
|
||||
Peak memory: 16584.0 KiB
|
||||
|
||||
Top 5 allocations:
|
||||
<frozen importlib._bootstrap_external>:511: size=5480 KiB (+5480 KiB), count=45642 (+45642), average=123 B
|
||||
/home/trenton/Documents/projects/paperless-ngx/.venv/lib/python3.14/site-packages/fido2/rpid.py:47: size=518 KiB (+518 KiB), count=9769 (+9769), average=54 B
|
||||
<frozen abc>:106: size=432 KiB (+432 KiB), count=1480 (+1480), average=299 B
|
||||
/home/trenton/Documents/projects/paperless-ngx/.venv/lib/python3.14/site-packages/langdetect/utils/ngram.py:257: size=391 KiB (+391 KiB), count=6667 (+6667), average=60 B
|
||||
<frozen importlib._bootstrap>:491: size=284 KiB (+284 KiB), count=2543 (+2543), average=114 B
|
||||
============================================================
|
||||
|
||||
PASSED
|
||||
src/documents/tests/test_search_profiling.py::TestSearchProfilingBaseline::test_profile_sorted_search
|
||||
============================================================
|
||||
Profile: BEFORE — sorted search (ordering=created)
|
||||
============================================================
|
||||
Wall time: 0.1320s
|
||||
Queries: 32 (0.0010s)
|
||||
Memory delta: 880.8 KiB
|
||||
Peak memory: 906.8 KiB
|
||||
|
||||
Top 5 allocations:
|
||||
/home/trenton/Documents/projects/paperless-ngx/src/documents/search/_backend.py:575: size=50.1 KiB (+50.1 KiB), count=521 (+521), average=99 B
|
||||
/home/trenton/.local/share/uv/python/cpython-3.14.3-linux-x86_64-gnu/lib/python3.14/copyreg.py:104: size=49.7 KiB (+49.7 KiB), count=315 (+315), average=162 B
|
||||
/home/trenton/Documents/projects/paperless-ngx/.venv/lib/python3.14/site-packages/django/db/models/sql/query.py:386: size=38.0 KiB (+38.0 KiB), count=160 (+160), average=243 B
|
||||
/home/trenton/Documents/projects/paperless-ngx/.venv/lib/python3.14/site-packages/django_filters/filterset.py:209: size=32.0 KiB (+32.0 KiB), count=82 (+82), average=400 B
|
||||
/home/trenton/Documents/projects/paperless-ngx/.venv/lib/python3.14/site-packages/django_filters/filters.py:158: size=21.4 KiB (+21.4 KiB), count=104 (+104), average=210 B
|
||||
============================================================
|
||||
|
||||
PASSED
|
||||
src/documents/tests/test_search_profiling.py::TestSearchProfilingBaseline::test_profile_paginated_search
|
||||
============================================================
|
||||
Profile: BEFORE — paginated search (page=2, page_size=25)
|
||||
============================================================
|
||||
Wall time: 0.1395s
|
||||
Queries: 32 (0.0000s)
|
||||
Memory delta: 868.1 KiB
|
||||
Peak memory: 893.5 KiB
|
||||
|
||||
Top 5 allocations:
|
||||
/home/trenton/Documents/projects/paperless-ngx/src/documents/search/_backend.py:575: size=50.1 KiB (+50.1 KiB), count=521 (+521), average=99 B
|
||||
/home/trenton/.local/share/uv/python/cpython-3.14.3-linux-x86_64-gnu/lib/python3.14/copyreg.py:104: size=49.2 KiB (+49.2 KiB), count=315 (+315), average=160 B
|
||||
/home/trenton/Documents/projects/paperless-ngx/.venv/lib/python3.14/site-packages/django/db/models/sql/query.py:386: size=38.1 KiB (+38.1 KiB), count=161 (+161), average=242 B
|
||||
/home/trenton/Documents/projects/paperless-ngx/.venv/lib/python3.14/site-packages/django_filters/filterset.py:209: size=32.0 KiB (+32.0 KiB), count=82 (+82), average=400 B
|
||||
/home/trenton/Documents/projects/paperless-ngx/.venv/lib/python3.14/site-packages/django_filters/filters.py:158: size=21.3 KiB (+21.3 KiB), count=104 (+104), average=209 B
|
||||
============================================================
|
||||
|
||||
PASSED
|
||||
src/documents/tests/test_search_profiling.py::TestSearchProfilingBaseline::test_profile_search_with_selection_data
|
||||
============================================================
|
||||
Profile: BEFORE — search with selection_data
|
||||
============================================================
|
||||
Wall time: 0.1656s
|
||||
Queries: 37 (0.0020s)
|
||||
Memory delta: 926.9 KiB
|
||||
Peak memory: 1084.3 KiB
|
||||
|
||||
Top 5 allocations:
|
||||
/home/trenton/Documents/projects/paperless-ngx/src/documents/search/_backend.py:575: size=50.1 KiB (+50.1 KiB), count=521 (+521), average=99 B
|
||||
/home/trenton/.local/share/uv/python/cpython-3.14.3-linux-x86_64-gnu/lib/python3.14/copyreg.py:104: size=49.6 KiB (+49.6 KiB), count=327 (+327), average=155 B
|
||||
/home/trenton/Documents/projects/paperless-ngx/.venv/lib/python3.14/site-packages/django/db/models/sql/query.py:386: size=38.1 KiB (+38.1 KiB), count=161 (+161), average=242 B
|
||||
/home/trenton/Documents/projects/paperless-ngx/.venv/lib/python3.14/site-packages/django_filters/filterset.py:209: size=32.0 KiB (+32.0 KiB), count=82 (+82), average=400 B
|
||||
/home/trenton/Documents/projects/paperless-ngx/.venv/lib/python3.14/site-packages/django/db/backends/sqlite3/operations.py:193: size=27.1 KiB (+27.1 KiB), count=37 (+37), average=751 B
|
||||
============================================================
|
||||
|
||||
PASSED
|
||||
src/documents/tests/test_search_profiling.py::TestSearchProfilingBaseline::test_profile_backend_search_only
|
||||
============================================================
|
||||
Profile: BEFORE — backend.search(page_size=10000, all highlights)
|
||||
============================================================
|
||||
Wall time: 0.0175s
|
||||
Queries: 0 (0.0000s)
|
||||
Memory delta: 88.6 KiB
|
||||
Peak memory: 100.3 KiB
|
||||
|
||||
Top 5 allocations:
|
||||
/home/trenton/Documents/projects/paperless-ngx/src/documents/search/_backend.py:575: size=51.2 KiB (+51.2 KiB), count=530 (+530), average=99 B
|
||||
/home/trenton/Documents/projects/paperless-ngx/src/documents/search/_backend.py:557: size=17.8 KiB (+17.8 KiB), count=200 (+200), average=91 B
|
||||
/home/trenton/Documents/projects/paperless-ngx/src/documents/search/_backend.py:542: size=8576 B (+8576 B), count=134 (+134), average=64 B
|
||||
/home/trenton/Documents/projects/paperless-ngx/src/documents/search/_backend.py:522: size=4800 B (+4800 B), count=200 (+200), average=24 B
|
||||
/home/trenton/Documents/projects/paperless-ngx/src/documents/search/_backend.py:515: size=2376 B (+2376 B), count=99 (+99), average=24 B
|
||||
============================================================
|
||||
|
||||
PASSED
|
||||
src/documents/tests/test_search_profiling.py::TestSearchProfilingBaseline::test_profile_backend_search_single_page
|
||||
============================================================
|
||||
Profile: BEFORE — backend.search(page_size=25)
|
||||
============================================================
|
||||
Wall time: 0.0070s
|
||||
Queries: 0 (0.0000s)
|
||||
Memory delta: 5.9 KiB
|
||||
Peak memory: 11.3 KiB
|
||||
|
||||
Top 5 allocations:
|
||||
/home/trenton/Documents/projects/paperless-ngx/src/documents/search/_backend.py:557: size=2275 B (+2275 B), count=25 (+25), average=91 B
|
||||
/home/trenton/Documents/projects/paperless-ngx/src/documents/search/_backend.py:575: size=1600 B (+1600 B), count=25 (+25), average=64 B
|
||||
/home/trenton/.local/share/uv/python/cpython-3.14.3-linux-x86_64-gnu/lib/python3.14/weakref.py:73: size=1280 B (+1280 B), count=20 (+20), average=64 B
|
||||
/home/trenton/Documents/projects/paperless-ngx/src/documents/search/_backend.py:574: size=256 B (+256 B), count=1 (+1), average=256 B
|
||||
/home/trenton/.local/share/uv/python/cpython-3.14.3-linux-x86_64-gnu/lib/python3.14/tracemalloc.py:560: size=240 B (+240 B), count=1 (+1), average=240 B
|
||||
============================================================
|
||||
|
||||
PASSEDDestroying test database for alias 'default'...
|
||||
|
||||
|
||||
======================== 6 passed in 241.83s (0:04:01) =========================
|
||||
@@ -1,3 +1,4 @@
|
||||
from documents.search._backend import SearchHit
|
||||
from documents.search._backend import SearchIndexLockError
|
||||
from documents.search._backend import SearchMode
|
||||
from documents.search._backend import SearchResults
|
||||
@@ -10,6 +11,7 @@ from documents.search._schema import needs_rebuild
|
||||
from documents.search._schema import wipe_index
|
||||
|
||||
__all__ = [
|
||||
"SearchHit",
|
||||
"SearchIndexLockError",
|
||||
"SearchMode",
|
||||
"SearchResults",
|
||||
|
||||
@@ -106,27 +106,61 @@ class SearchResults:
|
||||
|
||||
class TantivyRelevanceList:
|
||||
"""
|
||||
DRF-compatible list wrapper for Tantivy search hits.
|
||||
DRF-compatible list wrapper for Tantivy search results.
|
||||
|
||||
Provides paginated access to search results while storing all hits in memory
|
||||
for efficient ID retrieval. Used by Django REST framework for pagination.
|
||||
Holds a lightweight ordered list of IDs (for pagination count and
|
||||
``selection_data``) together with a small page of rich ``SearchHit``
|
||||
dicts (for serialization). DRF's ``PageNumberPagination`` calls
|
||||
``__len__`` to compute the total page count and ``__getitem__`` to
|
||||
slice the displayed page.
|
||||
|
||||
Methods:
|
||||
__len__: Returns total hit count for pagination calculations
|
||||
__getitem__: Slices the hit list for page-specific results
|
||||
|
||||
Note: Stores ALL post-filter hits so get_all_result_ids() can return
|
||||
every matching document ID without requiring a second search query.
|
||||
Args:
|
||||
ordered_ids: All matching document IDs in display order.
|
||||
page_hits: Rich SearchHit dicts for the requested DRF page only.
|
||||
page_offset: Index into *ordered_ids* where *page_hits* starts.
|
||||
"""
|
||||
|
||||
def __init__(self, hits: list[SearchHit]) -> None:
|
||||
self._hits = hits
|
||||
def __init__(
|
||||
self,
|
||||
ordered_ids: list[int],
|
||||
page_hits: list[SearchHit],
|
||||
page_offset: int = 0,
|
||||
) -> None:
|
||||
self._ordered_ids = ordered_ids
|
||||
self._page_hits = page_hits
|
||||
self._page_offset = page_offset
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self._hits)
|
||||
return len(self._ordered_ids)
|
||||
|
||||
def __getitem__(self, key: slice) -> list[SearchHit]:
|
||||
return self._hits[key]
|
||||
def __getitem__(self, key: int | slice) -> SearchHit | list[SearchHit]:
|
||||
if isinstance(key, int):
|
||||
idx = key if key >= 0 else len(self._ordered_ids) + key
|
||||
if self._page_offset <= idx < self._page_offset + len(self._page_hits):
|
||||
return self._page_hits[idx - self._page_offset]
|
||||
return SearchHit(
|
||||
id=self._ordered_ids[key],
|
||||
score=0.0,
|
||||
rank=idx + 1,
|
||||
highlights={},
|
||||
)
|
||||
start = key.start or 0
|
||||
stop = key.stop or len(self._ordered_ids)
|
||||
# DRF slices to extract the current page. If the slice aligns
|
||||
# with our pre-fetched page_hits, return them directly.
|
||||
if start == self._page_offset and stop <= self._page_offset + len(
|
||||
self._page_hits,
|
||||
):
|
||||
return self._page_hits[: stop - start]
|
||||
# Fallback: return stub dicts (no highlights).
|
||||
return [
|
||||
SearchHit(id=doc_id, score=0.0, rank=start + i + 1, highlights={})
|
||||
for i, doc_id in enumerate(self._ordered_ids[key])
|
||||
]
|
||||
|
||||
def get_all_ids(self) -> list[int]:
|
||||
"""Return all matching document IDs in display order."""
|
||||
return self._ordered_ids
|
||||
|
||||
|
||||
class SearchIndexLockError(Exception):
|
||||
@@ -234,6 +268,34 @@ class TantivyBackend:
|
||||
the underlying index directory changes (e.g., during test isolation).
|
||||
"""
|
||||
|
||||
# Maps DRF ordering field names to Tantivy index field names.
|
||||
SORT_FIELD_MAP: dict[str, str] = {
|
||||
"title": "title_sort",
|
||||
"correspondent__name": "correspondent_sort",
|
||||
"document_type__name": "type_sort",
|
||||
"created": "created",
|
||||
"added": "added",
|
||||
"modified": "modified",
|
||||
"archive_serial_number": "asn",
|
||||
"page_count": "page_count",
|
||||
"num_notes": "num_notes",
|
||||
}
|
||||
|
||||
# Fields where Tantivy's sort order matches the ORM's sort order.
|
||||
# Text-based fields (title, correspondent__name, document_type__name)
|
||||
# are excluded because Tantivy's tokenized fast fields produce different
|
||||
# ordering than the ORM's collation-based ordering.
|
||||
SORTABLE_FIELDS: frozenset[str] = frozenset(
|
||||
{
|
||||
"created",
|
||||
"added",
|
||||
"modified",
|
||||
"archive_serial_number",
|
||||
"page_count",
|
||||
"num_notes",
|
||||
},
|
||||
)
|
||||
|
||||
def __init__(self, path: Path | None = None):
|
||||
# path=None → in-memory index (for tests)
|
||||
# path=some_dir → on-disk index (for production)
|
||||
@@ -272,6 +334,36 @@ class TantivyBackend:
|
||||
if self._index is None:
|
||||
self.open() # pragma: no cover
|
||||
|
||||
def _parse_query(
|
||||
self,
|
||||
query: str,
|
||||
search_mode: SearchMode,
|
||||
) -> tantivy.Query:
|
||||
"""Parse a user query string into a Tantivy Query object."""
|
||||
tz = get_current_timezone()
|
||||
if search_mode is SearchMode.TEXT:
|
||||
return parse_simple_text_query(self._index, query)
|
||||
elif search_mode is SearchMode.TITLE:
|
||||
return parse_simple_title_query(self._index, query)
|
||||
else:
|
||||
return parse_user_query(self._index, query, tz)
|
||||
|
||||
def _apply_permission_filter(
|
||||
self,
|
||||
query: tantivy.Query,
|
||||
user: AbstractBaseUser | None,
|
||||
) -> tantivy.Query:
|
||||
"""Wrap a query with a permission filter if the user is not a superuser."""
|
||||
if user is not None:
|
||||
permission_filter = build_permission_filter(self._schema, user)
|
||||
return tantivy.Query.boolean_query(
|
||||
[
|
||||
(tantivy.Occur.Must, query),
|
||||
(tantivy.Occur.Must, permission_filter),
|
||||
],
|
||||
)
|
||||
return query
|
||||
|
||||
def _build_tantivy_doc(
|
||||
self,
|
||||
document: Document,
|
||||
@@ -435,6 +527,8 @@ class TantivyBackend:
|
||||
*,
|
||||
sort_reverse: bool,
|
||||
search_mode: SearchMode = SearchMode.QUERY,
|
||||
highlight_page: int | None = None,
|
||||
highlight_page_size: int | None = None,
|
||||
) -> SearchResults:
|
||||
"""
|
||||
Execute a search query against the document index.
|
||||
@@ -462,45 +556,15 @@ class TantivyBackend:
|
||||
SearchResults with hits, total count, and processed query
|
||||
"""
|
||||
self._ensure_open()
|
||||
tz = get_current_timezone()
|
||||
if search_mode is SearchMode.TEXT:
|
||||
user_query = parse_simple_text_query(self._index, query)
|
||||
elif search_mode is SearchMode.TITLE:
|
||||
user_query = parse_simple_title_query(self._index, query)
|
||||
else:
|
||||
user_query = parse_user_query(self._index, query, tz)
|
||||
|
||||
# Apply permission filter if user is not None (not superuser)
|
||||
if user is not None:
|
||||
permission_filter = build_permission_filter(self._schema, user)
|
||||
final_query = tantivy.Query.boolean_query(
|
||||
[
|
||||
(tantivy.Occur.Must, user_query),
|
||||
(tantivy.Occur.Must, permission_filter),
|
||||
],
|
||||
)
|
||||
else:
|
||||
final_query = user_query
|
||||
user_query = self._parse_query(query, search_mode)
|
||||
final_query = self._apply_permission_filter(user_query, user)
|
||||
|
||||
searcher = self._index.searcher()
|
||||
offset = (page - 1) * page_size
|
||||
|
||||
# Map sort fields
|
||||
sort_field_map = {
|
||||
"title": "title_sort",
|
||||
"correspondent__name": "correspondent_sort",
|
||||
"document_type__name": "type_sort",
|
||||
"created": "created",
|
||||
"added": "added",
|
||||
"modified": "modified",
|
||||
"archive_serial_number": "asn",
|
||||
"page_count": "page_count",
|
||||
"num_notes": "num_notes",
|
||||
}
|
||||
|
||||
# Perform search
|
||||
if sort_field and sort_field in sort_field_map:
|
||||
mapped_field = sort_field_map[sort_field]
|
||||
if sort_field and sort_field in self.SORT_FIELD_MAP:
|
||||
mapped_field = self.SORT_FIELD_MAP[sort_field]
|
||||
results = searcher.search(
|
||||
final_query,
|
||||
limit=offset + page_size,
|
||||
@@ -534,6 +598,15 @@ class TantivyBackend:
|
||||
snippet_generator = None
|
||||
notes_snippet_generator = None
|
||||
|
||||
# Determine which hits need highlights
|
||||
if highlight_page is not None and highlight_page_size is not None:
|
||||
hl_start = (highlight_page - 1) * highlight_page_size
|
||||
hl_end = hl_start + highlight_page_size
|
||||
else:
|
||||
# Highlight all hits (backward-compatible default)
|
||||
hl_start = 0
|
||||
hl_end = len(page_hits)
|
||||
|
||||
for rank, (doc_address, score) in enumerate(page_hits, start=offset + 1):
|
||||
# Get the actual document from the searcher using the doc address
|
||||
actual_doc = searcher.doc(doc_address)
|
||||
@@ -542,8 +615,9 @@ class TantivyBackend:
|
||||
|
||||
highlights: dict[str, str] = {}
|
||||
|
||||
# Generate highlights if score > 0
|
||||
if score > 0:
|
||||
# Generate highlights if score > 0 and hit is in the highlight window
|
||||
hit_index = rank - offset - 1 # 0-based index within page_hits
|
||||
if score > 0 and hl_start <= hit_index < hl_end:
|
||||
try:
|
||||
if snippet_generator is None:
|
||||
snippet_generator = tantivy.SnippetGenerator.create(
|
||||
@@ -590,6 +664,160 @@ class TantivyBackend:
|
||||
query=query,
|
||||
)
|
||||
|
||||
def highlight_hits(
|
||||
self,
|
||||
query: str,
|
||||
doc_ids: list[int],
|
||||
*,
|
||||
search_mode: SearchMode = SearchMode.QUERY,
|
||||
) -> list[SearchHit]:
|
||||
"""
|
||||
Generate SearchHit dicts with highlights for specific document IDs.
|
||||
|
||||
Unlike search(), this does not execute a ranked query — it looks up
|
||||
each document by ID and generates snippets against the provided query.
|
||||
Use this when you already know which documents to display (from
|
||||
search_ids + ORM filtering) and just need highlight data.
|
||||
|
||||
Note: Each doc_id requires an individual index lookup because tantivy-py
|
||||
does not expose a batch doc-address-by-ID API. This is acceptable for
|
||||
page-sized batches (typically 25 docs) but should not be called with
|
||||
thousands of IDs.
|
||||
|
||||
Args:
|
||||
query: The search query (used for snippet generation)
|
||||
doc_ids: Ordered list of document IDs to generate hits for
|
||||
search_mode: Query parsing mode (for building the snippet query)
|
||||
|
||||
Returns:
|
||||
List of SearchHit dicts in the same order as doc_ids
|
||||
"""
|
||||
if not doc_ids:
|
||||
return []
|
||||
|
||||
self._ensure_open()
|
||||
user_query = self._parse_query(query, search_mode)
|
||||
|
||||
searcher = self._index.searcher()
|
||||
snippet_generator = None
|
||||
hits: list[SearchHit] = []
|
||||
|
||||
for rank, doc_id in enumerate(doc_ids, start=1):
|
||||
# Look up document by ID
|
||||
id_query = tantivy.Query.range_query(
|
||||
self._schema,
|
||||
"id",
|
||||
tantivy.FieldType.Unsigned,
|
||||
doc_id,
|
||||
doc_id,
|
||||
)
|
||||
results = searcher.search(id_query, limit=1)
|
||||
|
||||
if not results.hits:
|
||||
continue
|
||||
|
||||
doc_address = results.hits[0][1]
|
||||
actual_doc = searcher.doc(doc_address)
|
||||
doc_dict = actual_doc.to_dict()
|
||||
|
||||
highlights: dict[str, str] = {}
|
||||
try:
|
||||
if snippet_generator is None:
|
||||
snippet_generator = tantivy.SnippetGenerator.create(
|
||||
searcher,
|
||||
user_query,
|
||||
self._schema,
|
||||
"content",
|
||||
)
|
||||
|
||||
content_snippet = snippet_generator.snippet_from_doc(actual_doc)
|
||||
if content_snippet:
|
||||
highlights["content"] = str(content_snippet)
|
||||
|
||||
if "notes" in doc_dict:
|
||||
notes_generator = tantivy.SnippetGenerator.create(
|
||||
searcher,
|
||||
user_query,
|
||||
self._schema,
|
||||
"notes",
|
||||
)
|
||||
notes_snippet = notes_generator.snippet_from_doc(actual_doc)
|
||||
if notes_snippet:
|
||||
highlights["notes"] = str(notes_snippet)
|
||||
|
||||
except Exception: # pragma: no cover
|
||||
logger.debug("Failed to generate highlights for doc %s", doc_id)
|
||||
|
||||
hits.append(
|
||||
SearchHit(
|
||||
id=doc_id,
|
||||
score=0.0,
|
||||
rank=rank,
|
||||
highlights=highlights,
|
||||
),
|
||||
)
|
||||
|
||||
return hits
|
||||
|
||||
def search_ids(
|
||||
self,
|
||||
query: str,
|
||||
user: AbstractBaseUser | None,
|
||||
*,
|
||||
sort_field: str | None = None,
|
||||
sort_reverse: bool = False,
|
||||
search_mode: SearchMode = SearchMode.QUERY,
|
||||
limit: int | None = None,
|
||||
) -> list[int]:
|
||||
"""
|
||||
Return document IDs matching a query — no highlights or scores.
|
||||
|
||||
This is the lightweight companion to search(). Use it when you need the
|
||||
full set of matching IDs (e.g. for ``selection_data``) but don't need
|
||||
scores, ranks, or highlights.
|
||||
|
||||
Args:
|
||||
query: User's search query
|
||||
user: User for permission filtering (None for superuser/no filtering)
|
||||
sort_field: Field to sort by (None for relevance ranking)
|
||||
sort_reverse: Whether to reverse the sort order
|
||||
search_mode: Query parsing mode (QUERY, TEXT, or TITLE)
|
||||
limit: Maximum number of IDs to return (None = all matching docs)
|
||||
|
||||
Returns:
|
||||
List of document IDs in the requested order
|
||||
"""
|
||||
self._ensure_open()
|
||||
user_query = self._parse_query(query, search_mode)
|
||||
final_query = self._apply_permission_filter(user_query, user)
|
||||
|
||||
searcher = self._index.searcher()
|
||||
effective_limit = limit if limit is not None else searcher.num_docs
|
||||
|
||||
if sort_field and sort_field in self.SORT_FIELD_MAP:
|
||||
mapped_field = self.SORT_FIELD_MAP[sort_field]
|
||||
results = searcher.search(
|
||||
final_query,
|
||||
limit=effective_limit,
|
||||
order_by_field=mapped_field,
|
||||
order=tantivy.Order.Desc if sort_reverse else tantivy.Order.Asc,
|
||||
)
|
||||
all_hits = [(hit[1],) for hit in results.hits]
|
||||
else:
|
||||
results = searcher.search(final_query, limit=effective_limit)
|
||||
all_hits = [(hit[1], hit[0]) for hit in results.hits]
|
||||
|
||||
# Normalize scores and apply threshold (relevance search only)
|
||||
if all_hits:
|
||||
max_score = max(hit[1] for hit in all_hits) or 1.0
|
||||
all_hits = [(hit[0], hit[1] / max_score) for hit in all_hits]
|
||||
|
||||
threshold = settings.ADVANCED_FUZZY_SEARCH_THRESHOLD
|
||||
if threshold is not None:
|
||||
all_hits = [hit for hit in all_hits if hit[1] >= threshold]
|
||||
|
||||
return [searcher.doc(doc_addr).to_dict()["id"][0] for doc_addr, *_ in all_hits]
|
||||
|
||||
def autocomplete(
|
||||
self,
|
||||
term: str,
|
||||
@@ -623,7 +851,7 @@ class TantivyBackend:
|
||||
else:
|
||||
base_query = tantivy.Query.all_query()
|
||||
|
||||
results = searcher.search(base_query, limit=10000)
|
||||
results = searcher.search(base_query, limit=searcher.num_docs)
|
||||
|
||||
# Count how many visible documents each word appears in.
|
||||
# Using Counter (not set) preserves per-word document frequency so
|
||||
@@ -699,17 +927,7 @@ class TantivyBackend:
|
||||
boost_factor=None,
|
||||
)
|
||||
|
||||
# Apply permission filter
|
||||
if user is not None:
|
||||
permission_filter = build_permission_filter(self._schema, user)
|
||||
final_query = tantivy.Query.boolean_query(
|
||||
[
|
||||
(tantivy.Occur.Must, mlt_query),
|
||||
(tantivy.Occur.Must, permission_filter),
|
||||
],
|
||||
)
|
||||
else:
|
||||
final_query = mlt_query
|
||||
final_query = self._apply_permission_filter(mlt_query, user)
|
||||
|
||||
# Search
|
||||
offset = (page - 1) * page_size
|
||||
@@ -753,6 +971,66 @@ class TantivyBackend:
|
||||
query=f"more_like:{doc_id}",
|
||||
)
|
||||
|
||||
def more_like_this_ids(
|
||||
self,
|
||||
doc_id: int,
|
||||
user: AbstractBaseUser | None,
|
||||
*,
|
||||
limit: int | None = None,
|
||||
) -> list[int]:
|
||||
"""
|
||||
Return IDs of documents similar to the given document — no highlights.
|
||||
|
||||
Lightweight companion to more_like_this(). The original document is
|
||||
excluded from results.
|
||||
|
||||
Args:
|
||||
doc_id: Primary key of the reference document
|
||||
user: User for permission filtering (None for no filtering)
|
||||
limit: Maximum number of IDs to return (None = all matching docs)
|
||||
|
||||
Returns:
|
||||
List of similar document IDs (excluding the original)
|
||||
"""
|
||||
self._ensure_open()
|
||||
searcher = self._index.searcher()
|
||||
|
||||
id_query = tantivy.Query.range_query(
|
||||
self._schema,
|
||||
"id",
|
||||
tantivy.FieldType.Unsigned,
|
||||
doc_id,
|
||||
doc_id,
|
||||
)
|
||||
results = searcher.search(id_query, limit=1)
|
||||
|
||||
if not results.hits:
|
||||
return []
|
||||
|
||||
doc_address = results.hits[0][1]
|
||||
mlt_query = tantivy.Query.more_like_this_query(
|
||||
doc_address,
|
||||
min_doc_frequency=1,
|
||||
max_doc_frequency=None,
|
||||
min_term_frequency=1,
|
||||
max_query_terms=12,
|
||||
min_word_length=None,
|
||||
max_word_length=None,
|
||||
boost_factor=None,
|
||||
)
|
||||
|
||||
final_query = self._apply_permission_filter(mlt_query, user)
|
||||
|
||||
effective_limit = limit if limit is not None else searcher.num_docs
|
||||
results = searcher.search(final_query, limit=effective_limit)
|
||||
|
||||
ids = []
|
||||
for _score, doc_address in results.hits:
|
||||
result_doc_id = searcher.doc(doc_address).to_dict()["id"][0]
|
||||
if result_doc_id != doc_id:
|
||||
ids.append(result_doc_id)
|
||||
return ids
|
||||
|
||||
def batch_update(self, lock_timeout: float = 30.0) -> WriteBatch:
|
||||
"""
|
||||
Get a batch context manager for bulk index operations.
|
||||
|
||||
@@ -428,6 +428,162 @@ class TestSearch:
|
||||
== 0
|
||||
)
|
||||
|
||||
def test_highlight_page_only_highlights_requested_slice(
|
||||
self,
|
||||
backend: TantivyBackend,
|
||||
):
|
||||
"""Only hits in the highlight_page slice should have non-empty highlights."""
|
||||
for i in range(6):
|
||||
doc = Document.objects.create(
|
||||
title=f"highlight doc {i}",
|
||||
content=f"searchable highlight content number {i}",
|
||||
checksum=f"HP{i}",
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
r = backend.search(
|
||||
"searchable",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10000,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
highlight_page=1,
|
||||
highlight_page_size=3,
|
||||
)
|
||||
assert r.total == 6
|
||||
assert len(r.hits) == 6
|
||||
highlighted = [h for h in r.hits if h["highlights"]]
|
||||
not_highlighted = [h for h in r.hits if not h["highlights"]]
|
||||
assert len(highlighted) == 3
|
||||
assert len(not_highlighted) == 3
|
||||
|
||||
def test_highlight_page_2_highlights_correct_slice(self, backend: TantivyBackend):
|
||||
"""highlight_page=2 should highlight only the second page of results."""
|
||||
for i in range(6):
|
||||
doc = Document.objects.create(
|
||||
title=f"page2 doc {i}",
|
||||
content=f"searchable page2 content number {i}",
|
||||
checksum=f"HP2{i}",
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
r = backend.search(
|
||||
"searchable",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10000,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
highlight_page=2,
|
||||
highlight_page_size=2,
|
||||
)
|
||||
assert r.total == 6
|
||||
assert len(r.hits) == 6
|
||||
highlighted = [h for h in r.hits if h["highlights"]]
|
||||
not_highlighted = [h for h in r.hits if not h["highlights"]]
|
||||
# Only 2 hits (the second page) should have highlights
|
||||
assert len(highlighted) == 2
|
||||
assert len(not_highlighted) == 4
|
||||
|
||||
def test_no_highlight_page_highlights_all(self, backend: TantivyBackend):
|
||||
"""When highlight_page is not specified, all hits get highlights (backward compat)."""
|
||||
for i in range(3):
|
||||
doc = Document.objects.create(
|
||||
title=f"compat doc {i}",
|
||||
content=f"searchable compat content {i}",
|
||||
checksum=f"HC{i}",
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
r = backend.search(
|
||||
"searchable",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10000,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
)
|
||||
assert len(r.hits) == 3
|
||||
for hit in r.hits:
|
||||
assert "content" in hit["highlights"]
|
||||
|
||||
|
||||
class TestSearchIds:
|
||||
"""Test lightweight ID-only search."""
|
||||
|
||||
def test_returns_matching_ids(self, backend: TantivyBackend):
|
||||
"""search_ids must return IDs of all matching documents."""
|
||||
docs = []
|
||||
for i in range(5):
|
||||
doc = Document.objects.create(
|
||||
title=f"findable doc {i}",
|
||||
content="common keyword",
|
||||
checksum=f"SI{i}",
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
docs.append(doc)
|
||||
other = Document.objects.create(
|
||||
title="unrelated",
|
||||
content="nothing here",
|
||||
checksum="SI_other",
|
||||
)
|
||||
backend.add_or_update(other)
|
||||
|
||||
ids = backend.search_ids(
|
||||
"common keyword",
|
||||
user=None,
|
||||
search_mode=SearchMode.QUERY,
|
||||
)
|
||||
assert set(ids) == {d.pk for d in docs}
|
||||
assert other.pk not in ids
|
||||
|
||||
def test_respects_permission_filter(self, backend: TantivyBackend):
|
||||
"""search_ids must respect user permission filtering."""
|
||||
owner = User.objects.create_user("ids_owner")
|
||||
other = User.objects.create_user("ids_other")
|
||||
doc = Document.objects.create(
|
||||
title="private doc",
|
||||
content="secret keyword",
|
||||
checksum="SIP1",
|
||||
owner=owner,
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
assert backend.search_ids(
|
||||
"secret",
|
||||
user=owner,
|
||||
search_mode=SearchMode.QUERY,
|
||||
) == [doc.pk]
|
||||
assert (
|
||||
backend.search_ids("secret", user=other, search_mode=SearchMode.QUERY) == []
|
||||
)
|
||||
|
||||
def test_respects_fuzzy_threshold(self, backend: TantivyBackend, settings):
|
||||
"""search_ids must apply the same fuzzy threshold as search()."""
|
||||
doc = Document.objects.create(
|
||||
title="threshold test",
|
||||
content="unique term",
|
||||
checksum="SIT1",
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
settings.ADVANCED_FUZZY_SEARCH_THRESHOLD = 1.1
|
||||
ids = backend.search_ids("unique", user=None, search_mode=SearchMode.QUERY)
|
||||
assert ids == []
|
||||
|
||||
def test_returns_ids_for_text_mode(self, backend: TantivyBackend):
|
||||
"""search_ids must work with TEXT search mode."""
|
||||
doc = Document.objects.create(
|
||||
title="text mode doc",
|
||||
content="findable phrase",
|
||||
checksum="SIM1",
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
ids = backend.search_ids("findable", user=None, search_mode=SearchMode.TEXT)
|
||||
assert ids == [doc.pk]
|
||||
|
||||
|
||||
class TestRebuild:
|
||||
"""Test index rebuilding functionality."""
|
||||
@@ -542,6 +698,27 @@ class TestMoreLikeThis:
|
||||
assert results.hits == []
|
||||
assert results.total == 0
|
||||
|
||||
def test_more_like_this_ids_excludes_original(self, backend: TantivyBackend):
|
||||
"""more_like_this_ids must return IDs of similar documents, excluding the original."""
|
||||
doc1 = Document.objects.create(
|
||||
title="Important document",
|
||||
content="financial information report",
|
||||
checksum="MLTI1",
|
||||
pk=150,
|
||||
)
|
||||
doc2 = Document.objects.create(
|
||||
title="Another document",
|
||||
content="financial information report",
|
||||
checksum="MLTI2",
|
||||
pk=151,
|
||||
)
|
||||
backend.add_or_update(doc1)
|
||||
backend.add_or_update(doc2)
|
||||
|
||||
ids = backend.more_like_this_ids(doc_id=150, user=None)
|
||||
assert 150 not in ids
|
||||
assert 151 in ids
|
||||
|
||||
|
||||
class TestSingleton:
|
||||
"""Test get_backend() and reset_backend() singleton lifecycle."""
|
||||
|
||||
@@ -1503,6 +1503,89 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
[d2.id, d1.id, d3.id],
|
||||
)
|
||||
|
||||
def test_search_with_tantivy_native_sort(self) -> None:
|
||||
"""When ordering by a Tantivy-sortable field, results must be correctly sorted."""
|
||||
backend = get_backend()
|
||||
for i, asn in enumerate([30, 10, 20]):
|
||||
doc = Document.objects.create(
|
||||
title=f"sortable doc {i}",
|
||||
content="searchable content",
|
||||
checksum=f"TNS{i}",
|
||||
archive_serial_number=asn,
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
response = self.client.get(
|
||||
"/api/documents/?query=searchable&ordering=archive_serial_number",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
asns = [doc["archive_serial_number"] for doc in response.data["results"]]
|
||||
self.assertEqual(asns, [10, 20, 30])
|
||||
|
||||
response = self.client.get(
|
||||
"/api/documents/?query=searchable&ordering=-archive_serial_number",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
asns = [doc["archive_serial_number"] for doc in response.data["results"]]
|
||||
self.assertEqual(asns, [30, 20, 10])
|
||||
|
||||
def test_search_page_2_returns_correct_slice(self) -> None:
|
||||
"""Page 2 must return the second slice, not overlap with page 1."""
|
||||
backend = get_backend()
|
||||
for i in range(10):
|
||||
doc = Document.objects.create(
|
||||
title=f"doc {i}",
|
||||
content="paginated content",
|
||||
checksum=f"PG2{i}",
|
||||
archive_serial_number=i + 1,
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
response = self.client.get(
|
||||
"/api/documents/?query=paginated&ordering=archive_serial_number&page=1&page_size=3",
|
||||
)
|
||||
page1_ids = [r["id"] for r in response.data["results"]]
|
||||
self.assertEqual(len(page1_ids), 3)
|
||||
|
||||
response = self.client.get(
|
||||
"/api/documents/?query=paginated&ordering=archive_serial_number&page=2&page_size=3",
|
||||
)
|
||||
page2_ids = [r["id"] for r in response.data["results"]]
|
||||
self.assertEqual(len(page2_ids), 3)
|
||||
|
||||
# No overlap between pages
|
||||
self.assertEqual(set(page1_ids) & set(page2_ids), set())
|
||||
# Page 2 ASNs are higher than page 1
|
||||
page1_asns = [
|
||||
Document.objects.get(pk=pk).archive_serial_number for pk in page1_ids
|
||||
]
|
||||
page2_asns = [
|
||||
Document.objects.get(pk=pk).archive_serial_number for pk in page2_ids
|
||||
]
|
||||
self.assertTrue(max(page1_asns) < min(page2_asns))
|
||||
|
||||
def test_search_all_field_contains_all_ids_when_paginated(self) -> None:
|
||||
"""The 'all' field must contain every matching ID, even when paginated."""
|
||||
backend = get_backend()
|
||||
doc_ids = []
|
||||
for i in range(10):
|
||||
doc = Document.objects.create(
|
||||
title=f"all field doc {i}",
|
||||
content="allfield content",
|
||||
checksum=f"AF{i}",
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
doc_ids.append(doc.pk)
|
||||
|
||||
response = self.client.get(
|
||||
"/api/documents/?query=allfield&page=1&page_size=3",
|
||||
headers={"Accept": "application/json; version=9"},
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(len(response.data["results"]), 3)
|
||||
# "all" must contain ALL 10 matching IDs
|
||||
self.assertCountEqual(response.data["all"], doc_ids)
|
||||
|
||||
@mock.patch("documents.bulk_edit.bulk_update_documents")
|
||||
def test_global_search(self, m) -> None:
|
||||
"""
|
||||
|
||||
@@ -2058,13 +2058,14 @@ class UnifiedSearchViewSet(DocumentViewSet):
|
||||
if not self._is_search_request():
|
||||
return super().list(request)
|
||||
|
||||
from documents.search import SearchHit
|
||||
from documents.search import SearchMode
|
||||
from documents.search import TantivyBackend
|
||||
from documents.search import TantivyRelevanceList
|
||||
from documents.search import get_backend
|
||||
|
||||
try:
|
||||
backend = get_backend()
|
||||
# ORM-filtered queryset: permissions + field filters + ordering (DRF backends applied)
|
||||
filtered_qs = self.filter_queryset(self.get_queryset())
|
||||
|
||||
user = None if request.user.is_superuser else request.user
|
||||
@@ -2079,6 +2080,28 @@ class UnifiedSearchViewSet(DocumentViewSet):
|
||||
},
|
||||
)
|
||||
|
||||
# Parse ordering param
|
||||
ordering_param = request.query_params.get("ordering", "")
|
||||
sort_reverse = ordering_param.startswith("-")
|
||||
sort_field_name = ordering_param.lstrip("-") or None
|
||||
|
||||
use_tantivy_sort = (
|
||||
sort_field_name in TantivyBackend.SORTABLE_FIELDS
|
||||
or sort_field_name is None
|
||||
)
|
||||
|
||||
# Compute the DRF page so we can tell Tantivy which slice to highlight
|
||||
try:
|
||||
requested_page = int(request.query_params.get("page", 1))
|
||||
except (TypeError, ValueError):
|
||||
requested_page = 1
|
||||
try:
|
||||
requested_page_size = int(
|
||||
request.query_params.get("page_size", self.paginator.page_size),
|
||||
)
|
||||
except (TypeError, ValueError):
|
||||
requested_page_size = self.paginator.page_size
|
||||
|
||||
if (
|
||||
"text" in request.query_params
|
||||
or "title_search" in request.query_params
|
||||
@@ -2093,17 +2116,44 @@ class UnifiedSearchViewSet(DocumentViewSet):
|
||||
else:
|
||||
search_mode = SearchMode.QUERY
|
||||
query_str = request.query_params["query"]
|
||||
results = backend.search(
|
||||
|
||||
# Step 1: Get all matching IDs (lightweight, no highlights)
|
||||
all_ids = backend.search_ids(
|
||||
query_str,
|
||||
user=user,
|
||||
page=1,
|
||||
page_size=10000,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
sort_field=sort_field_name if use_tantivy_sort else None,
|
||||
sort_reverse=sort_reverse,
|
||||
search_mode=search_mode,
|
||||
)
|
||||
|
||||
# Step 2: Intersect with ORM-visible IDs (field filters)
|
||||
orm_ids = set(filtered_qs.values_list("pk", flat=True))
|
||||
|
||||
if use_tantivy_sort:
|
||||
# Fast path: Tantivy already ordered the IDs
|
||||
ordered_ids = [doc_id for doc_id in all_ids if doc_id in orm_ids]
|
||||
else:
|
||||
# Slow path: ORM must re-sort
|
||||
id_set = set(all_ids) & orm_ids
|
||||
ordered_ids = list(
|
||||
filtered_qs.filter(id__in=id_set).values_list(
|
||||
"pk",
|
||||
flat=True,
|
||||
),
|
||||
)
|
||||
|
||||
# Step 3: Fetch highlights for the displayed page only
|
||||
page_offset = (requested_page - 1) * requested_page_size
|
||||
page_ids = ordered_ids[page_offset : page_offset + requested_page_size]
|
||||
|
||||
page_hits = backend.highlight_hits(
|
||||
query_str,
|
||||
page_ids,
|
||||
search_mode=search_mode,
|
||||
)
|
||||
|
||||
else:
|
||||
# more_like_id — validate permission on the seed document first
|
||||
# more_like_id path
|
||||
try:
|
||||
more_like_doc_id = int(request.query_params["more_like_id"])
|
||||
more_like_doc = Document.objects.select_related("owner").get(
|
||||
@@ -2119,33 +2169,24 @@ class UnifiedSearchViewSet(DocumentViewSet):
|
||||
):
|
||||
raise PermissionDenied(_("Insufficient permissions."))
|
||||
|
||||
results = backend.more_like_this(
|
||||
# Step 1: Get all matching IDs (lightweight)
|
||||
all_ids = backend.more_like_this_ids(
|
||||
more_like_doc_id,
|
||||
user=user,
|
||||
page=1,
|
||||
page_size=10000,
|
||||
)
|
||||
|
||||
hits_by_id = {h["id"]: h for h in results.hits}
|
||||
|
||||
# Determine sort order: no ordering param -> Tantivy relevance; otherwise -> ORM order
|
||||
ordering_param = request.query_params.get("ordering", "").lstrip("-")
|
||||
if not ordering_param:
|
||||
# Preserve Tantivy relevance order; intersect with ORM-visible IDs
|
||||
orm_ids = set(filtered_qs.values_list("pk", flat=True))
|
||||
ordered_hits = [h for h in results.hits if h["id"] in orm_ids]
|
||||
else:
|
||||
# Use ORM ordering (already applied by DocumentsOrderingFilter)
|
||||
hit_ids = set(hits_by_id.keys())
|
||||
orm_ordered_ids = filtered_qs.filter(id__in=hit_ids).values_list(
|
||||
"pk",
|
||||
flat=True,
|
||||
)
|
||||
ordered_hits = [
|
||||
hits_by_id[pk] for pk in orm_ordered_ids if pk in hits_by_id
|
||||
ordered_ids = [doc_id for doc_id in all_ids if doc_id in orm_ids]
|
||||
|
||||
# Step 2: Build hit dicts for the displayed page
|
||||
# MLT has no text query, so no highlights needed
|
||||
page_offset = (requested_page - 1) * requested_page_size
|
||||
page_ids = ordered_ids[page_offset : page_offset + requested_page_size]
|
||||
page_hits = [
|
||||
SearchHit(id=doc_id, score=0.0, rank=rank, highlights={})
|
||||
for rank, doc_id in enumerate(page_ids, start=page_offset + 1)
|
||||
]
|
||||
|
||||
rl = TantivyRelevanceList(ordered_hits)
|
||||
rl = TantivyRelevanceList(ordered_ids, page_hits, page_offset)
|
||||
page = self.paginate_queryset(rl)
|
||||
|
||||
if page is not None:
|
||||
@@ -2155,15 +2196,14 @@ class UnifiedSearchViewSet(DocumentViewSet):
|
||||
if get_boolean(
|
||||
str(request.query_params.get("include_selection_data", "false")),
|
||||
):
|
||||
all_ids = [h["id"] for h in ordered_hits]
|
||||
response.data["selection_data"] = (
|
||||
self._get_selection_data_for_queryset(
|
||||
filtered_qs.filter(pk__in=all_ids),
|
||||
filtered_qs.filter(pk__in=ordered_ids),
|
||||
)
|
||||
)
|
||||
return response
|
||||
|
||||
serializer = self.get_serializer(ordered_hits, many=True)
|
||||
serializer = self.get_serializer(page_hits, many=True)
|
||||
return Response(serializer.data)
|
||||
|
||||
except NotFound:
|
||||
|
||||
@@ -89,7 +89,7 @@ class StandardPagination(PageNumberPagination):
|
||||
|
||||
query = self.page.paginator.object_list
|
||||
if isinstance(query, TantivyRelevanceList):
|
||||
return [h["id"] for h in query._hits]
|
||||
return query.get_all_ids()
|
||||
return self.page.paginator.object_list.values_list("pk", flat=True)
|
||||
|
||||
def get_paginated_response_schema(self, schema):
|
||||
|
||||
Reference in New Issue
Block a user