Compare commits

..

1 Commits

4 changed files with 53 additions and 105 deletions

View File

@@ -1,5 +1,6 @@
from __future__ import annotations
import json
import logging
import shutil
from typing import TYPE_CHECKING
@@ -100,9 +101,9 @@ def needs_rebuild(index_dir: Path) -> bool:
"""
Check if the search index needs rebuilding.
Compares the current schema version and search language configuration
against sentinel files to determine if the index is compatible with
the current paperless-ngx version and settings.
Reads .index_settings.json to compare the stored schema version and
search language against the current configuration. Returns True if the
file is missing, unparsable, or either value mismatches.
Args:
index_dir: Path to the search index directory
@@ -110,24 +111,19 @@ def needs_rebuild(index_dir: Path) -> bool:
Returns:
True if the index needs rebuilding, False if it's up to date
"""
version_file = index_dir / ".schema_version"
if not version_file.exists():
settings_file = index_dir / ".index_settings.json"
if not settings_file.exists():
return True
try:
if int(version_file.read_text().strip()) != SCHEMA_VERSION:
data = json.loads(settings_file.read_text())
if data.get("schema_version") != SCHEMA_VERSION:
logger.info("Search index schema version mismatch - rebuilding.")
return True
if "language" not in data or data["language"] != settings.SEARCH_LANGUAGE:
logger.info("Search index language changed - rebuilding.")
return True
except ValueError:
return True
language_file = index_dir / ".schema_language"
if not language_file.exists():
logger.info("Search index language sentinel missing - rebuilding.")
return True
if language_file.read_text().strip() != (settings.SEARCH_LANGUAGE or ""):
logger.info("Search index language changed - rebuilding.")
return True
return False
@@ -149,9 +145,16 @@ def wipe_index(index_dir: Path) -> None:
def _write_sentinels(index_dir: Path) -> None:
"""Write schema version and language sentinel files so the next index open can skip rebuilding."""
(index_dir / ".schema_version").write_text(str(SCHEMA_VERSION))
(index_dir / ".schema_language").write_text(settings.SEARCH_LANGUAGE or "")
"""Write .index_settings.json so the next index open can skip rebuilding."""
settings_file = index_dir / ".index_settings.json"
settings_file.write_text(
json.dumps(
{
"schema_version": SCHEMA_VERSION,
"language": settings.SEARCH_LANGUAGE,
},
),
)
def open_or_rebuild_index(index_dir: Path | None = None) -> tantivy.Index:

View File

@@ -1,5 +1,6 @@
from __future__ import annotations
import json
from typing import TYPE_CHECKING
import pytest
@@ -18,7 +19,7 @@ pytestmark = pytest.mark.search
class TestNeedsRebuild:
"""needs_rebuild covers all sentinel-file states that require a full reindex."""
def test_returns_true_when_version_file_missing(self, index_dir: Path) -> None:
def test_returns_true_when_settings_file_missing(self, index_dir: Path) -> None:
assert needs_rebuild(index_dir) is True
def test_returns_false_when_version_and_language_match(
@@ -27,37 +28,51 @@ class TestNeedsRebuild:
settings: SettingsWrapper,
) -> None:
settings.SEARCH_LANGUAGE = "en"
(index_dir / ".schema_version").write_text(str(SCHEMA_VERSION))
(index_dir / ".schema_language").write_text("en")
(index_dir / ".index_settings.json").write_text(
json.dumps({"schema_version": SCHEMA_VERSION, "language": "en"}),
)
assert needs_rebuild(index_dir) is False
def test_returns_true_on_schema_version_mismatch(self, index_dir: Path) -> None:
(index_dir / ".schema_version").write_text(str(SCHEMA_VERSION - 1))
assert needs_rebuild(index_dir) is True
def test_returns_true_when_version_file_not_an_integer(
def test_returns_true_on_schema_version_mismatch(
self,
index_dir: Path,
settings: SettingsWrapper,
) -> None:
(index_dir / ".schema_version").write_text("not-a-number")
settings.SEARCH_LANGUAGE = None
(index_dir / ".index_settings.json").write_text(
json.dumps({"schema_version": SCHEMA_VERSION - 1, "language": None}),
)
assert needs_rebuild(index_dir) is True
def test_returns_true_when_language_sentinel_missing(
def test_returns_true_when_version_is_not_an_integer(
self,
index_dir: Path,
settings: SettingsWrapper,
) -> None:
settings.SEARCH_LANGUAGE = None
(index_dir / ".index_settings.json").write_text(
json.dumps({"schema_version": "not-a-number", "language": None}),
)
assert needs_rebuild(index_dir) is True
def test_returns_true_when_language_key_missing(
self,
index_dir: Path,
settings: SettingsWrapper,
) -> None:
settings.SEARCH_LANGUAGE = "en"
(index_dir / ".schema_version").write_text(str(SCHEMA_VERSION))
# .schema_language intentionally absent
(index_dir / ".index_settings.json").write_text(
json.dumps({"schema_version": SCHEMA_VERSION}),
)
assert needs_rebuild(index_dir) is True
def test_returns_true_when_language_sentinel_content_differs(
def test_returns_true_when_language_differs(
self,
index_dir: Path,
settings: SettingsWrapper,
) -> None:
settings.SEARCH_LANGUAGE = "de"
(index_dir / ".schema_version").write_text(str(SCHEMA_VERSION))
(index_dir / ".schema_language").write_text("en")
(index_dir / ".index_settings.json").write_text(
json.dumps({"schema_version": SCHEMA_VERSION, "language": "en"}),
)
assert needs_rebuild(index_dir) is True

View File

@@ -578,66 +578,6 @@ class TestSummary:
assert response.status_code == status.HTTP_400_BAD_REQUEST
assert "days" in response.data
def test_days_capped_at_365(self, admin_client: APIClient) -> None:
"""?days= above 365 is silently clamped to 365 so tasks older than a year are excluded."""
old_task = PaperlessTaskFactory(task_type=PaperlessTask.TaskType.CONSUME_FILE)
PaperlessTask.objects.filter(pk=old_task.pk).update(
date_created=timezone.now() - timedelta(days=400),
)
response = admin_client.get(ENDPOINT + "summary/", {"days": 10000})
assert response.status_code == status.HTTP_200_OK
assert len(response.data) == 0
@pytest.mark.django_db()
class TestSummaryPermissions:
def test_monitoring_user_can_access_summary(
self,
user_client: APIClient,
regular_user,
) -> None:
"""A user with view_system_status but no document permissions can access summary/."""
regular_user.user_permissions.add(
Permission.objects.get(codename="view_system_status"),
)
response = user_client.get(ENDPOINT + "summary/")
assert response.status_code == status.HTTP_200_OK
def test_monitoring_user_sees_all_tasks(
self,
user_client: APIClient,
regular_user,
admin_user,
) -> None:
"""Monitoring user sees aggregate data for all tasks, not just unowned ones."""
regular_user.user_permissions.add(
Permission.objects.get(codename="view_system_status"),
)
PaperlessTaskFactory(
owner=admin_user,
task_type=PaperlessTask.TaskType.CONSUME_FILE,
status=PaperlessTask.Status.SUCCESS,
)
response = user_client.get(ENDPOINT + "summary/")
assert response.status_code == status.HTTP_200_OK
total = sum(item["total_count"] for item in response.data)
assert total == 1
def test_unauthenticated_cannot_access_summary(
self,
rest_api_client: APIClient,
) -> None:
"""Unauthenticated requests to summary/ return 401."""
response = rest_api_client.get(ENDPOINT + "summary/")
assert response.status_code == status.HTTP_401_UNAUTHORIZED
@pytest.mark.django_db()
class TestActive:

View File

@@ -3946,28 +3946,18 @@ class TasksViewSet(ReadOnlyModelViewSet[PaperlessTask]):
count = tasks.update(acknowledged=True)
return Response({"result": count})
def get_permissions(self):
if self.action == "summary" and has_system_status_permission(
getattr(self.request, "user", None),
):
return [IsAuthenticated()]
return super().get_permissions()
@action(methods=["get"], detail=False)
def summary(self, request):
"""Aggregated task statistics per task_type over the last N days (default 30)."""
try:
days = min(365, max(1, int(request.query_params.get("days", 30))))
days = max(1, int(request.query_params.get("days", 30)))
except (TypeError, ValueError):
return Response(
{"days": "Must be a positive integer."},
status=status.HTTP_400_BAD_REQUEST,
)
cutoff = timezone.now() - timedelta(days=days)
if has_system_status_permission(request.user):
queryset = PaperlessTask.objects.filter(date_created__gte=cutoff)
else:
queryset = self.get_queryset().filter(date_created__gte=cutoff)
queryset = self.get_queryset().filter(date_created__gte=cutoff)
data = queryset.values("task_type").annotate(
total_count=Count("id"),