mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-05-27 08:55:25 +00:00
Performance: Further export memory improvements (#12273)
* Perf: streaming manifest writer for document exporter (Phase 3) Replaces the in-memory manifest dict accumulation with a StreamingManifestWriter that writes records to manifest.json incrementally, keeping only one batch resident in memory at a time. Key changes: - Add StreamingManifestWriter: writes to .tmp atomically, BLAKE2b compare for --compare-json, discard() on exception - Add _encrypt_record_inline(): per-record encryption replacing the bulk encrypt_secret_fields() call; crypto setup moved before streaming - Add _write_split_manifest(): extracted per-document manifest writing - Refactor dump(): non-doc records streamed during transaction, documents accumulated then written after filenames are assigned - Upgrade check_and_write_json() from MD5 to BLAKE2b - Remove encrypt_secret_fields() and unused itertools.chain import - Add profiling marker to pyproject.toml Measured improvement (200 docs + 200 CustomFieldInstances, same dump() code path, only writer differs): - Peak memory: ~50% reduction - Memory delta: ~70% reduction - Wall time and query count: unchanged Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * Refactor: O(1) lookup table for CRYPT_FIELDS in per-record encryption Add CRYPT_FIELDS_BY_MODEL to CryptMixin, derived from CRYPT_FIELDS at class definition time. _encrypt_record_inline() now does a single dict lookup instead of a linear scan per record, eliminating the loop and break pattern. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -753,6 +753,31 @@ class TestExportImport(
|
||||
call_command("document_importer", "--no-progress-bar", self.target)
|
||||
self.assertEqual(Document.objects.count(), 4)
|
||||
|
||||
def test_folder_prefix_with_split(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- Request to export documents to directory
|
||||
WHEN:
|
||||
- Option use_folder_prefix is used
|
||||
- Option split manifest is used
|
||||
THEN:
|
||||
- Documents can be imported again
|
||||
"""
|
||||
shutil.rmtree(Path(self.dirs.media_dir) / "documents")
|
||||
shutil.copytree(
|
||||
Path(__file__).parent / "samples" / "documents",
|
||||
Path(self.dirs.media_dir) / "documents",
|
||||
)
|
||||
|
||||
self._do_export(use_folder_prefix=True, split_manifest=True)
|
||||
|
||||
with paperless_environment():
|
||||
self.assertEqual(Document.objects.count(), 4)
|
||||
Document.objects.all().delete()
|
||||
self.assertEqual(Document.objects.count(), 0)
|
||||
call_command("document_importer", "--no-progress-bar", self.target)
|
||||
self.assertEqual(Document.objects.count(), 4)
|
||||
|
||||
def test_import_db_transaction_failed(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
|
||||
Reference in New Issue
Block a user