Files
paperless-ngx/src/documents/tests/test_management_importer.py

400 lines
13 KiB
Python

import json
import tempfile
from io import StringIO
from pathlib import Path
from zipfile import ZipFile
import pytest
from django.contrib.auth.models import User
from django.core.management import call_command
from django.core.management.base import CommandError
from django.test import TestCase
from documents.management.commands.document_importer import Command
from documents.models import Document
from documents.settings import EXPORTER_ARCHIVE_NAME
from documents.settings import EXPORTER_FILE_NAME
from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import FileSystemAssertsMixin
from documents.tests.utils import SampleDirMixin
@pytest.mark.management
class TestCommandImport(
DirectoriesMixin,
FileSystemAssertsMixin,
SampleDirMixin,
TestCase,
):
def test_check_manifest_exists(self) -> None:
"""
GIVEN:
- Source directory exists
- No manifest.json file exists in the directory
WHEN:
- Import is attempted
THEN:
- CommandError is raised indicating the issue
"""
with self.assertRaises(CommandError) as e:
call_command(
"document_importer",
"--no-progress-bar",
str(self.dirs.scratch_dir),
skip_checks=True,
)
self.assertIn(
"That directory doesn't appear to contain a manifest.json file.",
str(e.exception),
)
def test_check_manifest_malformed(self) -> None:
"""
GIVEN:
- Source directory exists
- manifest.json file exists in the directory
- manifest.json is missing the documents exported name
WHEN:
- Import is attempted
THEN:
- CommandError is raised indicating the issue
"""
manifest_file = self.dirs.scratch_dir / "manifest.json"
with manifest_file.open("w") as outfile:
json.dump([{"model": "documents.document"}], outfile)
with self.assertRaises(CommandError) as e:
call_command(
"document_importer",
"--no-progress-bar",
str(self.dirs.scratch_dir),
skip_checks=True,
)
self.assertIn(
"The manifest file contains a record which does not refer to an actual document file.",
str(e.exception),
)
def test_check_manifest_file_not_found(self) -> None:
"""
GIVEN:
- Source directory exists
- manifest.json file exists in the directory
- manifest.json refers to a file which doesn't exist
WHEN:
- Import is attempted
THEN:
- CommandError is raised indicating the issue
"""
manifest_file = self.dirs.scratch_dir / "manifest.json"
with manifest_file.open("w") as outfile:
json.dump(
[{"model": "documents.document", EXPORTER_FILE_NAME: "noexist.pdf"}],
outfile,
)
with self.assertRaises(CommandError) as e:
call_command(
"document_importer",
"--no-progress-bar",
str(self.dirs.scratch_dir),
skip_checks=True,
)
self.assertIn('The manifest file refers to "noexist.pdf"', str(e.exception))
def test_import_permission_error(self) -> None:
"""
GIVEN:
- Original file which cannot be read from
- Archive file which cannot be read from
WHEN:
- Import is attempted
THEN:
- CommandError is raised indicating the issue
"""
with tempfile.TemporaryDirectory() as temp_dir:
# Create empty files
original_path = Path(temp_dir) / "original.pdf"
archive_path = Path(temp_dir) / "archive.pdf"
original_path.touch()
archive_path.touch()
# No read permissions
original_path.chmod(0o222)
manifest_path = Path(temp_dir) / "manifest.json"
manifest_path.write_text(
json.dumps(
[
{
"model": "documents.document",
EXPORTER_FILE_NAME: "original.pdf",
EXPORTER_ARCHIVE_NAME: "archive.pdf",
},
],
),
)
cmd = Command()
cmd.source = Path(temp_dir)
cmd.manifest_paths = [manifest_path]
cmd.data_only = False
with self.assertRaises(CommandError) as cm:
cmd.check_manifest_validity()
self.assertIn("Failed to read from original file", str(cm.exception))
original_path.chmod(0o444)
archive_path.chmod(0o222)
with self.assertRaises(CommandError) as cm:
cmd.check_manifest_validity()
self.assertIn("Failed to read from archive file", str(cm.exception))
def test_import_source_not_existing(self) -> None:
"""
GIVEN:
- Source given doesn't exist
WHEN:
- Import is attempted
THEN:
- CommandError is raised indicating the issue
"""
with self.assertRaises(CommandError) as cm:
call_command("document_importer", Path("/tmp/notapath"), skip_checks=True)
self.assertIn("That path doesn't exist", str(cm.exception))
def test_import_source_not_readable(self) -> None:
"""
GIVEN:
- Source given isn't readable
WHEN:
- Import is attempted
THEN:
- CommandError is raised indicating the issue
"""
with tempfile.TemporaryDirectory() as temp_dir:
path = Path(temp_dir)
path.chmod(0o222)
with self.assertRaises(CommandError) as cm:
call_command("document_importer", path, skip_checks=True)
self.assertIn(
"That path doesn't appear to be readable",
str(cm.exception),
)
def test_import_source_does_not_exist(self) -> None:
"""
GIVEN:
- Source directory does not exist
WHEN:
- Request to import documents from a directory
THEN:
- CommandError is raised indicating the folder doesn't exist
"""
path = Path("/tmp/should-not-exist")
self.assertIsNotFile(path)
with self.assertRaises(CommandError) as e:
call_command(
"document_importer",
"--no-progress-bar",
str(path),
skip_checks=True,
)
self.assertIn("That path doesn't exist", str(e.exception))
def test_import_files_exist(self) -> None:
"""
GIVEN:
- Source directory does exist
- A file exists in the originals directory
WHEN:
- Request to import documents from a directory
THEN:
- CommandError is raised indicating the file exists
"""
(self.dirs.originals_dir / "temp").mkdir()
(self.dirs.originals_dir / "temp" / "file.pdf").touch()
stdout = StringIO()
with self.assertRaises(CommandError):
call_command(
"document_importer",
"--no-progress-bar",
str(self.dirs.scratch_dir),
stdout=stdout,
skip_checks=True,
)
stdout.seek(0)
self.assertIn(
"Found file temp/file.pdf, this might indicate a non-empty installation",
str(stdout.read()),
)
def test_import_with_user_exists(self) -> None:
"""
GIVEN:
- Source directory does exist
- At least 1 User exists in the database
WHEN:
- Request to import documents from a directory
THEN:
- A warning is output to stdout
"""
stdout = StringIO()
User.objects.create()
# Not creating a manifest, etc, so it errors
with self.assertRaises(CommandError):
call_command(
"document_importer",
"--no-progress-bar",
str(self.dirs.scratch_dir),
stdout=stdout,
skip_checks=True,
)
stdout.seek(0)
self.assertIn(
"Found existing user(s), this might indicate a non-empty installation",
stdout.read(),
)
def test_import_with_documents_exists(self) -> None:
"""
GIVEN:
- Source directory does exist
- At least 1 Document exists in the database
WHEN:
- Request to import documents from a directory
THEN:
- A warning is output to stdout
"""
stdout = StringIO()
Document.objects.create(
content="Content",
checksum="1093cf6e32adbd16b06969df09215d42c4a3a8938cc18b39455953f08d1ff2ab",
archive_checksum="706124ecde3c31616992fa979caed17a726b1c9ccdba70e82a4ff796cea97ccf",
title="wow1",
filename="0000001.pdf",
mime_type="application/pdf",
archive_filename="0000001.pdf",
)
# Not creating a manifest, etc, so it errors
with self.assertRaises(CommandError):
call_command(
"document_importer",
"--no-progress-bar",
str(self.dirs.scratch_dir),
stdout=stdout,
skip_checks=True,
)
stdout.seek(0)
self.assertIn(
"Found existing documents(s), this might indicate a non-empty installation",
str(stdout.read()),
)
def test_import_no_metadata_or_version_file(self) -> None:
"""
GIVEN:
- A source directory with a manifest file only
WHEN:
- An import is attempted
THEN:
- Warning about the missing files is output
"""
stdout = StringIO()
(self.dirs.scratch_dir / "manifest.json").touch()
# We're not building a manifest, so it fails, but this test doesn't care
with self.assertRaises(CommandError):
call_command(
"document_importer",
"--no-progress-bar",
str(self.dirs.scratch_dir),
stdout=stdout,
skip_checks=True,
)
stdout.seek(0)
stdout_str = str(stdout.read())
self.assertIn("No version.json or metadata.json file located", stdout_str)
def test_import_version_file(self) -> None:
"""
GIVEN:
- A source directory with a manifest file and version file
WHEN:
- An import is attempted
THEN:
- Warning about the the version mismatch is output
"""
stdout = StringIO()
(self.dirs.scratch_dir / "manifest.json").touch()
(self.dirs.scratch_dir / "version.json").write_text(
json.dumps({"version": "2.8.1"}),
)
# We're not building a manifest, so it fails, but this test doesn't care
with self.assertRaises(CommandError):
call_command(
"document_importer",
"--no-progress-bar",
str(self.dirs.scratch_dir),
stdout=stdout,
skip_checks=True,
)
stdout.seek(0)
stdout_str = str(stdout.read())
self.assertIn("Version mismatch:", stdout_str)
self.assertIn("importing 2.8.1", stdout_str)
def test_import_zipped_export(self) -> None:
"""
GIVEN:
- A zip file with correct content (manifest.json and version.json inside)
WHEN:
- An import is attempted using the zip file as the source
THEN:
- The command reads from the zip without warnings or errors
"""
stdout = StringIO()
zip_path = self.dirs.scratch_dir / "export.zip"
# Create manifest.json and version.json in a temp dir
with tempfile.TemporaryDirectory() as temp_dir:
temp_dir_path = Path(temp_dir)
(temp_dir_path / "manifest.json").touch()
(temp_dir_path / "version.json").touch()
# Create the zip file
with ZipFile(zip_path, "w") as zf:
zf.write(temp_dir_path / "manifest.json", arcname="manifest.json")
zf.write(temp_dir_path / "version.json", arcname="version.json")
# Try to import from the zip file
with self.assertRaises(json.decoder.JSONDecodeError):
call_command(
"document_importer",
"--no-progress-bar",
str(zip_path),
stdout=stdout,
skip_checks=True,
)
stdout.seek(0)
stdout_str = str(stdout.read())
# There should be no error or warnings. Therefore the output should be empty.
self.assertEqual(stdout_str, "")