mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-03-24 09:52:44 +00:00
Compare commits
5 Commits
fix-drop-s
...
chore/plug
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7a192d021f | ||
|
|
1e30490a46 | ||
|
|
bd9e529a63 | ||
|
|
79def8a200 | ||
|
|
701735f6e5 |
3
.github/ISSUE_TEMPLATE/bug-report.yml
vendored
3
.github/ISSUE_TEMPLATE/bug-report.yml
vendored
@@ -21,6 +21,7 @@ body:
|
|||||||
- [The installation instructions](https://docs.paperless-ngx.com/setup/#installation).
|
- [The installation instructions](https://docs.paperless-ngx.com/setup/#installation).
|
||||||
- [Existing issues and discussions](https://github.com/paperless-ngx/paperless-ngx/search?q=&type=issues).
|
- [Existing issues and discussions](https://github.com/paperless-ngx/paperless-ngx/search?q=&type=issues).
|
||||||
- Disable any custom container initialization scripts, if using
|
- Disable any custom container initialization scripts, if using
|
||||||
|
- Remove any third-party parser plugins — issues caused by or requiring changes to a third-party plugin will be closed without investigation.
|
||||||
|
|
||||||
If you encounter issues while installing or configuring Paperless-ngx, please post in the ["Support" section of the discussions](https://github.com/paperless-ngx/paperless-ngx/discussions/new?category=support).
|
If you encounter issues while installing or configuring Paperless-ngx, please post in the ["Support" section of the discussions](https://github.com/paperless-ngx/paperless-ngx/discussions/new?category=support).
|
||||||
- type: textarea
|
- type: textarea
|
||||||
@@ -120,5 +121,7 @@ body:
|
|||||||
required: true
|
required: true
|
||||||
- label: I have already searched for relevant existing issues and discussions before opening this report.
|
- label: I have already searched for relevant existing issues and discussions before opening this report.
|
||||||
required: true
|
required: true
|
||||||
|
- label: I have reproduced this issue with all third-party parser plugins removed. I understand that issues caused by third-party plugins will be closed without investigation.
|
||||||
|
required: true
|
||||||
- label: I have updated the title field above with a concise description.
|
- label: I have updated the title field above with a concise description.
|
||||||
required: true
|
required: true
|
||||||
|
|||||||
@@ -2437,17 +2437,3 @@ src/paperless_tesseract/tests/test_parser_custom_settings.py:0: error: Item "Non
|
|||||||
src/paperless_tesseract/tests/test_parser_custom_settings.py:0: error: Item "None" of "ApplicationConfiguration | None" has no attribute "unpaper_clean" [union-attr]
|
src/paperless_tesseract/tests/test_parser_custom_settings.py:0: error: Item "None" of "ApplicationConfiguration | None" has no attribute "unpaper_clean" [union-attr]
|
||||||
src/paperless_tesseract/tests/test_parser_custom_settings.py:0: error: Item "None" of "ApplicationConfiguration | None" has no attribute "unpaper_clean" [union-attr]
|
src/paperless_tesseract/tests/test_parser_custom_settings.py:0: error: Item "None" of "ApplicationConfiguration | None" has no attribute "unpaper_clean" [union-attr]
|
||||||
src/paperless_tesseract/tests/test_parser_custom_settings.py:0: error: Item "None" of "ApplicationConfiguration | None" has no attribute "user_args" [union-attr]
|
src/paperless_tesseract/tests/test_parser_custom_settings.py:0: error: Item "None" of "ApplicationConfiguration | None" has no attribute "user_args" [union-attr]
|
||||||
src/paperless_text/parsers.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def]
|
|
||||||
src/paperless_text/parsers.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def]
|
|
||||||
src/paperless_text/parsers.py:0: error: Incompatible types in assignment (expression has type "str", variable has type "None") [assignment]
|
|
||||||
src/paperless_text/signals.py:0: error: Function is missing a type annotation [no-untyped-def]
|
|
||||||
src/paperless_text/signals.py:0: error: Function is missing a type annotation [no-untyped-def]
|
|
||||||
src/paperless_tika/parsers.py:0: error: Argument 1 to "make_thumbnail_from_pdf" has incompatible type "None"; expected "Path" [arg-type]
|
|
||||||
src/paperless_tika/parsers.py:0: error: Function is missing a return type annotation [no-untyped-def]
|
|
||||||
src/paperless_tika/parsers.py:0: error: Function is missing a type annotation [no-untyped-def]
|
|
||||||
src/paperless_tika/parsers.py:0: error: Function is missing a type annotation [no-untyped-def]
|
|
||||||
src/paperless_tika/parsers.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def]
|
|
||||||
src/paperless_tika/parsers.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def]
|
|
||||||
src/paperless_tika/parsers.py:0: error: Incompatible types in assignment (expression has type "str | None", variable has type "None") [assignment]
|
|
||||||
src/paperless_tika/signals.py:0: error: Function is missing a type annotation [no-untyped-def]
|
|
||||||
src/paperless_tika/signals.py:0: error: Function is missing a type annotation [no-untyped-def]
|
|
||||||
|
|||||||
@@ -723,6 +723,81 @@ services:
|
|||||||
|
|
||||||
1. Note the `:ro` tag means the folder will be mounted as read only. This is for extra security against changes
|
1. Note the `:ro` tag means the folder will be mounted as read only. This is for extra security against changes
|
||||||
|
|
||||||
|
## Installing third-party parser plugins {#parser-plugins}
|
||||||
|
|
||||||
|
Third-party parser plugins extend Paperless-ngx to support additional file
|
||||||
|
formats. A plugin is a Python package that advertises itself under the
|
||||||
|
`paperless_ngx.parsers` entry point group. Refer to the
|
||||||
|
[developer documentation](development.md#making-custom-parsers) for how to
|
||||||
|
create one.
|
||||||
|
|
||||||
|
!!! warning "Third-party plugins are not officially supported"
|
||||||
|
|
||||||
|
The Paperless-ngx maintainers do not provide support for third-party
|
||||||
|
plugins. Issues caused by or requiring changes to a third-party plugin
|
||||||
|
will be closed without further investigation. Always reproduce problems
|
||||||
|
with all plugins removed before filing a bug report.
|
||||||
|
|
||||||
|
### Docker
|
||||||
|
|
||||||
|
Use a [custom container initialization script](#custom-container-initialization)
|
||||||
|
to install the package before the webserver starts. Create a shell script and
|
||||||
|
mount it into `/custom-cont-init.d`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
#!/bin/bash
|
||||||
|
# /path/to/my/scripts/install-parsers.sh
|
||||||
|
|
||||||
|
pip install my-paperless-parser-package
|
||||||
|
```
|
||||||
|
|
||||||
|
Mount it in your `docker-compose.yml`:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
services:
|
||||||
|
webserver:
|
||||||
|
# ...
|
||||||
|
volumes:
|
||||||
|
- /path/to/my/scripts:/custom-cont-init.d:ro
|
||||||
|
```
|
||||||
|
|
||||||
|
The script runs as `root` before the webserver starts, so the package will be
|
||||||
|
available when Paperless-ngx discovers plugins at startup.
|
||||||
|
|
||||||
|
### Bare metal
|
||||||
|
|
||||||
|
Install the package into the same Python environment that runs Paperless-ngx.
|
||||||
|
If you followed the standard bare-metal install guide, that is the `paperless`
|
||||||
|
user's environment:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo -Hu paperless pip3 install my-paperless-parser-package
|
||||||
|
```
|
||||||
|
|
||||||
|
If you are using `uv` or a virtual environment, activate it first and then run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
uv pip install my-paperless-parser-package
|
||||||
|
# or
|
||||||
|
pip install my-paperless-parser-package
|
||||||
|
```
|
||||||
|
|
||||||
|
Restart all Paperless-ngx services after installation so the new plugin is
|
||||||
|
discovered.
|
||||||
|
|
||||||
|
### Verifying installation
|
||||||
|
|
||||||
|
On the next startup, check the application logs for a line confirming
|
||||||
|
discovery:
|
||||||
|
|
||||||
|
```
|
||||||
|
Loaded third-party parser 'My Parser' v1.0.0 by Acme Corp (entrypoint: 'my_parser').
|
||||||
|
```
|
||||||
|
|
||||||
|
If this line does not appear, verify that the package is installed in the
|
||||||
|
correct environment and that its `pyproject.toml` declares the
|
||||||
|
`paperless_ngx.parsers` entry point.
|
||||||
|
|
||||||
## MySQL Caveats {#mysql-caveats}
|
## MySQL Caveats {#mysql-caveats}
|
||||||
|
|
||||||
### Case Sensitivity
|
### Case Sensitivity
|
||||||
|
|||||||
@@ -370,121 +370,363 @@ docker build --file Dockerfile --tag paperless:local .
|
|||||||
|
|
||||||
## Extending Paperless-ngx
|
## Extending Paperless-ngx
|
||||||
|
|
||||||
Paperless-ngx does not have any fancy plugin systems and will probably never
|
Paperless-ngx supports third-party document parsers via a Python entry point
|
||||||
have. However, some parts of the application have been designed to allow
|
plugin system. Plugins are distributed as ordinary Python packages and
|
||||||
easy integration of additional features without any modification to the
|
discovered automatically at startup — no changes to the Paperless-ngx source
|
||||||
base code.
|
are required.
|
||||||
|
|
||||||
|
!!! warning "Third-party plugins are not officially supported"
|
||||||
|
|
||||||
|
The Paperless-ngx maintainers do not provide support for third-party
|
||||||
|
plugins. Issues that are caused by or require changes to a third-party
|
||||||
|
plugin will be closed without further investigation. If you believe you
|
||||||
|
have found a bug in Paperless-ngx itself (not in a plugin), please
|
||||||
|
reproduce it with all third-party plugins removed before filing an issue.
|
||||||
|
|
||||||
### Making custom parsers
|
### Making custom parsers
|
||||||
|
|
||||||
Paperless-ngx uses parsers to add documents. A parser is
|
Paperless-ngx uses parsers to add documents. A parser is responsible for:
|
||||||
responsible for:
|
|
||||||
|
|
||||||
- Retrieving the content from the original
|
- Extracting plain-text content from the document
|
||||||
- Creating a thumbnail
|
- Generating a thumbnail image
|
||||||
- _optional:_ Retrieving a created date from the original
|
- _optional:_ Detecting the document's creation date
|
||||||
- _optional:_ Creating an archived document from the original
|
- _optional:_ Producing a searchable PDF archive copy
|
||||||
|
|
||||||
Custom parsers can be added to Paperless-ngx to support more file types. In
|
Custom parsers are distributed as ordinary Python packages and registered
|
||||||
order to do that, you need to write the parser itself and announce its
|
via a [setuptools entry point](https://setuptools.pypa.io/en/latest/userguide/entry_point.html).
|
||||||
existence to Paperless-ngx.
|
No changes to the Paperless-ngx source are required.
|
||||||
|
|
||||||
The parser itself must extend `documents.parsers.DocumentParser` and
|
#### 1. Implementing the parser class
|
||||||
must implement the methods `parse` and `get_thumbnail`. You can provide
|
|
||||||
your own implementation to `get_date` if you don't want to rely on
|
Your parser must satisfy the `ParserProtocol` structural interface defined in
|
||||||
Paperless-ngx' default date guessing mechanisms.
|
`paperless.parsers`. The simplest approach is to write a plain class — no base
|
||||||
|
class is required, only the right attributes and methods.
|
||||||
|
|
||||||
|
**Class-level identity attributes**
|
||||||
|
|
||||||
|
The registry reads these before instantiating the parser, so they must be
|
||||||
|
plain class attributes (not instance attributes or properties):
|
||||||
|
|
||||||
```python
|
```python
|
||||||
class MyCustomParser(DocumentParser):
|
class MyCustomParser:
|
||||||
|
name = "My Format Parser" # human-readable name shown in logs
|
||||||
def parse(self, document_path, mime_type):
|
version = "1.0.0" # semantic version string
|
||||||
# This method does not return anything. Rather, you should assign
|
author = "Acme Corp" # author / organisation
|
||||||
# whatever you got from the document to the following fields:
|
url = "https://example.com/my-parser" # docs or issue tracker
|
||||||
|
|
||||||
# The content of the document.
|
|
||||||
self.text = "content"
|
|
||||||
|
|
||||||
# Optional: path to a PDF document that you created from the original.
|
|
||||||
self.archive_path = os.path.join(self.tempdir, "archived.pdf")
|
|
||||||
|
|
||||||
# Optional: "created" date of the document.
|
|
||||||
self.date = get_created_from_metadata(document_path)
|
|
||||||
|
|
||||||
def get_thumbnail(self, document_path, mime_type):
|
|
||||||
# This should return the path to a thumbnail you created for this
|
|
||||||
# document.
|
|
||||||
return os.path.join(self.tempdir, "thumb.webp")
|
|
||||||
```
|
```
|
||||||
|
|
||||||
If you encounter any issues during parsing, raise a
|
**Declaring supported MIME types**
|
||||||
`documents.parsers.ParseError`.
|
|
||||||
|
|
||||||
The `self.tempdir` directory is a temporary directory that is guaranteed
|
Return a `dict` mapping MIME type strings to preferred file extensions
|
||||||
to be empty and removed after consumption finished. You can use that
|
(including the leading dot). Paperless-ngx uses the extension when storing
|
||||||
directory to store any intermediate files and also use it to store the
|
archive copies and serving files for download.
|
||||||
thumbnail / archived document.
|
|
||||||
|
|
||||||
After that, you need to announce your parser to Paperless-ngx. You need to
|
|
||||||
connect a handler to the `document_consumer_declaration` signal. Have a
|
|
||||||
look in the file `src/paperless_tesseract/apps.py` on how that's done.
|
|
||||||
The handler is a method that returns information about your parser:
|
|
||||||
|
|
||||||
```python
|
```python
|
||||||
def myparser_consumer_declaration(sender, **kwargs):
|
@classmethod
|
||||||
|
def supported_mime_types(cls) -> dict[str, str]:
|
||||||
return {
|
return {
|
||||||
"parser": MyCustomParser,
|
"application/x-my-format": ".myf",
|
||||||
"weight": 0,
|
"application/x-my-format-alt": ".myf",
|
||||||
"mime_types": {
|
|
||||||
"application/pdf": ".pdf",
|
|
||||||
"image/jpeg": ".jpg",
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
- `parser` is a reference to a class that extends `DocumentParser`.
|
**Scoring**
|
||||||
- `weight` is used whenever two or more parsers are able to parse a
|
|
||||||
file: The parser with the higher weight wins. This can be used to
|
|
||||||
override the parsers provided by Paperless-ngx.
|
|
||||||
- `mime_types` is a dictionary. The keys are the mime types your
|
|
||||||
parser supports and the value is the default file extension that
|
|
||||||
Paperless-ngx should use when storing files and serving them for
|
|
||||||
download. We could guess that from the file extensions, but some
|
|
||||||
mime types have many extensions associated with them and the Python
|
|
||||||
methods responsible for guessing the extension do not always return
|
|
||||||
the same value.
|
|
||||||
|
|
||||||
## Using Visual Studio Code devcontainer
|
When more than one parser can handle a file, the registry calls `score()` on
|
||||||
|
each candidate and picks the one with the highest result. Return `None` to
|
||||||
|
decline handling a file even though the MIME type is listed as supported (for
|
||||||
|
example, when a required external service is not configured).
|
||||||
|
|
||||||
Another easy way to get started with development is to use Visual Studio
|
| Score | Meaning |
|
||||||
Code devcontainers. This approach will create a preconfigured development
|
| ------ | ------------------------------------------------- |
|
||||||
environment with all of the required tools and dependencies.
|
| `None` | Decline — do not handle this file |
|
||||||
[Learn more about devcontainers](https://code.visualstudio.com/docs/devcontainers/containers).
|
| `10` | Default priority used by all built-in parsers |
|
||||||
The .devcontainer/vscode/tasks.json and .devcontainer/vscode/launch.json files
|
| `> 10` | Override a built-in parser for the same MIME type |
|
||||||
contain more information about the specific tasks and launch configurations (see the
|
|
||||||
non-standard "description" field).
|
|
||||||
|
|
||||||
To get started:
|
```python
|
||||||
|
@classmethod
|
||||||
|
def score(
|
||||||
|
cls,
|
||||||
|
mime_type: str,
|
||||||
|
filename: str,
|
||||||
|
path: "Path | None" = None,
|
||||||
|
) -> int | None:
|
||||||
|
# Inspect filename or file bytes here if needed.
|
||||||
|
return 10
|
||||||
|
```
|
||||||
|
|
||||||
1. Clone the repository on your machine and open the Paperless-ngx folder in VS Code.
|
**Archive and rendition flags**
|
||||||
|
|
||||||
2. VS Code will prompt you with "Reopen in container". Do so and wait for the environment to start.
|
```python
|
||||||
|
@property
|
||||||
|
def can_produce_archive(self) -> bool:
|
||||||
|
"""True if parse() can produce a searchable PDF archive copy."""
|
||||||
|
return True # or False if your parser doesn't produce PDFs
|
||||||
|
|
||||||
3. In case your host operating system is Windows:
|
@property
|
||||||
- The Source Control view in Visual Studio Code might show: "The detected Git repository is potentially unsafe as the folder is owned by someone other than the current user." Use "Manage Unsafe Repositories" to fix this.
|
def requires_pdf_rendition(self) -> bool:
|
||||||
- Git might have detecteded modifications for all files, because Windows is using CRLF line endings. Run `git checkout .` in the containers terminal to fix this issue.
|
"""True if the original format cannot be displayed by a browser
|
||||||
|
(e.g. DOCX, ODT) and the PDF output must always be kept."""
|
||||||
|
return False
|
||||||
|
```
|
||||||
|
|
||||||
4. Initialize the project by running the task **Project Setup: Run all Init Tasks**. This
|
**Context manager — temp directory lifecycle**
|
||||||
will initialize the database tables and create a superuser. Then you can compile the front end
|
|
||||||
for production or run the frontend in debug mode.
|
|
||||||
|
|
||||||
5. The project is ready for debugging, start either run the fullstack debug or individual debug
|
Paperless-ngx always uses parsers as context managers. Create a temporary
|
||||||
processes. Yo spin up the project without debugging run the task **Project Start: Run all Services**
|
working directory in `__enter__` (or `__init__`) and remove it in `__exit__`
|
||||||
|
regardless of whether an exception occurred. Store intermediate files,
|
||||||
|
thumbnails, and archive PDFs inside this directory.
|
||||||
|
|
||||||
## Developing Date Parser Plugins
|
```python
|
||||||
|
import shutil
|
||||||
|
import tempfile
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Self
|
||||||
|
from types import TracebackType
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
class MyCustomParser:
|
||||||
|
...
|
||||||
|
|
||||||
|
def __init__(self, logging_group: object = None) -> None:
|
||||||
|
settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
self._tempdir = Path(
|
||||||
|
tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
|
||||||
|
)
|
||||||
|
self._text: str | None = None
|
||||||
|
self._archive_path: Path | None = None
|
||||||
|
|
||||||
|
def __enter__(self) -> Self:
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(
|
||||||
|
self,
|
||||||
|
exc_type: type[BaseException] | None,
|
||||||
|
exc_val: BaseException | None,
|
||||||
|
exc_tb: TracebackType | None,
|
||||||
|
) -> None:
|
||||||
|
shutil.rmtree(self._tempdir, ignore_errors=True)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Optional context — `configure()`**
|
||||||
|
|
||||||
|
The consumer calls `configure()` with a `ParserContext` after instantiation
|
||||||
|
and before `parse()`. If your parser doesn't need context, a no-op
|
||||||
|
implementation is fine:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from paperless.parsers import ParserContext
|
||||||
|
|
||||||
|
def configure(self, context: ParserContext) -> None:
|
||||||
|
pass # override if you need context.mailrule_id, etc.
|
||||||
|
```
|
||||||
|
|
||||||
|
**Parsing**
|
||||||
|
|
||||||
|
`parse()` is the core method. It must not return a value; instead, store
|
||||||
|
results in instance attributes and expose them via the accessor methods below.
|
||||||
|
Raise `documents.parsers.ParseError` on any unrecoverable failure.
|
||||||
|
|
||||||
|
```python
|
||||||
|
from documents.parsers import ParseError
|
||||||
|
|
||||||
|
def parse(
|
||||||
|
self,
|
||||||
|
document_path: Path,
|
||||||
|
mime_type: str,
|
||||||
|
*,
|
||||||
|
produce_archive: bool = True,
|
||||||
|
) -> None:
|
||||||
|
try:
|
||||||
|
self._text = extract_text_from_my_format(document_path)
|
||||||
|
except Exception as e:
|
||||||
|
raise ParseError(f"Failed to parse {document_path}: {e}") from e
|
||||||
|
|
||||||
|
if produce_archive and self.can_produce_archive:
|
||||||
|
archive = self._tempdir / "archived.pdf"
|
||||||
|
convert_to_pdf(document_path, archive)
|
||||||
|
self._archive_path = archive
|
||||||
|
```
|
||||||
|
|
||||||
|
**Result accessors**
|
||||||
|
|
||||||
|
```python
|
||||||
|
def get_text(self) -> str | None:
|
||||||
|
return self._text
|
||||||
|
|
||||||
|
def get_date(self) -> "datetime.datetime | None":
|
||||||
|
# Return a datetime extracted from the document, or None to let
|
||||||
|
# Paperless-ngx use its default date-guessing logic.
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_archive_path(self) -> Path | None:
|
||||||
|
return self._archive_path
|
||||||
|
```
|
||||||
|
|
||||||
|
**Thumbnail**
|
||||||
|
|
||||||
|
`get_thumbnail()` may be called independently of `parse()`. Return the path
|
||||||
|
to a WebP image inside `self._tempdir`. The image should be roughly 500 × 700
|
||||||
|
pixels.
|
||||||
|
|
||||||
|
```python
|
||||||
|
def get_thumbnail(self, document_path: Path, mime_type: str) -> Path:
|
||||||
|
thumb = self._tempdir / "thumb.webp"
|
||||||
|
render_thumbnail(document_path, thumb)
|
||||||
|
return thumb
|
||||||
|
```
|
||||||
|
|
||||||
|
**Optional methods**
|
||||||
|
|
||||||
|
These are called by the API on demand, not during the consumption pipeline.
|
||||||
|
Implement them if your format supports the information; otherwise return
|
||||||
|
`None` / `[]`.
|
||||||
|
|
||||||
|
```python
|
||||||
|
def get_page_count(self, document_path: Path, mime_type: str) -> int | None:
|
||||||
|
return count_pages(document_path)
|
||||||
|
|
||||||
|
def extract_metadata(
|
||||||
|
self,
|
||||||
|
document_path: Path,
|
||||||
|
mime_type: str,
|
||||||
|
) -> "list[MetadataEntry]":
|
||||||
|
# Must never raise. Return [] if metadata cannot be read.
|
||||||
|
from paperless.parsers import MetadataEntry
|
||||||
|
return [
|
||||||
|
MetadataEntry(
|
||||||
|
namespace="https://example.com/ns/",
|
||||||
|
prefix="ex",
|
||||||
|
key="Author",
|
||||||
|
value="Alice",
|
||||||
|
)
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 2. Registering via entry point
|
||||||
|
|
||||||
|
Add the following to your package's `pyproject.toml`. The key (left of `=`)
|
||||||
|
is an arbitrary name used only in log output; the value is the
|
||||||
|
`module:ClassName` import path.
|
||||||
|
|
||||||
|
```toml
|
||||||
|
[project.entry-points."paperless_ngx.parsers"]
|
||||||
|
my_parser = "my_package.parsers:MyCustomParser"
|
||||||
|
```
|
||||||
|
|
||||||
|
Install your package into the same Python environment as Paperless-ngx (or
|
||||||
|
add it to the Docker image), and the parser will be discovered automatically
|
||||||
|
on the next startup. No configuration changes are needed.
|
||||||
|
|
||||||
|
To verify discovery, check the application logs at startup for a line like:
|
||||||
|
|
||||||
|
```
|
||||||
|
Loaded third-party parser 'My Format Parser' v1.0.0 by Acme Corp (entrypoint: 'my_parser').
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 3. Utilities
|
||||||
|
|
||||||
|
`paperless.parsers.utils` provides helpers you can import directly:
|
||||||
|
|
||||||
|
| Function | Description |
|
||||||
|
| --------------------------------------- | ---------------------------------------------------------------- |
|
||||||
|
| `read_file_handle_unicode_errors(path)` | Read a file as UTF-8, replacing invalid bytes instead of raising |
|
||||||
|
| `get_page_count_for_pdf(path)` | Count pages in a PDF using pikepdf |
|
||||||
|
| `extract_pdf_metadata(path)` | Extract XMP metadata from a PDF as a `list[MetadataEntry]` |
|
||||||
|
|
||||||
|
#### Minimal example
|
||||||
|
|
||||||
|
A complete, working parser for a hypothetical plain-XML format:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import shutil
|
||||||
|
import tempfile
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Self
|
||||||
|
from types import TracebackType
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
from documents.parsers import ParseError
|
||||||
|
from paperless.parsers import ParserContext
|
||||||
|
|
||||||
|
|
||||||
|
class XmlDocumentParser:
|
||||||
|
name = "XML Parser"
|
||||||
|
version = "1.0.0"
|
||||||
|
author = "Acme Corp"
|
||||||
|
url = "https://example.com/xml-parser"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def supported_mime_types(cls) -> dict[str, str]:
|
||||||
|
return {"application/xml": ".xml", "text/xml": ".xml"}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def score(cls, mime_type: str, filename: str, path: Path | None = None) -> int | None:
|
||||||
|
return 10
|
||||||
|
|
||||||
|
@property
|
||||||
|
def can_produce_archive(self) -> bool:
|
||||||
|
return False
|
||||||
|
|
||||||
|
@property
|
||||||
|
def requires_pdf_rendition(self) -> bool:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def __init__(self, logging_group: object = None) -> None:
|
||||||
|
settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
self._tempdir = Path(tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR))
|
||||||
|
self._text: str | None = None
|
||||||
|
|
||||||
|
def __enter__(self) -> Self:
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
|
||||||
|
shutil.rmtree(self._tempdir, ignore_errors=True)
|
||||||
|
|
||||||
|
def configure(self, context: ParserContext) -> None:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def parse(self, document_path: Path, mime_type: str, *, produce_archive: bool = True) -> None:
|
||||||
|
try:
|
||||||
|
tree = ET.parse(document_path)
|
||||||
|
self._text = " ".join(tree.getroot().itertext())
|
||||||
|
except ET.ParseError as e:
|
||||||
|
raise ParseError(f"XML parse error: {e}") from e
|
||||||
|
|
||||||
|
def get_text(self) -> str | None:
|
||||||
|
return self._text
|
||||||
|
|
||||||
|
def get_date(self):
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_archive_path(self) -> Path | None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_thumbnail(self, document_path: Path, mime_type: str) -> Path:
|
||||||
|
from PIL import Image, ImageDraw
|
||||||
|
img = Image.new("RGB", (500, 700), color="white")
|
||||||
|
ImageDraw.Draw(img).text((10, 10), "XML Document", fill="black")
|
||||||
|
out = self._tempdir / "thumb.webp"
|
||||||
|
img.save(out, format="WEBP")
|
||||||
|
return out
|
||||||
|
|
||||||
|
def get_page_count(self, document_path: Path, mime_type: str) -> int | None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def extract_metadata(self, document_path: Path, mime_type: str) -> list:
|
||||||
|
return []
|
||||||
|
```
|
||||||
|
|
||||||
|
### Developing date parser plugins
|
||||||
|
|
||||||
Paperless-ngx uses a plugin system for date parsing, allowing you to extend or replace the default date parsing behavior. Plugins are discovered using [Python entry points](https://setuptools.pypa.io/en/latest/userguide/entry_point.html).
|
Paperless-ngx uses a plugin system for date parsing, allowing you to extend or replace the default date parsing behavior. Plugins are discovered using [Python entry points](https://setuptools.pypa.io/en/latest/userguide/entry_point.html).
|
||||||
|
|
||||||
### Creating a Date Parser Plugin
|
#### Creating a Date Parser Plugin
|
||||||
|
|
||||||
To create a custom date parser plugin, you need to:
|
To create a custom date parser plugin, you need to:
|
||||||
|
|
||||||
@@ -492,7 +734,7 @@ To create a custom date parser plugin, you need to:
|
|||||||
2. Implement the required abstract method
|
2. Implement the required abstract method
|
||||||
3. Register your plugin via an entry point
|
3. Register your plugin via an entry point
|
||||||
|
|
||||||
#### 1. Implementing the Parser Class
|
##### 1. Implementing the Parser Class
|
||||||
|
|
||||||
Your parser must extend `documents.plugins.date_parsing.DateParserPluginBase` and implement the `parse` method:
|
Your parser must extend `documents.plugins.date_parsing.DateParserPluginBase` and implement the `parse` method:
|
||||||
|
|
||||||
@@ -532,7 +774,7 @@ class MyDateParserPlugin(DateParserPluginBase):
|
|||||||
yield another_datetime
|
yield another_datetime
|
||||||
```
|
```
|
||||||
|
|
||||||
#### 2. Configuration and Helper Methods
|
##### 2. Configuration and Helper Methods
|
||||||
|
|
||||||
Your parser instance is initialized with a `DateParserConfig` object accessible via `self.config`. This provides:
|
Your parser instance is initialized with a `DateParserConfig` object accessible via `self.config`. This provides:
|
||||||
|
|
||||||
@@ -565,11 +807,11 @@ def _filter_date(
|
|||||||
"""
|
"""
|
||||||
```
|
```
|
||||||
|
|
||||||
#### 3. Resource Management (Optional)
|
##### 3. Resource Management (Optional)
|
||||||
|
|
||||||
If your plugin needs to acquire or release resources (database connections, API clients, etc.), override the context manager methods. Paperless-ngx will always use plugins as context managers, ensuring resources can be released even in the event of errors.
|
If your plugin needs to acquire or release resources (database connections, API clients, etc.), override the context manager methods. Paperless-ngx will always use plugins as context managers, ensuring resources can be released even in the event of errors.
|
||||||
|
|
||||||
#### 4. Registering Your Plugin
|
##### 4. Registering Your Plugin
|
||||||
|
|
||||||
Register your plugin using a setuptools entry point in your package's `pyproject.toml`:
|
Register your plugin using a setuptools entry point in your package's `pyproject.toml`:
|
||||||
|
|
||||||
@@ -580,7 +822,7 @@ my_parser = "my_package.parsers:MyDateParserPlugin"
|
|||||||
|
|
||||||
The entry point name (e.g., `"my_parser"`) is used for sorting when multiple plugins are found. Paperless-ngx will use the first plugin alphabetically by name if multiple plugins are discovered.
|
The entry point name (e.g., `"my_parser"`) is used for sorting when multiple plugins are found. Paperless-ngx will use the first plugin alphabetically by name if multiple plugins are discovered.
|
||||||
|
|
||||||
### Plugin Discovery
|
#### Plugin Discovery
|
||||||
|
|
||||||
Paperless-ngx automatically discovers and loads date parser plugins at runtime. The discovery process:
|
Paperless-ngx automatically discovers and loads date parser plugins at runtime. The discovery process:
|
||||||
|
|
||||||
@@ -591,7 +833,7 @@ Paperless-ngx automatically discovers and loads date parser plugins at runtime.
|
|||||||
|
|
||||||
If multiple plugins are installed, a warning is logged indicating which plugin was selected.
|
If multiple plugins are installed, a warning is logged indicating which plugin was selected.
|
||||||
|
|
||||||
### Example: Simple Date Parser
|
#### Example: Simple Date Parser
|
||||||
|
|
||||||
Here's a minimal example that only looks for ISO 8601 dates:
|
Here's a minimal example that only looks for ISO 8601 dates:
|
||||||
|
|
||||||
@@ -623,3 +865,30 @@ class ISODateParserPlugin(DateParserPluginBase):
|
|||||||
if filtered_date is not None:
|
if filtered_date is not None:
|
||||||
yield filtered_date
|
yield filtered_date
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Using Visual Studio Code devcontainer
|
||||||
|
|
||||||
|
Another easy way to get started with development is to use Visual Studio
|
||||||
|
Code devcontainers. This approach will create a preconfigured development
|
||||||
|
environment with all of the required tools and dependencies.
|
||||||
|
[Learn more about devcontainers](https://code.visualstudio.com/docs/devcontainers/containers).
|
||||||
|
The .devcontainer/vscode/tasks.json and .devcontainer/vscode/launch.json files
|
||||||
|
contain more information about the specific tasks and launch configurations (see the
|
||||||
|
non-standard "description" field).
|
||||||
|
|
||||||
|
To get started:
|
||||||
|
|
||||||
|
1. Clone the repository on your machine and open the Paperless-ngx folder in VS Code.
|
||||||
|
|
||||||
|
2. VS Code will prompt you with "Reopen in container". Do so and wait for the environment to start.
|
||||||
|
|
||||||
|
3. In case your host operating system is Windows:
|
||||||
|
- The Source Control view in Visual Studio Code might show: "The detected Git repository is potentially unsafe as the folder is owned by someone other than the current user." Use "Manage Unsafe Repositories" to fix this.
|
||||||
|
- Git might have detecteded modifications for all files, because Windows is using CRLF line endings. Run `git checkout .` in the containers terminal to fix this issue.
|
||||||
|
|
||||||
|
4. Initialize the project by running the task **Project Setup: Run all Init Tasks**. This
|
||||||
|
will initialize the database tables and create a superuser. Then you can compile the front end
|
||||||
|
for production or run the frontend in debug mode.
|
||||||
|
|
||||||
|
5. The project is ready for debugging, start either run the fullstack debug or individual debug
|
||||||
|
processes. Yo spin up the project without debugging run the task **Project Start: Run all Services**
|
||||||
|
|||||||
@@ -269,10 +269,6 @@ testpaths = [
|
|||||||
"src/documents/tests/",
|
"src/documents/tests/",
|
||||||
"src/paperless/tests/",
|
"src/paperless/tests/",
|
||||||
"src/paperless_mail/tests/",
|
"src/paperless_mail/tests/",
|
||||||
"src/paperless_tesseract/tests/",
|
|
||||||
"src/paperless_tika/tests",
|
|
||||||
"src/paperless_text/tests/",
|
|
||||||
"src/paperless_remote/tests/",
|
|
||||||
"src/paperless_ai/tests",
|
"src/paperless_ai/tests",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
@@ -468,7 +468,7 @@
|
|||||||
"time": 0.951,
|
"time": 0.951,
|
||||||
"request": {
|
"request": {
|
||||||
"method": "GET",
|
"method": "GET",
|
||||||
"url": "http://localhost:8000/api/documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true&tags__id__in=9",
|
"url": "http://localhost:8000/api/documents/?page=1&page_size=50&ordering=-created&truncate_content=true&tags__id__in=9",
|
||||||
"httpVersion": "HTTP/1.1",
|
"httpVersion": "HTTP/1.1",
|
||||||
"cookies": [],
|
"cookies": [],
|
||||||
"headers": [
|
"headers": [
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
@@ -534,7 +534,7 @@
|
|||||||
"time": 0.653,
|
"time": 0.653,
|
||||||
"request": {
|
"request": {
|
||||||
"method": "GET",
|
"method": "GET",
|
||||||
"url": "http://localhost:8000/api/documents/?page=1&page_size=10&ordering=-created&truncate_content=true&include_selection_data=true&tags__id__all=9",
|
"url": "http://localhost:8000/api/documents/?page=1&page_size=50&ordering=-created&truncate_content=true&tags__id__all=9",
|
||||||
"httpVersion": "HTTP/1.1",
|
"httpVersion": "HTTP/1.1",
|
||||||
"cookies": [],
|
"cookies": [],
|
||||||
"headers": [
|
"headers": [
|
||||||
|
|||||||
@@ -883,7 +883,7 @@
|
|||||||
"time": 0.93,
|
"time": 0.93,
|
||||||
"request": {
|
"request": {
|
||||||
"method": "GET",
|
"method": "GET",
|
||||||
"url": "http://localhost:8000/api/documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true&tags__id__all=4",
|
"url": "http://localhost:8000/api/documents/?page=1&page_size=50&ordering=-created&truncate_content=true&tags__id__all=4",
|
||||||
"httpVersion": "HTTP/1.1",
|
"httpVersion": "HTTP/1.1",
|
||||||
"cookies": [],
|
"cookies": [],
|
||||||
"headers": [
|
"headers": [
|
||||||
@@ -961,7 +961,7 @@
|
|||||||
"time": -1,
|
"time": -1,
|
||||||
"request": {
|
"request": {
|
||||||
"method": "GET",
|
"method": "GET",
|
||||||
"url": "http://localhost:8000/api/documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true&tags__id__all=4",
|
"url": "http://localhost:8000/api/documents/?page=1&page_size=50&ordering=-created&truncate_content=true&tags__id__all=4",
|
||||||
"httpVersion": "HTTP/1.1",
|
"httpVersion": "HTTP/1.1",
|
||||||
"cookies": [],
|
"cookies": [],
|
||||||
"headers": [
|
"headers": [
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ test('basic filtering', async ({ page }) => {
|
|||||||
await expect(page).toHaveURL(/tags__id__all=9/)
|
await expect(page).toHaveURL(/tags__id__all=9/)
|
||||||
await expect(page.locator('pngx-document-list')).toHaveText(/8 documents/)
|
await expect(page.locator('pngx-document-list')).toHaveText(/8 documents/)
|
||||||
await page.getByRole('button', { name: 'Document type' }).click()
|
await page.getByRole('button', { name: 'Document type' }).click()
|
||||||
await page.getByRole('menuitem', { name: /^Invoice Test/ }).click()
|
await page.getByRole('menuitem', { name: 'Invoice Test 3' }).click()
|
||||||
await expect(page).toHaveURL(/document_type__id__in=1/)
|
await expect(page).toHaveURL(/document_type__id__in=1/)
|
||||||
await expect(page.locator('pngx-document-list')).toHaveText(/3 documents/)
|
await expect(page.locator('pngx-document-list')).toHaveText(/3 documents/)
|
||||||
await page.getByRole('button', { name: 'Reset filters' }).first().click()
|
await page.getByRole('button', { name: 'Reset filters' }).first().click()
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -20,9 +20,9 @@ import { Subject, filter, takeUntil } from 'rxjs'
|
|||||||
import { NEGATIVE_NULL_FILTER_VALUE } from 'src/app/data/filter-rule-type'
|
import { NEGATIVE_NULL_FILTER_VALUE } from 'src/app/data/filter-rule-type'
|
||||||
import { MatchingModel } from 'src/app/data/matching-model'
|
import { MatchingModel } from 'src/app/data/matching-model'
|
||||||
import { ObjectWithPermissions } from 'src/app/data/object-with-permissions'
|
import { ObjectWithPermissions } from 'src/app/data/object-with-permissions'
|
||||||
import { SelectionDataItem } from 'src/app/data/results'
|
|
||||||
import { FilterPipe } from 'src/app/pipes/filter.pipe'
|
import { FilterPipe } from 'src/app/pipes/filter.pipe'
|
||||||
import { HotKeyService } from 'src/app/services/hot-key.service'
|
import { HotKeyService } from 'src/app/services/hot-key.service'
|
||||||
|
import { SelectionDataItem } from 'src/app/services/rest/document.service'
|
||||||
import { pngxPopperOptions } from 'src/app/utils/popper-options'
|
import { pngxPopperOptions } from 'src/app/utils/popper-options'
|
||||||
import { LoadingComponentWithPermissions } from '../../loading-component/loading.component'
|
import { LoadingComponentWithPermissions } from '../../loading-component/loading.component'
|
||||||
import { ClearableBadgeComponent } from '../clearable-badge/clearable-badge.component'
|
import { ClearableBadgeComponent } from '../clearable-badge/clearable-badge.component'
|
||||||
|
|||||||
@@ -300,7 +300,7 @@ describe('BulkEditorComponent', () => {
|
|||||||
parameters: { add_tags: [101], remove_tags: [] },
|
parameters: { add_tags: [101], remove_tags: [] },
|
||||||
})
|
})
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
) // list reload
|
) // list reload
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||||
@@ -332,7 +332,7 @@ describe('BulkEditorComponent', () => {
|
|||||||
.expectOne(`${environment.apiBaseUrl}documents/bulk_edit/`)
|
.expectOne(`${environment.apiBaseUrl}documents/bulk_edit/`)
|
||||||
.flush(true)
|
.flush(true)
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
) // list reload
|
) // list reload
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||||
@@ -423,7 +423,7 @@ describe('BulkEditorComponent', () => {
|
|||||||
parameters: { correspondent: 101 },
|
parameters: { correspondent: 101 },
|
||||||
})
|
})
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
) // list reload
|
) // list reload
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||||
@@ -455,7 +455,7 @@ describe('BulkEditorComponent', () => {
|
|||||||
.expectOne(`${environment.apiBaseUrl}documents/bulk_edit/`)
|
.expectOne(`${environment.apiBaseUrl}documents/bulk_edit/`)
|
||||||
.flush(true)
|
.flush(true)
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
) // list reload
|
) // list reload
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||||
@@ -521,7 +521,7 @@ describe('BulkEditorComponent', () => {
|
|||||||
parameters: { document_type: 101 },
|
parameters: { document_type: 101 },
|
||||||
})
|
})
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
) // list reload
|
) // list reload
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||||
@@ -553,7 +553,7 @@ describe('BulkEditorComponent', () => {
|
|||||||
.expectOne(`${environment.apiBaseUrl}documents/bulk_edit/`)
|
.expectOne(`${environment.apiBaseUrl}documents/bulk_edit/`)
|
||||||
.flush(true)
|
.flush(true)
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
) // list reload
|
) // list reload
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||||
@@ -619,7 +619,7 @@ describe('BulkEditorComponent', () => {
|
|||||||
parameters: { storage_path: 101 },
|
parameters: { storage_path: 101 },
|
||||||
})
|
})
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
) // list reload
|
) // list reload
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||||
@@ -651,7 +651,7 @@ describe('BulkEditorComponent', () => {
|
|||||||
.expectOne(`${environment.apiBaseUrl}documents/bulk_edit/`)
|
.expectOne(`${environment.apiBaseUrl}documents/bulk_edit/`)
|
||||||
.flush(true)
|
.flush(true)
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
) // list reload
|
) // list reload
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||||
@@ -717,7 +717,7 @@ describe('BulkEditorComponent', () => {
|
|||||||
parameters: { add_custom_fields: [101], remove_custom_fields: [102] },
|
parameters: { add_custom_fields: [101], remove_custom_fields: [102] },
|
||||||
})
|
})
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
) // list reload
|
) // list reload
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||||
@@ -749,7 +749,7 @@ describe('BulkEditorComponent', () => {
|
|||||||
.expectOne(`${environment.apiBaseUrl}documents/bulk_edit/`)
|
.expectOne(`${environment.apiBaseUrl}documents/bulk_edit/`)
|
||||||
.flush(true)
|
.flush(true)
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
) // list reload
|
) // list reload
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||||
@@ -858,7 +858,7 @@ describe('BulkEditorComponent', () => {
|
|||||||
documents: [3, 4],
|
documents: [3, 4],
|
||||||
})
|
})
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
) // list reload
|
) // list reload
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||||
@@ -951,7 +951,7 @@ describe('BulkEditorComponent', () => {
|
|||||||
documents: [3, 4],
|
documents: [3, 4],
|
||||||
})
|
})
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
) // list reload
|
) // list reload
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||||
@@ -986,7 +986,7 @@ describe('BulkEditorComponent', () => {
|
|||||||
source_mode: 'latest_version',
|
source_mode: 'latest_version',
|
||||||
})
|
})
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
) // list reload
|
) // list reload
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||||
@@ -1027,7 +1027,7 @@ describe('BulkEditorComponent', () => {
|
|||||||
metadata_document_id: 3,
|
metadata_document_id: 3,
|
||||||
})
|
})
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
) // list reload
|
) // list reload
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||||
@@ -1046,7 +1046,7 @@ describe('BulkEditorComponent', () => {
|
|||||||
delete_originals: true,
|
delete_originals: true,
|
||||||
})
|
})
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
) // list reload
|
) // list reload
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||||
@@ -1067,7 +1067,7 @@ describe('BulkEditorComponent', () => {
|
|||||||
archive_fallback: true,
|
archive_fallback: true,
|
||||||
})
|
})
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
) // list reload
|
) // list reload
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||||
@@ -1153,7 +1153,7 @@ describe('BulkEditorComponent', () => {
|
|||||||
},
|
},
|
||||||
})
|
})
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
) // list reload
|
) // list reload
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||||
@@ -1460,7 +1460,7 @@ describe('BulkEditorComponent', () => {
|
|||||||
expect(toastServiceShowInfoSpy).toHaveBeenCalled()
|
expect(toastServiceShowInfoSpy).toHaveBeenCalled()
|
||||||
expect(listReloadSpy).toHaveBeenCalled()
|
expect(listReloadSpy).toHaveBeenCalled()
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
) // list reload
|
) // list reload
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||||
|
|||||||
@@ -16,7 +16,6 @@ import { first, map, Observable, Subject, switchMap, takeUntil } from 'rxjs'
|
|||||||
import { ConfirmDialogComponent } from 'src/app/components/common/confirm-dialog/confirm-dialog.component'
|
import { ConfirmDialogComponent } from 'src/app/components/common/confirm-dialog/confirm-dialog.component'
|
||||||
import { CustomField } from 'src/app/data/custom-field'
|
import { CustomField } from 'src/app/data/custom-field'
|
||||||
import { MatchingModel } from 'src/app/data/matching-model'
|
import { MatchingModel } from 'src/app/data/matching-model'
|
||||||
import { SelectionDataItem } from 'src/app/data/results'
|
|
||||||
import { SETTINGS_KEYS } from 'src/app/data/ui-settings'
|
import { SETTINGS_KEYS } from 'src/app/data/ui-settings'
|
||||||
import { IfPermissionsDirective } from 'src/app/directives/if-permissions.directive'
|
import { IfPermissionsDirective } from 'src/app/directives/if-permissions.directive'
|
||||||
import { DocumentListViewService } from 'src/app/services/document-list-view.service'
|
import { DocumentListViewService } from 'src/app/services/document-list-view.service'
|
||||||
@@ -33,6 +32,7 @@ import {
|
|||||||
DocumentBulkEditMethod,
|
DocumentBulkEditMethod,
|
||||||
DocumentService,
|
DocumentService,
|
||||||
MergeDocumentsRequest,
|
MergeDocumentsRequest,
|
||||||
|
SelectionDataItem,
|
||||||
} from 'src/app/services/rest/document.service'
|
} from 'src/app/services/rest/document.service'
|
||||||
import { SavedViewService } from 'src/app/services/rest/saved-view.service'
|
import { SavedViewService } from 'src/app/services/rest/saved-view.service'
|
||||||
import { ShareLinkBundleService } from 'src/app/services/rest/share-link-bundle.service'
|
import { ShareLinkBundleService } from 'src/app/services/rest/share-link-bundle.service'
|
||||||
|
|||||||
@@ -76,7 +76,6 @@ import {
|
|||||||
FILTER_TITLE_CONTENT,
|
FILTER_TITLE_CONTENT,
|
||||||
NEGATIVE_NULL_FILTER_VALUE,
|
NEGATIVE_NULL_FILTER_VALUE,
|
||||||
} from 'src/app/data/filter-rule-type'
|
} from 'src/app/data/filter-rule-type'
|
||||||
import { SelectionData, SelectionDataItem } from 'src/app/data/results'
|
|
||||||
import {
|
import {
|
||||||
PermissionAction,
|
PermissionAction,
|
||||||
PermissionType,
|
PermissionType,
|
||||||
@@ -85,7 +84,11 @@ import {
|
|||||||
import { CorrespondentService } from 'src/app/services/rest/correspondent.service'
|
import { CorrespondentService } from 'src/app/services/rest/correspondent.service'
|
||||||
import { CustomFieldsService } from 'src/app/services/rest/custom-fields.service'
|
import { CustomFieldsService } from 'src/app/services/rest/custom-fields.service'
|
||||||
import { DocumentTypeService } from 'src/app/services/rest/document-type.service'
|
import { DocumentTypeService } from 'src/app/services/rest/document-type.service'
|
||||||
import { DocumentService } from 'src/app/services/rest/document.service'
|
import {
|
||||||
|
DocumentService,
|
||||||
|
SelectionData,
|
||||||
|
SelectionDataItem,
|
||||||
|
} from 'src/app/services/rest/document.service'
|
||||||
import { SearchService } from 'src/app/services/rest/search.service'
|
import { SearchService } from 'src/app/services/rest/search.service'
|
||||||
import { StoragePathService } from 'src/app/services/rest/storage-path.service'
|
import { StoragePathService } from 'src/app/services/rest/storage-path.service'
|
||||||
import { TagService } from 'src/app/services/rest/tag.service'
|
import { TagService } from 'src/app/services/rest/tag.service'
|
||||||
|
|||||||
@@ -1,5 +1,3 @@
|
|||||||
import { Document } from './document'
|
|
||||||
|
|
||||||
export interface Results<T> {
|
export interface Results<T> {
|
||||||
count: number
|
count: number
|
||||||
|
|
||||||
@@ -7,20 +5,3 @@ export interface Results<T> {
|
|||||||
|
|
||||||
all: number[]
|
all: number[]
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface SelectionDataItem {
|
|
||||||
id: number
|
|
||||||
document_count: number
|
|
||||||
}
|
|
||||||
|
|
||||||
export interface SelectionData {
|
|
||||||
selected_storage_paths: SelectionDataItem[]
|
|
||||||
selected_correspondents: SelectionDataItem[]
|
|
||||||
selected_tags: SelectionDataItem[]
|
|
||||||
selected_document_types: SelectionDataItem[]
|
|
||||||
selected_custom_fields: SelectionDataItem[]
|
|
||||||
}
|
|
||||||
|
|
||||||
export interface DocumentResults extends Results<Document> {
|
|
||||||
selection_data?: SelectionData
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -126,10 +126,13 @@ describe('DocumentListViewService', () => {
|
|||||||
expect(documentListViewService.currentPage).toEqual(1)
|
expect(documentListViewService.currentPage).toEqual(1)
|
||||||
documentListViewService.reload()
|
documentListViewService.reload()
|
||||||
const req = httpTestingController.expectOne(
|
const req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
expect(req.request.method).toEqual('GET')
|
expect(req.request.method).toEqual('GET')
|
||||||
req.flush(full_results)
|
req.flush(full_results)
|
||||||
|
httpTestingController.expectOne(
|
||||||
|
`${environment.apiBaseUrl}documents/selection_data/`
|
||||||
|
)
|
||||||
expect(req.request.method).toEqual('GET')
|
expect(req.request.method).toEqual('GET')
|
||||||
expect(documentListViewService.isReloading).toBeFalsy()
|
expect(documentListViewService.isReloading).toBeFalsy()
|
||||||
expect(documentListViewService.activeSavedViewId).toBeNull()
|
expect(documentListViewService.activeSavedViewId).toBeNull()
|
||||||
@@ -141,12 +144,12 @@ describe('DocumentListViewService', () => {
|
|||||||
it('should handle error on page request out of range', () => {
|
it('should handle error on page request out of range', () => {
|
||||||
documentListViewService.currentPage = 50
|
documentListViewService.currentPage = 50
|
||||||
let req = httpTestingController.expectOne(
|
let req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=50&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=50&page_size=50&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
expect(req.request.method).toEqual('GET')
|
expect(req.request.method).toEqual('GET')
|
||||||
req.flush([], { status: 404, statusText: 'Unexpected error' })
|
req.flush([], { status: 404, statusText: 'Unexpected error' })
|
||||||
req = httpTestingController.expectOne(
|
req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
expect(req.request.method).toEqual('GET')
|
expect(req.request.method).toEqual('GET')
|
||||||
expect(documentListViewService.currentPage).toEqual(1)
|
expect(documentListViewService.currentPage).toEqual(1)
|
||||||
@@ -163,7 +166,7 @@ describe('DocumentListViewService', () => {
|
|||||||
]
|
]
|
||||||
documentListViewService.setFilterRules(filterRulesAny)
|
documentListViewService.setFilterRules(filterRulesAny)
|
||||||
let req = httpTestingController.expectOne(
|
let req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true&tags__id__in=${tags__id__in}`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&tags__id__in=${tags__id__in}`
|
||||||
)
|
)
|
||||||
expect(req.request.method).toEqual('GET')
|
expect(req.request.method).toEqual('GET')
|
||||||
req.flush(
|
req.flush(
|
||||||
@@ -171,13 +174,13 @@ describe('DocumentListViewService', () => {
|
|||||||
{ status: 404, statusText: 'Unexpected error' }
|
{ status: 404, statusText: 'Unexpected error' }
|
||||||
)
|
)
|
||||||
req = httpTestingController.expectOne(
|
req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
expect(req.request.method).toEqual('GET')
|
expect(req.request.method).toEqual('GET')
|
||||||
// reset the list
|
// reset the list
|
||||||
documentListViewService.setFilterRules([])
|
documentListViewService.setFilterRules([])
|
||||||
req = httpTestingController.expectOne(
|
req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
|
|
||||||
@@ -185,7 +188,7 @@ describe('DocumentListViewService', () => {
|
|||||||
documentListViewService.currentPage = 1
|
documentListViewService.currentPage = 1
|
||||||
documentListViewService.sortField = 'custom_field_999'
|
documentListViewService.sortField = 'custom_field_999'
|
||||||
let req = httpTestingController.expectOne(
|
let req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-custom_field_999&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-custom_field_999&truncate_content=true`
|
||||||
)
|
)
|
||||||
expect(req.request.method).toEqual('GET')
|
expect(req.request.method).toEqual('GET')
|
||||||
req.flush(
|
req.flush(
|
||||||
@@ -194,7 +197,7 @@ describe('DocumentListViewService', () => {
|
|||||||
)
|
)
|
||||||
// resets itself
|
// resets itself
|
||||||
req = httpTestingController.expectOne(
|
req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
|
|
||||||
@@ -209,7 +212,7 @@ describe('DocumentListViewService', () => {
|
|||||||
]
|
]
|
||||||
documentListViewService.setFilterRules(filterRulesAny)
|
documentListViewService.setFilterRules(filterRulesAny)
|
||||||
let req = httpTestingController.expectOne(
|
let req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true&tags__id__in=${tags__id__in}`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&tags__id__in=${tags__id__in}`
|
||||||
)
|
)
|
||||||
expect(req.request.method).toEqual('GET')
|
expect(req.request.method).toEqual('GET')
|
||||||
req.flush('Generic error', { status: 404, statusText: 'Unexpected error' })
|
req.flush('Generic error', { status: 404, statusText: 'Unexpected error' })
|
||||||
@@ -217,7 +220,7 @@ describe('DocumentListViewService', () => {
|
|||||||
// reset the list
|
// reset the list
|
||||||
documentListViewService.setFilterRules([])
|
documentListViewService.setFilterRules([])
|
||||||
req = httpTestingController.expectOne(
|
req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
|
|
||||||
@@ -226,7 +229,7 @@ describe('DocumentListViewService', () => {
|
|||||||
expect(documentListViewService.sortReverse).toBeTruthy()
|
expect(documentListViewService.sortReverse).toBeTruthy()
|
||||||
documentListViewService.setSort('added', false)
|
documentListViewService.setSort('added', false)
|
||||||
let req = httpTestingController.expectOne(
|
let req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=added&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=added&truncate_content=true`
|
||||||
)
|
)
|
||||||
expect(req.request.method).toEqual('GET')
|
expect(req.request.method).toEqual('GET')
|
||||||
expect(documentListViewService.sortField).toEqual('added')
|
expect(documentListViewService.sortField).toEqual('added')
|
||||||
@@ -234,12 +237,12 @@ describe('DocumentListViewService', () => {
|
|||||||
|
|
||||||
documentListViewService.sortField = 'created'
|
documentListViewService.sortField = 'created'
|
||||||
req = httpTestingController.expectOne(
|
req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=created&truncate_content=true`
|
||||||
)
|
)
|
||||||
expect(documentListViewService.sortField).toEqual('created')
|
expect(documentListViewService.sortField).toEqual('created')
|
||||||
documentListViewService.sortReverse = true
|
documentListViewService.sortReverse = true
|
||||||
req = httpTestingController.expectOne(
|
req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
expect(req.request.method).toEqual('GET')
|
expect(req.request.method).toEqual('GET')
|
||||||
expect(documentListViewService.sortReverse).toBeTruthy()
|
expect(documentListViewService.sortReverse).toBeTruthy()
|
||||||
@@ -259,7 +262,7 @@ describe('DocumentListViewService', () => {
|
|||||||
const req = httpTestingController.expectOne(
|
const req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=${page}&page_size=${
|
`${environment.apiBaseUrl}documents/?page=${page}&page_size=${
|
||||||
documentListViewService.pageSize
|
documentListViewService.pageSize
|
||||||
}&ordering=${reverse ? '-' : ''}${sort}&truncate_content=true&include_selection_data=true`
|
}&ordering=${reverse ? '-' : ''}${sort}&truncate_content=true`
|
||||||
)
|
)
|
||||||
expect(req.request.method).toEqual('GET')
|
expect(req.request.method).toEqual('GET')
|
||||||
expect(documentListViewService.currentPage).toEqual(page)
|
expect(documentListViewService.currentPage).toEqual(page)
|
||||||
@@ -276,7 +279,7 @@ describe('DocumentListViewService', () => {
|
|||||||
}
|
}
|
||||||
documentListViewService.loadFromQueryParams(convertToParamMap(params))
|
documentListViewService.loadFromQueryParams(convertToParamMap(params))
|
||||||
let req = httpTestingController.expectOne(
|
let req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=${documentListViewService.currentPage}&page_size=${documentListViewService.pageSize}&ordering=-added&truncate_content=true&include_selection_data=true&tags__id__all=${tags__id__all}`
|
`${environment.apiBaseUrl}documents/?page=${documentListViewService.currentPage}&page_size=${documentListViewService.pageSize}&ordering=-added&truncate_content=true&tags__id__all=${tags__id__all}`
|
||||||
)
|
)
|
||||||
expect(req.request.method).toEqual('GET')
|
expect(req.request.method).toEqual('GET')
|
||||||
expect(documentListViewService.filterRules).toEqual([
|
expect(documentListViewService.filterRules).toEqual([
|
||||||
@@ -286,12 +289,15 @@ describe('DocumentListViewService', () => {
|
|||||||
},
|
},
|
||||||
])
|
])
|
||||||
req.flush(full_results)
|
req.flush(full_results)
|
||||||
|
httpTestingController.expectOne(
|
||||||
|
`${environment.apiBaseUrl}documents/selection_data/`
|
||||||
|
)
|
||||||
})
|
})
|
||||||
|
|
||||||
it('should use filter rules to update query params', () => {
|
it('should use filter rules to update query params', () => {
|
||||||
documentListViewService.setFilterRules(filterRules)
|
documentListViewService.setFilterRules(filterRules)
|
||||||
const req = httpTestingController.expectOne(
|
const req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=${documentListViewService.currentPage}&page_size=${documentListViewService.pageSize}&ordering=-created&truncate_content=true&include_selection_data=true&tags__id__all=${tags__id__all}`
|
`${environment.apiBaseUrl}documents/?page=${documentListViewService.currentPage}&page_size=${documentListViewService.pageSize}&ordering=-created&truncate_content=true&tags__id__all=${tags__id__all}`
|
||||||
)
|
)
|
||||||
expect(req.request.method).toEqual('GET')
|
expect(req.request.method).toEqual('GET')
|
||||||
})
|
})
|
||||||
@@ -300,26 +306,34 @@ describe('DocumentListViewService', () => {
|
|||||||
documentListViewService.currentPage = 2
|
documentListViewService.currentPage = 2
|
||||||
let req = httpTestingController.expectOne((request) =>
|
let req = httpTestingController.expectOne((request) =>
|
||||||
request.urlWithParams.startsWith(
|
request.urlWithParams.startsWith(
|
||||||
`${environment.apiBaseUrl}documents/?page=2&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=2&page_size=50&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
expect(req.request.method).toEqual('GET')
|
expect(req.request.method).toEqual('GET')
|
||||||
req.flush(full_results)
|
req.flush(full_results)
|
||||||
|
req = httpTestingController.expectOne(
|
||||||
|
`${environment.apiBaseUrl}documents/selection_data/`
|
||||||
|
)
|
||||||
|
req.flush([])
|
||||||
|
|
||||||
documentListViewService.setFilterRules(filterRules, true)
|
documentListViewService.setFilterRules(filterRules, true)
|
||||||
|
|
||||||
const filteredReqs = httpTestingController.match(
|
const filteredReqs = httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true&tags__id__all=${tags__id__all}`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&tags__id__all=${tags__id__all}`
|
||||||
)
|
)
|
||||||
expect(filteredReqs).toHaveLength(1)
|
expect(filteredReqs).toHaveLength(1)
|
||||||
filteredReqs[0].flush(full_results)
|
filteredReqs[0].flush(full_results)
|
||||||
|
req = httpTestingController.expectOne(
|
||||||
|
`${environment.apiBaseUrl}documents/selection_data/`
|
||||||
|
)
|
||||||
|
req.flush([])
|
||||||
expect(documentListViewService.currentPage).toEqual(1)
|
expect(documentListViewService.currentPage).toEqual(1)
|
||||||
})
|
})
|
||||||
|
|
||||||
it('should support quick filter', () => {
|
it('should support quick filter', () => {
|
||||||
documentListViewService.quickFilter(filterRules)
|
documentListViewService.quickFilter(filterRules)
|
||||||
const req = httpTestingController.expectOne(
|
const req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=${documentListViewService.currentPage}&page_size=${documentListViewService.pageSize}&ordering=-created&truncate_content=true&include_selection_data=true&tags__id__all=${tags__id__all}`
|
`${environment.apiBaseUrl}documents/?page=${documentListViewService.currentPage}&page_size=${documentListViewService.pageSize}&ordering=-created&truncate_content=true&tags__id__all=${tags__id__all}`
|
||||||
)
|
)
|
||||||
expect(req.request.method).toEqual('GET')
|
expect(req.request.method).toEqual('GET')
|
||||||
})
|
})
|
||||||
@@ -342,21 +356,21 @@ describe('DocumentListViewService', () => {
|
|||||||
convertToParamMap(params)
|
convertToParamMap(params)
|
||||||
)
|
)
|
||||||
let req = httpTestingController.expectOne(
|
let req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=${page}&page_size=${documentListViewService.pageSize}&ordering=-added&truncate_content=true&include_selection_data=true&tags__id__all=${tags__id__all}`
|
`${environment.apiBaseUrl}documents/?page=${page}&page_size=${documentListViewService.pageSize}&ordering=-added&truncate_content=true&tags__id__all=${tags__id__all}`
|
||||||
)
|
)
|
||||||
expect(req.request.method).toEqual('GET')
|
expect(req.request.method).toEqual('GET')
|
||||||
// reset the list
|
// reset the list
|
||||||
documentListViewService.currentPage = 1
|
documentListViewService.currentPage = 1
|
||||||
req = httpTestingController.expectOne(
|
req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-added&truncate_content=true&include_selection_data=true&tags__id__all=9`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-added&truncate_content=true&tags__id__all=9`
|
||||||
)
|
)
|
||||||
documentListViewService.setFilterRules([])
|
documentListViewService.setFilterRules([])
|
||||||
req = httpTestingController.expectOne(
|
req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-added&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-added&truncate_content=true`
|
||||||
)
|
)
|
||||||
documentListViewService.sortField = 'created'
|
documentListViewService.sortField = 'created'
|
||||||
req = httpTestingController.expectOne(
|
req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
documentListViewService.activateSavedView(null)
|
documentListViewService.activateSavedView(null)
|
||||||
})
|
})
|
||||||
@@ -364,18 +378,21 @@ describe('DocumentListViewService', () => {
|
|||||||
it('should support navigating next / previous', () => {
|
it('should support navigating next / previous', () => {
|
||||||
documentListViewService.setFilterRules([])
|
documentListViewService.setFilterRules([])
|
||||||
let req = httpTestingController.expectOne(
|
let req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
expect(documentListViewService.currentPage).toEqual(1)
|
expect(documentListViewService.currentPage).toEqual(1)
|
||||||
documentListViewService.pageSize = 3
|
documentListViewService.pageSize = 3
|
||||||
req = httpTestingController.expectOne(
|
req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=3&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=3&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
expect(req.request.method).toEqual('GET')
|
expect(req.request.method).toEqual('GET')
|
||||||
req.flush({
|
req.flush({
|
||||||
count: 3,
|
count: 3,
|
||||||
results: documents.slice(0, 3),
|
results: documents.slice(0, 3),
|
||||||
})
|
})
|
||||||
|
httpTestingController
|
||||||
|
.expectOne(`${environment.apiBaseUrl}documents/selection_data/`)
|
||||||
|
.flush([])
|
||||||
expect(documentListViewService.hasNext(documents[0].id)).toBeTruthy()
|
expect(documentListViewService.hasNext(documents[0].id)).toBeTruthy()
|
||||||
expect(documentListViewService.hasPrevious(documents[0].id)).toBeFalsy()
|
expect(documentListViewService.hasPrevious(documents[0].id)).toBeFalsy()
|
||||||
documentListViewService.getNext(documents[0].id).subscribe((docId) => {
|
documentListViewService.getNext(documents[0].id).subscribe((docId) => {
|
||||||
@@ -422,7 +439,7 @@ describe('DocumentListViewService', () => {
|
|||||||
expect(documentListViewService.currentPage).toEqual(1)
|
expect(documentListViewService.currentPage).toEqual(1)
|
||||||
documentListViewService.pageSize = 3
|
documentListViewService.pageSize = 3
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=3&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=3&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
jest
|
jest
|
||||||
.spyOn(documentListViewService, 'getLastPage')
|
.spyOn(documentListViewService, 'getLastPage')
|
||||||
@@ -437,7 +454,7 @@ describe('DocumentListViewService', () => {
|
|||||||
expect(reloadSpy).toHaveBeenCalled()
|
expect(reloadSpy).toHaveBeenCalled()
|
||||||
expect(documentListViewService.currentPage).toEqual(2)
|
expect(documentListViewService.currentPage).toEqual(2)
|
||||||
const reqs = httpTestingController.match(
|
const reqs = httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=2&page_size=3&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=2&page_size=3&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
expect(reqs.length).toBeGreaterThan(0)
|
expect(reqs.length).toBeGreaterThan(0)
|
||||||
})
|
})
|
||||||
@@ -472,11 +489,11 @@ describe('DocumentListViewService', () => {
|
|||||||
.mockReturnValue(documents)
|
.mockReturnValue(documents)
|
||||||
documentListViewService.currentPage = 2
|
documentListViewService.currentPage = 2
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=2&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=2&page_size=50&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
documentListViewService.pageSize = 3
|
documentListViewService.pageSize = 3
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=2&page_size=3&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=2&page_size=3&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
const reloadSpy = jest.spyOn(documentListViewService, 'reload')
|
const reloadSpy = jest.spyOn(documentListViewService, 'reload')
|
||||||
documentListViewService.getPrevious(1).subscribe({
|
documentListViewService.getPrevious(1).subscribe({
|
||||||
@@ -486,7 +503,7 @@ describe('DocumentListViewService', () => {
|
|||||||
expect(reloadSpy).toHaveBeenCalled()
|
expect(reloadSpy).toHaveBeenCalled()
|
||||||
expect(documentListViewService.currentPage).toEqual(1)
|
expect(documentListViewService.currentPage).toEqual(1)
|
||||||
const reqs = httpTestingController.match(
|
const reqs = httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=3&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=3&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
expect(reqs.length).toBeGreaterThan(0)
|
expect(reqs.length).toBeGreaterThan(0)
|
||||||
})
|
})
|
||||||
@@ -499,10 +516,13 @@ describe('DocumentListViewService', () => {
|
|||||||
it('should support select a document', () => {
|
it('should support select a document', () => {
|
||||||
documentListViewService.reload()
|
documentListViewService.reload()
|
||||||
const req = httpTestingController.expectOne(
|
const req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
expect(req.request.method).toEqual('GET')
|
expect(req.request.method).toEqual('GET')
|
||||||
req.flush(full_results)
|
req.flush(full_results)
|
||||||
|
httpTestingController.expectOne(
|
||||||
|
`${environment.apiBaseUrl}documents/selection_data/`
|
||||||
|
)
|
||||||
documentListViewService.toggleSelected(documents[0])
|
documentListViewService.toggleSelected(documents[0])
|
||||||
expect(documentListViewService.isSelected(documents[0])).toBeTruthy()
|
expect(documentListViewService.isSelected(documents[0])).toBeTruthy()
|
||||||
documentListViewService.toggleSelected(documents[0])
|
documentListViewService.toggleSelected(documents[0])
|
||||||
@@ -524,13 +544,16 @@ describe('DocumentListViewService', () => {
|
|||||||
it('should support select page', () => {
|
it('should support select page', () => {
|
||||||
documentListViewService.pageSize = 3
|
documentListViewService.pageSize = 3
|
||||||
const req = httpTestingController.expectOne(
|
const req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=3&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=3&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
expect(req.request.method).toEqual('GET')
|
expect(req.request.method).toEqual('GET')
|
||||||
req.flush({
|
req.flush({
|
||||||
count: 3,
|
count: 3,
|
||||||
results: documents.slice(0, 3),
|
results: documents.slice(0, 3),
|
||||||
})
|
})
|
||||||
|
httpTestingController.expectOne(
|
||||||
|
`${environment.apiBaseUrl}documents/selection_data/`
|
||||||
|
)
|
||||||
documentListViewService.selectPage()
|
documentListViewService.selectPage()
|
||||||
expect(documentListViewService.selected.size).toEqual(3)
|
expect(documentListViewService.selected.size).toEqual(3)
|
||||||
expect(documentListViewService.isSelected(documents[5])).toBeFalsy()
|
expect(documentListViewService.isSelected(documents[5])).toBeFalsy()
|
||||||
@@ -539,10 +562,13 @@ describe('DocumentListViewService', () => {
|
|||||||
it('should support select range', () => {
|
it('should support select range', () => {
|
||||||
documentListViewService.reload()
|
documentListViewService.reload()
|
||||||
const req = httpTestingController.expectOne(
|
const req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
expect(req.request.method).toEqual('GET')
|
expect(req.request.method).toEqual('GET')
|
||||||
req.flush(full_results)
|
req.flush(full_results)
|
||||||
|
httpTestingController.expectOne(
|
||||||
|
`${environment.apiBaseUrl}documents/selection_data/`
|
||||||
|
)
|
||||||
documentListViewService.toggleSelected(documents[0])
|
documentListViewService.toggleSelected(documents[0])
|
||||||
expect(documentListViewService.isSelected(documents[0])).toBeTruthy()
|
expect(documentListViewService.isSelected(documents[0])).toBeTruthy()
|
||||||
documentListViewService.selectRangeTo(documents[2])
|
documentListViewService.selectRangeTo(documents[2])
|
||||||
@@ -562,7 +588,7 @@ describe('DocumentListViewService', () => {
|
|||||||
|
|
||||||
documentListViewService.setFilterRules(filterRules)
|
documentListViewService.setFilterRules(filterRules)
|
||||||
httpTestingController.expectOne(
|
httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true&tags__id__all=9`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&tags__id__all=9`
|
||||||
)
|
)
|
||||||
const reqs = httpTestingController.match(
|
const reqs = httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id&tags__id__all=9`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id&tags__id__all=9`
|
||||||
@@ -578,7 +604,7 @@ describe('DocumentListViewService', () => {
|
|||||||
const cancelSpy = jest.spyOn(documentListViewService, 'cancelPending')
|
const cancelSpy = jest.spyOn(documentListViewService, 'cancelPending')
|
||||||
documentListViewService.reload()
|
documentListViewService.reload()
|
||||||
httpTestingController.expectOne(
|
httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true&tags__id__all=9`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&tags__id__all=9`
|
||||||
)
|
)
|
||||||
expect(cancelSpy).toHaveBeenCalled()
|
expect(cancelSpy).toHaveBeenCalled()
|
||||||
})
|
})
|
||||||
@@ -597,7 +623,7 @@ describe('DocumentListViewService', () => {
|
|||||||
documentListViewService.setFilterRules([])
|
documentListViewService.setFilterRules([])
|
||||||
expect(documentListViewService.sortField).toEqual('created')
|
expect(documentListViewService.sortField).toEqual('created')
|
||||||
httpTestingController.expectOne(
|
httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
|
|
||||||
@@ -624,11 +650,11 @@ describe('DocumentListViewService', () => {
|
|||||||
expect(localStorageSpy).toHaveBeenCalled()
|
expect(localStorageSpy).toHaveBeenCalled()
|
||||||
// reload triggered
|
// reload triggered
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
documentListViewService.displayFields = null
|
documentListViewService.displayFields = null
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
expect(documentListViewService.displayFields).toEqual(
|
expect(documentListViewService.displayFields).toEqual(
|
||||||
DEFAULT_DISPLAY_FIELDS.filter((f) => f.id !== DisplayField.ADDED).map(
|
DEFAULT_DISPLAY_FIELDS.filter((f) => f.id !== DisplayField.ADDED).map(
|
||||||
@@ -668,7 +694,7 @@ describe('DocumentListViewService', () => {
|
|||||||
it('should generate quick filter URL preserving default state', () => {
|
it('should generate quick filter URL preserving default state', () => {
|
||||||
documentListViewService.reload()
|
documentListViewService.reload()
|
||||||
httpTestingController.expectOne(
|
httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
const urlTree = documentListViewService.getQuickFilterUrl(filterRules)
|
const urlTree = documentListViewService.getQuickFilterUrl(filterRules)
|
||||||
expect(urlTree).toBeDefined()
|
expect(urlTree).toBeDefined()
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
import { Injectable, inject } from '@angular/core'
|
import { Injectable, inject } from '@angular/core'
|
||||||
import { ParamMap, Router, UrlTree } from '@angular/router'
|
import { ParamMap, Router, UrlTree } from '@angular/router'
|
||||||
import { Observable, Subject, takeUntil } from 'rxjs'
|
import { Observable, Subject, first, takeUntil } from 'rxjs'
|
||||||
import {
|
import {
|
||||||
DEFAULT_DISPLAY_FIELDS,
|
DEFAULT_DISPLAY_FIELDS,
|
||||||
DisplayField,
|
DisplayField,
|
||||||
@@ -8,7 +8,6 @@ import {
|
|||||||
Document,
|
Document,
|
||||||
} from '../data/document'
|
} from '../data/document'
|
||||||
import { FilterRule } from '../data/filter-rule'
|
import { FilterRule } from '../data/filter-rule'
|
||||||
import { DocumentResults, SelectionData } from '../data/results'
|
|
||||||
import { SavedView } from '../data/saved-view'
|
import { SavedView } from '../data/saved-view'
|
||||||
import { DOCUMENT_LIST_SERVICE } from '../data/storage-keys'
|
import { DOCUMENT_LIST_SERVICE } from '../data/storage-keys'
|
||||||
import { SETTINGS_KEYS } from '../data/ui-settings'
|
import { SETTINGS_KEYS } from '../data/ui-settings'
|
||||||
@@ -18,7 +17,7 @@ import {
|
|||||||
isFullTextFilterRule,
|
isFullTextFilterRule,
|
||||||
} from '../utils/filter-rules'
|
} from '../utils/filter-rules'
|
||||||
import { paramsFromViewState, paramsToViewState } from '../utils/query-params'
|
import { paramsFromViewState, paramsToViewState } from '../utils/query-params'
|
||||||
import { DocumentService } from './rest/document.service'
|
import { DocumentService, SelectionData } from './rest/document.service'
|
||||||
import { SettingsService } from './settings.service'
|
import { SettingsService } from './settings.service'
|
||||||
|
|
||||||
const LIST_DEFAULT_DISPLAY_FIELDS: DisplayField[] = DEFAULT_DISPLAY_FIELDS.map(
|
const LIST_DEFAULT_DISPLAY_FIELDS: DisplayField[] = DEFAULT_DISPLAY_FIELDS.map(
|
||||||
@@ -261,17 +260,27 @@ export class DocumentListViewService {
|
|||||||
activeListViewState.sortField,
|
activeListViewState.sortField,
|
||||||
activeListViewState.sortReverse,
|
activeListViewState.sortReverse,
|
||||||
activeListViewState.filterRules,
|
activeListViewState.filterRules,
|
||||||
{ truncate_content: true, include_selection_data: true }
|
{ truncate_content: true }
|
||||||
)
|
)
|
||||||
.pipe(takeUntil(this.unsubscribeNotifier))
|
.pipe(takeUntil(this.unsubscribeNotifier))
|
||||||
.subscribe({
|
.subscribe({
|
||||||
next: (result) => {
|
next: (result) => {
|
||||||
const resultWithSelectionData = result as DocumentResults
|
|
||||||
this.initialized = true
|
this.initialized = true
|
||||||
this.isReloading = false
|
this.isReloading = false
|
||||||
activeListViewState.collectionSize = result.count
|
activeListViewState.collectionSize = result.count
|
||||||
activeListViewState.documents = result.results
|
activeListViewState.documents = result.results
|
||||||
this.selectionData = resultWithSelectionData.selection_data ?? null
|
|
||||||
|
this.documentService
|
||||||
|
.getSelectionData(result.all)
|
||||||
|
.pipe(first())
|
||||||
|
.subscribe({
|
||||||
|
next: (selectionData) => {
|
||||||
|
this.selectionData = selectionData
|
||||||
|
},
|
||||||
|
error: () => {
|
||||||
|
this.selectionData = null
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
if (updateQueryParams && !this._activeSavedViewId) {
|
if (updateQueryParams && !this._activeSavedViewId) {
|
||||||
let base = ['/documents']
|
let base = ['/documents']
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ import {
|
|||||||
import { DocumentMetadata } from 'src/app/data/document-metadata'
|
import { DocumentMetadata } from 'src/app/data/document-metadata'
|
||||||
import { DocumentSuggestions } from 'src/app/data/document-suggestions'
|
import { DocumentSuggestions } from 'src/app/data/document-suggestions'
|
||||||
import { FilterRule } from 'src/app/data/filter-rule'
|
import { FilterRule } from 'src/app/data/filter-rule'
|
||||||
import { Results, SelectionData } from 'src/app/data/results'
|
import { Results } from 'src/app/data/results'
|
||||||
import { SETTINGS_KEYS } from 'src/app/data/ui-settings'
|
import { SETTINGS_KEYS } from 'src/app/data/ui-settings'
|
||||||
import { queryParamsFromFilterRules } from '../../utils/query-params'
|
import { queryParamsFromFilterRules } from '../../utils/query-params'
|
||||||
import {
|
import {
|
||||||
@@ -24,6 +24,19 @@ import { SettingsService } from '../settings.service'
|
|||||||
import { AbstractPaperlessService } from './abstract-paperless-service'
|
import { AbstractPaperlessService } from './abstract-paperless-service'
|
||||||
import { CustomFieldsService } from './custom-fields.service'
|
import { CustomFieldsService } from './custom-fields.service'
|
||||||
|
|
||||||
|
export interface SelectionDataItem {
|
||||||
|
id: number
|
||||||
|
document_count: number
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface SelectionData {
|
||||||
|
selected_storage_paths: SelectionDataItem[]
|
||||||
|
selected_correspondents: SelectionDataItem[]
|
||||||
|
selected_tags: SelectionDataItem[]
|
||||||
|
selected_document_types: SelectionDataItem[]
|
||||||
|
selected_custom_fields: SelectionDataItem[]
|
||||||
|
}
|
||||||
|
|
||||||
export enum BulkEditSourceMode {
|
export enum BulkEditSourceMode {
|
||||||
LATEST_VERSION = 'latest_version',
|
LATEST_VERSION = 'latest_version',
|
||||||
EXPLICIT_SELECTION = 'explicit_selection',
|
EXPLICIT_SELECTION = 'explicit_selection',
|
||||||
|
|||||||
@@ -3,24 +3,19 @@ from django.core.checks import Error
|
|||||||
from django.core.checks import Warning
|
from django.core.checks import Warning
|
||||||
from django.core.checks import register
|
from django.core.checks import register
|
||||||
|
|
||||||
from documents.signals import document_consumer_declaration
|
|
||||||
from documents.templating.utils import convert_format_str_to_template_format
|
from documents.templating.utils import convert_format_str_to_template_format
|
||||||
|
from paperless.parsers.registry import get_parser_registry
|
||||||
|
|
||||||
|
|
||||||
@register()
|
@register()
|
||||||
def parser_check(app_configs, **kwargs):
|
def parser_check(app_configs, **kwargs):
|
||||||
parsers = []
|
if not get_parser_registry().all_parsers():
|
||||||
for response in document_consumer_declaration.send(None):
|
|
||||||
parsers.append(response[1])
|
|
||||||
|
|
||||||
if len(parsers) == 0:
|
|
||||||
return [
|
return [
|
||||||
Error(
|
Error(
|
||||||
"No parsers found. This is a bug. The consumer won't be "
|
"No parsers found. This is a bug. The consumer won't be "
|
||||||
"able to consume any documents without parsers.",
|
"able to consume any documents without parsers.",
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
else:
|
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -32,9 +32,7 @@ from documents.models import DocumentType
|
|||||||
from documents.models import StoragePath
|
from documents.models import StoragePath
|
||||||
from documents.models import Tag
|
from documents.models import Tag
|
||||||
from documents.models import WorkflowTrigger
|
from documents.models import WorkflowTrigger
|
||||||
from documents.parsers import DocumentParser
|
|
||||||
from documents.parsers import ParseError
|
from documents.parsers import ParseError
|
||||||
from documents.parsers import get_parser_class_for_mime_type
|
|
||||||
from documents.permissions import set_permissions_for_object
|
from documents.permissions import set_permissions_for_object
|
||||||
from documents.plugins.base import AlwaysRunPluginMixin
|
from documents.plugins.base import AlwaysRunPluginMixin
|
||||||
from documents.plugins.base import ConsumeTaskPlugin
|
from documents.plugins.base import ConsumeTaskPlugin
|
||||||
@@ -52,40 +50,12 @@ from documents.utils import copy_basic_file_stats
|
|||||||
from documents.utils import copy_file_with_basic_stats
|
from documents.utils import copy_file_with_basic_stats
|
||||||
from documents.utils import run_subprocess
|
from documents.utils import run_subprocess
|
||||||
from paperless.parsers import ParserContext
|
from paperless.parsers import ParserContext
|
||||||
from paperless.parsers.mail import MailDocumentParser
|
from paperless.parsers import ParserProtocol
|
||||||
from paperless.parsers.remote import RemoteDocumentParser
|
from paperless.parsers.registry import get_parser_registry
|
||||||
from paperless.parsers.tesseract import RasterisedDocumentParser
|
|
||||||
from paperless.parsers.text import TextDocumentParser
|
|
||||||
from paperless.parsers.tika import TikaDocumentParser
|
|
||||||
|
|
||||||
LOGGING_NAME: Final[str] = "paperless.consumer"
|
LOGGING_NAME: Final[str] = "paperless.consumer"
|
||||||
|
|
||||||
|
|
||||||
def _parser_cleanup(parser: DocumentParser) -> None:
|
|
||||||
"""
|
|
||||||
Call cleanup on a parser, handling the new-style context-manager parsers.
|
|
||||||
|
|
||||||
New-style parsers (e.g. TextDocumentParser) use __exit__ for teardown
|
|
||||||
instead of a cleanup() method. This shim will be removed once all existing parsers
|
|
||||||
have switched to the new style and this consumer is updated to use it
|
|
||||||
|
|
||||||
TODO(stumpylog): Remove me in the future
|
|
||||||
"""
|
|
||||||
if isinstance(
|
|
||||||
parser,
|
|
||||||
(
|
|
||||||
MailDocumentParser,
|
|
||||||
RasterisedDocumentParser,
|
|
||||||
RemoteDocumentParser,
|
|
||||||
TextDocumentParser,
|
|
||||||
TikaDocumentParser,
|
|
||||||
),
|
|
||||||
):
|
|
||||||
parser.__exit__(None, None, None)
|
|
||||||
else:
|
|
||||||
parser.cleanup()
|
|
||||||
|
|
||||||
|
|
||||||
class WorkflowTriggerPlugin(
|
class WorkflowTriggerPlugin(
|
||||||
NoCleanupPluginMixin,
|
NoCleanupPluginMixin,
|
||||||
NoSetupPluginMixin,
|
NoSetupPluginMixin,
|
||||||
@@ -422,8 +392,12 @@ class ConsumerPlugin(
|
|||||||
self.log.error(f"Error attempting to clean PDF: {e}")
|
self.log.error(f"Error attempting to clean PDF: {e}")
|
||||||
|
|
||||||
# Based on the mime type, get the parser for that type
|
# Based on the mime type, get the parser for that type
|
||||||
parser_class: type[DocumentParser] | None = get_parser_class_for_mime_type(
|
parser_class: type[ParserProtocol] | None = (
|
||||||
|
get_parser_registry().get_parser_for_file(
|
||||||
mime_type,
|
mime_type,
|
||||||
|
self.filename,
|
||||||
|
self.working_copy,
|
||||||
|
)
|
||||||
)
|
)
|
||||||
if not parser_class:
|
if not parser_class:
|
||||||
tempdir.cleanup()
|
tempdir.cleanup()
|
||||||
@@ -446,39 +420,13 @@ class ConsumerPlugin(
|
|||||||
tempdir.cleanup()
|
tempdir.cleanup()
|
||||||
raise
|
raise
|
||||||
|
|
||||||
def progress_callback(
|
|
||||||
current_progress,
|
|
||||||
max_progress,
|
|
||||||
) -> None: # pragma: no cover
|
|
||||||
# recalculate progress to be within 20 and 80
|
|
||||||
p = int((current_progress / max_progress) * 50 + 20)
|
|
||||||
self._send_progress(p, 100, ProgressStatusOptions.WORKING)
|
|
||||||
|
|
||||||
# This doesn't parse the document yet, but gives us a parser.
|
# This doesn't parse the document yet, but gives us a parser.
|
||||||
|
with parser_class() as document_parser:
|
||||||
document_parser: DocumentParser = parser_class(
|
document_parser.configure(
|
||||||
self.logging_group,
|
ParserContext(mailrule_id=self.input_doc.mailrule_id),
|
||||||
progress_callback=progress_callback,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
parser_is_new_style = isinstance(
|
self.log.debug(f"Parser: {document_parser.name} v{document_parser.version}")
|
||||||
document_parser,
|
|
||||||
(
|
|
||||||
MailDocumentParser,
|
|
||||||
RasterisedDocumentParser,
|
|
||||||
RemoteDocumentParser,
|
|
||||||
TextDocumentParser,
|
|
||||||
TikaDocumentParser,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
# New-style parsers use __enter__/__exit__ for resource management.
|
|
||||||
# _parser_cleanup (below) handles __exit__; call __enter__ here.
|
|
||||||
# TODO(stumpylog): Remove me in the future
|
|
||||||
if parser_is_new_style:
|
|
||||||
document_parser.__enter__()
|
|
||||||
|
|
||||||
self.log.debug(f"Parser: {type(document_parser).__name__}")
|
|
||||||
|
|
||||||
# Parse the document. This may take some time.
|
# Parse the document. This may take some time.
|
||||||
|
|
||||||
@@ -497,15 +445,7 @@ class ConsumerPlugin(
|
|||||||
)
|
)
|
||||||
self.log.debug(f"Parsing {self.filename}...")
|
self.log.debug(f"Parsing {self.filename}...")
|
||||||
|
|
||||||
# TODO(stumpylog): Remove me in the future when all parsers use new protocol
|
|
||||||
if parser_is_new_style:
|
|
||||||
document_parser.configure(
|
|
||||||
ParserContext(mailrule_id=self.input_doc.mailrule_id),
|
|
||||||
)
|
|
||||||
# TODO(stumpylog): Remove me in the future
|
|
||||||
document_parser.parse(self.working_copy, mime_type)
|
document_parser.parse(self.working_copy, mime_type)
|
||||||
else:
|
|
||||||
document_parser.parse(self.working_copy, mime_type, self.filename)
|
|
||||||
|
|
||||||
self.log.debug(f"Generating thumbnail for {self.filename}...")
|
self.log.debug(f"Generating thumbnail for {self.filename}...")
|
||||||
self._send_progress(
|
self._send_progress(
|
||||||
@@ -514,15 +454,7 @@ class ConsumerPlugin(
|
|||||||
ProgressStatusOptions.WORKING,
|
ProgressStatusOptions.WORKING,
|
||||||
ConsumerStatusShortMessage.GENERATING_THUMBNAIL,
|
ConsumerStatusShortMessage.GENERATING_THUMBNAIL,
|
||||||
)
|
)
|
||||||
# TODO(stumpylog): Remove me in the future when all parsers use new protocol
|
|
||||||
if parser_is_new_style:
|
|
||||||
thumbnail = document_parser.get_thumbnail(self.working_copy, mime_type)
|
thumbnail = document_parser.get_thumbnail(self.working_copy, mime_type)
|
||||||
else:
|
|
||||||
thumbnail = document_parser.get_thumbnail(
|
|
||||||
self.working_copy,
|
|
||||||
mime_type,
|
|
||||||
self.filename,
|
|
||||||
)
|
|
||||||
|
|
||||||
text = document_parser.get_text()
|
text = document_parser.get_text()
|
||||||
date = document_parser.get_date()
|
date = document_parser.get_date()
|
||||||
@@ -536,10 +468,12 @@ class ConsumerPlugin(
|
|||||||
with get_date_parser() as date_parser:
|
with get_date_parser() as date_parser:
|
||||||
date = next(date_parser.parse(self.filename, text), None)
|
date = next(date_parser.parse(self.filename, text), None)
|
||||||
archive_path = document_parser.get_archive_path()
|
archive_path = document_parser.get_archive_path()
|
||||||
page_count = document_parser.get_page_count(self.working_copy, mime_type)
|
page_count = document_parser.get_page_count(
|
||||||
|
self.working_copy,
|
||||||
|
mime_type,
|
||||||
|
)
|
||||||
|
|
||||||
except ParseError as e:
|
except ParseError as e:
|
||||||
_parser_cleanup(document_parser)
|
|
||||||
if tempdir:
|
if tempdir:
|
||||||
tempdir.cleanup()
|
tempdir.cleanup()
|
||||||
self._fail(
|
self._fail(
|
||||||
@@ -549,7 +483,6 @@ class ConsumerPlugin(
|
|||||||
exception=e,
|
exception=e,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
_parser_cleanup(document_parser)
|
|
||||||
if tempdir:
|
if tempdir:
|
||||||
tempdir.cleanup()
|
tempdir.cleanup()
|
||||||
self._fail(
|
self._fail(
|
||||||
@@ -598,7 +531,9 @@ class ConsumerPlugin(
|
|||||||
settings.AUDIT_LOG_ENABLED
|
settings.AUDIT_LOG_ENABLED
|
||||||
and self.metadata.actor_id is not None
|
and self.metadata.actor_id is not None
|
||||||
):
|
):
|
||||||
actor = User.objects.filter(pk=self.metadata.actor_id).first()
|
actor = User.objects.filter(
|
||||||
|
pk=self.metadata.actor_id,
|
||||||
|
).first()
|
||||||
if actor is not None:
|
if actor is not None:
|
||||||
from auditlog.context import ( # type: ignore[import-untyped]
|
from auditlog.context import ( # type: ignore[import-untyped]
|
||||||
set_actor,
|
set_actor,
|
||||||
@@ -722,7 +657,9 @@ class ConsumerPlugin(
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Delete the file only if it was successfully consumed
|
# Delete the file only if it was successfully consumed
|
||||||
self.log.debug(f"Deleting original file {self.input_doc.original_file}")
|
self.log.debug(
|
||||||
|
f"Deleting original file {self.input_doc.original_file}",
|
||||||
|
)
|
||||||
self.input_doc.original_file.unlink()
|
self.input_doc.original_file.unlink()
|
||||||
self.log.debug(f"Deleting working copy {self.working_copy}")
|
self.log.debug(f"Deleting working copy {self.working_copy}")
|
||||||
self.working_copy.unlink()
|
self.working_copy.unlink()
|
||||||
@@ -751,7 +688,6 @@ class ConsumerPlugin(
|
|||||||
exception=e,
|
exception=e,
|
||||||
)
|
)
|
||||||
finally:
|
finally:
|
||||||
_parser_cleanup(document_parser)
|
|
||||||
tempdir.cleanup()
|
tempdir.cleanup()
|
||||||
|
|
||||||
self.run_post_consume_script(document)
|
self.run_post_consume_script(document)
|
||||||
|
|||||||
@@ -3,19 +3,18 @@ import shutil
|
|||||||
|
|
||||||
from documents.management.commands.base import PaperlessCommand
|
from documents.management.commands.base import PaperlessCommand
|
||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
from documents.parsers import get_parser_class_for_mime_type
|
from paperless.parsers.registry import get_parser_registry
|
||||||
from paperless.parsers.mail import MailDocumentParser
|
|
||||||
from paperless.parsers.remote import RemoteDocumentParser
|
|
||||||
from paperless.parsers.tesseract import RasterisedDocumentParser
|
|
||||||
from paperless.parsers.text import TextDocumentParser
|
|
||||||
from paperless.parsers.tika import TikaDocumentParser
|
|
||||||
|
|
||||||
logger = logging.getLogger("paperless.management.thumbnails")
|
logger = logging.getLogger("paperless.management.thumbnails")
|
||||||
|
|
||||||
|
|
||||||
def _process_document(doc_id: int) -> None:
|
def _process_document(doc_id: int) -> None:
|
||||||
document: Document = Document.objects.get(id=doc_id)
|
document: Document = Document.objects.get(id=doc_id)
|
||||||
parser_class = get_parser_class_for_mime_type(document.mime_type)
|
parser_class = get_parser_registry().get_parser_for_file(
|
||||||
|
document.mime_type,
|
||||||
|
document.original_filename or "",
|
||||||
|
document.source_path,
|
||||||
|
)
|
||||||
|
|
||||||
if parser_class is None:
|
if parser_class is None:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
@@ -25,40 +24,9 @@ def _process_document(doc_id: int) -> None:
|
|||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
parser = parser_class(logging_group=None)
|
with parser_class() as parser:
|
||||||
|
|
||||||
parser_is_new_style = isinstance(
|
|
||||||
parser,
|
|
||||||
(
|
|
||||||
MailDocumentParser,
|
|
||||||
RasterisedDocumentParser,
|
|
||||||
RemoteDocumentParser,
|
|
||||||
TextDocumentParser,
|
|
||||||
TikaDocumentParser,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
# TODO(stumpylog): Remove branch in the future when all parsers use new protocol
|
|
||||||
if parser_is_new_style:
|
|
||||||
parser.__enter__()
|
|
||||||
|
|
||||||
try:
|
|
||||||
# TODO(stumpylog): Remove branch in the future when all parsers use new protocol
|
|
||||||
if parser_is_new_style:
|
|
||||||
thumb = parser.get_thumbnail(document.source_path, document.mime_type)
|
thumb = parser.get_thumbnail(document.source_path, document.mime_type)
|
||||||
else:
|
|
||||||
thumb = parser.get_thumbnail(
|
|
||||||
document.source_path,
|
|
||||||
document.mime_type,
|
|
||||||
document.get_public_filename(),
|
|
||||||
)
|
|
||||||
shutil.move(thumb, document.thumbnail_path)
|
shutil.move(thumb, document.thumbnail_path)
|
||||||
finally:
|
|
||||||
# TODO(stumpylog): Cleanup once all parsers are handled
|
|
||||||
if parser_is_new_style:
|
|
||||||
parser.__exit__(None, None, None)
|
|
||||||
else:
|
|
||||||
parser.cleanup()
|
|
||||||
|
|
||||||
|
|
||||||
class Command(PaperlessCommand):
|
class Command(PaperlessCommand):
|
||||||
|
|||||||
@@ -3,84 +3,47 @@ from __future__ import annotations
|
|||||||
import logging
|
import logging
|
||||||
import mimetypes
|
import mimetypes
|
||||||
import os
|
import os
|
||||||
import re
|
|
||||||
import shutil
|
import shutil
|
||||||
import subprocess
|
import subprocess
|
||||||
import tempfile
|
import tempfile
|
||||||
from functools import lru_cache
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
|
|
||||||
from documents.loggers import LoggingMixin
|
from documents.loggers import LoggingMixin
|
||||||
from documents.signals import document_consumer_declaration
|
|
||||||
from documents.utils import copy_file_with_basic_stats
|
from documents.utils import copy_file_with_basic_stats
|
||||||
from documents.utils import run_subprocess
|
from documents.utils import run_subprocess
|
||||||
|
from paperless.parsers.registry import get_parser_registry
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
# This regular expression will try to find dates in the document at
|
|
||||||
# hand and will match the following formats:
|
|
||||||
# - XX.YY.ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
|
||||||
# - XX/YY/ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
|
||||||
# - XX-YY-ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
|
||||||
# - ZZZZ.XX.YY with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
|
||||||
# - ZZZZ/XX/YY with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
|
||||||
# - ZZZZ-XX-YY with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
|
||||||
# - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits
|
|
||||||
# - MONTH ZZZZ, with ZZZZ being 4 digits
|
|
||||||
# - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits
|
|
||||||
# - XX MON ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits. MONTH is 3 letters
|
|
||||||
# - XXPP MONTH ZZZZ with XX being 1 or 2 and PP being 2 letters and ZZZZ being 4 digits
|
|
||||||
|
|
||||||
# TODO: isn't there a date parsing library for this?
|
|
||||||
|
|
||||||
DATE_REGEX = re.compile(
|
|
||||||
r"(\b|(?!=([_-])))(\d{1,2})[\.\/-](\d{1,2})[\.\/-](\d{4}|\d{2})(\b|(?=([_-])))|"
|
|
||||||
r"(\b|(?!=([_-])))(\d{4}|\d{2})[\.\/-](\d{1,2})[\.\/-](\d{1,2})(\b|(?=([_-])))|"
|
|
||||||
r"(\b|(?!=([_-])))(\d{1,2}[\. ]+[a-zéûäëčžúřěáíóńźçŞğü]{3,9} \d{4}|[a-zéûäëčžúřěáíóńźçŞğü]{3,9} \d{1,2}, \d{4})(\b|(?=([_-])))|"
|
|
||||||
r"(\b|(?!=([_-])))([^\W\d_]{3,9} \d{1,2}, (\d{4}))(\b|(?=([_-])))|"
|
|
||||||
r"(\b|(?!=([_-])))([^\W\d_]{3,9} \d{4})(\b|(?=([_-])))|"
|
|
||||||
r"(\b|(?!=([_-])))(\d{1,2}[^ 0-9]{2}[\. ]+[^ ]{3,9}[ \.\/-]\d{4})(\b|(?=([_-])))|"
|
|
||||||
r"(\b|(?!=([_-])))(\b\d{1,2}[ \.\/-][a-zéûäëčžúřěáíóńźçŞğü]{3}[ \.\/-]\d{4})(\b|(?=([_-])))",
|
|
||||||
re.IGNORECASE,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger("paperless.parsing")
|
logger = logging.getLogger("paperless.parsing")
|
||||||
|
|
||||||
|
|
||||||
@lru_cache(maxsize=8)
|
|
||||||
def is_mime_type_supported(mime_type: str) -> bool:
|
def is_mime_type_supported(mime_type: str) -> bool:
|
||||||
"""
|
"""
|
||||||
Returns True if the mime type is supported, False otherwise
|
Returns True if the mime type is supported, False otherwise
|
||||||
"""
|
"""
|
||||||
return get_parser_class_for_mime_type(mime_type) is not None
|
return get_parser_registry().get_parser_for_file(mime_type, "") is not None
|
||||||
|
|
||||||
|
|
||||||
@lru_cache(maxsize=8)
|
|
||||||
def get_default_file_extension(mime_type: str) -> str:
|
def get_default_file_extension(mime_type: str) -> str:
|
||||||
"""
|
"""
|
||||||
Returns the default file extension for a mimetype, or
|
Returns the default file extension for a mimetype, or
|
||||||
an empty string if it could not be determined
|
an empty string if it could not be determined
|
||||||
"""
|
"""
|
||||||
for response in document_consumer_declaration.send(None):
|
parser_class = get_parser_registry().get_parser_for_file(mime_type, "")
|
||||||
parser_declaration = response[1]
|
if parser_class is not None:
|
||||||
supported_mime_types = parser_declaration["mime_types"]
|
supported = parser_class.supported_mime_types()
|
||||||
|
if mime_type in supported:
|
||||||
if mime_type in supported_mime_types:
|
return supported[mime_type]
|
||||||
return supported_mime_types[mime_type]
|
|
||||||
|
|
||||||
ext = mimetypes.guess_extension(mime_type)
|
ext = mimetypes.guess_extension(mime_type)
|
||||||
if ext:
|
return ext if ext else ""
|
||||||
return ext
|
|
||||||
else:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
|
|
||||||
@lru_cache(maxsize=8)
|
|
||||||
def is_file_ext_supported(ext: str) -> bool:
|
def is_file_ext_supported(ext: str) -> bool:
|
||||||
"""
|
"""
|
||||||
Returns True if the file extension is supported, False otherwise
|
Returns True if the file extension is supported, False otherwise
|
||||||
@@ -94,44 +57,17 @@ def is_file_ext_supported(ext: str) -> bool:
|
|||||||
|
|
||||||
def get_supported_file_extensions() -> set[str]:
|
def get_supported_file_extensions() -> set[str]:
|
||||||
extensions = set()
|
extensions = set()
|
||||||
for response in document_consumer_declaration.send(None):
|
for parser_class in get_parser_registry().all_parsers():
|
||||||
parser_declaration = response[1]
|
for mime_type, ext in parser_class.supported_mime_types().items():
|
||||||
supported_mime_types = parser_declaration["mime_types"]
|
|
||||||
|
|
||||||
for mime_type in supported_mime_types:
|
|
||||||
extensions.update(mimetypes.guess_all_extensions(mime_type))
|
extensions.update(mimetypes.guess_all_extensions(mime_type))
|
||||||
# Python's stdlib might be behind, so also add what the parser
|
# Python's stdlib might be behind, so also add what the parser
|
||||||
# says is the default extension
|
# says is the default extension
|
||||||
# This makes image/webp supported on Python < 3.11
|
# This makes image/webp supported on Python < 3.11
|
||||||
extensions.add(supported_mime_types[mime_type])
|
extensions.add(ext)
|
||||||
|
|
||||||
return extensions
|
return extensions
|
||||||
|
|
||||||
|
|
||||||
def get_parser_class_for_mime_type(mime_type: str) -> type[DocumentParser] | None:
|
|
||||||
"""
|
|
||||||
Returns the best parser (by weight) for the given mimetype or
|
|
||||||
None if no parser exists
|
|
||||||
"""
|
|
||||||
|
|
||||||
options = []
|
|
||||||
|
|
||||||
for response in document_consumer_declaration.send(None):
|
|
||||||
parser_declaration = response[1]
|
|
||||||
supported_mime_types = parser_declaration["mime_types"]
|
|
||||||
|
|
||||||
if mime_type in supported_mime_types:
|
|
||||||
options.append(parser_declaration)
|
|
||||||
|
|
||||||
if not options:
|
|
||||||
return None
|
|
||||||
|
|
||||||
best_parser = sorted(options, key=lambda _: _["weight"], reverse=True)[0]
|
|
||||||
|
|
||||||
# Return the parser with the highest weight.
|
|
||||||
return best_parser["parser"]
|
|
||||||
|
|
||||||
|
|
||||||
def run_convert(
|
def run_convert(
|
||||||
input_file,
|
input_file,
|
||||||
output_file,
|
output_file,
|
||||||
|
|||||||
@@ -2,5 +2,4 @@ from django.dispatch import Signal
|
|||||||
|
|
||||||
document_consumption_started = Signal()
|
document_consumption_started = Signal()
|
||||||
document_consumption_finished = Signal()
|
document_consumption_finished = Signal()
|
||||||
document_consumer_declaration = Signal()
|
|
||||||
document_updated = Signal()
|
document_updated = Signal()
|
||||||
|
|||||||
@@ -52,8 +52,6 @@ from documents.models import StoragePath
|
|||||||
from documents.models import Tag
|
from documents.models import Tag
|
||||||
from documents.models import WorkflowRun
|
from documents.models import WorkflowRun
|
||||||
from documents.models import WorkflowTrigger
|
from documents.models import WorkflowTrigger
|
||||||
from documents.parsers import DocumentParser
|
|
||||||
from documents.parsers import get_parser_class_for_mime_type
|
|
||||||
from documents.plugins.base import ConsumeTaskPlugin
|
from documents.plugins.base import ConsumeTaskPlugin
|
||||||
from documents.plugins.base import ProgressManager
|
from documents.plugins.base import ProgressManager
|
||||||
from documents.plugins.base import StopConsumeTaskError
|
from documents.plugins.base import StopConsumeTaskError
|
||||||
@@ -66,11 +64,7 @@ from documents.signals.handlers import send_websocket_document_updated
|
|||||||
from documents.workflows.utils import get_workflows_for_trigger
|
from documents.workflows.utils import get_workflows_for_trigger
|
||||||
from paperless.config import AIConfig
|
from paperless.config import AIConfig
|
||||||
from paperless.parsers import ParserContext
|
from paperless.parsers import ParserContext
|
||||||
from paperless.parsers.mail import MailDocumentParser
|
from paperless.parsers.registry import get_parser_registry
|
||||||
from paperless.parsers.remote import RemoteDocumentParser
|
|
||||||
from paperless.parsers.tesseract import RasterisedDocumentParser
|
|
||||||
from paperless.parsers.text import TextDocumentParser
|
|
||||||
from paperless.parsers.tika import TikaDocumentParser
|
|
||||||
from paperless_ai.indexing import llm_index_add_or_update_document
|
from paperless_ai.indexing import llm_index_add_or_update_document
|
||||||
from paperless_ai.indexing import llm_index_remove_document
|
from paperless_ai.indexing import llm_index_remove_document
|
||||||
from paperless_ai.indexing import update_llm_index
|
from paperless_ai.indexing import update_llm_index
|
||||||
@@ -310,8 +304,10 @@ def update_document_content_maybe_archive_file(document_id) -> None:
|
|||||||
|
|
||||||
mime_type = document.mime_type
|
mime_type = document.mime_type
|
||||||
|
|
||||||
parser_class: type[DocumentParser] | None = get_parser_class_for_mime_type(
|
parser_class = get_parser_registry().get_parser_for_file(
|
||||||
mime_type,
|
mime_type,
|
||||||
|
document.original_filename or "",
|
||||||
|
document.source_path,
|
||||||
)
|
)
|
||||||
|
|
||||||
if not parser_class:
|
if not parser_class:
|
||||||
@@ -321,44 +317,13 @@ def update_document_content_maybe_archive_file(document_id) -> None:
|
|||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
parser: DocumentParser = parser_class(logging_group=uuid.uuid4())
|
with parser_class() as parser:
|
||||||
|
parser.configure(ParserContext())
|
||||||
parser_is_new_style = isinstance(
|
|
||||||
parser,
|
|
||||||
(
|
|
||||||
MailDocumentParser,
|
|
||||||
RasterisedDocumentParser,
|
|
||||||
RemoteDocumentParser,
|
|
||||||
TextDocumentParser,
|
|
||||||
TikaDocumentParser,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
# TODO(stumpylog): Remove branch in the future when all parsers use new protocol
|
|
||||||
if parser_is_new_style:
|
|
||||||
parser.__enter__()
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# TODO(stumpylog): Remove branch in the future when all parsers use new protocol
|
|
||||||
if parser_is_new_style:
|
|
||||||
parser.configure(ParserContext())
|
|
||||||
parser.parse(document.source_path, mime_type)
|
parser.parse(document.source_path, mime_type)
|
||||||
else:
|
|
||||||
parser.parse(
|
|
||||||
document.source_path,
|
|
||||||
mime_type,
|
|
||||||
document.get_public_filename(),
|
|
||||||
)
|
|
||||||
|
|
||||||
# TODO(stumpylog): Remove branch in the future when all parsers use new protocol
|
|
||||||
if parser_is_new_style:
|
|
||||||
thumbnail = parser.get_thumbnail(document.source_path, mime_type)
|
thumbnail = parser.get_thumbnail(document.source_path, mime_type)
|
||||||
else:
|
|
||||||
thumbnail = parser.get_thumbnail(
|
|
||||||
document.source_path,
|
|
||||||
mime_type,
|
|
||||||
document.get_public_filename(),
|
|
||||||
)
|
|
||||||
|
|
||||||
with transaction.atomic():
|
with transaction.atomic():
|
||||||
oldDocument = Document.objects.get(pk=document.pk)
|
oldDocument = Document.objects.get(pk=document.pk)
|
||||||
@@ -438,21 +403,6 @@ def update_document_content_maybe_archive_file(document_id) -> None:
|
|||||||
logger.exception(
|
logger.exception(
|
||||||
f"Error while parsing document {document} (ID: {document_id})",
|
f"Error while parsing document {document} (ID: {document_id})",
|
||||||
)
|
)
|
||||||
finally:
|
|
||||||
# TODO(stumpylog): Remove branch in the future when all parsers use new protocol
|
|
||||||
if isinstance(
|
|
||||||
parser,
|
|
||||||
(
|
|
||||||
MailDocumentParser,
|
|
||||||
RasterisedDocumentParser,
|
|
||||||
RemoteDocumentParser,
|
|
||||||
TextDocumentParser,
|
|
||||||
TikaDocumentParser,
|
|
||||||
),
|
|
||||||
):
|
|
||||||
parser.__exit__(None, None, None)
|
|
||||||
else:
|
|
||||||
parser.cleanup()
|
|
||||||
|
|
||||||
|
|
||||||
@shared_task
|
@shared_task
|
||||||
|
|||||||
@@ -1144,56 +1144,6 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
|
|||||||
self.assertEqual(len(response.data["all"]), 50)
|
self.assertEqual(len(response.data["all"]), 50)
|
||||||
self.assertCountEqual(response.data["all"], [d.id for d in docs])
|
self.assertCountEqual(response.data["all"], [d.id for d in docs])
|
||||||
|
|
||||||
def test_list_with_include_selection_data(self) -> None:
|
|
||||||
correspondent = Correspondent.objects.create(name="c1")
|
|
||||||
doc_type = DocumentType.objects.create(name="dt1")
|
|
||||||
storage_path = StoragePath.objects.create(name="sp1")
|
|
||||||
tag = Tag.objects.create(name="tag")
|
|
||||||
|
|
||||||
matching_doc = Document.objects.create(
|
|
||||||
checksum="A",
|
|
||||||
correspondent=correspondent,
|
|
||||||
document_type=doc_type,
|
|
||||||
storage_path=storage_path,
|
|
||||||
)
|
|
||||||
matching_doc.tags.add(tag)
|
|
||||||
|
|
||||||
non_matching_doc = Document.objects.create(checksum="B")
|
|
||||||
non_matching_doc.tags.add(Tag.objects.create(name="other"))
|
|
||||||
|
|
||||||
response = self.client.get(
|
|
||||||
f"/api/documents/?tags__id__in={tag.id}&include_selection_data=true",
|
|
||||||
)
|
|
||||||
|
|
||||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
|
||||||
self.assertIn("selection_data", response.data)
|
|
||||||
|
|
||||||
selected_correspondent = next(
|
|
||||||
item
|
|
||||||
for item in response.data["selection_data"]["selected_correspondents"]
|
|
||||||
if item["id"] == correspondent.id
|
|
||||||
)
|
|
||||||
selected_tag = next(
|
|
||||||
item
|
|
||||||
for item in response.data["selection_data"]["selected_tags"]
|
|
||||||
if item["id"] == tag.id
|
|
||||||
)
|
|
||||||
selected_type = next(
|
|
||||||
item
|
|
||||||
for item in response.data["selection_data"]["selected_document_types"]
|
|
||||||
if item["id"] == doc_type.id
|
|
||||||
)
|
|
||||||
selected_storage_path = next(
|
|
||||||
item
|
|
||||||
for item in response.data["selection_data"]["selected_storage_paths"]
|
|
||||||
if item["id"] == storage_path.id
|
|
||||||
)
|
|
||||||
|
|
||||||
self.assertEqual(selected_correspondent["document_count"], 1)
|
|
||||||
self.assertEqual(selected_tag["document_count"], 1)
|
|
||||||
self.assertEqual(selected_type["document_count"], 1)
|
|
||||||
self.assertEqual(selected_storage_path["document_count"], 1)
|
|
||||||
|
|
||||||
def test_statistics(self) -> None:
|
def test_statistics(self) -> None:
|
||||||
doc1 = Document.objects.create(
|
doc1 = Document.objects.create(
|
||||||
title="none1",
|
title="none1",
|
||||||
|
|||||||
@@ -89,46 +89,6 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
|||||||
self.assertEqual(len(results), 0)
|
self.assertEqual(len(results), 0)
|
||||||
self.assertCountEqual(response.data["all"], [])
|
self.assertCountEqual(response.data["all"], [])
|
||||||
|
|
||||||
def test_search_with_include_selection_data(self) -> None:
|
|
||||||
correspondent = Correspondent.objects.create(name="c1")
|
|
||||||
doc_type = DocumentType.objects.create(name="dt1")
|
|
||||||
storage_path = StoragePath.objects.create(name="sp1")
|
|
||||||
tag = Tag.objects.create(name="tag")
|
|
||||||
|
|
||||||
matching_doc = Document.objects.create(
|
|
||||||
title="bank statement",
|
|
||||||
content="bank content",
|
|
||||||
checksum="A",
|
|
||||||
correspondent=correspondent,
|
|
||||||
document_type=doc_type,
|
|
||||||
storage_path=storage_path,
|
|
||||||
)
|
|
||||||
matching_doc.tags.add(tag)
|
|
||||||
|
|
||||||
with AsyncWriter(index.open_index()) as writer:
|
|
||||||
index.update_document(writer, matching_doc)
|
|
||||||
|
|
||||||
response = self.client.get(
|
|
||||||
"/api/documents/?query=bank&include_selection_data=true",
|
|
||||||
)
|
|
||||||
|
|
||||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
|
||||||
self.assertIn("selection_data", response.data)
|
|
||||||
|
|
||||||
selected_correspondent = next(
|
|
||||||
item
|
|
||||||
for item in response.data["selection_data"]["selected_correspondents"]
|
|
||||||
if item["id"] == correspondent.id
|
|
||||||
)
|
|
||||||
selected_tag = next(
|
|
||||||
item
|
|
||||||
for item in response.data["selection_data"]["selected_tags"]
|
|
||||||
if item["id"] == tag.id
|
|
||||||
)
|
|
||||||
|
|
||||||
self.assertEqual(selected_correspondent["document_count"], 1)
|
|
||||||
self.assertEqual(selected_tag["document_count"], 1)
|
|
||||||
|
|
||||||
def test_search_custom_field_ordering(self) -> None:
|
def test_search_custom_field_ordering(self) -> None:
|
||||||
custom_field = CustomField.objects.create(
|
custom_field = CustomField.objects.create(
|
||||||
name="Sortable field",
|
name="Sortable field",
|
||||||
|
|||||||
@@ -13,8 +13,10 @@ class TestDocumentChecks(TestCase):
|
|||||||
def test_parser_check(self) -> None:
|
def test_parser_check(self) -> None:
|
||||||
self.assertEqual(parser_check(None), [])
|
self.assertEqual(parser_check(None), [])
|
||||||
|
|
||||||
with mock.patch("documents.checks.document_consumer_declaration.send") as m:
|
with mock.patch("documents.checks.get_parser_registry") as mock_registry_fn:
|
||||||
m.return_value = []
|
mock_registry = mock.MagicMock()
|
||||||
|
mock_registry.all_parsers.return_value = []
|
||||||
|
mock_registry_fn.return_value = mock_registry
|
||||||
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
parser_check(None),
|
parser_check(None),
|
||||||
|
|||||||
@@ -27,7 +27,6 @@ from documents.models import Document
|
|||||||
from documents.models import DocumentType
|
from documents.models import DocumentType
|
||||||
from documents.models import StoragePath
|
from documents.models import StoragePath
|
||||||
from documents.models import Tag
|
from documents.models import Tag
|
||||||
from documents.parsers import DocumentParser
|
|
||||||
from documents.parsers import ParseError
|
from documents.parsers import ParseError
|
||||||
from documents.plugins.helpers import ProgressStatusOptions
|
from documents.plugins.helpers import ProgressStatusOptions
|
||||||
from documents.tasks import sanity_check
|
from documents.tasks import sanity_check
|
||||||
@@ -38,62 +37,106 @@ from documents.tests.utils import GetConsumerMixin
|
|||||||
from paperless_mail.models import MailRule
|
from paperless_mail.models import MailRule
|
||||||
|
|
||||||
|
|
||||||
class _BaseTestParser(DocumentParser):
|
class _BaseNewStyleParser:
|
||||||
def get_settings(self) -> None:
|
"""Minimal ParserProtocol implementation for use in consumer tests."""
|
||||||
|
|
||||||
|
name: str = "test-parser"
|
||||||
|
version: str = "0.1"
|
||||||
|
author: str = "test"
|
||||||
|
url: str = "test"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def supported_mime_types(cls) -> dict:
|
||||||
|
return {
|
||||||
|
"application/pdf": ".pdf",
|
||||||
|
"image/png": ".png",
|
||||||
|
"message/rfc822": ".eml",
|
||||||
|
}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def score(cls, mime_type: str, filename: str, path=None):
|
||||||
|
return 0 if mime_type in cls.supported_mime_types() else None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def can_produce_archive(self) -> bool:
|
||||||
|
return True
|
||||||
|
|
||||||
|
@property
|
||||||
|
def requires_pdf_rendition(self) -> bool:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self._tmpdir: Path | None = None
|
||||||
|
self._text: str | None = None
|
||||||
|
self._archive: Path | None = None
|
||||||
|
self._thumb: Path | None = None
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
self._tmpdir = Path(
|
||||||
|
tempfile.mkdtemp(prefix="paperless-test-", dir=settings.SCRATCH_DIR),
|
||||||
|
)
|
||||||
|
_, thumb = tempfile.mkstemp(suffix=".webp", dir=self._tmpdir)
|
||||||
|
self._thumb = Path(thumb)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
|
||||||
|
if self._tmpdir and self._tmpdir.exists():
|
||||||
|
shutil.rmtree(self._tmpdir, ignore_errors=True)
|
||||||
|
|
||||||
|
def configure(self, context) -> None:
|
||||||
"""
|
"""
|
||||||
This parser does not implement additional settings yet
|
Test parser doesn't do anything with context
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
def parse(self, document_path, mime_type, *, produce_archive: bool = True) -> None:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def get_text(self) -> str | None:
|
||||||
|
return self._text
|
||||||
|
|
||||||
|
def get_date(self):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def get_archive_path(self):
|
||||||
|
return self._archive
|
||||||
|
|
||||||
class DummyParser(_BaseTestParser):
|
def get_thumbnail(self, document_path, mime_type) -> Path:
|
||||||
def __init__(self, logging_group, scratch_dir, archive_path) -> None:
|
return self._thumb
|
||||||
super().__init__(logging_group, None)
|
|
||||||
_, self.fake_thumb = tempfile.mkstemp(suffix=".webp", dir=scratch_dir)
|
|
||||||
self.archive_path = archive_path
|
|
||||||
|
|
||||||
def get_thumbnail(self, document_path, mime_type, file_name=None):
|
def get_page_count(self, document_path, mime_type):
|
||||||
return self.fake_thumb
|
return None
|
||||||
|
|
||||||
def parse(self, document_path, mime_type, file_name=None) -> None:
|
def extract_metadata(self, document_path, mime_type) -> list:
|
||||||
self.text = "The Text"
|
return []
|
||||||
|
|
||||||
|
|
||||||
class CopyParser(_BaseTestParser):
|
class DummyParser(_BaseNewStyleParser):
|
||||||
def get_thumbnail(self, document_path, mime_type, file_name=None):
|
_ARCHIVE_SRC = (
|
||||||
return self.fake_thumb
|
Path(__file__).parent / "samples" / "documents" / "archive" / "0000001.pdf"
|
||||||
|
)
|
||||||
|
|
||||||
def __init__(self, logging_group, progress_callback=None) -> None:
|
def parse(self, document_path, mime_type, *, produce_archive: bool = True) -> None:
|
||||||
super().__init__(logging_group, progress_callback)
|
self._text = "The Text"
|
||||||
_, self.fake_thumb = tempfile.mkstemp(suffix=".webp", dir=self.tempdir)
|
if produce_archive and self._tmpdir:
|
||||||
|
self._archive = self._tmpdir / "archive.pdf"
|
||||||
def parse(self, document_path, mime_type, file_name=None) -> None:
|
shutil.copy(self._ARCHIVE_SRC, self._archive)
|
||||||
self.text = "The text"
|
|
||||||
self.archive_path = Path(self.tempdir / "archive.pdf")
|
|
||||||
shutil.copy(document_path, self.archive_path)
|
|
||||||
|
|
||||||
|
|
||||||
class FaultyParser(_BaseTestParser):
|
class CopyParser(_BaseNewStyleParser):
|
||||||
def __init__(self, logging_group, scratch_dir) -> None:
|
def parse(self, document_path, mime_type, *, produce_archive: bool = True) -> None:
|
||||||
super().__init__(logging_group)
|
self._text = "The text"
|
||||||
_, self.fake_thumb = tempfile.mkstemp(suffix=".webp", dir=scratch_dir)
|
if produce_archive and self._tmpdir:
|
||||||
|
self._archive = self._tmpdir / "archive.pdf"
|
||||||
|
shutil.copy(document_path, self._archive)
|
||||||
|
|
||||||
def get_thumbnail(self, document_path, mime_type, file_name=None):
|
|
||||||
return self.fake_thumb
|
|
||||||
|
|
||||||
def parse(self, document_path, mime_type, file_name=None):
|
class FaultyParser(_BaseNewStyleParser):
|
||||||
|
def parse(self, document_path, mime_type, *, produce_archive: bool = True) -> None:
|
||||||
raise ParseError("Does not compute.")
|
raise ParseError("Does not compute.")
|
||||||
|
|
||||||
|
|
||||||
class FaultyGenericExceptionParser(_BaseTestParser):
|
class FaultyGenericExceptionParser(_BaseNewStyleParser):
|
||||||
def __init__(self, logging_group, scratch_dir) -> None:
|
def parse(self, document_path, mime_type, *, produce_archive: bool = True) -> None:
|
||||||
super().__init__(logging_group)
|
|
||||||
_, self.fake_thumb = tempfile.mkstemp(suffix=".webp", dir=scratch_dir)
|
|
||||||
|
|
||||||
def get_thumbnail(self, document_path, mime_type, file_name=None):
|
|
||||||
return self.fake_thumb
|
|
||||||
|
|
||||||
def parse(self, document_path, mime_type, file_name=None):
|
|
||||||
raise Exception("Generic exception.")
|
raise Exception("Generic exception.")
|
||||||
|
|
||||||
|
|
||||||
@@ -147,38 +190,12 @@ class TestConsumer(
|
|||||||
self.assertEqual(payload["data"]["max_progress"], last_progress_max)
|
self.assertEqual(payload["data"]["max_progress"], last_progress_max)
|
||||||
self.assertEqual(payload["data"]["status"], last_status)
|
self.assertEqual(payload["data"]["status"], last_status)
|
||||||
|
|
||||||
def make_dummy_parser(self, logging_group, progress_callback=None):
|
|
||||||
return DummyParser(
|
|
||||||
logging_group,
|
|
||||||
self.dirs.scratch_dir,
|
|
||||||
self.get_test_archive_file(),
|
|
||||||
)
|
|
||||||
|
|
||||||
def make_faulty_parser(self, logging_group, progress_callback=None):
|
|
||||||
return FaultyParser(logging_group, self.dirs.scratch_dir)
|
|
||||||
|
|
||||||
def make_faulty_generic_exception_parser(
|
|
||||||
self,
|
|
||||||
logging_group,
|
|
||||||
progress_callback=None,
|
|
||||||
):
|
|
||||||
return FaultyGenericExceptionParser(logging_group, self.dirs.scratch_dir)
|
|
||||||
|
|
||||||
def setUp(self) -> None:
|
def setUp(self) -> None:
|
||||||
super().setUp()
|
super().setUp()
|
||||||
|
|
||||||
patcher = mock.patch("documents.parsers.document_consumer_declaration.send")
|
patcher = mock.patch("documents.consumer.get_parser_registry")
|
||||||
m = patcher.start()
|
mock_registry = patcher.start()
|
||||||
m.return_value = [
|
mock_registry.return_value.get_parser_for_file.return_value = DummyParser
|
||||||
(
|
|
||||||
None,
|
|
||||||
{
|
|
||||||
"parser": self.make_dummy_parser,
|
|
||||||
"mime_types": {"application/pdf": ".pdf"},
|
|
||||||
"weight": 0,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
]
|
|
||||||
self.addCleanup(patcher.stop)
|
self.addCleanup(patcher.stop)
|
||||||
|
|
||||||
def get_test_file(self):
|
def get_test_file(self):
|
||||||
@@ -547,9 +564,9 @@ class TestConsumer(
|
|||||||
) as consumer:
|
) as consumer:
|
||||||
consumer.run()
|
consumer.run()
|
||||||
|
|
||||||
@mock.patch("documents.parsers.document_consumer_declaration.send")
|
@mock.patch("documents.consumer.get_parser_registry")
|
||||||
def testNoParsers(self, m) -> None:
|
def testNoParsers(self, m) -> None:
|
||||||
m.return_value = []
|
m.return_value.get_parser_for_file.return_value = None
|
||||||
|
|
||||||
with self.assertRaisesMessage(
|
with self.assertRaisesMessage(
|
||||||
ConsumerError,
|
ConsumerError,
|
||||||
@@ -560,18 +577,9 @@ class TestConsumer(
|
|||||||
|
|
||||||
self._assert_first_last_send_progress(last_status="FAILED")
|
self._assert_first_last_send_progress(last_status="FAILED")
|
||||||
|
|
||||||
@mock.patch("documents.parsers.document_consumer_declaration.send")
|
@mock.patch("documents.consumer.get_parser_registry")
|
||||||
def testFaultyParser(self, m) -> None:
|
def testFaultyParser(self, m) -> None:
|
||||||
m.return_value = [
|
m.return_value.get_parser_for_file.return_value = FaultyParser
|
||||||
(
|
|
||||||
None,
|
|
||||||
{
|
|
||||||
"parser": self.make_faulty_parser,
|
|
||||||
"mime_types": {"application/pdf": ".pdf"},
|
|
||||||
"weight": 0,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
]
|
|
||||||
|
|
||||||
with self.get_consumer(self.get_test_file()) as consumer:
|
with self.get_consumer(self.get_test_file()) as consumer:
|
||||||
with self.assertRaisesMessage(
|
with self.assertRaisesMessage(
|
||||||
@@ -582,18 +590,9 @@ class TestConsumer(
|
|||||||
|
|
||||||
self._assert_first_last_send_progress(last_status="FAILED")
|
self._assert_first_last_send_progress(last_status="FAILED")
|
||||||
|
|
||||||
@mock.patch("documents.parsers.document_consumer_declaration.send")
|
@mock.patch("documents.consumer.get_parser_registry")
|
||||||
def testGenericParserException(self, m) -> None:
|
def testGenericParserException(self, m) -> None:
|
||||||
m.return_value = [
|
m.return_value.get_parser_for_file.return_value = FaultyGenericExceptionParser
|
||||||
(
|
|
||||||
None,
|
|
||||||
{
|
|
||||||
"parser": self.make_faulty_generic_exception_parser,
|
|
||||||
"mime_types": {"application/pdf": ".pdf"},
|
|
||||||
"weight": 0,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
]
|
|
||||||
|
|
||||||
with self.get_consumer(self.get_test_file()) as consumer:
|
with self.get_consumer(self.get_test_file()) as consumer:
|
||||||
with self.assertRaisesMessage(
|
with self.assertRaisesMessage(
|
||||||
@@ -1017,7 +1016,7 @@ class TestConsumer(
|
|||||||
self._assert_first_last_send_progress()
|
self._assert_first_last_send_progress()
|
||||||
|
|
||||||
@override_settings(FILENAME_FORMAT="{title}")
|
@override_settings(FILENAME_FORMAT="{title}")
|
||||||
@mock.patch("documents.parsers.document_consumer_declaration.send")
|
@mock.patch("documents.consumer.get_parser_registry")
|
||||||
def test_similar_filenames(self, m) -> None:
|
def test_similar_filenames(self, m) -> None:
|
||||||
shutil.copy(
|
shutil.copy(
|
||||||
Path(__file__).parent / "samples" / "simple.pdf",
|
Path(__file__).parent / "samples" / "simple.pdf",
|
||||||
@@ -1031,16 +1030,7 @@ class TestConsumer(
|
|||||||
Path(__file__).parent / "samples" / "simple-noalpha.png",
|
Path(__file__).parent / "samples" / "simple-noalpha.png",
|
||||||
settings.CONSUMPTION_DIR / "simple.png.pdf",
|
settings.CONSUMPTION_DIR / "simple.png.pdf",
|
||||||
)
|
)
|
||||||
m.return_value = [
|
m.return_value.get_parser_for_file.return_value = CopyParser
|
||||||
(
|
|
||||||
None,
|
|
||||||
{
|
|
||||||
"parser": CopyParser,
|
|
||||||
"mime_types": {"application/pdf": ".pdf", "image/png": ".png"},
|
|
||||||
"weight": 0,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
]
|
|
||||||
|
|
||||||
with self.get_consumer(settings.CONSUMPTION_DIR / "simple.png") as consumer:
|
with self.get_consumer(settings.CONSUMPTION_DIR / "simple.png") as consumer:
|
||||||
consumer.run()
|
consumer.run()
|
||||||
@@ -1068,8 +1058,10 @@ class TestConsumer(
|
|||||||
|
|
||||||
sanity_check()
|
sanity_check()
|
||||||
|
|
||||||
|
@mock.patch("documents.consumer.get_parser_registry")
|
||||||
@mock.patch("documents.consumer.run_subprocess")
|
@mock.patch("documents.consumer.run_subprocess")
|
||||||
def test_try_to_clean_invalid_pdf(self, m) -> None:
|
def test_try_to_clean_invalid_pdf(self, m, mock_registry) -> None:
|
||||||
|
mock_registry.return_value.get_parser_for_file.return_value = None
|
||||||
shutil.copy(
|
shutil.copy(
|
||||||
Path(__file__).parent / "samples" / "invalid_pdf.pdf",
|
Path(__file__).parent / "samples" / "invalid_pdf.pdf",
|
||||||
settings.CONSUMPTION_DIR / "invalid_pdf.pdf",
|
settings.CONSUMPTION_DIR / "invalid_pdf.pdf",
|
||||||
@@ -1091,10 +1083,10 @@ class TestConsumer(
|
|||||||
|
|
||||||
@mock.patch("paperless_mail.models.MailRule.objects.get")
|
@mock.patch("paperless_mail.models.MailRule.objects.get")
|
||||||
@mock.patch("paperless.parsers.mail.MailDocumentParser.parse")
|
@mock.patch("paperless.parsers.mail.MailDocumentParser.parse")
|
||||||
@mock.patch("documents.parsers.document_consumer_declaration.send")
|
@mock.patch("documents.consumer.get_parser_registry")
|
||||||
def test_mail_parser_receives_mailrule(
|
def test_mail_parser_receives_mailrule(
|
||||||
self,
|
self,
|
||||||
mock_consumer_declaration_send: mock.Mock,
|
mock_get_parser_registry: mock.Mock,
|
||||||
mock_mail_parser_parse: mock.Mock,
|
mock_mail_parser_parse: mock.Mock,
|
||||||
mock_mailrule_get: mock.Mock,
|
mock_mailrule_get: mock.Mock,
|
||||||
) -> None:
|
) -> None:
|
||||||
@@ -1106,18 +1098,11 @@ class TestConsumer(
|
|||||||
THEN:
|
THEN:
|
||||||
- The mail parser should receive the mail rule
|
- The mail parser should receive the mail rule
|
||||||
"""
|
"""
|
||||||
from paperless_mail.signals import get_parser as mail_get_parser
|
from paperless.parsers.mail import MailDocumentParser
|
||||||
|
|
||||||
mock_consumer_declaration_send.return_value = [
|
mock_get_parser_registry.return_value.get_parser_for_file.return_value = (
|
||||||
(
|
MailDocumentParser
|
||||||
None,
|
)
|
||||||
{
|
|
||||||
"parser": mail_get_parser,
|
|
||||||
"mime_types": {"message/rfc822": ".eml"},
|
|
||||||
"weight": 0,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
]
|
|
||||||
mock_mailrule_get.return_value = mock.Mock(
|
mock_mailrule_get.return_value = mock.Mock(
|
||||||
pdf_layout=MailRule.PdfLayout.HTML_ONLY,
|
pdf_layout=MailRule.PdfLayout.HTML_ONLY,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -1,132 +1,16 @@
|
|||||||
from tempfile import TemporaryDirectory
|
|
||||||
from unittest import mock
|
|
||||||
|
|
||||||
from django.apps import apps
|
|
||||||
from django.test import TestCase
|
from django.test import TestCase
|
||||||
from django.test import override_settings
|
from django.test import override_settings
|
||||||
|
|
||||||
from documents.parsers import get_default_file_extension
|
from documents.parsers import get_default_file_extension
|
||||||
from documents.parsers import get_parser_class_for_mime_type
|
|
||||||
from documents.parsers import get_supported_file_extensions
|
from documents.parsers import get_supported_file_extensions
|
||||||
from documents.parsers import is_file_ext_supported
|
from documents.parsers import is_file_ext_supported
|
||||||
|
from paperless.parsers.registry import get_parser_registry
|
||||||
|
from paperless.parsers.registry import reset_parser_registry
|
||||||
from paperless.parsers.tesseract import RasterisedDocumentParser
|
from paperless.parsers.tesseract import RasterisedDocumentParser
|
||||||
from paperless.parsers.text import TextDocumentParser
|
from paperless.parsers.text import TextDocumentParser
|
||||||
from paperless.parsers.tika import TikaDocumentParser
|
from paperless.parsers.tika import TikaDocumentParser
|
||||||
|
|
||||||
|
|
||||||
class TestParserDiscovery(TestCase):
|
|
||||||
@mock.patch("documents.parsers.document_consumer_declaration.send")
|
|
||||||
def test_get_parser_class_1_parser(self, m, *args) -> None:
|
|
||||||
"""
|
|
||||||
GIVEN:
|
|
||||||
- Parser declared for a given mimetype
|
|
||||||
WHEN:
|
|
||||||
- Attempt to get parser for the mimetype
|
|
||||||
THEN:
|
|
||||||
- Declared parser class is returned
|
|
||||||
"""
|
|
||||||
|
|
||||||
class DummyParser:
|
|
||||||
pass
|
|
||||||
|
|
||||||
m.return_value = (
|
|
||||||
(
|
|
||||||
None,
|
|
||||||
{
|
|
||||||
"weight": 0,
|
|
||||||
"parser": DummyParser,
|
|
||||||
"mime_types": {"application/pdf": ".pdf"},
|
|
||||||
},
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
self.assertEqual(get_parser_class_for_mime_type("application/pdf"), DummyParser)
|
|
||||||
|
|
||||||
@mock.patch("documents.parsers.document_consumer_declaration.send")
|
|
||||||
def test_get_parser_class_n_parsers(self, m, *args) -> None:
|
|
||||||
"""
|
|
||||||
GIVEN:
|
|
||||||
- Two parsers declared for a given mimetype
|
|
||||||
- Second parser has a higher weight
|
|
||||||
WHEN:
|
|
||||||
- Attempt to get parser for the mimetype
|
|
||||||
THEN:
|
|
||||||
- Second parser class is returned
|
|
||||||
"""
|
|
||||||
|
|
||||||
class DummyParser1:
|
|
||||||
pass
|
|
||||||
|
|
||||||
class DummyParser2:
|
|
||||||
pass
|
|
||||||
|
|
||||||
m.return_value = (
|
|
||||||
(
|
|
||||||
None,
|
|
||||||
{
|
|
||||||
"weight": 0,
|
|
||||||
"parser": DummyParser1,
|
|
||||||
"mime_types": {"application/pdf": ".pdf"},
|
|
||||||
},
|
|
||||||
),
|
|
||||||
(
|
|
||||||
None,
|
|
||||||
{
|
|
||||||
"weight": 1,
|
|
||||||
"parser": DummyParser2,
|
|
||||||
"mime_types": {"application/pdf": ".pdf"},
|
|
||||||
},
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
self.assertEqual(
|
|
||||||
get_parser_class_for_mime_type("application/pdf"),
|
|
||||||
DummyParser2,
|
|
||||||
)
|
|
||||||
|
|
||||||
@mock.patch("documents.parsers.document_consumer_declaration.send")
|
|
||||||
def test_get_parser_class_0_parsers(self, m, *args) -> None:
|
|
||||||
"""
|
|
||||||
GIVEN:
|
|
||||||
- No parsers are declared
|
|
||||||
WHEN:
|
|
||||||
- Attempt to get parser for the mimetype
|
|
||||||
THEN:
|
|
||||||
- No parser class is returned
|
|
||||||
"""
|
|
||||||
m.return_value = []
|
|
||||||
with TemporaryDirectory():
|
|
||||||
self.assertIsNone(get_parser_class_for_mime_type("application/pdf"))
|
|
||||||
|
|
||||||
@mock.patch("documents.parsers.document_consumer_declaration.send")
|
|
||||||
def test_get_parser_class_no_valid_parser(self, m, *args) -> None:
|
|
||||||
"""
|
|
||||||
GIVEN:
|
|
||||||
- No parser declared for a given mimetype
|
|
||||||
- Parser declared for a different mimetype
|
|
||||||
WHEN:
|
|
||||||
- Attempt to get parser for the given mimetype
|
|
||||||
THEN:
|
|
||||||
- No parser class is returned
|
|
||||||
"""
|
|
||||||
|
|
||||||
class DummyParser:
|
|
||||||
pass
|
|
||||||
|
|
||||||
m.return_value = (
|
|
||||||
(
|
|
||||||
None,
|
|
||||||
{
|
|
||||||
"weight": 0,
|
|
||||||
"parser": DummyParser,
|
|
||||||
"mime_types": {"application/pdf": ".pdf"},
|
|
||||||
},
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
self.assertIsNone(get_parser_class_for_mime_type("image/tiff"))
|
|
||||||
|
|
||||||
|
|
||||||
class TestParserAvailability(TestCase):
|
class TestParserAvailability(TestCase):
|
||||||
def test_tesseract_parser(self) -> None:
|
def test_tesseract_parser(self) -> None:
|
||||||
"""
|
"""
|
||||||
@@ -151,7 +35,7 @@ class TestParserAvailability(TestCase):
|
|||||||
self.assertIn(ext, supported_exts)
|
self.assertIn(ext, supported_exts)
|
||||||
self.assertEqual(get_default_file_extension(mime_type), ext)
|
self.assertEqual(get_default_file_extension(mime_type), ext)
|
||||||
self.assertIsInstance(
|
self.assertIsInstance(
|
||||||
get_parser_class_for_mime_type(mime_type)(logging_group=None),
|
get_parser_registry().get_parser_for_file(mime_type, "")(),
|
||||||
RasterisedDocumentParser,
|
RasterisedDocumentParser,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -175,7 +59,7 @@ class TestParserAvailability(TestCase):
|
|||||||
self.assertIn(ext, supported_exts)
|
self.assertIn(ext, supported_exts)
|
||||||
self.assertEqual(get_default_file_extension(mime_type), ext)
|
self.assertEqual(get_default_file_extension(mime_type), ext)
|
||||||
self.assertIsInstance(
|
self.assertIsInstance(
|
||||||
get_parser_class_for_mime_type(mime_type)(logging_group=None),
|
get_parser_registry().get_parser_for_file(mime_type, "")(),
|
||||||
TextDocumentParser,
|
TextDocumentParser,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -198,22 +82,23 @@ class TestParserAvailability(TestCase):
|
|||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
# Force the app ready to notice the settings override
|
self.addCleanup(reset_parser_registry)
|
||||||
with override_settings(TIKA_ENABLED=True, INSTALLED_APPS=["paperless_tika"]):
|
|
||||||
app = apps.get_app_config("paperless_tika")
|
# Reset and rebuild the registry with Tika enabled.
|
||||||
app.ready()
|
with override_settings(TIKA_ENABLED=True):
|
||||||
|
reset_parser_registry()
|
||||||
supported_exts = get_supported_file_extensions()
|
supported_exts = get_supported_file_extensions()
|
||||||
|
|
||||||
for mime_type, ext in supported_mimes_and_exts:
|
for mime_type, ext in supported_mimes_and_exts:
|
||||||
self.assertIn(ext, supported_exts)
|
self.assertIn(ext, supported_exts)
|
||||||
self.assertEqual(get_default_file_extension(mime_type), ext)
|
self.assertEqual(get_default_file_extension(mime_type), ext)
|
||||||
self.assertIsInstance(
|
self.assertIsInstance(
|
||||||
get_parser_class_for_mime_type(mime_type)(logging_group=None),
|
get_parser_registry().get_parser_for_file(mime_type, "")(),
|
||||||
TikaDocumentParser,
|
TikaDocumentParser,
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_no_parser_for_mime(self) -> None:
|
def test_no_parser_for_mime(self) -> None:
|
||||||
self.assertIsNone(get_parser_class_for_mime_type("text/sdgsdf"))
|
self.assertIsNone(get_parser_registry().get_parser_for_file("text/sdgsdf", ""))
|
||||||
|
|
||||||
def test_default_extension(self) -> None:
|
def test_default_extension(self) -> None:
|
||||||
# Test no parser declared still returns a an extension
|
# Test no parser declared still returns a an extension
|
||||||
|
|||||||
@@ -7,7 +7,6 @@ import tempfile
|
|||||||
import zipfile
|
import zipfile
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from collections import deque
|
from collections import deque
|
||||||
from contextlib import nullcontext
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from time import mktime
|
from time import mktime
|
||||||
@@ -159,7 +158,6 @@ from documents.models import UiSettings
|
|||||||
from documents.models import Workflow
|
from documents.models import Workflow
|
||||||
from documents.models import WorkflowAction
|
from documents.models import WorkflowAction
|
||||||
from documents.models import WorkflowTrigger
|
from documents.models import WorkflowTrigger
|
||||||
from documents.parsers import get_parser_class_for_mime_type
|
|
||||||
from documents.permissions import AcknowledgeTasksPermissions
|
from documents.permissions import AcknowledgeTasksPermissions
|
||||||
from documents.permissions import PaperlessAdminPermissions
|
from documents.permissions import PaperlessAdminPermissions
|
||||||
from documents.permissions import PaperlessNotePermissions
|
from documents.permissions import PaperlessNotePermissions
|
||||||
@@ -227,7 +225,7 @@ from paperless.celery import app as celery_app
|
|||||||
from paperless.config import AIConfig
|
from paperless.config import AIConfig
|
||||||
from paperless.config import GeneralConfig
|
from paperless.config import GeneralConfig
|
||||||
from paperless.models import ApplicationConfiguration
|
from paperless.models import ApplicationConfiguration
|
||||||
from paperless.parsers import ParserProtocol
|
from paperless.parsers.registry import get_parser_registry
|
||||||
from paperless.serialisers import GroupSerializer
|
from paperless.serialisers import GroupSerializer
|
||||||
from paperless.serialisers import UserSerializer
|
from paperless.serialisers import UserSerializer
|
||||||
from paperless.views import StandardPagination
|
from paperless.views import StandardPagination
|
||||||
@@ -838,61 +836,6 @@ class DocumentViewSet(
|
|||||||
"custom_field_",
|
"custom_field_",
|
||||||
)
|
)
|
||||||
|
|
||||||
def _get_selection_data_for_queryset(self, queryset):
|
|
||||||
correspondents = Correspondent.objects.annotate(
|
|
||||||
document_count=Count(
|
|
||||||
"documents",
|
|
||||||
filter=Q(documents__in=queryset),
|
|
||||||
distinct=True,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
tags = Tag.objects.annotate(
|
|
||||||
document_count=Count(
|
|
||||||
"documents",
|
|
||||||
filter=Q(documents__in=queryset),
|
|
||||||
distinct=True,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
document_types = DocumentType.objects.annotate(
|
|
||||||
document_count=Count(
|
|
||||||
"documents",
|
|
||||||
filter=Q(documents__in=queryset),
|
|
||||||
distinct=True,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
storage_paths = StoragePath.objects.annotate(
|
|
||||||
document_count=Count(
|
|
||||||
"documents",
|
|
||||||
filter=Q(documents__in=queryset),
|
|
||||||
distinct=True,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
custom_fields = CustomField.objects.annotate(
|
|
||||||
document_count=Count(
|
|
||||||
"fields__document",
|
|
||||||
filter=Q(fields__document__in=queryset),
|
|
||||||
distinct=True,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"selected_correspondents": [
|
|
||||||
{"id": t.id, "document_count": t.document_count} for t in correspondents
|
|
||||||
],
|
|
||||||
"selected_tags": [
|
|
||||||
{"id": t.id, "document_count": t.document_count} for t in tags
|
|
||||||
],
|
|
||||||
"selected_document_types": [
|
|
||||||
{"id": t.id, "document_count": t.document_count} for t in document_types
|
|
||||||
],
|
|
||||||
"selected_storage_paths": [
|
|
||||||
{"id": t.id, "document_count": t.document_count} for t in storage_paths
|
|
||||||
],
|
|
||||||
"selected_custom_fields": [
|
|
||||||
{"id": t.id, "document_count": t.document_count} for t in custom_fields
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
def get_queryset(self):
|
def get_queryset(self):
|
||||||
latest_version_content = Subquery(
|
latest_version_content = Subquery(
|
||||||
Document.objects.filter(root_document=OuterRef("pk"))
|
Document.objects.filter(root_document=OuterRef("pk"))
|
||||||
@@ -1040,25 +983,6 @@ class DocumentViewSet(
|
|||||||
|
|
||||||
return response
|
return response
|
||||||
|
|
||||||
def list(self, request, *args, **kwargs):
|
|
||||||
if not get_boolean(
|
|
||||||
str(request.query_params.get("include_selection_data", "false")),
|
|
||||||
):
|
|
||||||
return super().list(request, *args, **kwargs)
|
|
||||||
|
|
||||||
queryset = self.filter_queryset(self.get_queryset())
|
|
||||||
selection_data = self._get_selection_data_for_queryset(queryset)
|
|
||||||
|
|
||||||
page = self.paginate_queryset(queryset)
|
|
||||||
if page is not None:
|
|
||||||
serializer = self.get_serializer(page, many=True)
|
|
||||||
response = self.get_paginated_response(serializer.data)
|
|
||||||
response.data["selection_data"] = selection_data
|
|
||||||
return response
|
|
||||||
|
|
||||||
serializer = self.get_serializer(queryset, many=True)
|
|
||||||
return Response({"results": serializer.data, "selection_data": selection_data})
|
|
||||||
|
|
||||||
def destroy(self, request, *args, **kwargs):
|
def destroy(self, request, *args, **kwargs):
|
||||||
from documents import index
|
from documents import index
|
||||||
|
|
||||||
@@ -1158,17 +1082,17 @@ class DocumentViewSet(
|
|||||||
if not Path(file).is_file():
|
if not Path(file).is_file():
|
||||||
return None
|
return None
|
||||||
|
|
||||||
parser_class = get_parser_class_for_mime_type(mime_type)
|
parser_class = get_parser_registry().get_parser_for_file(
|
||||||
|
mime_type,
|
||||||
|
Path(file).name,
|
||||||
|
Path(file),
|
||||||
|
)
|
||||||
if parser_class:
|
if parser_class:
|
||||||
parser = parser_class(progress_callback=None, logging_group=None)
|
|
||||||
cm = parser if isinstance(parser, ParserProtocol) else nullcontext(parser)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with cm:
|
with parser_class() as parser:
|
||||||
return parser.extract_metadata(file, mime_type)
|
return parser.extract_metadata(file, mime_type)
|
||||||
except Exception: # pragma: no cover
|
except Exception: # pragma: no cover
|
||||||
logger.exception(f"Issue getting metadata for {file}")
|
logger.exception(f"Issue getting metadata for {file}")
|
||||||
# TODO: cover GPG errors, remove later.
|
|
||||||
return []
|
return []
|
||||||
else: # pragma: no cover
|
else: # pragma: no cover
|
||||||
logger.warning(f"No parser for {mime_type}")
|
logger.warning(f"No parser for {mime_type}")
|
||||||
@@ -2099,21 +2023,6 @@ class UnifiedSearchViewSet(DocumentViewSet):
|
|||||||
else None
|
else None
|
||||||
)
|
)
|
||||||
|
|
||||||
if get_boolean(
|
|
||||||
str(
|
|
||||||
request.query_params.get(
|
|
||||||
"include_selection_data",
|
|
||||||
"false",
|
|
||||||
),
|
|
||||||
),
|
|
||||||
):
|
|
||||||
result_ids = response.data.get("all", [])
|
|
||||||
response.data["selection_data"] = (
|
|
||||||
self._get_selection_data_for_queryset(
|
|
||||||
Document.objects.filter(pk__in=result_ids),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
return response
|
return response
|
||||||
except NotFound:
|
except NotFound:
|
||||||
raise
|
raise
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ msgid ""
|
|||||||
msgstr ""
|
msgstr ""
|
||||||
"Project-Id-Version: paperless-ngx\n"
|
"Project-Id-Version: paperless-ngx\n"
|
||||||
"Report-Msgid-Bugs-To: \n"
|
"Report-Msgid-Bugs-To: \n"
|
||||||
"POT-Creation-Date: 2026-03-21 09:25+0000\n"
|
"POT-Creation-Date: 2026-03-22 13:54+0000\n"
|
||||||
"PO-Revision-Date: 2022-02-17 04:17\n"
|
"PO-Revision-Date: 2022-02-17 04:17\n"
|
||||||
"Last-Translator: \n"
|
"Last-Translator: \n"
|
||||||
"Language-Team: English\n"
|
"Language-Team: English\n"
|
||||||
@@ -1300,7 +1300,7 @@ msgid "workflow runs"
|
|||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: documents/serialisers.py:463 documents/serialisers.py:815
|
#: documents/serialisers.py:463 documents/serialisers.py:815
|
||||||
#: documents/serialisers.py:2501 documents/views.py:1992
|
#: documents/serialisers.py:2501 documents/views.py:1990
|
||||||
#: paperless_mail/serialisers.py:143
|
#: paperless_mail/serialisers.py:143
|
||||||
msgid "Insufficient permissions."
|
msgid "Insufficient permissions."
|
||||||
msgstr ""
|
msgstr ""
|
||||||
@@ -1341,7 +1341,7 @@ msgstr ""
|
|||||||
msgid "Duplicate document identifiers are not allowed."
|
msgid "Duplicate document identifiers are not allowed."
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: documents/serialisers.py:2587 documents/views.py:3598
|
#: documents/serialisers.py:2587 documents/views.py:3596
|
||||||
#, python-format
|
#, python-format
|
||||||
msgid "Documents not found: %(ids)s"
|
msgid "Documents not found: %(ids)s"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
@@ -1605,24 +1605,24 @@ msgstr ""
|
|||||||
msgid "Unable to parse URI {value}"
|
msgid "Unable to parse URI {value}"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: documents/views.py:1985
|
#: documents/views.py:1983
|
||||||
msgid "Invalid more_like_id"
|
msgid "Invalid more_like_id"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: documents/views.py:3610
|
#: documents/views.py:3608
|
||||||
#, python-format
|
#, python-format
|
||||||
msgid "Insufficient permissions to share document %(id)s."
|
msgid "Insufficient permissions to share document %(id)s."
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: documents/views.py:3653
|
#: documents/views.py:3651
|
||||||
msgid "Bundle is already being processed."
|
msgid "Bundle is already being processed."
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: documents/views.py:3710
|
#: documents/views.py:3708
|
||||||
msgid "The share link bundle is still being prepared. Please try again later."
|
msgid "The share link bundle is still being prepared. Please try again later."
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: documents/views.py:3720
|
#: documents/views.py:3718
|
||||||
msgid "The share link bundle is unavailable."
|
msgid "The share link bundle is unavailable."
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
@@ -1862,151 +1862,151 @@ msgstr ""
|
|||||||
msgid "paperless application settings"
|
msgid "paperless application settings"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:521
|
#: paperless/settings/__init__.py:518
|
||||||
msgid "English (US)"
|
msgid "English (US)"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:522
|
#: paperless/settings/__init__.py:519
|
||||||
msgid "Arabic"
|
msgid "Arabic"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:523
|
#: paperless/settings/__init__.py:520
|
||||||
msgid "Afrikaans"
|
msgid "Afrikaans"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:524
|
#: paperless/settings/__init__.py:521
|
||||||
msgid "Belarusian"
|
msgid "Belarusian"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:525
|
#: paperless/settings/__init__.py:522
|
||||||
msgid "Bulgarian"
|
msgid "Bulgarian"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:526
|
#: paperless/settings/__init__.py:523
|
||||||
msgid "Catalan"
|
msgid "Catalan"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:527
|
#: paperless/settings/__init__.py:524
|
||||||
msgid "Czech"
|
msgid "Czech"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:528
|
#: paperless/settings/__init__.py:525
|
||||||
msgid "Danish"
|
msgid "Danish"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:529
|
#: paperless/settings/__init__.py:526
|
||||||
msgid "German"
|
msgid "German"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:530
|
#: paperless/settings/__init__.py:527
|
||||||
msgid "Greek"
|
msgid "Greek"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:531
|
#: paperless/settings/__init__.py:528
|
||||||
msgid "English (GB)"
|
msgid "English (GB)"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:532
|
#: paperless/settings/__init__.py:529
|
||||||
msgid "Spanish"
|
msgid "Spanish"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:533
|
#: paperless/settings/__init__.py:530
|
||||||
msgid "Persian"
|
msgid "Persian"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:534
|
#: paperless/settings/__init__.py:531
|
||||||
msgid "Finnish"
|
msgid "Finnish"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:535
|
#: paperless/settings/__init__.py:532
|
||||||
msgid "French"
|
msgid "French"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:536
|
#: paperless/settings/__init__.py:533
|
||||||
msgid "Hungarian"
|
msgid "Hungarian"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:537
|
#: paperless/settings/__init__.py:534
|
||||||
msgid "Indonesian"
|
msgid "Indonesian"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:538
|
#: paperless/settings/__init__.py:535
|
||||||
msgid "Italian"
|
msgid "Italian"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:539
|
#: paperless/settings/__init__.py:536
|
||||||
msgid "Japanese"
|
msgid "Japanese"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:540
|
#: paperless/settings/__init__.py:537
|
||||||
msgid "Korean"
|
msgid "Korean"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:541
|
#: paperless/settings/__init__.py:538
|
||||||
msgid "Luxembourgish"
|
msgid "Luxembourgish"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:542
|
#: paperless/settings/__init__.py:539
|
||||||
msgid "Norwegian"
|
msgid "Norwegian"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:543
|
#: paperless/settings/__init__.py:540
|
||||||
msgid "Dutch"
|
msgid "Dutch"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:544
|
#: paperless/settings/__init__.py:541
|
||||||
msgid "Polish"
|
msgid "Polish"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:545
|
#: paperless/settings/__init__.py:542
|
||||||
msgid "Portuguese (Brazil)"
|
msgid "Portuguese (Brazil)"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:546
|
#: paperless/settings/__init__.py:543
|
||||||
msgid "Portuguese"
|
msgid "Portuguese"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:547
|
#: paperless/settings/__init__.py:544
|
||||||
msgid "Romanian"
|
msgid "Romanian"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:548
|
#: paperless/settings/__init__.py:545
|
||||||
msgid "Russian"
|
msgid "Russian"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:549
|
#: paperless/settings/__init__.py:546
|
||||||
msgid "Slovak"
|
msgid "Slovak"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:550
|
#: paperless/settings/__init__.py:547
|
||||||
msgid "Slovenian"
|
msgid "Slovenian"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:551
|
#: paperless/settings/__init__.py:548
|
||||||
msgid "Serbian"
|
msgid "Serbian"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:552
|
#: paperless/settings/__init__.py:549
|
||||||
msgid "Swedish"
|
msgid "Swedish"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:553
|
#: paperless/settings/__init__.py:550
|
||||||
msgid "Turkish"
|
msgid "Turkish"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:554
|
#: paperless/settings/__init__.py:551
|
||||||
msgid "Ukrainian"
|
msgid "Ukrainian"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:555
|
#: paperless/settings/__init__.py:552
|
||||||
msgid "Vietnamese"
|
msgid "Vietnamese"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:556
|
#: paperless/settings/__init__.py:553
|
||||||
msgid "Chinese Simplified"
|
msgid "Chinese Simplified"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:557
|
#: paperless/settings/__init__.py:554
|
||||||
msgid "Chinese Traditional"
|
msgid "Chinese Traditional"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
@@ -2052,7 +2052,7 @@ msgid ""
|
|||||||
"process all matching rules that you have defined."
|
"process all matching rules that you have defined."
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless_mail/apps.py:11
|
#: paperless_mail/apps.py:8
|
||||||
msgid "Paperless mail"
|
msgid "Paperless mail"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ import os
|
|||||||
import pwd
|
import pwd
|
||||||
import shutil
|
import shutil
|
||||||
import stat
|
import stat
|
||||||
|
import subprocess
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
@@ -299,3 +300,62 @@ def check_deprecated_db_settings(
|
|||||||
)
|
)
|
||||||
|
|
||||||
return warnings
|
return warnings
|
||||||
|
|
||||||
|
|
||||||
|
@register()
|
||||||
|
def check_remote_parser_configured(app_configs, **kwargs) -> list[Error]:
|
||||||
|
if settings.REMOTE_OCR_ENGINE == "azureai" and not (
|
||||||
|
settings.REMOTE_OCR_ENDPOINT and settings.REMOTE_OCR_API_KEY
|
||||||
|
):
|
||||||
|
return [
|
||||||
|
Error(
|
||||||
|
"Azure AI remote parser requires endpoint and API key to be configured.",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def get_tesseract_langs():
|
||||||
|
proc = subprocess.run(
|
||||||
|
[shutil.which("tesseract"), "--list-langs"],
|
||||||
|
capture_output=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Decode bytes to string, split on newlines, trim out the header
|
||||||
|
proc_lines = proc.stdout.decode("utf8", errors="ignore").strip().split("\n")[1:]
|
||||||
|
|
||||||
|
return [x.strip() for x in proc_lines]
|
||||||
|
|
||||||
|
|
||||||
|
@register()
|
||||||
|
def check_default_language_available(app_configs, **kwargs):
|
||||||
|
errs = []
|
||||||
|
|
||||||
|
if not settings.OCR_LANGUAGE:
|
||||||
|
errs.append(
|
||||||
|
Warning(
|
||||||
|
"No OCR language has been specified with PAPERLESS_OCR_LANGUAGE. "
|
||||||
|
"This means that tesseract will fallback to english.",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
return errs
|
||||||
|
|
||||||
|
# binaries_check in paperless will check and report if this doesn't exist
|
||||||
|
# So skip trying to do anything here and let that handle missing binaries
|
||||||
|
if shutil.which("tesseract") is not None:
|
||||||
|
installed_langs = get_tesseract_langs()
|
||||||
|
|
||||||
|
specified_langs = [x.strip() for x in settings.OCR_LANGUAGE.split("+")]
|
||||||
|
|
||||||
|
for lang in specified_langs:
|
||||||
|
if lang not in installed_langs:
|
||||||
|
errs.append(
|
||||||
|
Error(
|
||||||
|
f"The selected ocr language {lang} is "
|
||||||
|
f"not installed. Paperless cannot OCR your documents "
|
||||||
|
f"without it. Please fix PAPERLESS_OCR_LANGUAGE.",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
return errs
|
||||||
|
|||||||
@@ -33,6 +33,7 @@ name, version, author, url, supported_mime_types (callable), score (callable).
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
import threading
|
||||||
from importlib.metadata import entry_points
|
from importlib.metadata import entry_points
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
@@ -49,6 +50,7 @@ logger = logging.getLogger("paperless.parsers.registry")
|
|||||||
|
|
||||||
_registry: ParserRegistry | None = None
|
_registry: ParserRegistry | None = None
|
||||||
_discovery_complete: bool = False
|
_discovery_complete: bool = False
|
||||||
|
_lock = threading.Lock()
|
||||||
|
|
||||||
# Attribute names that every registered external parser class must expose.
|
# Attribute names that every registered external parser class must expose.
|
||||||
_REQUIRED_ATTRS: tuple[str, ...] = (
|
_REQUIRED_ATTRS: tuple[str, ...] = (
|
||||||
@@ -74,7 +76,6 @@ def get_parser_registry() -> ParserRegistry:
|
|||||||
1. Creates a new ParserRegistry.
|
1. Creates a new ParserRegistry.
|
||||||
2. Calls register_defaults to install built-in parsers.
|
2. Calls register_defaults to install built-in parsers.
|
||||||
3. Calls discover to load third-party plugins via importlib.metadata entrypoints.
|
3. Calls discover to load third-party plugins via importlib.metadata entrypoints.
|
||||||
4. Calls log_summary to emit a startup summary.
|
|
||||||
|
|
||||||
Subsequent calls return the same instance immediately.
|
Subsequent calls return the same instance immediately.
|
||||||
|
|
||||||
@@ -85,13 +86,14 @@ def get_parser_registry() -> ParserRegistry:
|
|||||||
"""
|
"""
|
||||||
global _registry, _discovery_complete
|
global _registry, _discovery_complete
|
||||||
|
|
||||||
|
with _lock:
|
||||||
if _registry is None:
|
if _registry is None:
|
||||||
_registry = ParserRegistry()
|
r = ParserRegistry()
|
||||||
_registry.register_defaults()
|
r.register_defaults()
|
||||||
|
_registry = r
|
||||||
|
|
||||||
if not _discovery_complete:
|
if not _discovery_complete:
|
||||||
_registry.discover()
|
_registry.discover()
|
||||||
_registry.log_summary()
|
|
||||||
_discovery_complete = True
|
_discovery_complete = True
|
||||||
|
|
||||||
return _registry
|
return _registry
|
||||||
@@ -113,9 +115,11 @@ def init_builtin_parsers() -> None:
|
|||||||
"""
|
"""
|
||||||
global _registry
|
global _registry
|
||||||
|
|
||||||
|
with _lock:
|
||||||
if _registry is None:
|
if _registry is None:
|
||||||
_registry = ParserRegistry()
|
r = ParserRegistry()
|
||||||
_registry.register_defaults()
|
r.register_defaults()
|
||||||
|
_registry = r
|
||||||
|
|
||||||
|
|
||||||
def reset_parser_registry() -> None:
|
def reset_parser_registry() -> None:
|
||||||
@@ -304,6 +308,23 @@ class ParserRegistry:
|
|||||||
getattr(cls, "url", "unknown"),
|
getattr(cls, "url", "unknown"),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Inspection helpers
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def all_parsers(self) -> list[type[ParserProtocol]]:
|
||||||
|
"""Return all registered parser classes (external first, then builtins).
|
||||||
|
|
||||||
|
Used by compatibility wrappers that need to iterate every parser to
|
||||||
|
compute the full set of supported MIME types and file extensions.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
list[type[ParserProtocol]]
|
||||||
|
External parsers followed by built-in parsers.
|
||||||
|
"""
|
||||||
|
return [*self._external, *self._builtins]
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# Parser resolution
|
# Parser resolution
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
@@ -334,7 +355,7 @@ class ParserRegistry:
|
|||||||
mime_type:
|
mime_type:
|
||||||
The detected MIME type of the file.
|
The detected MIME type of the file.
|
||||||
filename:
|
filename:
|
||||||
The original filename, including extension.
|
The original filename, including extension. May be empty in some cases
|
||||||
path:
|
path:
|
||||||
Optional filesystem path to the file. Forwarded to each
|
Optional filesystem path to the file. Forwarded to each
|
||||||
parser's score method.
|
parser's score method.
|
||||||
|
|||||||
@@ -121,10 +121,7 @@ INSTALLED_APPS = [
|
|||||||
"django_extensions",
|
"django_extensions",
|
||||||
"paperless",
|
"paperless",
|
||||||
"documents.apps.DocumentsConfig",
|
"documents.apps.DocumentsConfig",
|
||||||
"paperless_tesseract.apps.PaperlessTesseractConfig",
|
|
||||||
"paperless_text.apps.PaperlessTextConfig",
|
|
||||||
"paperless_mail.apps.PaperlessMailConfig",
|
"paperless_mail.apps.PaperlessMailConfig",
|
||||||
"paperless_remote.apps.PaperlessRemoteParserConfig",
|
|
||||||
"django.contrib.admin",
|
"django.contrib.admin",
|
||||||
"rest_framework",
|
"rest_framework",
|
||||||
"rest_framework.authtoken",
|
"rest_framework.authtoken",
|
||||||
@@ -974,8 +971,8 @@ TIKA_GOTENBERG_ENDPOINT = os.getenv(
|
|||||||
"http://localhost:3000",
|
"http://localhost:3000",
|
||||||
)
|
)
|
||||||
|
|
||||||
if TIKA_ENABLED:
|
# Tika parser is now integrated into the main parser registry
|
||||||
INSTALLED_APPS.append("paperless_tika.apps.PaperlessTikaConfig")
|
# No separate Django app needed
|
||||||
|
|
||||||
AUDIT_LOG_ENABLED = get_bool_from_env("PAPERLESS_AUDIT_LOG_ENABLED", "true")
|
AUDIT_LOG_ENABLED = get_bool_from_env("PAPERLESS_AUDIT_LOG_ENABLED", "true")
|
||||||
if AUDIT_LOG_ENABLED:
|
if AUDIT_LOG_ENABLED:
|
||||||
|
|||||||
@@ -90,35 +90,6 @@ def text_parser() -> Generator[TextDocumentParser, None, None]:
|
|||||||
yield parser
|
yield parser
|
||||||
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
|
||||||
# Remote parser sample files
|
|
||||||
# ------------------------------------------------------------------
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
|
||||||
def remote_samples_dir(samples_dir: Path) -> Path:
|
|
||||||
"""Absolute path to the remote parser sample files directory.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
Path
|
|
||||||
``<samples_dir>/remote/``
|
|
||||||
"""
|
|
||||||
return samples_dir / "remote"
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
|
||||||
def sample_pdf_file(remote_samples_dir: Path) -> Path:
|
|
||||||
"""Path to a simple digital PDF sample file.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
Path
|
|
||||||
Absolute path to ``remote/simple-digital.pdf``.
|
|
||||||
"""
|
|
||||||
return remote_samples_dir / "simple-digital.pdf"
|
|
||||||
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# Remote parser instance
|
# Remote parser instance
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
|
|||||||
@@ -277,20 +277,20 @@ class TestRemoteParserParse:
|
|||||||
def test_parse_returns_text_from_azure(
|
def test_parse_returns_text_from_azure(
|
||||||
self,
|
self,
|
||||||
remote_parser: RemoteDocumentParser,
|
remote_parser: RemoteDocumentParser,
|
||||||
sample_pdf_file: Path,
|
simple_digital_pdf_file: Path,
|
||||||
azure_client: Mock,
|
azure_client: Mock,
|
||||||
) -> None:
|
) -> None:
|
||||||
remote_parser.parse(sample_pdf_file, "application/pdf")
|
remote_parser.parse(simple_digital_pdf_file, "application/pdf")
|
||||||
|
|
||||||
assert remote_parser.get_text() == _DEFAULT_TEXT
|
assert remote_parser.get_text() == _DEFAULT_TEXT
|
||||||
|
|
||||||
def test_parse_sets_archive_path(
|
def test_parse_sets_archive_path(
|
||||||
self,
|
self,
|
||||||
remote_parser: RemoteDocumentParser,
|
remote_parser: RemoteDocumentParser,
|
||||||
sample_pdf_file: Path,
|
simple_digital_pdf_file: Path,
|
||||||
azure_client: Mock,
|
azure_client: Mock,
|
||||||
) -> None:
|
) -> None:
|
||||||
remote_parser.parse(sample_pdf_file, "application/pdf")
|
remote_parser.parse(simple_digital_pdf_file, "application/pdf")
|
||||||
|
|
||||||
archive = remote_parser.get_archive_path()
|
archive = remote_parser.get_archive_path()
|
||||||
assert archive is not None
|
assert archive is not None
|
||||||
@@ -300,11 +300,11 @@ class TestRemoteParserParse:
|
|||||||
def test_parse_closes_client_on_success(
|
def test_parse_closes_client_on_success(
|
||||||
self,
|
self,
|
||||||
remote_parser: RemoteDocumentParser,
|
remote_parser: RemoteDocumentParser,
|
||||||
sample_pdf_file: Path,
|
simple_digital_pdf_file: Path,
|
||||||
azure_client: Mock,
|
azure_client: Mock,
|
||||||
) -> None:
|
) -> None:
|
||||||
remote_parser.configure(ParserContext())
|
remote_parser.configure(ParserContext())
|
||||||
remote_parser.parse(sample_pdf_file, "application/pdf")
|
remote_parser.parse(simple_digital_pdf_file, "application/pdf")
|
||||||
|
|
||||||
azure_client.close.assert_called_once()
|
azure_client.close.assert_called_once()
|
||||||
|
|
||||||
@@ -312,9 +312,9 @@ class TestRemoteParserParse:
|
|||||||
def test_parse_sets_empty_text_when_not_configured(
|
def test_parse_sets_empty_text_when_not_configured(
|
||||||
self,
|
self,
|
||||||
remote_parser: RemoteDocumentParser,
|
remote_parser: RemoteDocumentParser,
|
||||||
sample_pdf_file: Path,
|
simple_digital_pdf_file: Path,
|
||||||
) -> None:
|
) -> None:
|
||||||
remote_parser.parse(sample_pdf_file, "application/pdf")
|
remote_parser.parse(simple_digital_pdf_file, "application/pdf")
|
||||||
|
|
||||||
assert remote_parser.get_text() == ""
|
assert remote_parser.get_text() == ""
|
||||||
assert remote_parser.get_archive_path() is None
|
assert remote_parser.get_archive_path() is None
|
||||||
@@ -328,10 +328,10 @@ class TestRemoteParserParse:
|
|||||||
def test_get_date_always_none(
|
def test_get_date_always_none(
|
||||||
self,
|
self,
|
||||||
remote_parser: RemoteDocumentParser,
|
remote_parser: RemoteDocumentParser,
|
||||||
sample_pdf_file: Path,
|
simple_digital_pdf_file: Path,
|
||||||
azure_client: Mock,
|
azure_client: Mock,
|
||||||
) -> None:
|
) -> None:
|
||||||
remote_parser.parse(sample_pdf_file, "application/pdf")
|
remote_parser.parse(simple_digital_pdf_file, "application/pdf")
|
||||||
|
|
||||||
assert remote_parser.get_date() is None
|
assert remote_parser.get_date() is None
|
||||||
|
|
||||||
@@ -345,33 +345,33 @@ class TestRemoteParserParseError:
|
|||||||
def test_parse_returns_none_on_azure_error(
|
def test_parse_returns_none_on_azure_error(
|
||||||
self,
|
self,
|
||||||
remote_parser: RemoteDocumentParser,
|
remote_parser: RemoteDocumentParser,
|
||||||
sample_pdf_file: Path,
|
simple_digital_pdf_file: Path,
|
||||||
failing_azure_client: Mock,
|
failing_azure_client: Mock,
|
||||||
) -> None:
|
) -> None:
|
||||||
remote_parser.parse(sample_pdf_file, "application/pdf")
|
remote_parser.parse(simple_digital_pdf_file, "application/pdf")
|
||||||
|
|
||||||
assert remote_parser.get_text() is None
|
assert remote_parser.get_text() is None
|
||||||
|
|
||||||
def test_parse_closes_client_on_error(
|
def test_parse_closes_client_on_error(
|
||||||
self,
|
self,
|
||||||
remote_parser: RemoteDocumentParser,
|
remote_parser: RemoteDocumentParser,
|
||||||
sample_pdf_file: Path,
|
simple_digital_pdf_file: Path,
|
||||||
failing_azure_client: Mock,
|
failing_azure_client: Mock,
|
||||||
) -> None:
|
) -> None:
|
||||||
remote_parser.parse(sample_pdf_file, "application/pdf")
|
remote_parser.parse(simple_digital_pdf_file, "application/pdf")
|
||||||
|
|
||||||
failing_azure_client.close.assert_called_once()
|
failing_azure_client.close.assert_called_once()
|
||||||
|
|
||||||
def test_parse_logs_error_on_azure_failure(
|
def test_parse_logs_error_on_azure_failure(
|
||||||
self,
|
self,
|
||||||
remote_parser: RemoteDocumentParser,
|
remote_parser: RemoteDocumentParser,
|
||||||
sample_pdf_file: Path,
|
simple_digital_pdf_file: Path,
|
||||||
failing_azure_client: Mock,
|
failing_azure_client: Mock,
|
||||||
mocker: MockerFixture,
|
mocker: MockerFixture,
|
||||||
) -> None:
|
) -> None:
|
||||||
mock_log = mocker.patch("paperless.parsers.remote.logger")
|
mock_log = mocker.patch("paperless.parsers.remote.logger")
|
||||||
|
|
||||||
remote_parser.parse(sample_pdf_file, "application/pdf")
|
remote_parser.parse(simple_digital_pdf_file, "application/pdf")
|
||||||
|
|
||||||
mock_log.error.assert_called_once()
|
mock_log.error.assert_called_once()
|
||||||
assert "Azure AI Vision parsing failed" in mock_log.error.call_args[0][0]
|
assert "Azure AI Vision parsing failed" in mock_log.error.call_args[0][0]
|
||||||
@@ -386,18 +386,18 @@ class TestRemoteParserPageCount:
|
|||||||
def test_page_count_for_pdf(
|
def test_page_count_for_pdf(
|
||||||
self,
|
self,
|
||||||
remote_parser: RemoteDocumentParser,
|
remote_parser: RemoteDocumentParser,
|
||||||
sample_pdf_file: Path,
|
simple_digital_pdf_file: Path,
|
||||||
) -> None:
|
) -> None:
|
||||||
count = remote_parser.get_page_count(sample_pdf_file, "application/pdf")
|
count = remote_parser.get_page_count(simple_digital_pdf_file, "application/pdf")
|
||||||
assert isinstance(count, int)
|
assert isinstance(count, int)
|
||||||
assert count >= 1
|
assert count >= 1
|
||||||
|
|
||||||
def test_page_count_returns_none_for_image_mime(
|
def test_page_count_returns_none_for_image_mime(
|
||||||
self,
|
self,
|
||||||
remote_parser: RemoteDocumentParser,
|
remote_parser: RemoteDocumentParser,
|
||||||
sample_pdf_file: Path,
|
simple_digital_pdf_file: Path,
|
||||||
) -> None:
|
) -> None:
|
||||||
count = remote_parser.get_page_count(sample_pdf_file, "image/png")
|
count = remote_parser.get_page_count(simple_digital_pdf_file, "image/png")
|
||||||
assert count is None
|
assert count is None
|
||||||
|
|
||||||
def test_page_count_returns_none_for_invalid_pdf(
|
def test_page_count_returns_none_for_invalid_pdf(
|
||||||
@@ -420,25 +420,31 @@ class TestRemoteParserMetadata:
|
|||||||
def test_extract_metadata_non_pdf_returns_empty(
|
def test_extract_metadata_non_pdf_returns_empty(
|
||||||
self,
|
self,
|
||||||
remote_parser: RemoteDocumentParser,
|
remote_parser: RemoteDocumentParser,
|
||||||
sample_pdf_file: Path,
|
simple_digital_pdf_file: Path,
|
||||||
) -> None:
|
) -> None:
|
||||||
result = remote_parser.extract_metadata(sample_pdf_file, "image/png")
|
result = remote_parser.extract_metadata(simple_digital_pdf_file, "image/png")
|
||||||
assert result == []
|
assert result == []
|
||||||
|
|
||||||
def test_extract_metadata_pdf_returns_list(
|
def test_extract_metadata_pdf_returns_list(
|
||||||
self,
|
self,
|
||||||
remote_parser: RemoteDocumentParser,
|
remote_parser: RemoteDocumentParser,
|
||||||
sample_pdf_file: Path,
|
simple_digital_pdf_file: Path,
|
||||||
) -> None:
|
) -> None:
|
||||||
result = remote_parser.extract_metadata(sample_pdf_file, "application/pdf")
|
result = remote_parser.extract_metadata(
|
||||||
|
simple_digital_pdf_file,
|
||||||
|
"application/pdf",
|
||||||
|
)
|
||||||
assert isinstance(result, list)
|
assert isinstance(result, list)
|
||||||
|
|
||||||
def test_extract_metadata_pdf_entries_have_required_keys(
|
def test_extract_metadata_pdf_entries_have_required_keys(
|
||||||
self,
|
self,
|
||||||
remote_parser: RemoteDocumentParser,
|
remote_parser: RemoteDocumentParser,
|
||||||
sample_pdf_file: Path,
|
simple_digital_pdf_file: Path,
|
||||||
) -> None:
|
) -> None:
|
||||||
result = remote_parser.extract_metadata(sample_pdf_file, "application/pdf")
|
result = remote_parser.extract_metadata(
|
||||||
|
simple_digital_pdf_file,
|
||||||
|
"application/pdf",
|
||||||
|
)
|
||||||
for entry in result:
|
for entry in result:
|
||||||
assert "namespace" in entry
|
assert "namespace" in entry
|
||||||
assert "prefix" in entry
|
assert "prefix" in entry
|
||||||
|
|||||||
@@ -77,10 +77,10 @@ class TestTikaParserRegistryInterface:
|
|||||||
def test_get_page_count_returns_int_with_pdf_archive(
|
def test_get_page_count_returns_int_with_pdf_archive(
|
||||||
self,
|
self,
|
||||||
tika_parser: TikaDocumentParser,
|
tika_parser: TikaDocumentParser,
|
||||||
sample_pdf_file: Path,
|
simple_digital_pdf_file: Path,
|
||||||
) -> None:
|
) -> None:
|
||||||
tika_parser._archive_path = sample_pdf_file
|
tika_parser._archive_path = simple_digital_pdf_file
|
||||||
count = tika_parser.get_page_count(sample_pdf_file, "application/pdf")
|
count = tika_parser.get_page_count(simple_digital_pdf_file, "application/pdf")
|
||||||
assert isinstance(count, int)
|
assert isinstance(count, int)
|
||||||
assert count > 0
|
assert count > 0
|
||||||
|
|
||||||
|
|||||||
Binary file not shown.
@@ -5,6 +5,7 @@ from pathlib import Path
|
|||||||
from unittest import mock
|
from unittest import mock
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
from django.core.checks import ERROR
|
||||||
from django.core.checks import Error
|
from django.core.checks import Error
|
||||||
from django.core.checks import Warning
|
from django.core.checks import Warning
|
||||||
from pytest_django.fixtures import SettingsWrapper
|
from pytest_django.fixtures import SettingsWrapper
|
||||||
@@ -12,7 +13,9 @@ from pytest_mock import MockerFixture
|
|||||||
|
|
||||||
from paperless.checks import audit_log_check
|
from paperless.checks import audit_log_check
|
||||||
from paperless.checks import binaries_check
|
from paperless.checks import binaries_check
|
||||||
|
from paperless.checks import check_default_language_available
|
||||||
from paperless.checks import check_deprecated_db_settings
|
from paperless.checks import check_deprecated_db_settings
|
||||||
|
from paperless.checks import check_remote_parser_configured
|
||||||
from paperless.checks import check_v3_minimum_upgrade_version
|
from paperless.checks import check_v3_minimum_upgrade_version
|
||||||
from paperless.checks import debug_mode_check
|
from paperless.checks import debug_mode_check
|
||||||
from paperless.checks import paths_check
|
from paperless.checks import paths_check
|
||||||
@@ -626,3 +629,116 @@ class TestV3MinimumUpgradeVersionCheck:
|
|||||||
conn.introspection.table_names.side_effect = OperationalError("DB unavailable")
|
conn.introspection.table_names.side_effect = OperationalError("DB unavailable")
|
||||||
mocker.patch.dict("paperless.checks.connections", {"default": conn})
|
mocker.patch.dict("paperless.checks.connections", {"default": conn})
|
||||||
assert check_v3_minimum_upgrade_version(None) == []
|
assert check_v3_minimum_upgrade_version(None) == []
|
||||||
|
|
||||||
|
|
||||||
|
class TestRemoteParserChecks:
|
||||||
|
def test_no_engine(self, settings: SettingsWrapper) -> None:
|
||||||
|
settings.REMOTE_OCR_ENGINE = None
|
||||||
|
msgs = check_remote_parser_configured(None)
|
||||||
|
|
||||||
|
assert len(msgs) == 0
|
||||||
|
|
||||||
|
def test_azure_no_endpoint(self, settings: SettingsWrapper) -> None:
|
||||||
|
|
||||||
|
settings.REMOTE_OCR_ENGINE = "azureai"
|
||||||
|
settings.REMOTE_OCR_API_KEY = "somekey"
|
||||||
|
settings.REMOTE_OCR_ENDPOINT = None
|
||||||
|
|
||||||
|
msgs = check_remote_parser_configured(None)
|
||||||
|
|
||||||
|
assert len(msgs) == 1
|
||||||
|
|
||||||
|
msg = msgs[0]
|
||||||
|
|
||||||
|
assert (
|
||||||
|
"Azure AI remote parser requires endpoint and API key to be configured."
|
||||||
|
in msg.msg
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestTesseractChecks:
|
||||||
|
def test_default_language(self) -> None:
|
||||||
|
check_default_language_available(None)
|
||||||
|
|
||||||
|
def test_no_language(self, settings: SettingsWrapper) -> None:
|
||||||
|
|
||||||
|
settings.OCR_LANGUAGE = ""
|
||||||
|
|
||||||
|
msgs = check_default_language_available(None)
|
||||||
|
|
||||||
|
assert len(msgs) == 1
|
||||||
|
msg = msgs[0]
|
||||||
|
|
||||||
|
assert (
|
||||||
|
"No OCR language has been specified with PAPERLESS_OCR_LANGUAGE" in msg.msg
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_invalid_language(
|
||||||
|
self,
|
||||||
|
settings: SettingsWrapper,
|
||||||
|
mocker: MockerFixture,
|
||||||
|
) -> None:
|
||||||
|
|
||||||
|
settings.OCR_LANGUAGE = "ita"
|
||||||
|
|
||||||
|
tesser_lang_mock = mocker.patch("paperless.checks.get_tesseract_langs")
|
||||||
|
tesser_lang_mock.return_value = ["deu", "eng"]
|
||||||
|
|
||||||
|
msgs = check_default_language_available(None)
|
||||||
|
|
||||||
|
assert len(msgs) == 1
|
||||||
|
msg = msgs[0]
|
||||||
|
|
||||||
|
assert msg.level == ERROR
|
||||||
|
assert "The selected ocr language ita is not installed" in msg.msg
|
||||||
|
|
||||||
|
def test_multi_part_language(
|
||||||
|
self,
|
||||||
|
settings: SettingsWrapper,
|
||||||
|
mocker: MockerFixture,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- An OCR language which is multi part (ie chi-sim)
|
||||||
|
- The language is correctly formatted
|
||||||
|
WHEN:
|
||||||
|
- Installed packages are checked
|
||||||
|
THEN:
|
||||||
|
- No errors are reported
|
||||||
|
"""
|
||||||
|
|
||||||
|
settings.OCR_LANGUAGE = "chi_sim"
|
||||||
|
|
||||||
|
tesser_lang_mock = mocker.patch("paperless.checks.get_tesseract_langs")
|
||||||
|
tesser_lang_mock.return_value = ["chi_sim", "eng"]
|
||||||
|
|
||||||
|
msgs = check_default_language_available(None)
|
||||||
|
|
||||||
|
assert len(msgs) == 0
|
||||||
|
|
||||||
|
def test_multi_part_language_bad_format(
|
||||||
|
self,
|
||||||
|
settings: SettingsWrapper,
|
||||||
|
mocker: MockerFixture,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- An OCR language which is multi part (ie chi-sim)
|
||||||
|
- The language is correctly NOT formatted
|
||||||
|
WHEN:
|
||||||
|
- Installed packages are checked
|
||||||
|
THEN:
|
||||||
|
- No errors are reported
|
||||||
|
"""
|
||||||
|
settings.OCR_LANGUAGE = "chi-sim"
|
||||||
|
|
||||||
|
tesser_lang_mock = mocker.patch("paperless.checks.get_tesseract_langs")
|
||||||
|
tesser_lang_mock.return_value = ["chi_sim", "eng"]
|
||||||
|
|
||||||
|
msgs = check_default_language_available(None)
|
||||||
|
|
||||||
|
assert len(msgs) == 1
|
||||||
|
msg = msgs[0]
|
||||||
|
|
||||||
|
assert msg.level == ERROR
|
||||||
|
assert "The selected ocr language chi-sim is not installed" in msg.msg
|
||||||
|
|||||||
@@ -1,18 +1,8 @@
|
|||||||
from django.apps import AppConfig
|
from django.apps import AppConfig
|
||||||
from django.conf import settings
|
|
||||||
from django.utils.translation import gettext_lazy as _
|
from django.utils.translation import gettext_lazy as _
|
||||||
|
|
||||||
from paperless_mail.signals import mail_consumer_declaration
|
|
||||||
|
|
||||||
|
|
||||||
class PaperlessMailConfig(AppConfig):
|
class PaperlessMailConfig(AppConfig):
|
||||||
name = "paperless_mail"
|
name = "paperless_mail"
|
||||||
|
|
||||||
verbose_name = _("Paperless mail")
|
verbose_name = _("Paperless mail")
|
||||||
|
|
||||||
def ready(self) -> None:
|
|
||||||
from documents.signals import document_consumer_declaration
|
|
||||||
|
|
||||||
if settings.TIKA_ENABLED:
|
|
||||||
document_consumer_declaration.connect(mail_consumer_declaration)
|
|
||||||
AppConfig.ready(self)
|
|
||||||
|
|||||||
@@ -1,19 +0,0 @@
|
|||||||
def get_parser(*args, **kwargs):
|
|
||||||
from paperless.parsers.mail import MailDocumentParser
|
|
||||||
|
|
||||||
# MailDocumentParser accepts no constructor args in the new-style protocol.
|
|
||||||
# Pop legacy args that arrive from the signal-based consumer path.
|
|
||||||
# Phase 4 will replace this signal path with the ParserRegistry.
|
|
||||||
kwargs.pop("logging_group", None)
|
|
||||||
kwargs.pop("progress_callback", None)
|
|
||||||
return MailDocumentParser()
|
|
||||||
|
|
||||||
|
|
||||||
def mail_consumer_declaration(sender, **kwargs):
|
|
||||||
return {
|
|
||||||
"parser": get_parser,
|
|
||||||
"weight": 20,
|
|
||||||
"mime_types": {
|
|
||||||
"message/rfc822": ".eml",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
@@ -1,4 +0,0 @@
|
|||||||
# this is here so that django finds the checks.
|
|
||||||
from paperless_remote.checks import check_remote_parser_configured
|
|
||||||
|
|
||||||
__all__ = ["check_remote_parser_configured"]
|
|
||||||
@@ -1,14 +0,0 @@
|
|||||||
from django.apps import AppConfig
|
|
||||||
|
|
||||||
from paperless_remote.signals import remote_consumer_declaration
|
|
||||||
|
|
||||||
|
|
||||||
class PaperlessRemoteParserConfig(AppConfig):
|
|
||||||
name = "paperless_remote"
|
|
||||||
|
|
||||||
def ready(self) -> None:
|
|
||||||
from documents.signals import document_consumer_declaration
|
|
||||||
|
|
||||||
document_consumer_declaration.connect(remote_consumer_declaration)
|
|
||||||
|
|
||||||
AppConfig.ready(self)
|
|
||||||
@@ -1,17 +0,0 @@
|
|||||||
from django.conf import settings
|
|
||||||
from django.core.checks import Error
|
|
||||||
from django.core.checks import register
|
|
||||||
|
|
||||||
|
|
||||||
@register()
|
|
||||||
def check_remote_parser_configured(app_configs, **kwargs):
|
|
||||||
if settings.REMOTE_OCR_ENGINE == "azureai" and not (
|
|
||||||
settings.REMOTE_OCR_ENDPOINT and settings.REMOTE_OCR_API_KEY
|
|
||||||
):
|
|
||||||
return [
|
|
||||||
Error(
|
|
||||||
"Azure AI remote parser requires endpoint and API key to be configured.",
|
|
||||||
),
|
|
||||||
]
|
|
||||||
|
|
||||||
return []
|
|
||||||
@@ -1,38 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
|
|
||||||
def get_parser(*args: Any, **kwargs: Any) -> Any:
|
|
||||||
from paperless.parsers.remote import RemoteDocumentParser
|
|
||||||
|
|
||||||
# The new RemoteDocumentParser does not accept the progress_callback
|
|
||||||
# kwarg injected by the old signal-based consumer. logging_group is
|
|
||||||
# forwarded as a positional arg.
|
|
||||||
# Phase 4 will replace this signal path with the new ParserRegistry.
|
|
||||||
kwargs.pop("progress_callback", None)
|
|
||||||
return RemoteDocumentParser(*args, **kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
def get_supported_mime_types() -> dict[str, str]:
|
|
||||||
from django.conf import settings
|
|
||||||
|
|
||||||
from paperless.parsers.remote import RemoteDocumentParser
|
|
||||||
from paperless.parsers.remote import RemoteEngineConfig
|
|
||||||
|
|
||||||
config = RemoteEngineConfig(
|
|
||||||
engine=settings.REMOTE_OCR_ENGINE,
|
|
||||||
api_key=settings.REMOTE_OCR_API_KEY,
|
|
||||||
endpoint=settings.REMOTE_OCR_ENDPOINT,
|
|
||||||
)
|
|
||||||
if not config.engine_is_valid():
|
|
||||||
return {}
|
|
||||||
return RemoteDocumentParser.supported_mime_types()
|
|
||||||
|
|
||||||
|
|
||||||
def remote_consumer_declaration(sender: Any, **kwargs: Any) -> dict[str, Any]:
|
|
||||||
return {
|
|
||||||
"parser": get_parser,
|
|
||||||
"weight": 5,
|
|
||||||
"mime_types": get_supported_mime_types(),
|
|
||||||
}
|
|
||||||
@@ -1,24 +0,0 @@
|
|||||||
from unittest import TestCase
|
|
||||||
|
|
||||||
from django.test import override_settings
|
|
||||||
|
|
||||||
from paperless_remote import check_remote_parser_configured
|
|
||||||
|
|
||||||
|
|
||||||
class TestChecks(TestCase):
|
|
||||||
@override_settings(REMOTE_OCR_ENGINE=None)
|
|
||||||
def test_no_engine(self) -> None:
|
|
||||||
msgs = check_remote_parser_configured(None)
|
|
||||||
self.assertEqual(len(msgs), 0)
|
|
||||||
|
|
||||||
@override_settings(REMOTE_OCR_ENGINE="azureai")
|
|
||||||
@override_settings(REMOTE_OCR_API_KEY="somekey")
|
|
||||||
@override_settings(REMOTE_OCR_ENDPOINT=None)
|
|
||||||
def test_azure_no_endpoint(self) -> None:
|
|
||||||
msgs = check_remote_parser_configured(None)
|
|
||||||
self.assertEqual(len(msgs), 1)
|
|
||||||
self.assertTrue(
|
|
||||||
msgs[0].msg.startswith(
|
|
||||||
"Azure AI remote parser requires endpoint and API key to be configured.",
|
|
||||||
),
|
|
||||||
)
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
# this is here so that django finds the checks.
|
|
||||||
from paperless_tesseract.checks import check_default_language_available
|
|
||||||
from paperless_tesseract.checks import get_tesseract_langs
|
|
||||||
|
|
||||||
__all__ = ["check_default_language_available", "get_tesseract_langs"]
|
|
||||||
@@ -1,14 +0,0 @@
|
|||||||
from django.apps import AppConfig
|
|
||||||
|
|
||||||
from paperless_tesseract.signals import tesseract_consumer_declaration
|
|
||||||
|
|
||||||
|
|
||||||
class PaperlessTesseractConfig(AppConfig):
|
|
||||||
name = "paperless_tesseract"
|
|
||||||
|
|
||||||
def ready(self) -> None:
|
|
||||||
from documents.signals import document_consumer_declaration
|
|
||||||
|
|
||||||
document_consumer_declaration.connect(tesseract_consumer_declaration)
|
|
||||||
|
|
||||||
AppConfig.ready(self)
|
|
||||||
@@ -1,52 +0,0 @@
|
|||||||
import shutil
|
|
||||||
import subprocess
|
|
||||||
|
|
||||||
from django.conf import settings
|
|
||||||
from django.core.checks import Error
|
|
||||||
from django.core.checks import Warning
|
|
||||||
from django.core.checks import register
|
|
||||||
|
|
||||||
|
|
||||||
def get_tesseract_langs():
|
|
||||||
proc = subprocess.run(
|
|
||||||
[shutil.which("tesseract"), "--list-langs"],
|
|
||||||
capture_output=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Decode bytes to string, split on newlines, trim out the header
|
|
||||||
proc_lines = proc.stdout.decode("utf8", errors="ignore").strip().split("\n")[1:]
|
|
||||||
|
|
||||||
return [x.strip() for x in proc_lines]
|
|
||||||
|
|
||||||
|
|
||||||
@register()
|
|
||||||
def check_default_language_available(app_configs, **kwargs):
|
|
||||||
errs = []
|
|
||||||
|
|
||||||
if not settings.OCR_LANGUAGE:
|
|
||||||
errs.append(
|
|
||||||
Warning(
|
|
||||||
"No OCR language has been specified with PAPERLESS_OCR_LANGUAGE. "
|
|
||||||
"This means that tesseract will fallback to english.",
|
|
||||||
),
|
|
||||||
)
|
|
||||||
return errs
|
|
||||||
|
|
||||||
# binaries_check in paperless will check and report if this doesn't exist
|
|
||||||
# So skip trying to do anything here and let that handle missing binaries
|
|
||||||
if shutil.which("tesseract") is not None:
|
|
||||||
installed_langs = get_tesseract_langs()
|
|
||||||
|
|
||||||
specified_langs = [x.strip() for x in settings.OCR_LANGUAGE.split("+")]
|
|
||||||
|
|
||||||
for lang in specified_langs:
|
|
||||||
if lang not in installed_langs:
|
|
||||||
errs.append(
|
|
||||||
Error(
|
|
||||||
f"The selected ocr language {lang} is "
|
|
||||||
f"not installed. Paperless cannot OCR your documents "
|
|
||||||
f"without it. Please fix PAPERLESS_OCR_LANGUAGE.",
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
return errs
|
|
||||||
@@ -1,34 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
|
|
||||||
def get_parser(*args: Any, **kwargs: Any) -> Any:
|
|
||||||
from paperless.parsers.tesseract import RasterisedDocumentParser
|
|
||||||
|
|
||||||
# RasterisedDocumentParser accepts logging_group for constructor compatibility but
|
|
||||||
# does not store or use it (no legacy DocumentParser base class).
|
|
||||||
# progress_callback is also not used. Both may arrive as a positional arg
|
|
||||||
# (consumer) or a keyword arg (views); *args absorbs the positional form,
|
|
||||||
# kwargs.pop handles the keyword form. Phase 4 will replace this signal
|
|
||||||
# path with the new ParserRegistry so the shim can be removed at that point.
|
|
||||||
kwargs.pop("logging_group", None)
|
|
||||||
kwargs.pop("progress_callback", None)
|
|
||||||
return RasterisedDocumentParser(*args, **kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
def tesseract_consumer_declaration(sender: Any, **kwargs: Any) -> dict[str, Any]:
|
|
||||||
return {
|
|
||||||
"parser": get_parser,
|
|
||||||
"weight": 0,
|
|
||||||
"mime_types": {
|
|
||||||
"application/pdf": ".pdf",
|
|
||||||
"image/jpeg": ".jpg",
|
|
||||||
"image/png": ".png",
|
|
||||||
"image/tiff": ".tif",
|
|
||||||
"image/gif": ".gif",
|
|
||||||
"image/bmp": ".bmp",
|
|
||||||
"image/webp": ".webp",
|
|
||||||
"image/heic": ".heic",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
@@ -1,67 +0,0 @@
|
|||||||
from unittest import mock
|
|
||||||
|
|
||||||
from django.core.checks import ERROR
|
|
||||||
from django.test import TestCase
|
|
||||||
from django.test import override_settings
|
|
||||||
|
|
||||||
from paperless_tesseract import check_default_language_available
|
|
||||||
|
|
||||||
|
|
||||||
class TestChecks(TestCase):
|
|
||||||
def test_default_language(self) -> None:
|
|
||||||
check_default_language_available(None)
|
|
||||||
|
|
||||||
@override_settings(OCR_LANGUAGE="")
|
|
||||||
def test_no_language(self) -> None:
|
|
||||||
msgs = check_default_language_available(None)
|
|
||||||
self.assertEqual(len(msgs), 1)
|
|
||||||
self.assertTrue(
|
|
||||||
msgs[0].msg.startswith(
|
|
||||||
"No OCR language has been specified with PAPERLESS_OCR_LANGUAGE",
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
@override_settings(OCR_LANGUAGE="ita")
|
|
||||||
@mock.patch("paperless_tesseract.checks.get_tesseract_langs")
|
|
||||||
def test_invalid_language(self, m) -> None:
|
|
||||||
m.return_value = ["deu", "eng"]
|
|
||||||
msgs = check_default_language_available(None)
|
|
||||||
self.assertEqual(len(msgs), 1)
|
|
||||||
self.assertEqual(msgs[0].level, ERROR)
|
|
||||||
|
|
||||||
@override_settings(OCR_LANGUAGE="chi_sim")
|
|
||||||
@mock.patch("paperless_tesseract.checks.get_tesseract_langs")
|
|
||||||
def test_multi_part_language(self, m) -> None:
|
|
||||||
"""
|
|
||||||
GIVEN:
|
|
||||||
- An OCR language which is multi part (ie chi-sim)
|
|
||||||
- The language is correctly formatted
|
|
||||||
WHEN:
|
|
||||||
- Installed packages are checked
|
|
||||||
THEN:
|
|
||||||
- No errors are reported
|
|
||||||
"""
|
|
||||||
m.return_value = ["chi_sim", "eng"]
|
|
||||||
|
|
||||||
msgs = check_default_language_available(None)
|
|
||||||
|
|
||||||
self.assertEqual(len(msgs), 0)
|
|
||||||
|
|
||||||
@override_settings(OCR_LANGUAGE="chi-sim")
|
|
||||||
@mock.patch("paperless_tesseract.checks.get_tesseract_langs")
|
|
||||||
def test_multi_part_language_bad_format(self, m) -> None:
|
|
||||||
"""
|
|
||||||
GIVEN:
|
|
||||||
- An OCR language which is multi part (ie chi-sim)
|
|
||||||
- The language is correctly NOT formatted
|
|
||||||
WHEN:
|
|
||||||
- Installed packages are checked
|
|
||||||
THEN:
|
|
||||||
- No errors are reported
|
|
||||||
"""
|
|
||||||
m.return_value = ["chi_sim", "eng"]
|
|
||||||
|
|
||||||
msgs = check_default_language_available(None)
|
|
||||||
|
|
||||||
self.assertEqual(len(msgs), 1)
|
|
||||||
self.assertEqual(msgs[0].level, ERROR)
|
|
||||||
@@ -1,14 +0,0 @@
|
|||||||
from django.apps import AppConfig
|
|
||||||
|
|
||||||
from paperless_text.signals import text_consumer_declaration
|
|
||||||
|
|
||||||
|
|
||||||
class PaperlessTextConfig(AppConfig):
|
|
||||||
name = "paperless_text"
|
|
||||||
|
|
||||||
def ready(self) -> None:
|
|
||||||
from documents.signals import document_consumer_declaration
|
|
||||||
|
|
||||||
document_consumer_declaration.connect(text_consumer_declaration)
|
|
||||||
|
|
||||||
AppConfig.ready(self)
|
|
||||||
@@ -1,29 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
|
|
||||||
def get_parser(*args: Any, **kwargs: Any) -> Any:
|
|
||||||
from paperless.parsers.text import TextDocumentParser
|
|
||||||
|
|
||||||
# TextDocumentParser accepts logging_group for constructor compatibility but
|
|
||||||
# does not store or use it (no legacy DocumentParser base class).
|
|
||||||
# progress_callback is also not used. Both may arrive as a positional arg
|
|
||||||
# (consumer) or a keyword arg (views); *args absorbs the positional form,
|
|
||||||
# kwargs.pop handles the keyword form. Phase 4 will replace this signal
|
|
||||||
# path with the new ParserRegistry so the shim can be removed at that point.
|
|
||||||
kwargs.pop("logging_group", None)
|
|
||||||
kwargs.pop("progress_callback", None)
|
|
||||||
return TextDocumentParser(*args, **kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
def text_consumer_declaration(sender: Any, **kwargs: Any) -> dict[str, Any]:
|
|
||||||
return {
|
|
||||||
"parser": get_parser,
|
|
||||||
"weight": 10,
|
|
||||||
"mime_types": {
|
|
||||||
"text/plain": ".txt",
|
|
||||||
"text/csv": ".csv",
|
|
||||||
"application/csv": ".csv",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
@@ -1,15 +0,0 @@
|
|||||||
from django.apps import AppConfig
|
|
||||||
from django.conf import settings
|
|
||||||
|
|
||||||
from paperless_tika.signals import tika_consumer_declaration
|
|
||||||
|
|
||||||
|
|
||||||
class PaperlessTikaConfig(AppConfig):
|
|
||||||
name = "paperless_tika"
|
|
||||||
|
|
||||||
def ready(self) -> None:
|
|
||||||
from documents.signals import document_consumer_declaration
|
|
||||||
|
|
||||||
if settings.TIKA_ENABLED:
|
|
||||||
document_consumer_declaration.connect(tika_consumer_declaration)
|
|
||||||
AppConfig.ready(self)
|
|
||||||
@@ -1,33 +0,0 @@
|
|||||||
def get_parser(*args, **kwargs):
|
|
||||||
from paperless.parsers.tika import TikaDocumentParser
|
|
||||||
|
|
||||||
# TikaDocumentParser accepts logging_group for constructor compatibility but
|
|
||||||
# does not store or use it (no legacy DocumentParser base class).
|
|
||||||
# progress_callback is also not used. Both may arrive as a positional arg
|
|
||||||
# (consumer) or a keyword arg (views); *args absorbs the positional form,
|
|
||||||
# kwargs.pop handles the keyword form. Phase 4 will replace this signal
|
|
||||||
# path with the new ParserRegistry so the shim can be removed at that point.
|
|
||||||
kwargs.pop("logging_group", None)
|
|
||||||
kwargs.pop("progress_callback", None)
|
|
||||||
return TikaDocumentParser()
|
|
||||||
|
|
||||||
|
|
||||||
def tika_consumer_declaration(sender, **kwargs):
|
|
||||||
return {
|
|
||||||
"parser": get_parser,
|
|
||||||
"weight": 10,
|
|
||||||
"mime_types": {
|
|
||||||
"application/msword": ".doc",
|
|
||||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
|
|
||||||
"application/vnd.ms-excel": ".xls",
|
|
||||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
|
|
||||||
"application/vnd.ms-powerpoint": ".ppt",
|
|
||||||
"application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx",
|
|
||||||
"application/vnd.openxmlformats-officedocument.presentationml.slideshow": ".ppsx",
|
|
||||||
"application/vnd.oasis.opendocument.presentation": ".odp",
|
|
||||||
"application/vnd.oasis.opendocument.spreadsheet": ".ods",
|
|
||||||
"application/vnd.oasis.opendocument.text": ".odt",
|
|
||||||
"application/vnd.oasis.opendocument.graphics": ".odg",
|
|
||||||
"text/rtf": ".rtf",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
Reference in New Issue
Block a user