mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-03-31 13:22:43 +00:00
Compare commits
56 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
13e91d8c95 | ||
|
|
6ac90181cb | ||
|
|
d6c3471909 | ||
|
|
5b56fad9c7 | ||
|
|
ed0b1fe115 | ||
|
|
4211153527 | ||
|
|
2f85461109 | ||
|
|
3fa7dcb0cb | ||
|
|
857fe3a55c | ||
|
|
dd19ea46fe | ||
|
|
21740a9d87 | ||
|
|
658fb2f208 | ||
|
|
252d4cb513 | ||
|
|
5aed41223b | ||
|
|
45dfbf3747 | ||
|
|
e4fe5bebab | ||
|
|
04519ee623 | ||
|
|
6c8f010f7a | ||
|
|
ed84cf26e7 | ||
|
|
1bc961f0c0 | ||
|
|
77d745381f | ||
|
|
7082cb9c36 | ||
|
|
34e84cc757 | ||
|
|
9246411610 | ||
|
|
8330b3598c | ||
|
|
1d002149dc | ||
|
|
8d6071e977 | ||
|
|
7d67766508 | ||
|
|
887dd122fe | ||
|
|
a1293c77b9 | ||
|
|
ee9a73aa95 | ||
|
|
9df332b614 | ||
|
|
d13e86a892 | ||
|
|
69d7f8c180 | ||
|
|
1ba89ddd09 | ||
|
|
0c40a28ad3 | ||
|
|
2b7424c42a | ||
|
|
a9f1766d1c | ||
|
|
fca8576d80 | ||
|
|
05f59e7d5e | ||
|
|
c9511680b3 | ||
|
|
0ed001c56e | ||
|
|
1e5a418191 | ||
|
|
e05735bc0f | ||
|
|
7621e10840 | ||
|
|
d90080f325 | ||
|
|
0c676b90f2 | ||
|
|
c2d8bda83c | ||
|
|
302ebf737e | ||
|
|
816c95a4ae | ||
|
|
40106f6fcc | ||
|
|
61143b3ad1 | ||
|
|
9b64eebd10 | ||
|
|
731418349f | ||
|
|
7728920670 | ||
|
|
f555bb95ae |
59
Dockerfile
59
Dockerfile
@@ -10,10 +10,6 @@ RUN ./configure && make
|
||||
|
||||
FROM python:3.7-slim
|
||||
|
||||
WORKDIR /usr/src/paperless/
|
||||
|
||||
COPY requirements.txt ./
|
||||
|
||||
# Binary dependencies
|
||||
RUN apt-get update \
|
||||
&& apt-get -y --no-install-recommends install \
|
||||
@@ -49,16 +45,24 @@ RUN apt-get update \
|
||||
tesseract-ocr-spa \
|
||||
unpaper \
|
||||
zlib1g \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# This pulls in updated dependencies from bullseye to fix some issues with file type detection.
|
||||
# TODO: Remove this once bullseye releases.
|
||||
RUN echo "deb http://deb.debian.org/debian bullseye main" > /etc/apt/sources.list.d/bullseye.list \
|
||||
&& echo "deb http://deb.debian.org/debian bullseye main" > /etc/apt/sources.list.d/bullseye.list \
|
||||
&& apt-get update \
|
||||
&& apt-get install --no-install-recommends -y file libmagic-dev \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& rm /etc/apt/sources.list.d/bullseye.list
|
||||
|
||||
# copy jbig2enc
|
||||
COPY --from=jbig2enc /usr/src/jbig2enc/src/.libs/libjbig2enc* /usr/local/lib/
|
||||
COPY --from=jbig2enc /usr/src/jbig2enc/src/jbig2 /usr/local/bin/
|
||||
COPY --from=jbig2enc /usr/src/jbig2enc/src/*.h /usr/local/include/
|
||||
|
||||
WORKDIR /usr/src/paperless/src/
|
||||
|
||||
COPY requirements.txt ../
|
||||
|
||||
# Python dependencies
|
||||
RUN apt-get update \
|
||||
&& apt-get -y --no-install-recommends install \
|
||||
@@ -67,41 +71,36 @@ RUN apt-get update \
|
||||
libpq-dev \
|
||||
libqpdf-dev \
|
||||
&& python3 -m pip install --upgrade --no-cache-dir supervisor \
|
||||
&& python3 -m pip install --no-cache-dir -r requirements.txt \
|
||||
&& python3 -m pip install --no-cache-dir -r ../requirements.txt \
|
||||
&& apt-get -y purge build-essential libqpdf-dev \
|
||||
&& apt-get -y autoremove --purge \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& mkdir /var/log/supervisord /var/run/supervisord
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# setup docker-specific things
|
||||
COPY docker/ ./docker/
|
||||
|
||||
# copy scripts
|
||||
# this fixes issues with imagemagick and PDF
|
||||
COPY docker/imagemagick-policy.xml /etc/ImageMagick-6/policy.xml
|
||||
|
||||
COPY gunicorn.conf.py ./
|
||||
COPY docker/supervisord.conf /etc/supervisord.conf
|
||||
COPY docker/docker-entrypoint.sh /sbin/docker-entrypoint.sh
|
||||
|
||||
# copy jbig2enc
|
||||
COPY --from=jbig2enc /usr/src/jbig2enc/src/.libs/libjbig2enc* /usr/local/lib/
|
||||
COPY --from=jbig2enc /usr/src/jbig2enc/src/jbig2 /usr/local/bin/
|
||||
COPY --from=jbig2enc /usr/src/jbig2enc/src/*.h /usr/local/include/
|
||||
RUN cd docker \
|
||||
&& cp imagemagick-policy.xml /etc/ImageMagick-6/policy.xml \
|
||||
&& mkdir /var/log/supervisord /var/run/supervisord \
|
||||
&& cp supervisord.conf /etc/supervisord.conf \
|
||||
&& cp docker-entrypoint.sh /sbin/docker-entrypoint.sh \
|
||||
&& chmod 755 /sbin/docker-entrypoint.sh \
|
||||
&& chmod +x install_management_commands.sh \
|
||||
&& ./install_management_commands.sh \
|
||||
&& cd .. \
|
||||
&& rm docker -rf
|
||||
|
||||
COPY gunicorn.conf.py ../
|
||||
|
||||
# copy app
|
||||
COPY src/ ./src/
|
||||
COPY src/ ./
|
||||
|
||||
# add users, setup scripts
|
||||
RUN addgroup --gid 1000 paperless \
|
||||
&& useradd --uid 1000 --gid paperless --home-dir /usr/src/paperless paperless \
|
||||
&& chown -R paperless:paperless . \
|
||||
&& chmod 755 /sbin/docker-entrypoint.sh
|
||||
|
||||
WORKDIR /usr/src/paperless/src/
|
||||
|
||||
RUN sudo -HEu paperless python3 manage.py collectstatic --clear --no-input
|
||||
|
||||
RUN sudo -HEu paperless python3 manage.py compilemessages
|
||||
&& chown -R paperless:paperless ../ \
|
||||
&& sudo -HEu paperless python3 manage.py collectstatic --clear --no-input \
|
||||
&& sudo -HEu paperless python3 manage.py compilemessages
|
||||
|
||||
VOLUME ["/usr/src/paperless/data", "/usr/src/paperless/media", "/usr/src/paperless/consume", "/usr/src/paperless/export"]
|
||||
ENTRYPOINT ["/sbin/docker-entrypoint.sh"]
|
||||
|
||||
4
Pipfile
4
Pipfile
@@ -52,6 +52,10 @@ channels-redis = "*"
|
||||
uvicorn = {extras = ["standard"], version = "*"}
|
||||
concurrent-log-handler = "*"
|
||||
django-redis = "*"
|
||||
# uvloop 0.15+ incompatible with python 3.6
|
||||
uvloop = "~=0.14.0"
|
||||
# TODO: keep an eye on piwheel builds and update this once available (https://www.piwheels.org/project/cryptography/)
|
||||
cryptography = "~=3.3.2"
|
||||
|
||||
[dev-packages]
|
||||
coveralls = "*"
|
||||
|
||||
67
Pipfile.lock
generated
67
Pipfile.lock
generated
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"_meta": {
|
||||
"hash": {
|
||||
"sha256": "0c2003b9d3d95d1af594f749a2740b55079551ea0ae512177ee9524bb327281e"
|
||||
"sha256": "b3bed0a6b8981e8fffc1b6aa3bc35a0b1472f28e6f745c62469eb8045740e57b"
|
||||
},
|
||||
"pipfile-spec": 6,
|
||||
"requires": {},
|
||||
@@ -190,24 +190,24 @@
|
||||
},
|
||||
"cryptography": {
|
||||
"hashes": [
|
||||
"sha256:0003a52a123602e1acee177dc90dd201f9bb1e73f24a070db7d36c588e8f5c7d",
|
||||
"sha256:0e85aaae861d0485eb5a79d33226dd6248d2a9f133b81532c8f5aae37de10ff7",
|
||||
"sha256:594a1db4511bc4d960571536abe21b4e5c3003e8750ab8365fafce71c5d86901",
|
||||
"sha256:69e836c9e5ff4373ce6d3ab311c1a2eed274793083858d3cd4c7d12ce20d5f9c",
|
||||
"sha256:788a3c9942df5e4371c199d10383f44a105d67d401fb4304178020142f020244",
|
||||
"sha256:7e177e4bea2de937a584b13645cab32f25e3d96fc0bc4a4cf99c27dc77682be6",
|
||||
"sha256:83d9d2dfec70364a74f4e7c70ad04d3ca2e6a08b703606993407bf46b97868c5",
|
||||
"sha256:84ef7a0c10c24a7773163f917f1cb6b4444597efd505a8aed0a22e8c4780f27e",
|
||||
"sha256:982f661bffc7a24b6d4f8ebe3291f17cf3833a0941c6f4d9d55c790b9aa2cdb3",
|
||||
"sha256:9e21301f7a1e7c03dbea73e8602905a4ebba641547a462b26dd03451e5769e7c",
|
||||
"sha256:9f6b0492d111b43de5f70052e24c1f0951cb9e6022188ebcb1cc3a3d301469b0",
|
||||
"sha256:a69bd3c68b98298f490e84519b954335154917eaab52cf582fa2c5c7efc6e812",
|
||||
"sha256:b4890d5fb9b7a23e3bf8abf5a8a7da8e228f1e97dc96b30b95685df840b6914a",
|
||||
"sha256:c366df0401d1ec4e548bebe8f91d55ebcc0ec3137900d214dd7aac8427ef3030",
|
||||
"sha256:dc42f645f8f3a489c3dd416730a514e7a91a59510ddaadc09d04224c098d3302"
|
||||
"sha256:0d7b69674b738068fa6ffade5c962ecd14969690585aaca0a1b1fc9058938a72",
|
||||
"sha256:1bd0ccb0a1ed775cd7e2144fe46df9dc03eefd722bbcf587b3e0616ea4a81eff",
|
||||
"sha256:3c284fc1e504e88e51c428db9c9274f2da9f73fdf5d7e13a36b8ecb039af6e6c",
|
||||
"sha256:49570438e60f19243e7e0d504527dd5fe9b4b967b5a1ff21cc12b57602dd85d3",
|
||||
"sha256:541dd758ad49b45920dda3b5b48c968f8b2533d8981bcdb43002798d8f7a89ed",
|
||||
"sha256:5a60d3780149e13b7a6ff7ad6526b38846354d11a15e21068e57073e29e19bed",
|
||||
"sha256:7951a966613c4211b6612b0352f5bf29989955ee592c4a885d8c7d0f830d0433",
|
||||
"sha256:922f9602d67c15ade470c11d616f2b2364950602e370c76f0c94c94ae672742e",
|
||||
"sha256:a0f0b96c572fc9f25c3f4ddbf4688b9b38c69836713fb255f4a2715d93cbaf44",
|
||||
"sha256:a777c096a49d80f9d2979695b835b0f9c9edab73b59e4ceb51f19724dda887ed",
|
||||
"sha256:a9a4ac9648d39ce71c2f63fe7dc6db144b9fa567ddfc48b9fde1b54483d26042",
|
||||
"sha256:aa4969f24d536ae2268c902b2c3d62ab464b5a66bcb247630d208a79a8098e9b",
|
||||
"sha256:c7390f9b2119b2b43160abb34f63277a638504ef8df99f11cb52c1fda66a2e6f",
|
||||
"sha256:ddd06e71c449a4fe10d0c60846280ee35d69ce49e3e413ce46d5f129e1468083",
|
||||
"sha256:e18e6ab84dfb0ab997faf8cca25a86ff15dfea4027b986322026cc99e0a892da"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'",
|
||||
"version": "==3.3.1"
|
||||
"index": "pypi",
|
||||
"version": "==3.3.2"
|
||||
},
|
||||
"daphne": {
|
||||
"hashes": [
|
||||
@@ -243,11 +243,11 @@
|
||||
},
|
||||
"django-extensions": {
|
||||
"hashes": [
|
||||
"sha256:7cd002495ff0a0e5eb6cdd6be759600905b4e4079232ea27618fc46bdd853651",
|
||||
"sha256:c7f88625a53f631745d4f2bef9ec4dcb999ed59476393bdbbe99db8596778846"
|
||||
"sha256:674ad4c3b1587a884881824f40212d51829e662e52f85b012cd83d83fe1271d9",
|
||||
"sha256:9507f8761ee760748938fd8af766d0608fb2738cf368adfa1b2451f61c15ae35"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==3.1.0"
|
||||
"version": "==3.1.1"
|
||||
},
|
||||
"django-filter": {
|
||||
"hashes": [
|
||||
@@ -462,11 +462,11 @@
|
||||
},
|
||||
"joblib": {
|
||||
"hashes": [
|
||||
"sha256:75ead23f13484a2a414874779d69ade40d4fa1abe62b222a23cd50d4bc822f6f",
|
||||
"sha256:7ad866067ac1fdec27d51c8678ea760601b70e32ff1881d4dc8e1171f2b64b24"
|
||||
"sha256:9c17567692206d2f3fb9ecf5e991084254fe631665c450b443761c4186a613f7",
|
||||
"sha256:feeb1ec69c4d45129954f1b7034954241eedfd6ba39b5e9e4b6883be3332d5e5"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==1.0.0"
|
||||
"version": "==1.0.1"
|
||||
},
|
||||
"langdetect": {
|
||||
"hashes": [
|
||||
@@ -1113,11 +1113,11 @@
|
||||
},
|
||||
"tqdm": {
|
||||
"hashes": [
|
||||
"sha256:4621f6823bab46a9cc33d48105753ccbea671b68bab2c50a9f0be23d4065cb5a",
|
||||
"sha256:fe3d08dd00a526850568d542ff9de9bbc2a09a791da3c334f3213d8d0bbbca65"
|
||||
"sha256:2874fa525c051177583ec59c0fb4583e91f28ccd3f217ffad2acdb32d2c789ac",
|
||||
"sha256:ab9b659241d82b8b51b2269ee243ec95286046bf06015c4e15a947cc15914211"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==4.56.0"
|
||||
"version": "==4.56.1"
|
||||
},
|
||||
"twisted": {
|
||||
"extras": [
|
||||
@@ -1201,6 +1201,7 @@
|
||||
"sha256:e7514d7a48c063226b7d06617cbb12a14278d4323a065a8d46a7962686ce2e95",
|
||||
"sha256:f07909cd9fc08c52d294b1570bba92186181ca01fe3dc9ffba68955273dd7362"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==0.14.0"
|
||||
},
|
||||
"watchdog": {
|
||||
@@ -1506,11 +1507,11 @@
|
||||
},
|
||||
"faker": {
|
||||
"hashes": [
|
||||
"sha256:190f0d3ce037866b5d230f0b9fd0f513f07c25dc326dcad6ee019849c68d441c",
|
||||
"sha256:db7adc3b4755005fc960cf96fb4ed46b54b6eb21413741ab3f31a9595f379905"
|
||||
"sha256:bf2a9b3f8d00a5dada61fc4a3f80fe0d6795c7f02a138a7d2ef2db5817c7d017",
|
||||
"sha256:d4aecdb877519d06c2fdc01ffc5ecf70658981acf5e13cd07ded9892994ef5c6"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==6.0.0"
|
||||
"version": "==6.1.1"
|
||||
},
|
||||
"filelock": {
|
||||
"hashes": [
|
||||
@@ -1713,11 +1714,11 @@
|
||||
},
|
||||
"pytest-xdist": {
|
||||
"hashes": [
|
||||
"sha256:1d8edbb1a45e8e1f8e44b1260583107fc23f8bc8da6d18cb331ff61d41258ecf",
|
||||
"sha256:f127e11e84ad37cc1de1088cb2990f3c354630d428af3f71282de589c5bb779b"
|
||||
"sha256:2447a1592ab41745955fb870ac7023026f20a5f0bfccf1b52a879bd193d46450",
|
||||
"sha256:718887296892f92683f6a51f25a3ae584993b06f7076ce1e1fd482e59a8220a2"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==2.2.0"
|
||||
"version": "==2.2.1"
|
||||
},
|
||||
"python-dateutil": {
|
||||
"hashes": [
|
||||
|
||||
@@ -38,7 +38,7 @@
|
||||
|
||||
- name: verify uploaded document has been accepted
|
||||
uri:
|
||||
url: "http://{{ paperlessng_listen_address }}:{{ paperlessng_listen_port }}/api/logs/"
|
||||
url: "http://{{ paperlessng_listen_address }}:{{ paperlessng_listen_port }}/api/logs/paperless/"
|
||||
headers:
|
||||
Authorization: 'Basic {{ (paperlessng_superuser_name + ":" + paperlessng_superuser_password) | b64encode }}'
|
||||
return_content: yes
|
||||
@@ -51,7 +51,7 @@
|
||||
|
||||
- name: verify uploaded document has been consumed
|
||||
uri:
|
||||
url: "http://{{ paperlessng_listen_address }}:{{ paperlessng_listen_port }}/api/logs/"
|
||||
url: "http://{{ paperlessng_listen_address }}:{{ paperlessng_listen_port }}/api/logs/paperless/"
|
||||
headers:
|
||||
Authorization: 'Basic {{ (paperlessng_superuser_name + ":" + paperlessng_superuser_password) | b64encode }}'
|
||||
return_content: yes
|
||||
|
||||
6
docker/install_management_commands.sh
Executable file
6
docker/install_management_commands.sh
Executable file
@@ -0,0 +1,6 @@
|
||||
for command in document_archiver document_exporter document_importer mail_fetcher document_create_classifier document_index document_renamer document_retagger document_thumbnails;
|
||||
do
|
||||
echo "installing $command..."
|
||||
sed "s/management_command/$command/g" management_script.sh > /usr/local/bin/$command
|
||||
chmod +x /usr/local/bin/$command
|
||||
done
|
||||
15
docker/management_script.sh
Normal file
15
docker/management_script.sh
Normal file
@@ -0,0 +1,15 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
cd /usr/src/paperless/src/
|
||||
|
||||
if [[ $(id -u) == 0 ]] ;
|
||||
then
|
||||
sudo -HEu paperless python3 manage.py management_command "$@"
|
||||
elif [[ $(id -un) == "paperless" ]] ;
|
||||
then
|
||||
python3 manage.py management_command "$@"
|
||||
else
|
||||
echo "Unknown user."
|
||||
fi
|
||||
@@ -23,6 +23,12 @@ Options available to any installation of paperless:
|
||||
* The document exporter is also able to update an already existing export.
|
||||
Therefore, incremental backups with ``rsync`` are entirely possible.
|
||||
|
||||
.. caution::
|
||||
|
||||
You cannot import the export generated with one version of paperless in a
|
||||
different version of paperless. The export contains an exact image of the
|
||||
database, and migrations may change the database layout.
|
||||
|
||||
Options available to docker installations:
|
||||
|
||||
* Backup the docker volumes. These usually reside within
|
||||
@@ -101,17 +107,17 @@ Then you can start paperless-ng with ``-d`` to have it run in the background.
|
||||
update to newer versions. In order to enable updates as described above, either
|
||||
get the new ``docker-compose.yml`` file from `here <https://github.com/jonaswinkler/paperless-ng/tree/master/docker/compose>`_
|
||||
or edit the ``docker-compose.yml`` file, find the line that says
|
||||
|
||||
|
||||
.. code::
|
||||
|
||||
image: jonaswinkler/paperless-ng:0.9.x
|
||||
|
||||
|
||||
and replace the version with ``latest``:
|
||||
|
||||
.. code::
|
||||
|
||||
image: jonaswinkler/paperless-ng:latest
|
||||
|
||||
|
||||
Bare Metal Route
|
||||
================
|
||||
|
||||
@@ -171,26 +177,63 @@ Most of the update process is automated when using the ansible role.
|
||||
$ ansible-playbook playbook.yml
|
||||
|
||||
|
||||
Downgrading Paperless
|
||||
#####################
|
||||
|
||||
Downgrades are possible. However, some updates also contain database migrations (these change the layout of the database and may move data).
|
||||
In order to move back from a version that applied database migrations, you'll have to revert the database migration *before* downgrading,
|
||||
and then downgrade paperless.
|
||||
|
||||
This table lists the most recent database migrations for each versions:
|
||||
|
||||
+---------+-------------------------+
|
||||
| Version | Latest migration number |
|
||||
+---------+-------------------------+
|
||||
| 1.0.0 | 1011 |
|
||||
+---------+-------------------------+
|
||||
| 1.1.0 | 1011 |
|
||||
+---------+-------------------------+
|
||||
| 1.1.1 | 1012 |
|
||||
+---------+-------------------------+
|
||||
|
||||
Execute the following management command to migrate your database:
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ python3 manage.py migrate documents <migration number>
|
||||
|
||||
.. note::
|
||||
|
||||
Some migrations cannot be undone. The command will issue errors if that happens.
|
||||
|
||||
.. _utilities-management-commands:
|
||||
|
||||
Management utilities
|
||||
####################
|
||||
|
||||
Paperless comes with some management commands that perform various maintenance
|
||||
tasks on your paperless instance. You can invoke these commands either by
|
||||
tasks on your paperless instance. You can invoke these commands in the following way:
|
||||
|
||||
With docker-compose, while paperless is running:
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ cd /path/to/paperless
|
||||
$ docker-compose run --rm webserver <command> <arguments>
|
||||
$ docker-compose exec webserver <command> <arguments>
|
||||
|
||||
or
|
||||
With docker, while paperless is running:
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ docker exec -it <container-name> <command> <arguments>
|
||||
|
||||
Bare metal:
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ cd /path/to/paperless/src
|
||||
$ python3 manage.py <command> <arguments>
|
||||
|
||||
depending on whether you use docker or not.
|
||||
|
||||
All commands have built-in help, which can be accessed by executing them with
|
||||
the argument ``--help``.
|
||||
|
||||
@@ -210,7 +253,7 @@ backup or migration to another DMS.
|
||||
-c, --compare-checksums
|
||||
-f, --use-filename-format
|
||||
-d, --delete
|
||||
|
||||
|
||||
``target`` is a folder to which the data gets written. This includes documents,
|
||||
thumbnails and a ``manifest.json`` file. The manifest contains all metadata from
|
||||
the database (correspondents, tags, etc).
|
||||
|
||||
@@ -217,6 +217,7 @@ will create a directory structure as follows:
|
||||
|
||||
Paperless provides the following placeholders withing filenames:
|
||||
|
||||
* ``{asn}``: The archive serial number of the document, or "none".
|
||||
* ``{correspondent}``: The name of the correspondent, or "none".
|
||||
* ``{document_type}``: The name of the document type, or "none".
|
||||
* ``{tag_list}``: A comma separated list of all tags assigned to the document.
|
||||
|
||||
@@ -5,6 +5,27 @@
|
||||
Changelog
|
||||
*********
|
||||
|
||||
paperless-ng 1.1.1
|
||||
##################
|
||||
|
||||
This release contains new database migrations.
|
||||
|
||||
* Fixed a bug in the sanity checker that would cause it to display "x not in list" errors instead of actual issues.
|
||||
|
||||
* Fixed a bug with filename generation for archive filenames that would cause the archive files of two documents to overlap.
|
||||
|
||||
* This happened when ``PAPERLESS_FILENAME_FORMAT`` is used and the filenames of two or more documents are the same, except for the file extension.
|
||||
* Paperless will now store the archive filename in the database as well instead of deriving it from the original filename, and use the
|
||||
same logic for detecting and avoiding filename clashes that's also used for original filenames.
|
||||
* The migrations will repair any missing archive files. If you're using tika, ensure that tika is running while performing the migration. Docker-compose will take care of that.
|
||||
|
||||
* Fixed a bug with thumbnail regeneration when TIKA integration was used.
|
||||
|
||||
* Added ASN as a placeholder field to the filename format.
|
||||
|
||||
* The docker image now comes with built-in shortcuts for most management commands. These are now the recommended way to execute management commands, since these
|
||||
also ensure that they're always executed as the paperless user and you're less likely to run into permission issues. See :ref:`utilities-management-commands`.
|
||||
|
||||
paperless-ng 1.1.0
|
||||
##################
|
||||
|
||||
@@ -17,7 +38,7 @@ paperless-ng 1.1.0
|
||||
or added with one of the mobile apps.
|
||||
* Documents are successfully added to paperless.
|
||||
* Document consumption failed (with error messages)
|
||||
|
||||
|
||||
* Configuration options to enable/disable individual notifications.
|
||||
|
||||
* Live updates to document lists and saved views when new documents are added.
|
||||
|
||||
@@ -24,11 +24,11 @@ click==7.1.2; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2,
|
||||
coloredlogs==15.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
|
||||
concurrent-log-handler==0.9.19
|
||||
constantly==15.1.0
|
||||
cryptography==3.3.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'
|
||||
cryptography==3.3.2
|
||||
daphne==3.0.1; python_version >= '3.6'
|
||||
dateparser==0.7.6
|
||||
django-cors-headers==3.7.0
|
||||
django-extensions==3.1.0
|
||||
django-extensions==3.1.1
|
||||
django-filter==2.4.0
|
||||
django-picklefield==3.0.1; python_version >= '3'
|
||||
django-q==1.3.4
|
||||
@@ -49,7 +49,7 @@ img2pdf==0.4.0
|
||||
incremental==17.5.0
|
||||
inotify-simple==1.3.5; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
|
||||
inotifyrecursive==0.3.5
|
||||
joblib==1.0.0; python_version >= '3.6'
|
||||
joblib==1.0.1; python_version >= '3.6'
|
||||
langdetect==1.0.8
|
||||
lxml==4.6.2; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
|
||||
msgpack==1.0.2
|
||||
@@ -87,7 +87,7 @@ sortedcontainers==2.3.0
|
||||
sqlparse==0.4.1; python_version >= '3.5'
|
||||
threadpoolctl==2.1.0; python_version >= '3.5'
|
||||
tika==1.24
|
||||
tqdm==4.56.0
|
||||
tqdm==4.56.1
|
||||
twisted[tls]==20.3.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
|
||||
txaio==20.12.1; python_version >= '3.6'
|
||||
tzlocal==2.1
|
||||
|
||||
@@ -2,7 +2,7 @@ export const environment = {
|
||||
production: true,
|
||||
apiBaseUrl: "/api/",
|
||||
appTitle: "Paperless-ng",
|
||||
version: "1.1.0",
|
||||
version: "1.1.1",
|
||||
webSocketHost: window.location.host,
|
||||
webSocketProtocol: (window.location.protocol == "https:" ? "wss:" : "ws:")
|
||||
};
|
||||
|
||||
BIN
src/clash.pdf
Normal file
BIN
src/clash.pdf
Normal file
Binary file not shown.
@@ -50,26 +50,31 @@ class DocumentAdmin(admin.ModelAdmin):
|
||||
"modified",
|
||||
"mime_type",
|
||||
"storage_type",
|
||||
"filename")
|
||||
"filename",
|
||||
"checksum",
|
||||
"archive_filename",
|
||||
"archive_checksum"
|
||||
)
|
||||
|
||||
list_display_links = ("title",)
|
||||
|
||||
list_display = (
|
||||
"correspondent",
|
||||
"id",
|
||||
"title",
|
||||
"tags_",
|
||||
"created",
|
||||
"mime_type",
|
||||
"filename",
|
||||
"archive_filename"
|
||||
)
|
||||
|
||||
list_filter = (
|
||||
"document_type",
|
||||
"tags",
|
||||
"correspondent"
|
||||
("mime_type"),
|
||||
("archive_serial_number", admin.EmptyFieldListFilter),
|
||||
("archive_filename", admin.EmptyFieldListFilter),
|
||||
)
|
||||
|
||||
filter_horizontal = ("tags",)
|
||||
|
||||
ordering = ["-created"]
|
||||
ordering = ["-id"]
|
||||
|
||||
date_hierarchy = "created"
|
||||
|
||||
@@ -95,26 +100,6 @@ class DocumentAdmin(admin.ModelAdmin):
|
||||
index.add_or_update_document(obj)
|
||||
super(DocumentAdmin, self).save_model(request, obj, form, change)
|
||||
|
||||
@mark_safe
|
||||
def tags_(self, obj):
|
||||
r = ""
|
||||
for tag in obj.tags.all():
|
||||
r += self._html_tag(
|
||||
"span",
|
||||
tag.name + ", "
|
||||
)
|
||||
return r
|
||||
|
||||
@staticmethod
|
||||
def _html_tag(kind, inside=None, **kwargs):
|
||||
attributes = format_html_join(' ', '{}="{}"', kwargs.items())
|
||||
|
||||
if inside is not None:
|
||||
return format_html("<{kind} {attributes}>{inside}</{kind}>",
|
||||
kind=kind, attributes=attributes, inside=inside)
|
||||
|
||||
return format_html("<{} {}/>", kind, attributes)
|
||||
|
||||
|
||||
class RuleInline(admin.TabularInline):
|
||||
model = SavedViewFilterRule
|
||||
|
||||
@@ -43,9 +43,9 @@ def load_classifier():
|
||||
version=version, timeout=86400)
|
||||
except (EOFError, IncompatibleClassifierVersionError) as e:
|
||||
# there's something wrong with the model file.
|
||||
logger.error(
|
||||
logger.exception(
|
||||
f"Unrecoverable error while loading document "
|
||||
f"classification model: {str(e)}, deleting model file."
|
||||
f"classification model, deleting model file."
|
||||
)
|
||||
os.unlink(settings.MODEL_FILE)
|
||||
classifier = None
|
||||
|
||||
@@ -241,7 +241,7 @@ class Consumer(LoggingMixin):
|
||||
self._send_progress(70, 100, 'WORKING',
|
||||
MESSAGE_GENERATING_THUMBNAIL)
|
||||
thumbnail = document_parser.get_optimised_thumbnail(
|
||||
self.path, mime_type)
|
||||
self.path, mime_type, self.filename)
|
||||
|
||||
text = document_parser.get_text()
|
||||
date = document_parser.get_date()
|
||||
@@ -292,8 +292,7 @@ class Consumer(LoggingMixin):
|
||||
# After everything is in the database, copy the files into
|
||||
# place. If this fails, we'll also rollback the transaction.
|
||||
with FileLock(settings.MEDIA_LOCK):
|
||||
document.filename = generate_unique_filename(
|
||||
document, settings.ORIGINALS_DIR)
|
||||
document.filename = generate_unique_filename(document)
|
||||
create_source_path_directory(document.source_path)
|
||||
|
||||
self._write(document.storage_type,
|
||||
@@ -303,6 +302,10 @@ class Consumer(LoggingMixin):
|
||||
thumbnail, document.thumbnail_path)
|
||||
|
||||
if archive_path and os.path.isfile(archive_path):
|
||||
document.archive_filename = generate_unique_filename(
|
||||
document,
|
||||
archive_filename=True
|
||||
)
|
||||
create_source_path_directory(document.archive_path)
|
||||
self._write(document.storage_type,
|
||||
archive_path, document.archive_path)
|
||||
|
||||
@@ -79,12 +79,40 @@ def many_to_dictionary(field):
|
||||
return mydictionary
|
||||
|
||||
|
||||
def generate_unique_filename(doc, root):
|
||||
def generate_unique_filename(doc,
|
||||
archive_filename=False):
|
||||
"""
|
||||
Generates a unique filename for doc in settings.ORIGINALS_DIR.
|
||||
|
||||
The returned filename is guaranteed to be either the current filename
|
||||
of the document if unchanged, or a new filename that does not correspondent
|
||||
to any existing files. The function will append _01, _02, etc to the
|
||||
filename before the extension to avoid conflicts.
|
||||
|
||||
If archive_filename is True, return a unique archive filename instead.
|
||||
|
||||
"""
|
||||
if archive_filename:
|
||||
old_filename = doc.archive_filename
|
||||
root = settings.ARCHIVE_DIR
|
||||
else:
|
||||
old_filename = doc.filename
|
||||
root = settings.ORIGINALS_DIR
|
||||
|
||||
# If generating archive filenames, try to make a name that is similar to
|
||||
# the original filename first.
|
||||
|
||||
if archive_filename and doc.filename:
|
||||
new_filename = os.path.splitext(doc.filename)[0] + ".pdf"
|
||||
if new_filename == old_filename or not os.path.exists(os.path.join(root, new_filename)): # NOQA: E501
|
||||
return new_filename
|
||||
|
||||
counter = 0
|
||||
|
||||
while True:
|
||||
new_filename = generate_filename(doc, counter)
|
||||
if new_filename == doc.filename:
|
||||
new_filename = generate_filename(
|
||||
doc, counter, archive_filename=archive_filename)
|
||||
if new_filename == old_filename:
|
||||
# still the same as before.
|
||||
return new_filename
|
||||
|
||||
@@ -94,7 +122,7 @@ def generate_unique_filename(doc, root):
|
||||
return new_filename
|
||||
|
||||
|
||||
def generate_filename(doc, counter=0, append_gpg=True):
|
||||
def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
|
||||
path = ""
|
||||
|
||||
try:
|
||||
@@ -123,6 +151,11 @@ def generate_filename(doc, counter=0, append_gpg=True):
|
||||
else:
|
||||
document_type = "none"
|
||||
|
||||
if doc.archive_serial_number:
|
||||
asn = str(doc.archive_serial_number)
|
||||
else:
|
||||
asn = "none"
|
||||
|
||||
path = settings.PAPERLESS_FILENAME_FORMAT.format(
|
||||
title=pathvalidate.sanitize_filename(
|
||||
doc.title, replacement_text="-"),
|
||||
@@ -136,6 +169,7 @@ def generate_filename(doc, counter=0, append_gpg=True):
|
||||
added_year=doc.added.year if doc.added else "none",
|
||||
added_month=f"{doc.added.month:02}" if doc.added else "none",
|
||||
added_day=f"{doc.added.day:02}" if doc.added else "none",
|
||||
asn=asn,
|
||||
tags=tags,
|
||||
tag_list=tag_list
|
||||
).strip()
|
||||
@@ -148,18 +182,16 @@ def generate_filename(doc, counter=0, append_gpg=True):
|
||||
f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default")
|
||||
|
||||
counter_str = f"_{counter:02}" if counter else ""
|
||||
|
||||
filetype_str = ".pdf" if archive_filename else doc.file_type
|
||||
|
||||
if len(path) > 0:
|
||||
filename = f"{path}{counter_str}{doc.file_type}"
|
||||
filename = f"{path}{counter_str}{filetype_str}"
|
||||
else:
|
||||
filename = f"{doc.pk:07}{counter_str}{doc.file_type}"
|
||||
filename = f"{doc.pk:07}{counter_str}{filetype_str}"
|
||||
|
||||
# Append .gpg for encrypted files
|
||||
if append_gpg and doc.storage_type == doc.STORAGE_TYPE_GPG:
|
||||
filename += ".gpg"
|
||||
|
||||
return filename
|
||||
|
||||
|
||||
def archive_name_from_filename(filename):
|
||||
|
||||
return os.path.splitext(filename)[0] + ".pdf"
|
||||
|
||||
@@ -78,8 +78,8 @@ def open_index(recreate=False):
|
||||
try:
|
||||
if exists_in(settings.INDEX_DIR) and not recreate:
|
||||
return open_dir(settings.INDEX_DIR, schema=get_schema())
|
||||
except Exception as e:
|
||||
logger.error(f"Error while opening the index: {e}, recreating.")
|
||||
except Exception:
|
||||
logger.exception(f"Error while opening the index, recreating.")
|
||||
|
||||
if not os.path.isdir(settings.INDEX_DIR):
|
||||
os.makedirs(settings.INDEX_DIR, exist_ok=True)
|
||||
|
||||
@@ -16,7 +16,8 @@ from whoosh.writing import AsyncWriter
|
||||
|
||||
from documents.models import Document
|
||||
from ... import index
|
||||
from ...file_handling import create_source_path_directory
|
||||
from ...file_handling import create_source_path_directory, \
|
||||
generate_unique_filename
|
||||
from ...parsers import get_parser_class_for_mime_type
|
||||
|
||||
|
||||
@@ -39,13 +40,16 @@ def handle_document(document_id):
|
||||
with transaction.atomic():
|
||||
with open(parser.get_archive_path(), 'rb') as f:
|
||||
checksum = hashlib.md5(f.read()).hexdigest()
|
||||
# i'm going to save first so that in case the file move
|
||||
# I'm going to save first so that in case the file move
|
||||
# fails, the database is rolled back.
|
||||
# we also don't use save() since that triggers the filehandling
|
||||
# We also don't use save() since that triggers the filehandling
|
||||
# logic, and we don't want that yet (file not yet in place)
|
||||
document.archive_filename = generate_unique_filename(
|
||||
document, archive_filename=True)
|
||||
Document.objects.filter(pk=document.pk).update(
|
||||
archive_checksum=checksum,
|
||||
content=parser.get_text()
|
||||
content=parser.get_text(),
|
||||
archive_filename=document.archive_filename
|
||||
)
|
||||
with FileLock(settings.MEDIA_LOCK):
|
||||
create_source_path_directory(document.archive_path)
|
||||
@@ -56,7 +60,7 @@ def handle_document(document_id):
|
||||
index.update_document(writer, document)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error while parsing document {document}: {str(e)}")
|
||||
logger.exception(f"Error while parsing document {document}")
|
||||
finally:
|
||||
parser.cleanup()
|
||||
|
||||
@@ -101,7 +105,7 @@ class Command(BaseCommand):
|
||||
document_ids = list(map(
|
||||
lambda doc: doc.id,
|
||||
filter(
|
||||
lambda d: overwrite or not d.archive_checksum,
|
||||
lambda d: overwrite or not d.has_archive_version,
|
||||
documents
|
||||
)
|
||||
))
|
||||
|
||||
@@ -54,8 +54,7 @@ def _consume(filepath):
|
||||
if settings.CONSUMER_SUBDIRS_AS_TAGS:
|
||||
tag_ids = _tags_from_path(filepath)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Error creating tags from path: {}".format(e))
|
||||
logger.exception("Error creating tags from path")
|
||||
|
||||
try:
|
||||
async_task("documents.tasks.consume_file",
|
||||
@@ -66,8 +65,7 @@ def _consume(filepath):
|
||||
# Catch all so that the consumer won't crash.
|
||||
# This is also what the test case is listening for to check for
|
||||
# errors.
|
||||
logger.error(
|
||||
"Error while consuming document: {}".format(e))
|
||||
logger.exception("Error while consuming document")
|
||||
|
||||
|
||||
def _consume_wait_unmodified(file, num_tries=20, wait_time=1):
|
||||
|
||||
@@ -139,7 +139,7 @@ class Command(BaseCommand):
|
||||
thumbnail_target = os.path.join(self.target, thumbnail_name)
|
||||
document_dict[EXPORTER_THUMBNAIL_NAME] = thumbnail_name
|
||||
|
||||
if os.path.exists(document.archive_path):
|
||||
if document.has_archive_version:
|
||||
archive_name = base_name + "-archive.pdf"
|
||||
archive_target = os.path.join(self.target, archive_name)
|
||||
document_dict[EXPORTER_ARCHIVE_NAME] = archive_name
|
||||
|
||||
@@ -151,6 +151,9 @@ class Command(BaseCommand):
|
||||
shutil.copy2(thumbnail_path, document.thumbnail_path)
|
||||
if archive_path:
|
||||
create_source_path_directory(document.archive_path)
|
||||
# TODO: this assumes that the export is valid and
|
||||
# archive_filename is present on all documents with
|
||||
# archived files
|
||||
shutil.copy2(archive_path, document.archive_path)
|
||||
|
||||
document.save()
|
||||
|
||||
@@ -22,7 +22,10 @@ def _process_document(doc_in):
|
||||
|
||||
try:
|
||||
thumb = parser.get_optimised_thumbnail(
|
||||
document.source_path, document.mime_type)
|
||||
document.source_path,
|
||||
document.mime_type,
|
||||
document.get_public_filename()
|
||||
)
|
||||
|
||||
shutil.move(thumb, document.thumbnail_path)
|
||||
finally:
|
||||
|
||||
330
src/documents/migrations/1012_fix_archive_files.py
Normal file
330
src/documents/migrations/1012_fix_archive_files.py
Normal file
@@ -0,0 +1,330 @@
|
||||
# Generated by Django 3.1.6 on 2021-02-07 22:26
|
||||
import datetime
|
||||
import hashlib
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
from time import sleep
|
||||
|
||||
import pathvalidate
|
||||
from django.conf import settings
|
||||
from django.db import migrations, models
|
||||
from django.template.defaultfilters import slugify
|
||||
|
||||
from documents.file_handling import defaultdictNoStr, many_to_dictionary
|
||||
|
||||
|
||||
logger = logging.getLogger("paperless.migrations")
|
||||
|
||||
###############################################################################
|
||||
# This is code copied straight paperless before the change.
|
||||
###############################################################################
|
||||
|
||||
def archive_name_from_filename(filename):
|
||||
return os.path.splitext(filename)[0] + ".pdf"
|
||||
|
||||
|
||||
def archive_path_old(doc):
|
||||
if doc.filename:
|
||||
fname = archive_name_from_filename(doc.filename)
|
||||
else:
|
||||
fname = "{:07}.pdf".format(doc.pk)
|
||||
|
||||
return os.path.join(
|
||||
settings.ARCHIVE_DIR,
|
||||
fname
|
||||
)
|
||||
|
||||
|
||||
STORAGE_TYPE_GPG = "gpg"
|
||||
|
||||
|
||||
def archive_path_new(doc):
|
||||
if doc.archive_filename is not None:
|
||||
return os.path.join(
|
||||
settings.ARCHIVE_DIR,
|
||||
str(doc.archive_filename)
|
||||
)
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def source_path(doc):
|
||||
if doc.filename:
|
||||
fname = str(doc.filename)
|
||||
else:
|
||||
fname = "{:07}{}".format(doc.pk, doc.file_type)
|
||||
if doc.storage_type == STORAGE_TYPE_GPG:
|
||||
fname += ".gpg" # pragma: no cover
|
||||
|
||||
return os.path.join(
|
||||
settings.ORIGINALS_DIR,
|
||||
fname
|
||||
)
|
||||
|
||||
|
||||
def generate_unique_filename(doc, archive_filename=False):
|
||||
if archive_filename:
|
||||
old_filename = doc.archive_filename
|
||||
root = settings.ARCHIVE_DIR
|
||||
else:
|
||||
old_filename = doc.filename
|
||||
root = settings.ORIGINALS_DIR
|
||||
|
||||
counter = 0
|
||||
|
||||
while True:
|
||||
new_filename = generate_filename(
|
||||
doc, counter, archive_filename=archive_filename)
|
||||
if new_filename == old_filename:
|
||||
# still the same as before.
|
||||
return new_filename
|
||||
|
||||
if os.path.exists(os.path.join(root, new_filename)):
|
||||
counter += 1
|
||||
else:
|
||||
return new_filename
|
||||
|
||||
|
||||
def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
|
||||
path = ""
|
||||
|
||||
try:
|
||||
if settings.PAPERLESS_FILENAME_FORMAT is not None:
|
||||
tags = defaultdictNoStr(lambda: slugify(None),
|
||||
many_to_dictionary(doc.tags))
|
||||
|
||||
tag_list = pathvalidate.sanitize_filename(
|
||||
",".join(sorted(
|
||||
[tag.name for tag in doc.tags.all()]
|
||||
)),
|
||||
replacement_text="-"
|
||||
)
|
||||
|
||||
if doc.correspondent:
|
||||
correspondent = pathvalidate.sanitize_filename(
|
||||
doc.correspondent.name, replacement_text="-"
|
||||
)
|
||||
else:
|
||||
correspondent = "none"
|
||||
|
||||
if doc.document_type:
|
||||
document_type = pathvalidate.sanitize_filename(
|
||||
doc.document_type.name, replacement_text="-"
|
||||
)
|
||||
else:
|
||||
document_type = "none"
|
||||
|
||||
path = settings.PAPERLESS_FILENAME_FORMAT.format(
|
||||
title=pathvalidate.sanitize_filename(
|
||||
doc.title, replacement_text="-"),
|
||||
correspondent=correspondent,
|
||||
document_type=document_type,
|
||||
created=datetime.date.isoformat(doc.created),
|
||||
created_year=doc.created.year if doc.created else "none",
|
||||
created_month=f"{doc.created.month:02}" if doc.created else "none", # NOQA: E501
|
||||
created_day=f"{doc.created.day:02}" if doc.created else "none",
|
||||
added=datetime.date.isoformat(doc.added),
|
||||
added_year=doc.added.year if doc.added else "none",
|
||||
added_month=f"{doc.added.month:02}" if doc.added else "none",
|
||||
added_day=f"{doc.added.day:02}" if doc.added else "none",
|
||||
tags=tags,
|
||||
tag_list=tag_list
|
||||
).strip()
|
||||
|
||||
path = path.strip(os.sep)
|
||||
|
||||
except (ValueError, KeyError, IndexError):
|
||||
logger.warning(
|
||||
f"Invalid PAPERLESS_FILENAME_FORMAT: "
|
||||
f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default")
|
||||
|
||||
counter_str = f"_{counter:02}" if counter else ""
|
||||
|
||||
filetype_str = ".pdf" if archive_filename else doc.file_type
|
||||
|
||||
if len(path) > 0:
|
||||
filename = f"{path}{counter_str}{filetype_str}"
|
||||
else:
|
||||
filename = f"{doc.pk:07}{counter_str}{filetype_str}"
|
||||
|
||||
# Append .gpg for encrypted files
|
||||
if append_gpg and doc.storage_type == STORAGE_TYPE_GPG:
|
||||
filename += ".gpg"
|
||||
|
||||
return filename
|
||||
|
||||
|
||||
###############################################################################
|
||||
# This code performs bidirection archive file transformation.
|
||||
###############################################################################
|
||||
|
||||
|
||||
def parse_wrapper(parser, path, mime_type, file_name):
|
||||
# this is here so that I can mock this out for testing.
|
||||
parser.parse(path, mime_type, file_name)
|
||||
|
||||
|
||||
def create_archive_version(doc, retry_count=3):
|
||||
from documents.parsers import get_parser_class_for_mime_type, \
|
||||
DocumentParser, \
|
||||
ParseError
|
||||
|
||||
logger.info(
|
||||
f"Regenerating archive document for document ID:{doc.id}"
|
||||
)
|
||||
parser_class = get_parser_class_for_mime_type(doc.mime_type)
|
||||
for try_num in range(retry_count):
|
||||
parser: DocumentParser = parser_class(None, None)
|
||||
try:
|
||||
parse_wrapper(parser, source_path(doc), doc.mime_type,
|
||||
os.path.basename(doc.filename))
|
||||
doc.content = parser.get_text()
|
||||
|
||||
if parser.get_archive_path() and os.path.isfile(
|
||||
parser.get_archive_path()):
|
||||
doc.archive_filename = generate_unique_filename(
|
||||
doc, archive_filename=True)
|
||||
with open(parser.get_archive_path(), "rb") as f:
|
||||
doc.archive_checksum = hashlib.md5(f.read()).hexdigest()
|
||||
os.makedirs(os.path.dirname(archive_path_new(doc)),
|
||||
exist_ok=True)
|
||||
shutil.copy2(parser.get_archive_path(), archive_path_new(doc))
|
||||
else:
|
||||
doc.archive_checksum = None
|
||||
logger.error(
|
||||
f"Parser did not return an archive document for document "
|
||||
f"ID:{doc.id}. Removing archive document."
|
||||
)
|
||||
doc.save()
|
||||
return
|
||||
except ParseError:
|
||||
if try_num + 1 == retry_count:
|
||||
logger.exception(
|
||||
f"Unable to regenerate archive document for ID:{doc.id}. You "
|
||||
f"need to invoke the document_archiver management command "
|
||||
f"manually for that document."
|
||||
)
|
||||
doc.archive_checksum = None
|
||||
doc.save()
|
||||
return
|
||||
else:
|
||||
# This is mostly here for the tika parser in docker
|
||||
# environemnts. The servers for parsing need to come up first,
|
||||
# and the docker setup doesn't ensure that tika is running
|
||||
# before attempting migrations.
|
||||
logger.error("Parse error, will try again in 5 seconds...")
|
||||
sleep(5)
|
||||
finally:
|
||||
parser.cleanup()
|
||||
|
||||
|
||||
def move_old_to_new_locations(apps, schema_editor):
|
||||
Document = apps.get_model("documents", "Document")
|
||||
|
||||
affected_document_ids = set()
|
||||
|
||||
old_archive_path_to_id = {}
|
||||
|
||||
# check for documents that have incorrect archive versions
|
||||
for doc in Document.objects.filter(archive_checksum__isnull=False):
|
||||
old_path = archive_path_old(doc)
|
||||
|
||||
if old_path in old_archive_path_to_id:
|
||||
affected_document_ids.add(doc.id)
|
||||
affected_document_ids.add(old_archive_path_to_id[old_path])
|
||||
else:
|
||||
old_archive_path_to_id[old_path] = doc.id
|
||||
|
||||
# check that archive files of all unaffected documents are in place
|
||||
for doc in Document.objects.filter(archive_checksum__isnull=False):
|
||||
old_path = archive_path_old(doc)
|
||||
if doc.id not in affected_document_ids and not os.path.isfile(old_path):
|
||||
raise ValueError(
|
||||
f"Archived document ID:{doc.id} does not exist at: "
|
||||
f"{old_path}")
|
||||
|
||||
# check that we can regenerate affected archive versions
|
||||
for doc_id in affected_document_ids:
|
||||
from documents.parsers import get_parser_class_for_mime_type
|
||||
|
||||
doc = Document.objects.get(id=doc_id)
|
||||
parser_class = get_parser_class_for_mime_type(doc.mime_type)
|
||||
if not parser_class:
|
||||
raise ValueError(
|
||||
f"Document ID:{doc.id} has an invalid archived document, "
|
||||
f"but no parsers are available. Cannot migrate.")
|
||||
|
||||
for doc in Document.objects.filter(archive_checksum__isnull=False):
|
||||
|
||||
if doc.id in affected_document_ids:
|
||||
old_path = archive_path_old(doc)
|
||||
# remove affected archive versions
|
||||
if os.path.isfile(old_path):
|
||||
logger.debug(
|
||||
f"Removing {old_path}"
|
||||
)
|
||||
os.unlink(old_path)
|
||||
else:
|
||||
# Set archive path for unaffected files
|
||||
doc.archive_filename = archive_name_from_filename(doc.filename)
|
||||
Document.objects.filter(id=doc.id).update(
|
||||
archive_filename=doc.archive_filename
|
||||
)
|
||||
|
||||
# regenerate archive documents
|
||||
for doc_id in affected_document_ids:
|
||||
doc = Document.objects.get(id=doc_id)
|
||||
create_archive_version(doc)
|
||||
|
||||
|
||||
def move_new_to_old_locations(apps, schema_editor):
|
||||
Document = apps.get_model("documents", "Document")
|
||||
|
||||
old_archive_paths = set()
|
||||
|
||||
for doc in Document.objects.filter(archive_checksum__isnull=False):
|
||||
new_archive_path = archive_path_new(doc)
|
||||
old_archive_path = archive_path_old(doc)
|
||||
if old_archive_path in old_archive_paths:
|
||||
raise ValueError(
|
||||
f"Cannot migrate: Archive file name {old_archive_path} of "
|
||||
f"document {doc.filename} would clash with another archive "
|
||||
f"filename.")
|
||||
old_archive_paths.add(old_archive_path)
|
||||
if new_archive_path != old_archive_path and os.path.isfile(old_archive_path):
|
||||
raise ValueError(
|
||||
f"Cannot migrate: Cannot move {new_archive_path} to "
|
||||
f"{old_archive_path}: file already exists."
|
||||
)
|
||||
|
||||
for doc in Document.objects.filter(archive_checksum__isnull=False):
|
||||
new_archive_path = archive_path_new(doc)
|
||||
old_archive_path = archive_path_old(doc)
|
||||
if new_archive_path != old_archive_path:
|
||||
logger.debug(f"Moving {new_archive_path} to {old_archive_path}")
|
||||
shutil.move(new_archive_path, old_archive_path)
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '1011_auto_20210101_2340'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='document',
|
||||
name='archive_filename',
|
||||
field=models.FilePathField(default=None, editable=False, help_text='Current archive filename in storage', max_length=1024, null=True, unique=True, verbose_name='archive filename'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='filename',
|
||||
field=models.FilePathField(default=None, editable=False, help_text='Current filename in storage', max_length=1024, null=True, unique=True, verbose_name='filename'),
|
||||
),
|
||||
migrations.RunPython(
|
||||
move_old_to_new_locations,
|
||||
move_new_to_old_locations
|
||||
),
|
||||
]
|
||||
@@ -16,7 +16,6 @@ from django.utils.timezone import is_aware
|
||||
|
||||
from django.utils.translation import gettext_lazy as _
|
||||
|
||||
from documents.file_handling import archive_name_from_filename
|
||||
from documents.parsers import get_default_file_extension
|
||||
|
||||
|
||||
@@ -208,10 +207,21 @@ class Document(models.Model):
|
||||
max_length=1024,
|
||||
editable=False,
|
||||
default=None,
|
||||
unique=True,
|
||||
null=True,
|
||||
help_text=_("Current filename in storage")
|
||||
)
|
||||
|
||||
archive_filename = models.FilePathField(
|
||||
_("archive filename"),
|
||||
max_length=1024,
|
||||
editable=False,
|
||||
default=None,
|
||||
unique=True,
|
||||
null=True,
|
||||
help_text=_("Current archive filename in storage")
|
||||
)
|
||||
|
||||
archive_serial_number = models.IntegerField(
|
||||
_("archive serial number"),
|
||||
blank=True,
|
||||
@@ -256,16 +266,18 @@ class Document(models.Model):
|
||||
return open(self.source_path, "rb")
|
||||
|
||||
@property
|
||||
def archive_path(self):
|
||||
if self.filename:
|
||||
fname = archive_name_from_filename(self.filename)
|
||||
else:
|
||||
fname = "{:07}.pdf".format(self.pk)
|
||||
def has_archive_version(self):
|
||||
return self.archive_filename is not None
|
||||
|
||||
return os.path.join(
|
||||
settings.ARCHIVE_DIR,
|
||||
fname
|
||||
)
|
||||
@property
|
||||
def archive_path(self):
|
||||
if self.has_archive_version:
|
||||
return os.path.join(
|
||||
settings.ARCHIVE_DIR,
|
||||
str(self.archive_filename)
|
||||
)
|
||||
else:
|
||||
return None
|
||||
|
||||
@property
|
||||
def archive_file(self):
|
||||
|
||||
@@ -288,14 +288,17 @@ class DocumentParser(LoggingMixin):
|
||||
def get_archive_path(self):
|
||||
return self.archive_path
|
||||
|
||||
def get_thumbnail(self, document_path, mime_type):
|
||||
def get_thumbnail(self, document_path, mime_type, file_name=None):
|
||||
"""
|
||||
Returns the path to a file we can use as a thumbnail for this document.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_optimised_thumbnail(self, document_path, mime_type):
|
||||
thumbnail = self.get_thumbnail(document_path, mime_type)
|
||||
def get_optimised_thumbnail(self,
|
||||
document_path,
|
||||
mime_type,
|
||||
file_name=None):
|
||||
thumbnail = self.get_thumbnail(document_path, mime_type, file_name)
|
||||
if settings.OPTIMIZE_THUMBNAILS:
|
||||
out_path = os.path.join(self.tempdir, "thumb_optipng.png")
|
||||
|
||||
|
||||
@@ -56,7 +56,8 @@ def check_sanity():
|
||||
messages.append(SanityError(
|
||||
f"Thumbnail of document {doc.pk} does not exist."))
|
||||
else:
|
||||
present_files.remove(os.path.normpath(doc.thumbnail_path))
|
||||
if os.path.normpath(doc.thumbnail_path) in present_files:
|
||||
present_files.remove(os.path.normpath(doc.thumbnail_path))
|
||||
try:
|
||||
with doc.thumbnail_file as f:
|
||||
f.read()
|
||||
@@ -71,7 +72,8 @@ def check_sanity():
|
||||
messages.append(SanityError(
|
||||
f"Original of document {doc.pk} does not exist."))
|
||||
else:
|
||||
present_files.remove(os.path.normpath(doc.source_path))
|
||||
if os.path.normpath(doc.source_path) in present_files:
|
||||
present_files.remove(os.path.normpath(doc.source_path))
|
||||
try:
|
||||
with doc.source_file as f:
|
||||
checksum = hashlib.md5(f.read()).hexdigest()
|
||||
@@ -86,13 +88,24 @@ def check_sanity():
|
||||
))
|
||||
|
||||
# Check sanity of the archive file.
|
||||
if doc.archive_checksum:
|
||||
if doc.archive_checksum and not doc.archive_filename:
|
||||
messages.append(SanityError(
|
||||
f"Document {doc.pk} has an archive file checksum, but no "
|
||||
f"archive filename."
|
||||
))
|
||||
elif not doc.archive_checksum and doc.archive_filename:
|
||||
messages.append(SanityError(
|
||||
f"Document {doc.pk} has an archive file, but its checksum is "
|
||||
f"missing."
|
||||
))
|
||||
elif doc.has_archive_version:
|
||||
if not os.path.isfile(doc.archive_path):
|
||||
messages.append(SanityError(
|
||||
f"Archived version of document {doc.pk} does not exist."
|
||||
))
|
||||
else:
|
||||
present_files.remove(os.path.normpath(doc.archive_path))
|
||||
if os.path.normpath(doc.archive_path) in present_files:
|
||||
present_files.remove(os.path.normpath(doc.archive_path))
|
||||
try:
|
||||
with doc.archive_file as f:
|
||||
checksum = hashlib.md5(f.read()).hexdigest()
|
||||
@@ -103,7 +116,8 @@ def check_sanity():
|
||||
else:
|
||||
if not checksum == doc.archive_checksum:
|
||||
messages.append(SanityError(
|
||||
f"Checksum mismatch of archive {doc.pk}. "
|
||||
f"Checksum mismatch of archived document "
|
||||
f"{doc.pk}. "
|
||||
f"Stored: {doc.checksum}, actual: {checksum}."
|
||||
))
|
||||
|
||||
|
||||
@@ -129,7 +129,7 @@ class DocumentSerializer(DynamicFieldsModelSerializer):
|
||||
return obj.get_public_filename()
|
||||
|
||||
def get_archived_file_name(self, obj):
|
||||
if obj.archive_checksum:
|
||||
if obj.has_archive_version:
|
||||
return obj.get_public_filename(archive=True)
|
||||
else:
|
||||
return None
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import logging
|
||||
import os
|
||||
from subprocess import Popen
|
||||
|
||||
from django.conf import settings
|
||||
from django.contrib.admin.models import ADDITION, LogEntry
|
||||
@@ -14,7 +13,7 @@ from filelock import FileLock
|
||||
|
||||
from .. import index, matching
|
||||
from ..file_handling import delete_empty_directories, \
|
||||
create_source_path_directory, archive_name_from_filename, \
|
||||
create_source_path_directory, \
|
||||
generate_unique_filename
|
||||
from ..models import Document, Tag
|
||||
|
||||
@@ -148,18 +147,18 @@ def set_tags(sender,
|
||||
@receiver(models.signals.post_delete, sender=Document)
|
||||
def cleanup_document_deletion(sender, instance, using, **kwargs):
|
||||
with FileLock(settings.MEDIA_LOCK):
|
||||
for f in (instance.source_path,
|
||||
instance.archive_path,
|
||||
instance.thumbnail_path):
|
||||
if os.path.isfile(f):
|
||||
for filename in (instance.source_path,
|
||||
instance.archive_path,
|
||||
instance.thumbnail_path):
|
||||
if filename and os.path.isfile(filename):
|
||||
try:
|
||||
os.unlink(f)
|
||||
os.unlink(filename)
|
||||
logger.debug(
|
||||
f"Deleted file {f}.")
|
||||
f"Deleted file {filename}.")
|
||||
except OSError as e:
|
||||
logger.warning(
|
||||
f"While deleting document {str(instance)}, the file "
|
||||
f"{f} could not be deleted: {e}"
|
||||
f"{filename} could not be deleted: {e}"
|
||||
)
|
||||
|
||||
delete_empty_directories(
|
||||
@@ -167,10 +166,15 @@ def cleanup_document_deletion(sender, instance, using, **kwargs):
|
||||
root=settings.ORIGINALS_DIR
|
||||
)
|
||||
|
||||
delete_empty_directories(
|
||||
os.path.dirname(instance.archive_path),
|
||||
root=settings.ARCHIVE_DIR
|
||||
)
|
||||
if instance.has_archive_version:
|
||||
delete_empty_directories(
|
||||
os.path.dirname(instance.archive_path),
|
||||
root=settings.ARCHIVE_DIR
|
||||
)
|
||||
|
||||
|
||||
class CannotMoveFilesException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def validate_move(instance, old_path, new_path):
|
||||
@@ -178,16 +182,14 @@ def validate_move(instance, old_path, new_path):
|
||||
# Can't do anything if the old file does not exist anymore.
|
||||
logger.fatal(
|
||||
f"Document {str(instance)}: File {old_path} has gone.")
|
||||
return False
|
||||
raise CannotMoveFilesException()
|
||||
|
||||
if os.path.isfile(new_path):
|
||||
# Can't do anything if the new file already exists. Skip updating file.
|
||||
logger.warning(
|
||||
f"Document {str(instance)}: Cannot rename file "
|
||||
f"since target path {new_path} already exists.")
|
||||
return False
|
||||
|
||||
return True
|
||||
raise CannotMoveFilesException()
|
||||
|
||||
|
||||
@receiver(models.signals.m2m_changed, sender=Document.tags.through)
|
||||
@@ -206,56 +208,61 @@ def update_filename_and_move_files(sender, instance, **kwargs):
|
||||
return
|
||||
|
||||
with FileLock(settings.MEDIA_LOCK):
|
||||
old_filename = instance.filename
|
||||
new_filename = generate_unique_filename(
|
||||
instance, settings.ORIGINALS_DIR)
|
||||
try:
|
||||
old_filename = instance.filename
|
||||
old_source_path = instance.source_path
|
||||
|
||||
if new_filename == instance.filename:
|
||||
# Don't do anything if its the same.
|
||||
return
|
||||
instance.filename = generate_unique_filename(instance)
|
||||
move_original = old_filename != instance.filename
|
||||
|
||||
old_source_path = instance.source_path
|
||||
new_source_path = os.path.join(settings.ORIGINALS_DIR, new_filename)
|
||||
|
||||
if not validate_move(instance, old_source_path, new_source_path):
|
||||
return
|
||||
|
||||
# archive files are optional, archive checksum tells us if we have one,
|
||||
# since this is None for documents without archived files.
|
||||
if instance.archive_checksum:
|
||||
new_archive_filename = archive_name_from_filename(new_filename)
|
||||
old_archive_filename = instance.archive_filename
|
||||
old_archive_path = instance.archive_path
|
||||
new_archive_path = os.path.join(settings.ARCHIVE_DIR,
|
||||
new_archive_filename)
|
||||
|
||||
if not validate_move(instance, old_archive_path, new_archive_path):
|
||||
if instance.has_archive_version:
|
||||
|
||||
instance.archive_filename = generate_unique_filename(
|
||||
instance, archive_filename=True
|
||||
)
|
||||
|
||||
move_archive = old_archive_filename != instance.archive_filename # NOQA: E501
|
||||
else:
|
||||
move_archive = False
|
||||
|
||||
if not move_original and not move_archive:
|
||||
# Don't do anything if filenames did not change.
|
||||
return
|
||||
|
||||
create_source_path_directory(new_archive_path)
|
||||
else:
|
||||
old_archive_path = None
|
||||
new_archive_path = None
|
||||
if move_original:
|
||||
validate_move(instance, old_source_path, instance.source_path)
|
||||
create_source_path_directory(instance.source_path)
|
||||
os.rename(old_source_path, instance.source_path)
|
||||
|
||||
create_source_path_directory(new_source_path)
|
||||
|
||||
try:
|
||||
os.rename(old_source_path, new_source_path)
|
||||
if instance.archive_checksum:
|
||||
os.rename(old_archive_path, new_archive_path)
|
||||
instance.filename = new_filename
|
||||
if move_archive:
|
||||
validate_move(
|
||||
instance, old_archive_path, instance.archive_path)
|
||||
create_source_path_directory(instance.archive_path)
|
||||
os.rename(old_archive_path, instance.archive_path)
|
||||
|
||||
# Don't save() here to prevent infinite recursion.
|
||||
Document.objects.filter(pk=instance.pk).update(
|
||||
filename=new_filename)
|
||||
filename=instance.filename,
|
||||
archive_filename=instance.archive_filename,
|
||||
)
|
||||
|
||||
except OSError as e:
|
||||
instance.filename = old_filename
|
||||
# this happens when we can't move a file. If that's the case for
|
||||
# the archive file, we try our best to revert the changes.
|
||||
# no need to save the instance, the update() has not happened yet.
|
||||
except (OSError, DatabaseError, CannotMoveFilesException):
|
||||
# This happens when either:
|
||||
# - moving the files failed due to file system errors
|
||||
# - saving to the database failed due to database errors
|
||||
# In both cases, we need to revert to the original state.
|
||||
|
||||
# Try to move files to their original location.
|
||||
try:
|
||||
os.rename(new_source_path, old_source_path)
|
||||
os.rename(new_archive_path, old_archive_path)
|
||||
if move_original and os.path.isfile(instance.source_path):
|
||||
os.rename(instance.source_path, old_source_path)
|
||||
|
||||
if move_archive and os.path.isfile(instance.archive_path):
|
||||
os.rename(instance.archive_path, old_archive_path)
|
||||
|
||||
except Exception as e:
|
||||
# This is fine, since:
|
||||
# A: if we managed to move source from A to B, we will also
|
||||
@@ -266,16 +273,10 @@ def update_filename_and_move_files(sender, instance, **kwargs):
|
||||
# B: if moving the orignal file failed, nothing has changed
|
||||
# anyway.
|
||||
pass
|
||||
except DatabaseError as e:
|
||||
# this happens after moving files, so move them back into place.
|
||||
# since moving them once succeeded, it's very likely going to
|
||||
# succeed again.
|
||||
os.rename(new_source_path, old_source_path)
|
||||
if instance.archive_checksum:
|
||||
os.rename(new_archive_path, old_archive_path)
|
||||
|
||||
# restore old values on the instance
|
||||
instance.filename = old_filename
|
||||
# again, no need to save the instance, since the actual update()
|
||||
# operation failed.
|
||||
instance.archive_filename = old_archive_filename
|
||||
|
||||
# finally, remove any empty sub folders. This will do nothing if
|
||||
# something has failed above.
|
||||
@@ -283,7 +284,7 @@ def update_filename_and_move_files(sender, instance, **kwargs):
|
||||
delete_empty_directories(os.path.dirname(old_source_path),
|
||||
root=settings.ORIGINALS_DIR)
|
||||
|
||||
if old_archive_path and not os.path.isfile(old_archive_path):
|
||||
if instance.has_archive_version and not os.path.isfile(old_archive_path): # NOQA: E501
|
||||
delete_empty_directories(os.path.dirname(old_archive_path),
|
||||
root=settings.ARCHIVE_DIR)
|
||||
|
||||
|
||||
BIN
src/documents/tests/samples/simple-noalpha.png
Normal file
BIN
src/documents/tests/samples/simple-noalpha.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 6.3 KiB |
BIN
src/documents/tests/samples/simple.jpg
Normal file
BIN
src/documents/tests/samples/simple.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 17 KiB |
BIN
src/documents/tests/samples/simple.png
Normal file
BIN
src/documents/tests/samples/simple.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 7.7 KiB |
1
src/documents/tests/samples/simple.txt
Normal file
1
src/documents/tests/samples/simple.txt
Normal file
@@ -0,0 +1 @@
|
||||
This is a test file.
|
||||
@@ -23,18 +23,6 @@ class TestDocumentAdmin(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(Document.objects.get(id=doc.id).title, "new title")
|
||||
m.assert_called_once()
|
||||
|
||||
def test_tags(self):
|
||||
doc = Document.objects.create(title="test")
|
||||
doc.tags.create(name="t1")
|
||||
doc.tags.create(name="t2")
|
||||
|
||||
self.assertEqual(self.doc_admin.tags_(doc), "<span >t1, </span><span >t2, </span>")
|
||||
|
||||
def test_tags_empty(self):
|
||||
doc = Document.objects.create(title="test")
|
||||
|
||||
self.assertEqual(self.doc_admin.tags_(doc), "")
|
||||
|
||||
@mock.patch("documents.admin.index.remove_document")
|
||||
def test_delete_model(self, m):
|
||||
doc = Document.objects.create(title="test")
|
||||
|
||||
@@ -146,21 +146,19 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(response.content, content_thumbnail)
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="")
|
||||
def test_download_with_archive(self):
|
||||
|
||||
_, filename = tempfile.mkstemp(dir=self.dirs.originals_dir)
|
||||
|
||||
content = b"This is a test"
|
||||
content_archive = b"This is the same test but archived"
|
||||
|
||||
with open(filename, "wb") as f:
|
||||
f.write(content)
|
||||
|
||||
filename = os.path.basename(filename)
|
||||
|
||||
doc = Document.objects.create(title="none", filename=filename,
|
||||
doc = Document.objects.create(title="none", filename="my_document.pdf",
|
||||
archive_filename="archived.pdf",
|
||||
mime_type="application/pdf")
|
||||
|
||||
with open(doc.source_path, "wb") as f:
|
||||
f.write(content)
|
||||
|
||||
with open(doc.archive_path, "wb") as f:
|
||||
f.write(content_archive)
|
||||
|
||||
@@ -577,7 +575,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
async_task.assert_not_called()
|
||||
|
||||
def test_get_metadata(self):
|
||||
doc = Document.objects.create(title="test", filename="file.pdf", mime_type="image/png", archive_checksum="A")
|
||||
doc = Document.objects.create(title="test", filename="file.pdf", mime_type="image/png", archive_checksum="A", archive_filename="archive.pdf")
|
||||
|
||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "thumbnails", "0000001.png"), doc.source_path)
|
||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), doc.archive_path)
|
||||
@@ -591,6 +589,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
self.assertTrue(meta['has_archive_version'])
|
||||
self.assertEqual(len(meta['original_metadata']), 0)
|
||||
self.assertGreater(len(meta['archive_metadata']), 0)
|
||||
self.assertEqual(meta['media_filename'], "file.pdf")
|
||||
self.assertEqual(meta['archive_media_filename'], "archive.pdf")
|
||||
|
||||
def test_get_metadata_invalid_doc(self):
|
||||
response = self.client.get(f"/api/documents/34576/metadata/")
|
||||
@@ -610,6 +610,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
self.assertFalse(meta['has_archive_version'])
|
||||
self.assertGreater(len(meta['original_metadata']), 0)
|
||||
self.assertIsNone(meta['archive_metadata'])
|
||||
self.assertIsNone(meta['archive_media_filename'])
|
||||
|
||||
def test_get_empty_suggestions(self):
|
||||
doc = Document.objects.create(title="test", mime_type="application/pdf")
|
||||
|
||||
@@ -5,12 +5,14 @@ import tempfile
|
||||
from unittest import mock
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from django.conf import settings
|
||||
from django.test import TestCase, override_settings
|
||||
|
||||
from .utils import DirectoriesMixin
|
||||
from ..consumer import Consumer, ConsumerError
|
||||
from ..models import FileInfo, Tag, Correspondent, DocumentType, Document
|
||||
from ..parsers import DocumentParser, ParseError
|
||||
from ..tasks import sanity_check
|
||||
|
||||
|
||||
class TestAttributes(TestCase):
|
||||
@@ -165,7 +167,7 @@ class TestFieldPermutations(TestCase):
|
||||
|
||||
class DummyParser(DocumentParser):
|
||||
|
||||
def get_thumbnail(self, document_path, mime_type):
|
||||
def get_thumbnail(self, document_path, mime_type, file_name=None):
|
||||
# not important during tests
|
||||
raise NotImplementedError()
|
||||
|
||||
@@ -174,16 +176,34 @@ class DummyParser(DocumentParser):
|
||||
_, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir)
|
||||
self.archive_path = archive_path
|
||||
|
||||
def get_optimised_thumbnail(self, document_path, mime_type):
|
||||
def get_optimised_thumbnail(self, document_path, mime_type, file_name=None):
|
||||
return self.fake_thumb
|
||||
|
||||
def parse(self, document_path, mime_type, file_name=None):
|
||||
self.text = "The Text"
|
||||
|
||||
|
||||
class CopyParser(DocumentParser):
|
||||
|
||||
def get_thumbnail(self, document_path, mime_type, file_name=None):
|
||||
return self.fake_thumb
|
||||
|
||||
def get_optimised_thumbnail(self, document_path, mime_type, file_name=None):
|
||||
return self.fake_thumb
|
||||
|
||||
def __init__(self, logging_group, progress_callback=None):
|
||||
super(CopyParser, self).__init__(logging_group, progress_callback)
|
||||
_, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=self.tempdir)
|
||||
|
||||
def parse(self, document_path, mime_type, file_name=None):
|
||||
self.text = "The text"
|
||||
self.archive_path = os.path.join(self.tempdir, "archive.pdf")
|
||||
shutil.copy(document_path, self.archive_path)
|
||||
|
||||
|
||||
class FaultyParser(DocumentParser):
|
||||
|
||||
def get_thumbnail(self, document_path, mime_type):
|
||||
def get_thumbnail(self, document_path, mime_type, file_name=None):
|
||||
# not important during tests
|
||||
raise NotImplementedError()
|
||||
|
||||
@@ -191,7 +211,7 @@ class FaultyParser(DocumentParser):
|
||||
super(FaultyParser, self).__init__(logging_group)
|
||||
_, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir)
|
||||
|
||||
def get_optimised_thumbnail(self, document_path, mime_type):
|
||||
def get_optimised_thumbnail(self, document_path, mime_type, file_name=None):
|
||||
return self.fake_thumb
|
||||
|
||||
def parse(self, document_path, mime_type, file_name=None):
|
||||
@@ -203,6 +223,8 @@ def fake_magic_from_file(file, mime=False):
|
||||
if mime:
|
||||
if os.path.splitext(file)[1] == ".pdf":
|
||||
return "application/pdf"
|
||||
elif os.path.splitext(file)[1] == ".png":
|
||||
return "image/png"
|
||||
else:
|
||||
return "unknown"
|
||||
else:
|
||||
@@ -274,6 +296,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
||||
self.assertIsNone(document.correspondent)
|
||||
self.assertIsNone(document.document_type)
|
||||
self.assertEqual(document.filename, "0000001.pdf")
|
||||
self.assertEqual(document.archive_filename, "0000001.pdf")
|
||||
|
||||
self.assertTrue(os.path.isfile(
|
||||
document.source_path
|
||||
@@ -432,6 +455,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
||||
|
||||
self.assertEqual(document.title, "new docs")
|
||||
self.assertEqual(document.filename, "none/new docs.pdf")
|
||||
self.assertEqual(document.archive_filename, "none/new docs.pdf")
|
||||
|
||||
self._assert_first_last_send_progress()
|
||||
|
||||
@@ -446,7 +470,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
||||
filenames.insert(0, f)
|
||||
return f
|
||||
|
||||
m.side_effect = lambda f, root: get_filename()
|
||||
m.side_effect = lambda f, archive_filename = False: get_filename()
|
||||
|
||||
filename = self.get_test_file()
|
||||
|
||||
@@ -457,6 +481,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(document.title, "new docs")
|
||||
self.assertIsNotNone(os.path.isfile(document.title))
|
||||
self.assertTrue(os.path.isfile(document.source_path))
|
||||
self.assertTrue(os.path.isfile(document.archive_path))
|
||||
|
||||
self._assert_first_last_send_progress()
|
||||
|
||||
@@ -516,6 +541,30 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
||||
|
||||
self._assert_first_last_send_progress(last_status="FAILED")
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
|
||||
@mock.patch("documents.parsers.document_consumer_declaration.send")
|
||||
def test_similar_filenames(self, m):
|
||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), os.path.join(settings.CONSUMPTION_DIR, "simple.pdf"))
|
||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.png"), os.path.join(settings.CONSUMPTION_DIR, "simple.png"))
|
||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple-noalpha.png"), os.path.join(settings.CONSUMPTION_DIR, "simple.png.pdf"))
|
||||
m.return_value = [(None, {
|
||||
"parser": CopyParser,
|
||||
"mime_types": {"application/pdf": ".pdf", "image/png": ".png"},
|
||||
"weight": 0
|
||||
})]
|
||||
doc1 = self.consumer.try_consume_file(os.path.join(settings.CONSUMPTION_DIR, "simple.png"))
|
||||
doc2 = self.consumer.try_consume_file(os.path.join(settings.CONSUMPTION_DIR, "simple.pdf"))
|
||||
doc3 = self.consumer.try_consume_file(os.path.join(settings.CONSUMPTION_DIR, "simple.png.pdf"))
|
||||
|
||||
self.assertEqual(doc1.filename, "simple.png")
|
||||
self.assertEqual(doc1.archive_filename, "simple.pdf")
|
||||
self.assertEqual(doc2.filename, "simple.pdf")
|
||||
self.assertEqual(doc2.archive_filename, "simple_01.pdf")
|
||||
self.assertEqual(doc3.filename, "simple.png.pdf")
|
||||
self.assertEqual(doc3.archive_filename, "simple.png.pdf")
|
||||
|
||||
sanity_check()
|
||||
|
||||
|
||||
class PreConsumeTestCase(TestCase):
|
||||
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
import datetime
|
||||
import os
|
||||
import shutil
|
||||
from unittest import mock
|
||||
from uuid import uuid4
|
||||
|
||||
from dateutil import tz
|
||||
@@ -9,7 +8,6 @@ from django.conf import settings
|
||||
from django.test import TestCase, override_settings
|
||||
|
||||
from documents.parsers import parse_date
|
||||
from paperless_tesseract.parsers import RasterisedDocumentParser
|
||||
|
||||
|
||||
class TestDate(TestCase):
|
||||
@@ -152,4 +150,4 @@ class TestDate(TestCase):
|
||||
2018, 2, 13, 0, 0,
|
||||
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
@@ -201,6 +201,13 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
|
||||
self.assertEqual(generate_filename(d), "my_doc_type - the_doc.pdf")
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{asn} - {title}")
|
||||
def test_asn(self):
|
||||
d1 = Document.objects.create(title="the_doc", mime_type="application/pdf", archive_serial_number=652, checksum="A")
|
||||
d2 = Document.objects.create(title="the_doc", mime_type="application/pdf", archive_serial_number=None, checksum="B")
|
||||
self.assertEqual(generate_filename(d1), "652 - the_doc.pdf")
|
||||
self.assertEqual(generate_filename(d2), "none - the_doc.pdf")
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
|
||||
def test_tags_with_underscore(self):
|
||||
document = Document()
|
||||
@@ -439,6 +446,18 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(document2.filename, "qwe.pdf")
|
||||
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
|
||||
@mock.patch("documents.signals.handlers.Document.objects.filter")
|
||||
def test_no_update_without_change(self, m):
|
||||
doc = Document.objects.create(title="document", filename="document.pdf", archive_filename="document.pdf", checksum="A", archive_checksum="B", mime_type="application/pdf")
|
||||
Path(doc.source_path).touch()
|
||||
Path(doc.archive_path).touch()
|
||||
|
||||
doc.save()
|
||||
|
||||
m.assert_not_called()
|
||||
|
||||
|
||||
|
||||
class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
|
||||
|
||||
@@ -448,7 +467,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
|
||||
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
|
||||
Path(original).touch()
|
||||
Path(archive).touch()
|
||||
doc = Document.objects.create(mime_type="application/pdf", filename="0000001.pdf", checksum="A", archive_checksum="B")
|
||||
doc = Document.objects.create(mime_type="application/pdf", filename="0000001.pdf", checksum="A", archive_filename="0000001.pdf", archive_checksum="B")
|
||||
|
||||
self.assertTrue(os.path.isfile(original))
|
||||
self.assertTrue(os.path.isfile(archive))
|
||||
@@ -461,7 +480,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
|
||||
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
|
||||
Path(original).touch()
|
||||
Path(archive).touch()
|
||||
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
|
||||
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B", archive_filename="0000001.pdf")
|
||||
|
||||
self.assertFalse(os.path.isfile(original))
|
||||
self.assertFalse(os.path.isfile(archive))
|
||||
@@ -475,7 +494,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
|
||||
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
|
||||
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
|
||||
Path(original).touch()
|
||||
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
|
||||
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B", archive_filename="0000001.pdf")
|
||||
|
||||
self.assertTrue(os.path.isfile(original))
|
||||
self.assertFalse(os.path.isfile(archive))
|
||||
@@ -486,14 +505,49 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
|
||||
def test_move_archive_exists(self):
|
||||
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
|
||||
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
|
||||
existing_archive_file = os.path.join(settings.ARCHIVE_DIR, "none", "my_doc.pdf")
|
||||
Path(original).touch()
|
||||
Path(archive).touch()
|
||||
os.makedirs(os.path.join(settings.ARCHIVE_DIR, "none"))
|
||||
Path(os.path.join(settings.ARCHIVE_DIR, "none", "my_doc.pdf")).touch()
|
||||
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
|
||||
Path(existing_archive_file).touch()
|
||||
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B", archive_filename="0000001.pdf")
|
||||
|
||||
self.assertFalse(os.path.isfile(original))
|
||||
self.assertFalse(os.path.isfile(archive))
|
||||
self.assertTrue(os.path.isfile(doc.source_path))
|
||||
self.assertTrue(os.path.isfile(doc.archive_path))
|
||||
self.assertTrue(os.path.isfile(existing_archive_file))
|
||||
self.assertEqual(doc.archive_filename, "none/my_doc_01.pdf")
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
|
||||
def test_move_original_only(self):
|
||||
original = os.path.join(settings.ORIGINALS_DIR, "document_01.pdf")
|
||||
archive = os.path.join(settings.ARCHIVE_DIR, "document.pdf")
|
||||
Path(original).touch()
|
||||
Path(archive).touch()
|
||||
|
||||
doc = Document.objects.create(mime_type="application/pdf", title="document", filename="document_01.pdf", checksum="A",
|
||||
archive_checksum="B", archive_filename="document.pdf")
|
||||
|
||||
self.assertEqual(doc.filename, "document.pdf")
|
||||
self.assertEqual(doc.archive_filename, "document.pdf")
|
||||
|
||||
self.assertTrue(os.path.isfile(doc.source_path))
|
||||
self.assertTrue(os.path.isfile(doc.archive_path))
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
|
||||
def test_move_archive_only(self):
|
||||
original = os.path.join(settings.ORIGINALS_DIR, "document.pdf")
|
||||
archive = os.path.join(settings.ARCHIVE_DIR, "document_01.pdf")
|
||||
Path(original).touch()
|
||||
Path(archive).touch()
|
||||
|
||||
doc = Document.objects.create(mime_type="application/pdf", title="document", filename="document.pdf", checksum="A",
|
||||
archive_checksum="B", archive_filename="document_01.pdf")
|
||||
|
||||
self.assertEqual(doc.filename, "document.pdf")
|
||||
self.assertEqual(doc.archive_filename, "document.pdf")
|
||||
|
||||
self.assertTrue(os.path.isfile(original))
|
||||
self.assertTrue(os.path.isfile(archive))
|
||||
self.assertTrue(os.path.isfile(doc.source_path))
|
||||
self.assertTrue(os.path.isfile(doc.archive_path))
|
||||
|
||||
@@ -514,8 +568,9 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
|
||||
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
|
||||
Path(original).touch()
|
||||
Path(archive).touch()
|
||||
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
|
||||
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B", archive_filename="0000001.pdf")
|
||||
|
||||
m.assert_called()
|
||||
self.assertTrue(os.path.isfile(original))
|
||||
self.assertTrue(os.path.isfile(archive))
|
||||
self.assertTrue(os.path.isfile(doc.source_path))
|
||||
@@ -527,7 +582,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
|
||||
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
|
||||
#Path(original).touch()
|
||||
Path(archive).touch()
|
||||
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
|
||||
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", archive_filename="0000001.pdf", checksum="A", archive_checksum="B")
|
||||
|
||||
self.assertFalse(os.path.isfile(original))
|
||||
self.assertTrue(os.path.isfile(archive))
|
||||
@@ -551,19 +606,21 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
|
||||
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
|
||||
Path(original).touch()
|
||||
Path(archive).touch()
|
||||
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
|
||||
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", archive_filename="0000001.pdf", checksum="A", archive_checksum="B")
|
||||
|
||||
m.assert_called()
|
||||
self.assertTrue(os.path.isfile(original))
|
||||
self.assertTrue(os.path.isfile(archive))
|
||||
self.assertTrue(os.path.isfile(doc.source_path))
|
||||
self.assertTrue(os.path.isfile(doc.archive_path))
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="")
|
||||
def test_archive_deleted(self):
|
||||
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
|
||||
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
|
||||
Path(original).touch()
|
||||
Path(archive).touch()
|
||||
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
|
||||
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B", archive_filename="0000001.pdf")
|
||||
|
||||
self.assertTrue(os.path.isfile(original))
|
||||
self.assertTrue(os.path.isfile(archive))
|
||||
@@ -577,6 +634,28 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
|
||||
self.assertFalse(os.path.isfile(doc.source_path))
|
||||
self.assertFalse(os.path.isfile(doc.archive_path))
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
|
||||
def test_archive_deleted2(self):
|
||||
original = os.path.join(settings.ORIGINALS_DIR, "document.png")
|
||||
original2 = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
|
||||
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
|
||||
Path(original).touch()
|
||||
Path(original2).touch()
|
||||
Path(archive).touch()
|
||||
|
||||
doc1 = Document.objects.create(mime_type="image/png", title="document", filename="document.png", checksum="A", archive_checksum="B", archive_filename="0000001.pdf")
|
||||
doc2 = Document.objects.create(mime_type="application/pdf", title="0000001", filename="0000001.pdf", checksum="C")
|
||||
|
||||
self.assertTrue(os.path.isfile(doc1.source_path))
|
||||
self.assertTrue(os.path.isfile(doc1.archive_path))
|
||||
self.assertTrue(os.path.isfile(doc2.source_path))
|
||||
|
||||
doc2.delete()
|
||||
|
||||
self.assertTrue(os.path.isfile(doc1.source_path))
|
||||
self.assertTrue(os.path.isfile(doc1.archive_path))
|
||||
self.assertFalse(os.path.isfile(doc2.source_path))
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
|
||||
def test_database_error(self):
|
||||
|
||||
@@ -584,7 +663,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
|
||||
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
|
||||
Path(original).touch()
|
||||
Path(archive).touch()
|
||||
doc = Document(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
|
||||
doc = Document(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_filename="0000001.pdf", archive_checksum="B")
|
||||
with mock.patch("documents.signals.handlers.Document.objects.filter") as m:
|
||||
m.side_effect = DatabaseError()
|
||||
doc.save()
|
||||
@@ -594,6 +673,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
|
||||
self.assertTrue(os.path.isfile(doc.source_path))
|
||||
self.assertTrue(os.path.isfile(doc.archive_path))
|
||||
|
||||
|
||||
class TestFilenameGeneration(TestCase):
|
||||
|
||||
@override_settings(
|
||||
@@ -617,7 +697,7 @@ class TestFilenameGeneration(TestCase):
|
||||
|
||||
def run():
|
||||
doc = Document.objects.create(checksum=str(uuid.uuid4()), title=str(uuid.uuid4()), content="wow")
|
||||
doc.filename = generate_unique_filename(doc, settings.ORIGINALS_DIR)
|
||||
doc.filename = generate_unique_filename(doc)
|
||||
Path(doc.thumbnail_path).touch()
|
||||
with open(doc.source_path, "w") as f:
|
||||
f.write(str(uuid.uuid4()))
|
||||
|
||||
@@ -20,6 +20,7 @@ from documents.tests.utils import DirectoriesMixin
|
||||
sample_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
|
||||
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
|
||||
class TestArchiver(DirectoriesMixin, TestCase):
|
||||
|
||||
def make_models(self):
|
||||
@@ -42,10 +43,27 @@ class TestArchiver(DirectoriesMixin, TestCase):
|
||||
doc = Document.objects.get(id=doc.id)
|
||||
|
||||
self.assertIsNotNone(doc.checksum)
|
||||
self.assertIsNotNone(doc.archive_checksum)
|
||||
self.assertTrue(os.path.isfile(doc.archive_path))
|
||||
self.assertTrue(os.path.isfile(doc.source_path))
|
||||
self.assertTrue(filecmp.cmp(sample_file, doc.source_path))
|
||||
self.assertEqual(doc.archive_filename, "none/A.pdf")
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
|
||||
def test_naming_priorities(self):
|
||||
doc1 = Document.objects.create(checksum="A", title="document", content="first document", mime_type="application/pdf", filename="document.pdf")
|
||||
doc2 = Document.objects.create(checksum="B", title="document", content="second document", mime_type="application/pdf", filename="document_01.pdf")
|
||||
shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"document.pdf"))
|
||||
shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"document_01.pdf"))
|
||||
|
||||
handle_document(doc2.pk)
|
||||
handle_document(doc1.pk)
|
||||
|
||||
doc1 = Document.objects.get(id=doc1.id)
|
||||
doc2 = Document.objects.get(id=doc2.id)
|
||||
|
||||
self.assertEqual(doc1.archive_filename, "document.pdf")
|
||||
self.assertEqual(doc2.archive_filename, "document_01.pdf")
|
||||
|
||||
class TestDecryptDocuments(TestCase):
|
||||
|
||||
@@ -106,24 +124,27 @@ class TestMakeIndex(TestCase):
|
||||
|
||||
class TestRenamer(DirectoriesMixin, TestCase):
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="")
|
||||
def test_rename(self):
|
||||
doc = Document.objects.create(title="test", mime_type="application/pdf")
|
||||
doc = Document.objects.create(title="test", mime_type="image/jpeg")
|
||||
doc.filename = generate_filename(doc)
|
||||
doc.archive_filename = generate_filename(doc, archive_filename=True)
|
||||
doc.save()
|
||||
|
||||
Path(doc.source_path).touch()
|
||||
Path(doc.archive_path).touch()
|
||||
|
||||
old_source_path = doc.source_path
|
||||
|
||||
with override_settings(PAPERLESS_FILENAME_FORMAT="{title}"):
|
||||
with override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}"):
|
||||
call_command("document_renamer")
|
||||
|
||||
doc2 = Document.objects.get(id=doc.id)
|
||||
|
||||
self.assertEqual(doc2.filename, "test.pdf")
|
||||
self.assertFalse(os.path.isfile(old_source_path))
|
||||
self.assertEqual(doc2.filename, "none/test.jpg")
|
||||
self.assertEqual(doc2.archive_filename, "none/test.pdf")
|
||||
self.assertFalse(os.path.isfile(doc.source_path))
|
||||
self.assertFalse(os.path.isfile(doc.archive_path))
|
||||
self.assertTrue(os.path.isfile(doc2.source_path))
|
||||
self.assertTrue(os.path.isfile(doc2.archive_path))
|
||||
|
||||
|
||||
class TestCreateClassifier(TestCase):
|
||||
|
||||
@@ -22,7 +22,7 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
||||
self.target = tempfile.mkdtemp()
|
||||
self.addCleanup(shutil.rmtree, self.target)
|
||||
|
||||
self.d1 = Document.objects.create(content="Content", checksum="42995833e01aea9b3edee44bbfdd7ce1", archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b", title="wow1", filename="0000001.pdf", mime_type="application/pdf")
|
||||
self.d1 = Document.objects.create(content="Content", checksum="42995833e01aea9b3edee44bbfdd7ce1", archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b", title="wow1", filename="0000001.pdf", mime_type="application/pdf", archive_filename="0000001.pdf")
|
||||
self.d2 = Document.objects.create(content="Content", checksum="9c9691e51741c1f4f41a20896af31770", title="wow2", filename="0000002.pdf", mime_type="application/pdf")
|
||||
self.d3 = Document.objects.create(content="Content", checksum="d38d7ed02e988e072caf924e0f3fcb76", title="wow2", filename="0000003.pdf", mime_type="application/pdf")
|
||||
self.d4 = Document.objects.create(content="Content", checksum="82186aaa94f0b98697d704b90fd1c072", title="wow_dec", filename="0000004.pdf.gpg", mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG)
|
||||
|
||||
325
src/documents/tests/test_migration_archive_files.py
Normal file
325
src/documents/tests/test_migration_archive_files.py
Normal file
@@ -0,0 +1,325 @@
|
||||
import hashlib
|
||||
import os
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
from django.conf import settings
|
||||
from django.test import override_settings
|
||||
|
||||
from documents.parsers import ParseError
|
||||
from documents.tests.utils import DirectoriesMixin, TestMigrations
|
||||
|
||||
|
||||
STORAGE_TYPE_GPG = "gpg"
|
||||
|
||||
|
||||
def archive_name_from_filename(filename):
|
||||
return os.path.splitext(filename)[0] + ".pdf"
|
||||
|
||||
|
||||
def archive_path_old(self):
|
||||
if self.filename:
|
||||
fname = archive_name_from_filename(self.filename)
|
||||
else:
|
||||
fname = "{:07}.pdf".format(self.pk)
|
||||
|
||||
return os.path.join(
|
||||
settings.ARCHIVE_DIR,
|
||||
fname
|
||||
)
|
||||
|
||||
|
||||
def archive_path_new(doc):
|
||||
if doc.archive_filename is not None:
|
||||
return os.path.join(
|
||||
settings.ARCHIVE_DIR,
|
||||
str(doc.archive_filename)
|
||||
)
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def source_path(doc):
|
||||
if doc.filename:
|
||||
fname = str(doc.filename)
|
||||
else:
|
||||
fname = "{:07}{}".format(doc.pk, doc.file_type)
|
||||
if doc.storage_type == STORAGE_TYPE_GPG:
|
||||
fname += ".gpg" # pragma: no cover
|
||||
|
||||
return os.path.join(
|
||||
settings.ORIGINALS_DIR,
|
||||
fname
|
||||
)
|
||||
|
||||
|
||||
def thumbnail_path(doc):
|
||||
file_name = "{:07}.png".format(doc.pk)
|
||||
if doc.storage_type == STORAGE_TYPE_GPG:
|
||||
file_name += ".gpg"
|
||||
|
||||
return os.path.join(
|
||||
settings.THUMBNAIL_DIR,
|
||||
file_name
|
||||
)
|
||||
|
||||
|
||||
def make_test_document(document_class, title: str, mime_type: str, original: str, original_filename: str, archive: str = None, archive_filename: str = None):
|
||||
doc = document_class()
|
||||
doc.filename = original_filename
|
||||
doc.title = title
|
||||
doc.mime_type = mime_type
|
||||
doc.content = "the content, does not matter for this test"
|
||||
doc.save()
|
||||
|
||||
shutil.copy2(original, source_path(doc))
|
||||
with open(original, "rb") as f:
|
||||
doc.checksum = hashlib.md5(f.read()).hexdigest()
|
||||
|
||||
if archive:
|
||||
if archive_filename:
|
||||
doc.archive_filename = archive_filename
|
||||
shutil.copy2(archive, archive_path_new(doc))
|
||||
else:
|
||||
shutil.copy2(archive, archive_path_old(doc))
|
||||
|
||||
with open(archive, "rb") as f:
|
||||
doc.archive_checksum = hashlib.md5(f.read()).hexdigest()
|
||||
|
||||
doc.save()
|
||||
|
||||
Path(thumbnail_path(doc)).touch()
|
||||
|
||||
return doc
|
||||
|
||||
|
||||
simple_jpg = os.path.join(os.path.dirname(__file__), "samples", "simple.jpg")
|
||||
simple_pdf = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
|
||||
simple_pdf2 = os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000002.pdf")
|
||||
simple_pdf3 = os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000003.pdf")
|
||||
simple_txt = os.path.join(os.path.dirname(__file__), "samples", "simple.txt")
|
||||
simple_png = os.path.join(os.path.dirname(__file__), "samples", "simple-noalpha.png")
|
||||
simple_png2 = os.path.join(os.path.dirname(__file__), "examples", "no-text.png")
|
||||
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="")
|
||||
class TestMigrateArchiveFiles(DirectoriesMixin, TestMigrations):
|
||||
|
||||
migrate_from = '1011_auto_20210101_2340'
|
||||
migrate_to = '1012_fix_archive_files'
|
||||
|
||||
def setUpBeforeMigration(self, apps):
|
||||
Document = apps.get_model("documents", "Document")
|
||||
|
||||
self.unrelated = make_test_document(Document, "unrelated", "application/pdf", simple_pdf3, "unrelated.pdf", simple_pdf)
|
||||
self.no_text = make_test_document(Document, "no-text", "image/png", simple_png2, "no-text.png", simple_pdf)
|
||||
self.doc_no_archive = make_test_document(Document, "no_archive", "text/plain", simple_txt, "no_archive.txt")
|
||||
self.clash1 = make_test_document(Document, "clash", "application/pdf", simple_pdf, "clash.pdf", simple_pdf)
|
||||
self.clash2 = make_test_document(Document, "clash", "image/jpeg", simple_jpg, "clash.jpg", simple_pdf)
|
||||
self.clash3 = make_test_document(Document, "clash", "image/png", simple_png, "clash.png", simple_pdf)
|
||||
self.clash4 = make_test_document(Document, "clash.png", "application/pdf", simple_pdf2, "clash.png.pdf", simple_pdf2)
|
||||
|
||||
self.assertEqual(archive_path_old(self.clash1), archive_path_old(self.clash2))
|
||||
self.assertEqual(archive_path_old(self.clash1), archive_path_old(self.clash3))
|
||||
self.assertNotEqual(archive_path_old(self.clash1), archive_path_old(self.clash4))
|
||||
|
||||
def testArchiveFilesMigrated(self):
|
||||
Document = self.apps.get_model('documents', 'Document')
|
||||
|
||||
for doc in Document.objects.all():
|
||||
if doc.archive_checksum:
|
||||
self.assertIsNotNone(doc.archive_filename)
|
||||
self.assertTrue(os.path.isfile(archive_path_new(doc)))
|
||||
else:
|
||||
self.assertIsNone(doc.archive_filename)
|
||||
|
||||
with open(source_path(doc), "rb") as f:
|
||||
original_checksum = hashlib.md5(f.read()).hexdigest()
|
||||
self.assertEqual(original_checksum, doc.checksum)
|
||||
|
||||
if doc.archive_checksum:
|
||||
self.assertTrue(os.path.isfile(archive_path_new(doc)))
|
||||
with open(archive_path_new(doc), "rb") as f:
|
||||
archive_checksum = hashlib.md5(f.read()).hexdigest()
|
||||
self.assertEqual(archive_checksum, doc.archive_checksum)
|
||||
|
||||
self.assertEqual(Document.objects.filter(archive_checksum__isnull=False).count(), 6)
|
||||
|
||||
def test_filenames(self):
|
||||
Document = self.apps.get_model('documents', 'Document')
|
||||
self.assertEqual(Document.objects.get(id=self.unrelated.id).archive_filename, "unrelated.pdf")
|
||||
self.assertEqual(Document.objects.get(id=self.no_text.id).archive_filename, "no-text.pdf")
|
||||
self.assertEqual(Document.objects.get(id=self.doc_no_archive.id).archive_filename, None)
|
||||
self.assertEqual(Document.objects.get(id=self.clash1.id).archive_filename, f"{self.clash1.id:07}.pdf")
|
||||
self.assertEqual(Document.objects.get(id=self.clash2.id).archive_filename, f"{self.clash2.id:07}.pdf")
|
||||
self.assertEqual(Document.objects.get(id=self.clash3.id).archive_filename, f"{self.clash3.id:07}.pdf")
|
||||
self.assertEqual(Document.objects.get(id=self.clash4.id).archive_filename, "clash.png.pdf")
|
||||
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
|
||||
class TestMigrateArchiveFilesWithFilenameFormat(TestMigrateArchiveFiles):
|
||||
|
||||
def test_filenames(self):
|
||||
Document = self.apps.get_model('documents', 'Document')
|
||||
self.assertEqual(Document.objects.get(id=self.unrelated.id).archive_filename, "unrelated.pdf")
|
||||
self.assertEqual(Document.objects.get(id=self.no_text.id).archive_filename, "no-text.pdf")
|
||||
self.assertEqual(Document.objects.get(id=self.doc_no_archive.id).archive_filename, None)
|
||||
self.assertEqual(Document.objects.get(id=self.clash1.id).archive_filename, "none/clash.pdf")
|
||||
self.assertEqual(Document.objects.get(id=self.clash2.id).archive_filename, "none/clash_01.pdf")
|
||||
self.assertEqual(Document.objects.get(id=self.clash3.id).archive_filename, "none/clash_02.pdf")
|
||||
self.assertEqual(Document.objects.get(id=self.clash4.id).archive_filename, "clash.png.pdf")
|
||||
|
||||
|
||||
def fake_parse_wrapper(parser, path, mime_type, file_name):
|
||||
parser.archive_path = None
|
||||
parser.text = "the text"
|
||||
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="")
|
||||
class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
|
||||
|
||||
migrate_from = '1011_auto_20210101_2340'
|
||||
migrate_to = '1012_fix_archive_files'
|
||||
auto_migrate = False
|
||||
|
||||
def test_archive_missing(self):
|
||||
|
||||
Document = self.apps.get_model("documents", "Document")
|
||||
|
||||
doc = make_test_document(Document, "clash", "application/pdf", simple_pdf, "clash.pdf", simple_pdf)
|
||||
os.unlink(archive_path_old(doc))
|
||||
|
||||
self.assertRaisesMessage(ValueError, "does not exist at: ", self.performMigration)
|
||||
|
||||
def test_parser_missing(self):
|
||||
Document = self.apps.get_model("documents", "Document")
|
||||
|
||||
doc1 = make_test_document(Document, "document", "invalid/typesss768", simple_png, "document.png", simple_pdf)
|
||||
doc2 = make_test_document(Document, "document", "invalid/typesss768", simple_jpg, "document.jpg", simple_pdf)
|
||||
|
||||
self.assertRaisesMessage(ValueError, "no parsers are available", self.performMigration)
|
||||
|
||||
@mock.patch("documents.migrations.1012_fix_archive_files.parse_wrapper")
|
||||
def test_parser_error(self, m):
|
||||
m.side_effect = ParseError()
|
||||
Document = self.apps.get_model("documents", "Document")
|
||||
|
||||
doc1 = make_test_document(Document, "document", "image/png", simple_png, "document.png", simple_pdf)
|
||||
doc2 = make_test_document(Document, "document", "application/pdf", simple_jpg, "document.jpg", simple_pdf)
|
||||
|
||||
self.assertIsNotNone(doc1.archive_checksum)
|
||||
self.assertIsNotNone(doc2.archive_checksum)
|
||||
|
||||
with self.assertLogs() as capture:
|
||||
self.performMigration()
|
||||
|
||||
self.assertEqual(m.call_count, 6)
|
||||
|
||||
self.assertEqual(
|
||||
len(list(filter(lambda log: "Parse error, will try again in 5 seconds" in log, capture.output))),
|
||||
4)
|
||||
|
||||
self.assertEqual(
|
||||
len(list(filter(lambda log: "Unable to regenerate archive document for ID:" in log, capture.output))),
|
||||
2)
|
||||
|
||||
Document = self.apps.get_model("documents", "Document")
|
||||
|
||||
doc1 = Document.objects.get(id=doc1.id)
|
||||
doc2 = Document.objects.get(id=doc2.id)
|
||||
|
||||
self.assertIsNone(doc1.archive_checksum)
|
||||
self.assertIsNone(doc2.archive_checksum)
|
||||
self.assertIsNone(doc1.archive_filename)
|
||||
self.assertIsNone(doc2.archive_filename)
|
||||
|
||||
@mock.patch("documents.migrations.1012_fix_archive_files.parse_wrapper")
|
||||
def test_parser_no_archive(self, m):
|
||||
m.side_effect = fake_parse_wrapper
|
||||
|
||||
Document = self.apps.get_model("documents", "Document")
|
||||
|
||||
doc1 = make_test_document(Document, "document", "image/png", simple_png, "document.png", simple_pdf)
|
||||
doc2 = make_test_document(Document, "document", "application/pdf", simple_jpg, "document.jpg", simple_pdf)
|
||||
|
||||
with self.assertLogs() as capture:
|
||||
self.performMigration()
|
||||
|
||||
self.assertEqual(
|
||||
len(list(filter(lambda log: "Parser did not return an archive document for document" in log, capture.output))),
|
||||
2)
|
||||
|
||||
Document = self.apps.get_model("documents", "Document")
|
||||
|
||||
doc1 = Document.objects.get(id=doc1.id)
|
||||
doc2 = Document.objects.get(id=doc2.id)
|
||||
|
||||
self.assertIsNone(doc1.archive_checksum)
|
||||
self.assertIsNone(doc2.archive_checksum)
|
||||
self.assertIsNone(doc1.archive_filename)
|
||||
self.assertIsNone(doc2.archive_filename)
|
||||
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="")
|
||||
class TestMigrateArchiveFilesBackwards(DirectoriesMixin, TestMigrations):
|
||||
|
||||
migrate_from = '1012_fix_archive_files'
|
||||
migrate_to = '1011_auto_20210101_2340'
|
||||
|
||||
def setUpBeforeMigration(self, apps):
|
||||
|
||||
Document = apps.get_model("documents", "Document")
|
||||
|
||||
doc_unrelated = make_test_document(Document, "unrelated", "application/pdf", simple_pdf2, "unrelated.txt", simple_pdf2, "unrelated.pdf")
|
||||
doc_no_archive = make_test_document(Document, "no_archive", "text/plain", simple_txt, "no_archive.txt")
|
||||
clashB = make_test_document(Document, "clash", "image/jpeg", simple_jpg, "clash.jpg", simple_pdf, "clash_02.pdf")
|
||||
|
||||
def testArchiveFilesReverted(self):
|
||||
Document = self.apps.get_model('documents', 'Document')
|
||||
|
||||
for doc in Document.objects.all():
|
||||
if doc.archive_checksum:
|
||||
self.assertTrue(os.path.isfile(archive_path_old(doc)))
|
||||
with open(source_path(doc), "rb") as f:
|
||||
original_checksum = hashlib.md5(f.read()).hexdigest()
|
||||
self.assertEqual(original_checksum, doc.checksum)
|
||||
|
||||
if doc.archive_checksum:
|
||||
self.assertTrue(os.path.isfile(archive_path_old(doc)))
|
||||
with open(archive_path_old(doc), "rb") as f:
|
||||
archive_checksum = hashlib.md5(f.read()).hexdigest()
|
||||
self.assertEqual(archive_checksum, doc.archive_checksum)
|
||||
|
||||
self.assertEqual(Document.objects.filter(archive_checksum__isnull=False).count(), 2)
|
||||
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
|
||||
class TestMigrateArchiveFilesBackwardsWithFilenameFormat(TestMigrateArchiveFilesBackwards):
|
||||
pass
|
||||
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="")
|
||||
class TestMigrateArchiveFilesBackwardsErrors(DirectoriesMixin, TestMigrations):
|
||||
|
||||
migrate_from = '1012_fix_archive_files'
|
||||
migrate_to = '1011_auto_20210101_2340'
|
||||
auto_migrate = False
|
||||
|
||||
def test_filename_clash(self):
|
||||
|
||||
Document = self.apps.get_model("documents", "Document")
|
||||
|
||||
self.clashA = make_test_document(Document, "clash", "application/pdf", simple_pdf, "clash.pdf", simple_pdf, "clash_02.pdf")
|
||||
self.clashB = make_test_document(Document, "clash", "image/jpeg", simple_jpg, "clash.jpg", simple_pdf, "clash_01.pdf")
|
||||
|
||||
self.assertRaisesMessage(ValueError, "would clash with another archive filename", self.performMigration)
|
||||
|
||||
def test_filename_exists(self):
|
||||
|
||||
Document = self.apps.get_model("documents", "Document")
|
||||
|
||||
self.clashA = make_test_document(Document, "clash", "application/pdf", simple_pdf, "clash.pdf", simple_pdf, "clash.pdf")
|
||||
self.clashB = make_test_document(Document, "clash", "image/jpeg", simple_jpg, "clash.jpg", simple_pdf, "clash_01.pdf")
|
||||
|
||||
self.assertRaisesMessage(ValueError, "file already exists.", self.performMigration)
|
||||
@@ -1,52 +1,11 @@
|
||||
import os
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
from django.apps import apps
|
||||
from django.conf import settings
|
||||
from django.db import connection
|
||||
from django.db.migrations.executor import MigrationExecutor
|
||||
from django.test import TestCase, TransactionTestCase, override_settings
|
||||
from django.test import override_settings
|
||||
|
||||
from documents.models import Document
|
||||
from documents.parsers import get_default_file_extension
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
|
||||
class TestMigrations(TransactionTestCase):
|
||||
|
||||
@property
|
||||
def app(self):
|
||||
return apps.get_containing_app_config(type(self).__module__).name
|
||||
|
||||
migrate_from = None
|
||||
migrate_to = None
|
||||
|
||||
def setUp(self):
|
||||
super(TestMigrations, self).setUp()
|
||||
|
||||
assert self.migrate_from and self.migrate_to, \
|
||||
"TestCase '{}' must define migrate_from and migrate_to properties".format(type(self).__name__)
|
||||
self.migrate_from = [(self.app, self.migrate_from)]
|
||||
self.migrate_to = [(self.app, self.migrate_to)]
|
||||
executor = MigrationExecutor(connection)
|
||||
old_apps = executor.loader.project_state(self.migrate_from).apps
|
||||
|
||||
# Reverse to the original migration
|
||||
executor.migrate(self.migrate_from)
|
||||
|
||||
self.setUpBeforeMigration(old_apps)
|
||||
|
||||
# Run the migration to test
|
||||
executor = MigrationExecutor(connection)
|
||||
executor.loader.build_graph() # reload.
|
||||
executor.migrate(self.migrate_to)
|
||||
|
||||
self.apps = executor.loader.project_state(self.migrate_to).apps
|
||||
|
||||
def setUpBeforeMigration(self, apps):
|
||||
pass
|
||||
|
||||
from documents.tests.utils import DirectoriesMixin, TestMigrations
|
||||
|
||||
STORAGE_TYPE_UNENCRYPTED = "unencrypted"
|
||||
STORAGE_TYPE_GPG = "gpg"
|
||||
@@ -68,7 +68,7 @@ class TestParserDiscovery(TestCase):
|
||||
)
|
||||
|
||||
|
||||
def fake_get_thumbnail(self, path, mimetype):
|
||||
def fake_get_thumbnail(self, path, mimetype, file_name):
|
||||
return os.path.join(os.path.dirname(__file__), "examples", "no-text.png")
|
||||
|
||||
|
||||
@@ -89,15 +89,15 @@ class TestBaseParser(TestCase):
|
||||
def test_get_optimised_thumbnail(self):
|
||||
parser = DocumentParser(None)
|
||||
|
||||
parser.get_optimised_thumbnail("any", "not important")
|
||||
parser.get_optimised_thumbnail("any", "not important", "document.pdf")
|
||||
|
||||
@mock.patch("documents.parsers.DocumentParser.get_thumbnail", fake_get_thumbnail)
|
||||
@override_settings(OPTIMIZE_THUMBNAILS=False)
|
||||
def test_get_optimised_thumb_disabled(self):
|
||||
parser = DocumentParser(None)
|
||||
|
||||
path = parser.get_optimised_thumbnail("any", "not important")
|
||||
self.assertEqual(path, fake_get_thumbnail(None, None, None))
|
||||
path = parser.get_optimised_thumbnail("any", "not important", "document.pdf")
|
||||
self.assertEqual(path, fake_get_thumbnail(None, None, None, None))
|
||||
|
||||
|
||||
class TestParserAvailability(TestCase):
|
||||
|
||||
@@ -21,7 +21,7 @@ class TestSanityCheck(DirectoriesMixin, TestCase):
|
||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "archive", "0000001.pdf"), os.path.join(self.dirs.archive_dir, "0000001.pdf"))
|
||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "thumbnails", "0000001.png"), os.path.join(self.dirs.thumbnail_dir, "0000001.png"))
|
||||
|
||||
return Document.objects.create(title="test", checksum="42995833e01aea9b3edee44bbfdd7ce1", archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b", content="test", pk=1, filename="0000001.pdf", mime_type="application/pdf")
|
||||
return Document.objects.create(title="test", checksum="42995833e01aea9b3edee44bbfdd7ce1", archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b", content="test", pk=1, filename="0000001.pdf", mime_type="application/pdf", archive_filename="0000001.pdf")
|
||||
|
||||
def test_no_docs(self):
|
||||
self.assertEqual(len(check_sanity()), 0)
|
||||
@@ -86,6 +86,19 @@ class TestSanityCheck(DirectoriesMixin, TestCase):
|
||||
Path(self.dirs.originals_dir, "orphaned").touch()
|
||||
self.assertEqual(len(check_sanity()), 1)
|
||||
|
||||
def test_all(self):
|
||||
Document.objects.create(title="test", checksum="dgfhj", archive_checksum="dfhg", content="", pk=1, filename="0000001.pdf")
|
||||
def test_error_tostring(self):
|
||||
Document.objects.create(title="test", checksum="dgfhj", archive_checksum="dfhg", content="", pk=1, filename="0000001.pdf", archive_filename="0000001.pdf")
|
||||
string = str(SanityFailedError(check_sanity()))
|
||||
self.assertIsNotNone(string)
|
||||
|
||||
def test_archive_filename_no_checksum(self):
|
||||
doc = self.make_test_data()
|
||||
doc.archive_checksum = None
|
||||
doc.save()
|
||||
self.assertEqual(len(check_sanity()), 2)
|
||||
|
||||
def test_archive_checksum_no_filename(self):
|
||||
doc = self.make_test_data()
|
||||
doc.archive_filename = None
|
||||
doc.save()
|
||||
self.assertEqual(len(check_sanity()), 2)
|
||||
|
||||
@@ -4,7 +4,10 @@ import tempfile
|
||||
from collections import namedtuple
|
||||
from contextlib import contextmanager
|
||||
|
||||
from django.test import override_settings
|
||||
from django.apps import apps
|
||||
from django.db import connection
|
||||
from django.db.migrations.executor import MigrationExecutor
|
||||
from django.test import override_settings, TransactionTestCase
|
||||
|
||||
|
||||
def setup_directories():
|
||||
@@ -79,3 +82,45 @@ class DirectoriesMixin:
|
||||
def tearDown(self) -> None:
|
||||
super(DirectoriesMixin, self).tearDown()
|
||||
remove_dirs(self.dirs)
|
||||
|
||||
|
||||
class TestMigrations(TransactionTestCase):
|
||||
|
||||
@property
|
||||
def app(self):
|
||||
return apps.get_containing_app_config(type(self).__module__).name
|
||||
|
||||
migrate_from = None
|
||||
migrate_to = None
|
||||
auto_migrate = True
|
||||
|
||||
def setUp(self):
|
||||
super(TestMigrations, self).setUp()
|
||||
|
||||
assert self.migrate_from and self.migrate_to, \
|
||||
"TestCase '{}' must define migrate_from and migrate_to properties".format(type(self).__name__)
|
||||
self.migrate_from = [(self.app, self.migrate_from)]
|
||||
self.migrate_to = [(self.app, self.migrate_to)]
|
||||
executor = MigrationExecutor(connection)
|
||||
old_apps = executor.loader.project_state(self.migrate_from).apps
|
||||
|
||||
# Reverse to the original migration
|
||||
executor.migrate(self.migrate_from)
|
||||
|
||||
self.setUpBeforeMigration(old_apps)
|
||||
|
||||
self.apps = old_apps
|
||||
|
||||
if self.auto_migrate:
|
||||
self.performMigration()
|
||||
|
||||
def performMigration(self):
|
||||
# Run the migration to test
|
||||
executor = MigrationExecutor(connection)
|
||||
executor.loader.build_graph() # reload.
|
||||
executor.migrate(self.migrate_to)
|
||||
|
||||
self.apps = executor.loader.project_state(self.migrate_to).apps
|
||||
|
||||
def setUpBeforeMigration(self, apps):
|
||||
pass
|
||||
|
||||
@@ -192,7 +192,7 @@ class DocumentViewSet(RetrieveModelMixin,
|
||||
|
||||
def file_response(self, pk, request, disposition):
|
||||
doc = Document.objects.get(id=pk)
|
||||
if not self.original_requested(request) and os.path.isfile(doc.archive_path): # NOQA: E501
|
||||
if not self.original_requested(request) and doc.has_archive_version: # NOQA: E501
|
||||
file_handle = doc.archive_file
|
||||
filename = doc.get_public_filename(archive=True)
|
||||
mime_type = 'application/pdf'
|
||||
@@ -237,18 +237,18 @@ class DocumentViewSet(RetrieveModelMixin,
|
||||
"original_size": os.stat(doc.source_path).st_size,
|
||||
"original_mime_type": doc.mime_type,
|
||||
"media_filename": doc.filename,
|
||||
"has_archive_version": os.path.isfile(doc.archive_path),
|
||||
"has_archive_version": doc.has_archive_version,
|
||||
"original_metadata": self.get_metadata(
|
||||
doc.source_path, doc.mime_type)
|
||||
doc.source_path, doc.mime_type),
|
||||
"archive_checksum": doc.archive_checksum,
|
||||
"archive_media_filename": doc.archive_filename
|
||||
}
|
||||
|
||||
if doc.archive_checksum and os.path.isfile(doc.archive_path):
|
||||
meta['archive_checksum'] = doc.archive_checksum
|
||||
if doc.has_archive_version:
|
||||
meta['archive_size'] = os.stat(doc.archive_path).st_size,
|
||||
meta['archive_metadata'] = self.get_metadata(
|
||||
doc.archive_path, "application/pdf")
|
||||
else:
|
||||
meta['archive_checksum'] = None
|
||||
meta['archive_size'] = None
|
||||
meta['archive_metadata'] = None
|
||||
|
||||
@@ -291,6 +291,8 @@ class DocumentViewSet(RetrieveModelMixin,
|
||||
handle = GnuPG.decrypted(doc.thumbnail_file)
|
||||
else:
|
||||
handle = doc.thumbnail_file
|
||||
# TODO: Send ETag information and use that to send new thumbnails
|
||||
# if available
|
||||
return HttpResponse(handle,
|
||||
content_type='image/png')
|
||||
except (FileNotFoundError, Document.DoesNotExist):
|
||||
|
||||
@@ -1 +1 @@
|
||||
__version__ = (1, 1, 0)
|
||||
__version__ = (1, 1, 1)
|
||||
|
||||
@@ -13,11 +13,8 @@ def process_mail_accounts():
|
||||
try:
|
||||
total_new_documents += MailAccountHandler().handle_mail_account(
|
||||
account)
|
||||
except MailError as e:
|
||||
logger.error(
|
||||
f"Error while processing mail account {account}: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
except MailError:
|
||||
logger.exception(f"Error while processing mail account {account}")
|
||||
|
||||
if total_new_documents > 0:
|
||||
return f"Added {total_new_documents} document(s)."
|
||||
|
||||
@@ -48,7 +48,7 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
)
|
||||
return result
|
||||
|
||||
def get_thumbnail(self, document_path, mime_type):
|
||||
def get_thumbnail(self, document_path, mime_type, file_name=None):
|
||||
return make_thumbnail_from_pdf(
|
||||
document_path, self.tempdir, self.logging_group)
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ class TextDocumentParser(DocumentParser):
|
||||
|
||||
logging_name = "paperless.parsing.text"
|
||||
|
||||
def get_thumbnail(self, document_path, mime_type):
|
||||
def get_thumbnail(self, document_path, mime_type, file_name=None):
|
||||
|
||||
def read_text():
|
||||
with open(document_path, 'r') as src:
|
||||
|
||||
@@ -16,9 +16,9 @@ class TikaDocumentParser(DocumentParser):
|
||||
|
||||
logging_name = "paperless.parsing.tika"
|
||||
|
||||
def get_thumbnail(self, document_path, mime_type):
|
||||
def get_thumbnail(self, document_path, mime_type, file_name=None):
|
||||
if not self.archive_path:
|
||||
self.archive_path = self.convert_to_pdf(document_path)
|
||||
self.archive_path = self.convert_to_pdf(document_path, file_name)
|
||||
|
||||
return make_thumbnail_from_pdf(
|
||||
self.archive_path, self.tempdir, self.logging_group)
|
||||
|
||||
Reference in New Issue
Block a user