Compare commits

...

56 Commits

Author SHA1 Message Date
jonaswinkler
13e91d8c95 changelog 2021-02-12 18:04:15 +01:00
jonaswinkler
6ac90181cb documentation and changelog 2021-02-12 16:54:00 +01:00
jonaswinkler
d6c3471909 reprganized docker file, less layers, new shortcuts for management commands 2021-02-12 16:53:51 +01:00
jonaswinkler
5b56fad9c7 fix test case 2021-02-12 01:31:50 +01:00
jonaswinkler
ed0b1fe115 better exception logging 2021-02-11 22:16:41 +01:00
jonaswinkler
4211153527 update file renaming logic 2021-02-11 13:47:17 +01:00
jonaswinkler
2f85461109 added some test cases that I still need to address 2021-02-10 23:53:48 +01:00
jonaswinkler
3fa7dcb0cb changes to the admin document list 2021-02-10 21:34:58 +01:00
jonaswinkler
857fe3a55c fix one incorrect use of archive_version 2021-02-10 21:34:39 +01:00
jonaswinkler
dd19ea46fe backup documentation 2021-02-10 20:14:55 +01:00
jonaswinkler
21740a9d87 changelog 2021-02-10 20:05:02 +01:00
jonaswinkler
658fb2f208 remove invalid test cases 2021-02-10 20:01:35 +01:00
jonaswinkler
252d4cb513 update document admin 2021-02-10 18:55:39 +01:00
jonaswinkler
5aed41223b Merge branch 'master' into dev 2021-02-10 18:44:02 +01:00
jonaswinkler
45dfbf3747 downgrades 2021-02-10 18:27:41 +01:00
jonaswinkler
e4fe5bebab requirements 2021-02-10 17:09:22 +01:00
jonaswinkler
04519ee623 more testing of the migration 2021-02-10 16:58:55 +01:00
jonaswinkler
6c8f010f7a retries for archive generation 2021-02-10 14:50:20 +01:00
jonaswinkler
ed84cf26e7 update dependencies 2021-02-10 14:31:17 +01:00
jonaswinkler
1bc961f0c0 update dependencies 2021-02-10 11:50:57 +01:00
jonaswinkler
77d745381f more testing 2021-02-10 01:31:15 +01:00
jonaswinkler
7082cb9c36 document renamer testing 2021-02-10 01:12:45 +01:00
jonaswinkler
34e84cc757 sanity checker testing 2021-02-10 00:52:18 +01:00
jonaswinkler
9246411610 better logging for the migration 2021-02-10 00:52:01 +01:00
jonaswinkler
8330b3598c changelog 2021-02-09 23:23:11 +01:00
jonaswinkler
1d002149dc added ASN to filename format #519 2021-02-09 23:03:07 +01:00
jonaswinkler
8d6071e977 fix a bug with thumbnail generation when TIKA was enabled 2021-02-09 22:12:43 +01:00
jonaswinkler
7d67766508 todo note #520 2021-02-09 21:53:10 +01:00
jonaswinkler
887dd122fe more info in the admin 2021-02-09 21:00:04 +01:00
jonaswinkler
a1293c77b9 fix migration and more tests 2021-02-09 20:54:02 +01:00
jonaswinkler
ee9a73aa95 codestyle 2021-02-09 20:46:41 +01:00
jonaswinkler
9df332b614 test resources 2021-02-09 19:51:25 +01:00
jonaswinkler
d13e86a892 update all test cases to address the archive filename changes 2021-02-09 19:51:16 +01:00
jonaswinkler
69d7f8c180 testing the updated migration 2021-02-09 19:49:29 +01:00
jonaswinkler
1ba89ddd09 refactor migration tests to allow testing for exceptions while migrating 2021-02-09 19:47:50 +01:00
jonaswinkler
0c40a28ad3 more sanity checks regarding archive versions 2021-02-09 19:46:59 +01:00
jonaswinkler
2b7424c42a imports 2021-02-09 19:46:42 +01:00
jonaswinkler
a9f1766d1c todo note 2021-02-09 19:46:32 +01:00
jonaswinkler
fca8576d80 archive filenames are now stored in the database and checked for collisions just as original filenames as well, unified method for archive version checking 2021-02-09 19:46:19 +01:00
jonaswinkler
05f59e7d5e another way to make the test case fail 2021-02-09 02:13:25 +01:00
jonaswinkler
c9511680b3 version push 2021-02-09 01:36:39 +01:00
jonaswinkler
0ed001c56e validate move before migration 2021-02-09 00:13:13 +01:00
jonaswinkler
1e5a418191 more testing #511 2021-02-09 00:01:11 +01:00
jonaswinkler
e05735bc0f fix some test cases 2021-02-09 00:00:46 +01:00
jonaswinkler
7621e10840 only move unaffected files, regenerate affected files 2021-02-08 23:54:07 +01:00
jonaswinkler
d90080f325 only move files if necessary 2021-02-08 22:49:01 +01:00
jonaswinkler
0c676b90f2 migration for #511 2021-02-08 20:59:14 +01:00
jonaswinkler
c2d8bda83c fix for #511 2021-02-08 19:59:14 +01:00
jonaswinkler
302ebf737e refactor migration test case 2021-02-08 13:18:39 +01:00
jonaswinkler
816c95a4ae code style 2021-02-08 13:18:08 +01:00
jonaswinkler
40106f6fcc updated documentation regarding execution of management commands with docker fixes #509 2021-02-08 00:10:52 +01:00
jonaswinkler
61143b3ad1 make the test case fail 2021-02-07 19:53:08 +01:00
jonaswinkler
9b64eebd10 revert commit 2021-02-07 18:26:03 +01:00
jonaswinkler
731418349f added a test case that replicates #511 2021-02-07 18:23:54 +01:00
jonaswinkler
7728920670 Merge branch 'dev' 2021-02-07 01:22:04 +01:00
jonaswinkler
f555bb95ae possible fix for the ansible roles 2021-02-07 00:49:53 +01:00
50 changed files with 1295 additions and 338 deletions

View File

@@ -10,10 +10,6 @@ RUN ./configure && make
FROM python:3.7-slim
WORKDIR /usr/src/paperless/
COPY requirements.txt ./
# Binary dependencies
RUN apt-get update \
&& apt-get -y --no-install-recommends install \
@@ -49,16 +45,24 @@ RUN apt-get update \
tesseract-ocr-spa \
unpaper \
zlib1g \
&& rm -rf /var/lib/apt/lists/*
# This pulls in updated dependencies from bullseye to fix some issues with file type detection.
# TODO: Remove this once bullseye releases.
RUN echo "deb http://deb.debian.org/debian bullseye main" > /etc/apt/sources.list.d/bullseye.list \
&& echo "deb http://deb.debian.org/debian bullseye main" > /etc/apt/sources.list.d/bullseye.list \
&& apt-get update \
&& apt-get install --no-install-recommends -y file libmagic-dev \
&& rm -rf /var/lib/apt/lists/* \
&& rm /etc/apt/sources.list.d/bullseye.list
# copy jbig2enc
COPY --from=jbig2enc /usr/src/jbig2enc/src/.libs/libjbig2enc* /usr/local/lib/
COPY --from=jbig2enc /usr/src/jbig2enc/src/jbig2 /usr/local/bin/
COPY --from=jbig2enc /usr/src/jbig2enc/src/*.h /usr/local/include/
WORKDIR /usr/src/paperless/src/
COPY requirements.txt ../
# Python dependencies
RUN apt-get update \
&& apt-get -y --no-install-recommends install \
@@ -67,41 +71,36 @@ RUN apt-get update \
libpq-dev \
libqpdf-dev \
&& python3 -m pip install --upgrade --no-cache-dir supervisor \
&& python3 -m pip install --no-cache-dir -r requirements.txt \
&& python3 -m pip install --no-cache-dir -r ../requirements.txt \
&& apt-get -y purge build-essential libqpdf-dev \
&& apt-get -y autoremove --purge \
&& rm -rf /var/lib/apt/lists/* \
&& mkdir /var/log/supervisord /var/run/supervisord
&& rm -rf /var/lib/apt/lists/*
# setup docker-specific things
COPY docker/ ./docker/
# copy scripts
# this fixes issues with imagemagick and PDF
COPY docker/imagemagick-policy.xml /etc/ImageMagick-6/policy.xml
COPY gunicorn.conf.py ./
COPY docker/supervisord.conf /etc/supervisord.conf
COPY docker/docker-entrypoint.sh /sbin/docker-entrypoint.sh
# copy jbig2enc
COPY --from=jbig2enc /usr/src/jbig2enc/src/.libs/libjbig2enc* /usr/local/lib/
COPY --from=jbig2enc /usr/src/jbig2enc/src/jbig2 /usr/local/bin/
COPY --from=jbig2enc /usr/src/jbig2enc/src/*.h /usr/local/include/
RUN cd docker \
&& cp imagemagick-policy.xml /etc/ImageMagick-6/policy.xml \
&& mkdir /var/log/supervisord /var/run/supervisord \
&& cp supervisord.conf /etc/supervisord.conf \
&& cp docker-entrypoint.sh /sbin/docker-entrypoint.sh \
&& chmod 755 /sbin/docker-entrypoint.sh \
&& chmod +x install_management_commands.sh \
&& ./install_management_commands.sh \
&& cd .. \
&& rm docker -rf
COPY gunicorn.conf.py ../
# copy app
COPY src/ ./src/
COPY src/ ./
# add users, setup scripts
RUN addgroup --gid 1000 paperless \
&& useradd --uid 1000 --gid paperless --home-dir /usr/src/paperless paperless \
&& chown -R paperless:paperless . \
&& chmod 755 /sbin/docker-entrypoint.sh
WORKDIR /usr/src/paperless/src/
RUN sudo -HEu paperless python3 manage.py collectstatic --clear --no-input
RUN sudo -HEu paperless python3 manage.py compilemessages
&& chown -R paperless:paperless ../ \
&& sudo -HEu paperless python3 manage.py collectstatic --clear --no-input \
&& sudo -HEu paperless python3 manage.py compilemessages
VOLUME ["/usr/src/paperless/data", "/usr/src/paperless/media", "/usr/src/paperless/consume", "/usr/src/paperless/export"]
ENTRYPOINT ["/sbin/docker-entrypoint.sh"]

View File

@@ -52,6 +52,10 @@ channels-redis = "*"
uvicorn = {extras = ["standard"], version = "*"}
concurrent-log-handler = "*"
django-redis = "*"
# uvloop 0.15+ incompatible with python 3.6
uvloop = "~=0.14.0"
# TODO: keep an eye on piwheel builds and update this once available (https://www.piwheels.org/project/cryptography/)
cryptography = "~=3.3.2"
[dev-packages]
coveralls = "*"

67
Pipfile.lock generated
View File

@@ -1,7 +1,7 @@
{
"_meta": {
"hash": {
"sha256": "0c2003b9d3d95d1af594f749a2740b55079551ea0ae512177ee9524bb327281e"
"sha256": "b3bed0a6b8981e8fffc1b6aa3bc35a0b1472f28e6f745c62469eb8045740e57b"
},
"pipfile-spec": 6,
"requires": {},
@@ -190,24 +190,24 @@
},
"cryptography": {
"hashes": [
"sha256:0003a52a123602e1acee177dc90dd201f9bb1e73f24a070db7d36c588e8f5c7d",
"sha256:0e85aaae861d0485eb5a79d33226dd6248d2a9f133b81532c8f5aae37de10ff7",
"sha256:594a1db4511bc4d960571536abe21b4e5c3003e8750ab8365fafce71c5d86901",
"sha256:69e836c9e5ff4373ce6d3ab311c1a2eed274793083858d3cd4c7d12ce20d5f9c",
"sha256:788a3c9942df5e4371c199d10383f44a105d67d401fb4304178020142f020244",
"sha256:7e177e4bea2de937a584b13645cab32f25e3d96fc0bc4a4cf99c27dc77682be6",
"sha256:83d9d2dfec70364a74f4e7c70ad04d3ca2e6a08b703606993407bf46b97868c5",
"sha256:84ef7a0c10c24a7773163f917f1cb6b4444597efd505a8aed0a22e8c4780f27e",
"sha256:982f661bffc7a24b6d4f8ebe3291f17cf3833a0941c6f4d9d55c790b9aa2cdb3",
"sha256:9e21301f7a1e7c03dbea73e8602905a4ebba641547a462b26dd03451e5769e7c",
"sha256:9f6b0492d111b43de5f70052e24c1f0951cb9e6022188ebcb1cc3a3d301469b0",
"sha256:a69bd3c68b98298f490e84519b954335154917eaab52cf582fa2c5c7efc6e812",
"sha256:b4890d5fb9b7a23e3bf8abf5a8a7da8e228f1e97dc96b30b95685df840b6914a",
"sha256:c366df0401d1ec4e548bebe8f91d55ebcc0ec3137900d214dd7aac8427ef3030",
"sha256:dc42f645f8f3a489c3dd416730a514e7a91a59510ddaadc09d04224c098d3302"
"sha256:0d7b69674b738068fa6ffade5c962ecd14969690585aaca0a1b1fc9058938a72",
"sha256:1bd0ccb0a1ed775cd7e2144fe46df9dc03eefd722bbcf587b3e0616ea4a81eff",
"sha256:3c284fc1e504e88e51c428db9c9274f2da9f73fdf5d7e13a36b8ecb039af6e6c",
"sha256:49570438e60f19243e7e0d504527dd5fe9b4b967b5a1ff21cc12b57602dd85d3",
"sha256:541dd758ad49b45920dda3b5b48c968f8b2533d8981bcdb43002798d8f7a89ed",
"sha256:5a60d3780149e13b7a6ff7ad6526b38846354d11a15e21068e57073e29e19bed",
"sha256:7951a966613c4211b6612b0352f5bf29989955ee592c4a885d8c7d0f830d0433",
"sha256:922f9602d67c15ade470c11d616f2b2364950602e370c76f0c94c94ae672742e",
"sha256:a0f0b96c572fc9f25c3f4ddbf4688b9b38c69836713fb255f4a2715d93cbaf44",
"sha256:a777c096a49d80f9d2979695b835b0f9c9edab73b59e4ceb51f19724dda887ed",
"sha256:a9a4ac9648d39ce71c2f63fe7dc6db144b9fa567ddfc48b9fde1b54483d26042",
"sha256:aa4969f24d536ae2268c902b2c3d62ab464b5a66bcb247630d208a79a8098e9b",
"sha256:c7390f9b2119b2b43160abb34f63277a638504ef8df99f11cb52c1fda66a2e6f",
"sha256:ddd06e71c449a4fe10d0c60846280ee35d69ce49e3e413ce46d5f129e1468083",
"sha256:e18e6ab84dfb0ab997faf8cca25a86ff15dfea4027b986322026cc99e0a892da"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'",
"version": "==3.3.1"
"index": "pypi",
"version": "==3.3.2"
},
"daphne": {
"hashes": [
@@ -243,11 +243,11 @@
},
"django-extensions": {
"hashes": [
"sha256:7cd002495ff0a0e5eb6cdd6be759600905b4e4079232ea27618fc46bdd853651",
"sha256:c7f88625a53f631745d4f2bef9ec4dcb999ed59476393bdbbe99db8596778846"
"sha256:674ad4c3b1587a884881824f40212d51829e662e52f85b012cd83d83fe1271d9",
"sha256:9507f8761ee760748938fd8af766d0608fb2738cf368adfa1b2451f61c15ae35"
],
"index": "pypi",
"version": "==3.1.0"
"version": "==3.1.1"
},
"django-filter": {
"hashes": [
@@ -462,11 +462,11 @@
},
"joblib": {
"hashes": [
"sha256:75ead23f13484a2a414874779d69ade40d4fa1abe62b222a23cd50d4bc822f6f",
"sha256:7ad866067ac1fdec27d51c8678ea760601b70e32ff1881d4dc8e1171f2b64b24"
"sha256:9c17567692206d2f3fb9ecf5e991084254fe631665c450b443761c4186a613f7",
"sha256:feeb1ec69c4d45129954f1b7034954241eedfd6ba39b5e9e4b6883be3332d5e5"
],
"markers": "python_version >= '3.6'",
"version": "==1.0.0"
"version": "==1.0.1"
},
"langdetect": {
"hashes": [
@@ -1113,11 +1113,11 @@
},
"tqdm": {
"hashes": [
"sha256:4621f6823bab46a9cc33d48105753ccbea671b68bab2c50a9f0be23d4065cb5a",
"sha256:fe3d08dd00a526850568d542ff9de9bbc2a09a791da3c334f3213d8d0bbbca65"
"sha256:2874fa525c051177583ec59c0fb4583e91f28ccd3f217ffad2acdb32d2c789ac",
"sha256:ab9b659241d82b8b51b2269ee243ec95286046bf06015c4e15a947cc15914211"
],
"index": "pypi",
"version": "==4.56.0"
"version": "==4.56.1"
},
"twisted": {
"extras": [
@@ -1201,6 +1201,7 @@
"sha256:e7514d7a48c063226b7d06617cbb12a14278d4323a065a8d46a7962686ce2e95",
"sha256:f07909cd9fc08c52d294b1570bba92186181ca01fe3dc9ffba68955273dd7362"
],
"index": "pypi",
"version": "==0.14.0"
},
"watchdog": {
@@ -1506,11 +1507,11 @@
},
"faker": {
"hashes": [
"sha256:190f0d3ce037866b5d230f0b9fd0f513f07c25dc326dcad6ee019849c68d441c",
"sha256:db7adc3b4755005fc960cf96fb4ed46b54b6eb21413741ab3f31a9595f379905"
"sha256:bf2a9b3f8d00a5dada61fc4a3f80fe0d6795c7f02a138a7d2ef2db5817c7d017",
"sha256:d4aecdb877519d06c2fdc01ffc5ecf70658981acf5e13cd07ded9892994ef5c6"
],
"markers": "python_version >= '3.6'",
"version": "==6.0.0"
"version": "==6.1.1"
},
"filelock": {
"hashes": [
@@ -1713,11 +1714,11 @@
},
"pytest-xdist": {
"hashes": [
"sha256:1d8edbb1a45e8e1f8e44b1260583107fc23f8bc8da6d18cb331ff61d41258ecf",
"sha256:f127e11e84ad37cc1de1088cb2990f3c354630d428af3f71282de589c5bb779b"
"sha256:2447a1592ab41745955fb870ac7023026f20a5f0bfccf1b52a879bd193d46450",
"sha256:718887296892f92683f6a51f25a3ae584993b06f7076ce1e1fd482e59a8220a2"
],
"index": "pypi",
"version": "==2.2.0"
"version": "==2.2.1"
},
"python-dateutil": {
"hashes": [

View File

@@ -38,7 +38,7 @@
- name: verify uploaded document has been accepted
uri:
url: "http://{{ paperlessng_listen_address }}:{{ paperlessng_listen_port }}/api/logs/"
url: "http://{{ paperlessng_listen_address }}:{{ paperlessng_listen_port }}/api/logs/paperless/"
headers:
Authorization: 'Basic {{ (paperlessng_superuser_name + ":" + paperlessng_superuser_password) | b64encode }}'
return_content: yes
@@ -51,7 +51,7 @@
- name: verify uploaded document has been consumed
uri:
url: "http://{{ paperlessng_listen_address }}:{{ paperlessng_listen_port }}/api/logs/"
url: "http://{{ paperlessng_listen_address }}:{{ paperlessng_listen_port }}/api/logs/paperless/"
headers:
Authorization: 'Basic {{ (paperlessng_superuser_name + ":" + paperlessng_superuser_password) | b64encode }}'
return_content: yes

View File

@@ -0,0 +1,6 @@
for command in document_archiver document_exporter document_importer mail_fetcher document_create_classifier document_index document_renamer document_retagger document_thumbnails;
do
echo "installing $command..."
sed "s/management_command/$command/g" management_script.sh > /usr/local/bin/$command
chmod +x /usr/local/bin/$command
done

View File

@@ -0,0 +1,15 @@
#!/bin/bash
set -e
cd /usr/src/paperless/src/
if [[ $(id -u) == 0 ]] ;
then
sudo -HEu paperless python3 manage.py management_command "$@"
elif [[ $(id -un) == "paperless" ]] ;
then
python3 manage.py management_command "$@"
else
echo "Unknown user."
fi

View File

@@ -23,6 +23,12 @@ Options available to any installation of paperless:
* The document exporter is also able to update an already existing export.
Therefore, incremental backups with ``rsync`` are entirely possible.
.. caution::
You cannot import the export generated with one version of paperless in a
different version of paperless. The export contains an exact image of the
database, and migrations may change the database layout.
Options available to docker installations:
* Backup the docker volumes. These usually reside within
@@ -101,17 +107,17 @@ Then you can start paperless-ng with ``-d`` to have it run in the background.
update to newer versions. In order to enable updates as described above, either
get the new ``docker-compose.yml`` file from `here <https://github.com/jonaswinkler/paperless-ng/tree/master/docker/compose>`_
or edit the ``docker-compose.yml`` file, find the line that says
.. code::
image: jonaswinkler/paperless-ng:0.9.x
and replace the version with ``latest``:
.. code::
image: jonaswinkler/paperless-ng:latest
Bare Metal Route
================
@@ -171,26 +177,63 @@ Most of the update process is automated when using the ansible role.
$ ansible-playbook playbook.yml
Downgrading Paperless
#####################
Downgrades are possible. However, some updates also contain database migrations (these change the layout of the database and may move data).
In order to move back from a version that applied database migrations, you'll have to revert the database migration *before* downgrading,
and then downgrade paperless.
This table lists the most recent database migrations for each versions:
+---------+-------------------------+
| Version | Latest migration number |
+---------+-------------------------+
| 1.0.0 | 1011 |
+---------+-------------------------+
| 1.1.0 | 1011 |
+---------+-------------------------+
| 1.1.1 | 1012 |
+---------+-------------------------+
Execute the following management command to migrate your database:
.. code:: shell-session
$ python3 manage.py migrate documents <migration number>
.. note::
Some migrations cannot be undone. The command will issue errors if that happens.
.. _utilities-management-commands:
Management utilities
####################
Paperless comes with some management commands that perform various maintenance
tasks on your paperless instance. You can invoke these commands either by
tasks on your paperless instance. You can invoke these commands in the following way:
With docker-compose, while paperless is running:
.. code:: shell-session
$ cd /path/to/paperless
$ docker-compose run --rm webserver <command> <arguments>
$ docker-compose exec webserver <command> <arguments>
or
With docker, while paperless is running:
.. code:: shell-session
$ docker exec -it <container-name> <command> <arguments>
Bare metal:
.. code:: shell-session
$ cd /path/to/paperless/src
$ python3 manage.py <command> <arguments>
depending on whether you use docker or not.
All commands have built-in help, which can be accessed by executing them with
the argument ``--help``.
@@ -210,7 +253,7 @@ backup or migration to another DMS.
-c, --compare-checksums
-f, --use-filename-format
-d, --delete
``target`` is a folder to which the data gets written. This includes documents,
thumbnails and a ``manifest.json`` file. The manifest contains all metadata from
the database (correspondents, tags, etc).

View File

@@ -217,6 +217,7 @@ will create a directory structure as follows:
Paperless provides the following placeholders withing filenames:
* ``{asn}``: The archive serial number of the document, or "none".
* ``{correspondent}``: The name of the correspondent, or "none".
* ``{document_type}``: The name of the document type, or "none".
* ``{tag_list}``: A comma separated list of all tags assigned to the document.

View File

@@ -5,6 +5,27 @@
Changelog
*********
paperless-ng 1.1.1
##################
This release contains new database migrations.
* Fixed a bug in the sanity checker that would cause it to display "x not in list" errors instead of actual issues.
* Fixed a bug with filename generation for archive filenames that would cause the archive files of two documents to overlap.
* This happened when ``PAPERLESS_FILENAME_FORMAT`` is used and the filenames of two or more documents are the same, except for the file extension.
* Paperless will now store the archive filename in the database as well instead of deriving it from the original filename, and use the
same logic for detecting and avoiding filename clashes that's also used for original filenames.
* The migrations will repair any missing archive files. If you're using tika, ensure that tika is running while performing the migration. Docker-compose will take care of that.
* Fixed a bug with thumbnail regeneration when TIKA integration was used.
* Added ASN as a placeholder field to the filename format.
* The docker image now comes with built-in shortcuts for most management commands. These are now the recommended way to execute management commands, since these
also ensure that they're always executed as the paperless user and you're less likely to run into permission issues. See :ref:`utilities-management-commands`.
paperless-ng 1.1.0
##################
@@ -17,7 +38,7 @@ paperless-ng 1.1.0
or added with one of the mobile apps.
* Documents are successfully added to paperless.
* Document consumption failed (with error messages)
* Configuration options to enable/disable individual notifications.
* Live updates to document lists and saved views when new documents are added.

View File

@@ -24,11 +24,11 @@ click==7.1.2; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2,
coloredlogs==15.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
concurrent-log-handler==0.9.19
constantly==15.1.0
cryptography==3.3.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'
cryptography==3.3.2
daphne==3.0.1; python_version >= '3.6'
dateparser==0.7.6
django-cors-headers==3.7.0
django-extensions==3.1.0
django-extensions==3.1.1
django-filter==2.4.0
django-picklefield==3.0.1; python_version >= '3'
django-q==1.3.4
@@ -49,7 +49,7 @@ img2pdf==0.4.0
incremental==17.5.0
inotify-simple==1.3.5; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
inotifyrecursive==0.3.5
joblib==1.0.0; python_version >= '3.6'
joblib==1.0.1; python_version >= '3.6'
langdetect==1.0.8
lxml==4.6.2; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
msgpack==1.0.2
@@ -87,7 +87,7 @@ sortedcontainers==2.3.0
sqlparse==0.4.1; python_version >= '3.5'
threadpoolctl==2.1.0; python_version >= '3.5'
tika==1.24
tqdm==4.56.0
tqdm==4.56.1
twisted[tls]==20.3.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
txaio==20.12.1; python_version >= '3.6'
tzlocal==2.1

View File

@@ -2,7 +2,7 @@ export const environment = {
production: true,
apiBaseUrl: "/api/",
appTitle: "Paperless-ng",
version: "1.1.0",
version: "1.1.1",
webSocketHost: window.location.host,
webSocketProtocol: (window.location.protocol == "https:" ? "wss:" : "ws:")
};

BIN
src/clash.pdf Normal file

Binary file not shown.

View File

@@ -50,26 +50,31 @@ class DocumentAdmin(admin.ModelAdmin):
"modified",
"mime_type",
"storage_type",
"filename")
"filename",
"checksum",
"archive_filename",
"archive_checksum"
)
list_display_links = ("title",)
list_display = (
"correspondent",
"id",
"title",
"tags_",
"created",
"mime_type",
"filename",
"archive_filename"
)
list_filter = (
"document_type",
"tags",
"correspondent"
("mime_type"),
("archive_serial_number", admin.EmptyFieldListFilter),
("archive_filename", admin.EmptyFieldListFilter),
)
filter_horizontal = ("tags",)
ordering = ["-created"]
ordering = ["-id"]
date_hierarchy = "created"
@@ -95,26 +100,6 @@ class DocumentAdmin(admin.ModelAdmin):
index.add_or_update_document(obj)
super(DocumentAdmin, self).save_model(request, obj, form, change)
@mark_safe
def tags_(self, obj):
r = ""
for tag in obj.tags.all():
r += self._html_tag(
"span",
tag.name + ", "
)
return r
@staticmethod
def _html_tag(kind, inside=None, **kwargs):
attributes = format_html_join(' ', '{}="{}"', kwargs.items())
if inside is not None:
return format_html("<{kind} {attributes}>{inside}</{kind}>",
kind=kind, attributes=attributes, inside=inside)
return format_html("<{} {}/>", kind, attributes)
class RuleInline(admin.TabularInline):
model = SavedViewFilterRule

View File

@@ -43,9 +43,9 @@ def load_classifier():
version=version, timeout=86400)
except (EOFError, IncompatibleClassifierVersionError) as e:
# there's something wrong with the model file.
logger.error(
logger.exception(
f"Unrecoverable error while loading document "
f"classification model: {str(e)}, deleting model file."
f"classification model, deleting model file."
)
os.unlink(settings.MODEL_FILE)
classifier = None

View File

@@ -241,7 +241,7 @@ class Consumer(LoggingMixin):
self._send_progress(70, 100, 'WORKING',
MESSAGE_GENERATING_THUMBNAIL)
thumbnail = document_parser.get_optimised_thumbnail(
self.path, mime_type)
self.path, mime_type, self.filename)
text = document_parser.get_text()
date = document_parser.get_date()
@@ -292,8 +292,7 @@ class Consumer(LoggingMixin):
# After everything is in the database, copy the files into
# place. If this fails, we'll also rollback the transaction.
with FileLock(settings.MEDIA_LOCK):
document.filename = generate_unique_filename(
document, settings.ORIGINALS_DIR)
document.filename = generate_unique_filename(document)
create_source_path_directory(document.source_path)
self._write(document.storage_type,
@@ -303,6 +302,10 @@ class Consumer(LoggingMixin):
thumbnail, document.thumbnail_path)
if archive_path and os.path.isfile(archive_path):
document.archive_filename = generate_unique_filename(
document,
archive_filename=True
)
create_source_path_directory(document.archive_path)
self._write(document.storage_type,
archive_path, document.archive_path)

View File

@@ -79,12 +79,40 @@ def many_to_dictionary(field):
return mydictionary
def generate_unique_filename(doc, root):
def generate_unique_filename(doc,
archive_filename=False):
"""
Generates a unique filename for doc in settings.ORIGINALS_DIR.
The returned filename is guaranteed to be either the current filename
of the document if unchanged, or a new filename that does not correspondent
to any existing files. The function will append _01, _02, etc to the
filename before the extension to avoid conflicts.
If archive_filename is True, return a unique archive filename instead.
"""
if archive_filename:
old_filename = doc.archive_filename
root = settings.ARCHIVE_DIR
else:
old_filename = doc.filename
root = settings.ORIGINALS_DIR
# If generating archive filenames, try to make a name that is similar to
# the original filename first.
if archive_filename and doc.filename:
new_filename = os.path.splitext(doc.filename)[0] + ".pdf"
if new_filename == old_filename or not os.path.exists(os.path.join(root, new_filename)): # NOQA: E501
return new_filename
counter = 0
while True:
new_filename = generate_filename(doc, counter)
if new_filename == doc.filename:
new_filename = generate_filename(
doc, counter, archive_filename=archive_filename)
if new_filename == old_filename:
# still the same as before.
return new_filename
@@ -94,7 +122,7 @@ def generate_unique_filename(doc, root):
return new_filename
def generate_filename(doc, counter=0, append_gpg=True):
def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
path = ""
try:
@@ -123,6 +151,11 @@ def generate_filename(doc, counter=0, append_gpg=True):
else:
document_type = "none"
if doc.archive_serial_number:
asn = str(doc.archive_serial_number)
else:
asn = "none"
path = settings.PAPERLESS_FILENAME_FORMAT.format(
title=pathvalidate.sanitize_filename(
doc.title, replacement_text="-"),
@@ -136,6 +169,7 @@ def generate_filename(doc, counter=0, append_gpg=True):
added_year=doc.added.year if doc.added else "none",
added_month=f"{doc.added.month:02}" if doc.added else "none",
added_day=f"{doc.added.day:02}" if doc.added else "none",
asn=asn,
tags=tags,
tag_list=tag_list
).strip()
@@ -148,18 +182,16 @@ def generate_filename(doc, counter=0, append_gpg=True):
f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default")
counter_str = f"_{counter:02}" if counter else ""
filetype_str = ".pdf" if archive_filename else doc.file_type
if len(path) > 0:
filename = f"{path}{counter_str}{doc.file_type}"
filename = f"{path}{counter_str}{filetype_str}"
else:
filename = f"{doc.pk:07}{counter_str}{doc.file_type}"
filename = f"{doc.pk:07}{counter_str}{filetype_str}"
# Append .gpg for encrypted files
if append_gpg and doc.storage_type == doc.STORAGE_TYPE_GPG:
filename += ".gpg"
return filename
def archive_name_from_filename(filename):
return os.path.splitext(filename)[0] + ".pdf"

View File

@@ -78,8 +78,8 @@ def open_index(recreate=False):
try:
if exists_in(settings.INDEX_DIR) and not recreate:
return open_dir(settings.INDEX_DIR, schema=get_schema())
except Exception as e:
logger.error(f"Error while opening the index: {e}, recreating.")
except Exception:
logger.exception(f"Error while opening the index, recreating.")
if not os.path.isdir(settings.INDEX_DIR):
os.makedirs(settings.INDEX_DIR, exist_ok=True)

View File

@@ -16,7 +16,8 @@ from whoosh.writing import AsyncWriter
from documents.models import Document
from ... import index
from ...file_handling import create_source_path_directory
from ...file_handling import create_source_path_directory, \
generate_unique_filename
from ...parsers import get_parser_class_for_mime_type
@@ -39,13 +40,16 @@ def handle_document(document_id):
with transaction.atomic():
with open(parser.get_archive_path(), 'rb') as f:
checksum = hashlib.md5(f.read()).hexdigest()
# i'm going to save first so that in case the file move
# I'm going to save first so that in case the file move
# fails, the database is rolled back.
# we also don't use save() since that triggers the filehandling
# We also don't use save() since that triggers the filehandling
# logic, and we don't want that yet (file not yet in place)
document.archive_filename = generate_unique_filename(
document, archive_filename=True)
Document.objects.filter(pk=document.pk).update(
archive_checksum=checksum,
content=parser.get_text()
content=parser.get_text(),
archive_filename=document.archive_filename
)
with FileLock(settings.MEDIA_LOCK):
create_source_path_directory(document.archive_path)
@@ -56,7 +60,7 @@ def handle_document(document_id):
index.update_document(writer, document)
except Exception as e:
logger.error(f"Error while parsing document {document}: {str(e)}")
logger.exception(f"Error while parsing document {document}")
finally:
parser.cleanup()
@@ -101,7 +105,7 @@ class Command(BaseCommand):
document_ids = list(map(
lambda doc: doc.id,
filter(
lambda d: overwrite or not d.archive_checksum,
lambda d: overwrite or not d.has_archive_version,
documents
)
))

View File

@@ -54,8 +54,7 @@ def _consume(filepath):
if settings.CONSUMER_SUBDIRS_AS_TAGS:
tag_ids = _tags_from_path(filepath)
except Exception as e:
logger.error(
"Error creating tags from path: {}".format(e))
logger.exception("Error creating tags from path")
try:
async_task("documents.tasks.consume_file",
@@ -66,8 +65,7 @@ def _consume(filepath):
# Catch all so that the consumer won't crash.
# This is also what the test case is listening for to check for
# errors.
logger.error(
"Error while consuming document: {}".format(e))
logger.exception("Error while consuming document")
def _consume_wait_unmodified(file, num_tries=20, wait_time=1):

View File

@@ -139,7 +139,7 @@ class Command(BaseCommand):
thumbnail_target = os.path.join(self.target, thumbnail_name)
document_dict[EXPORTER_THUMBNAIL_NAME] = thumbnail_name
if os.path.exists(document.archive_path):
if document.has_archive_version:
archive_name = base_name + "-archive.pdf"
archive_target = os.path.join(self.target, archive_name)
document_dict[EXPORTER_ARCHIVE_NAME] = archive_name

View File

@@ -151,6 +151,9 @@ class Command(BaseCommand):
shutil.copy2(thumbnail_path, document.thumbnail_path)
if archive_path:
create_source_path_directory(document.archive_path)
# TODO: this assumes that the export is valid and
# archive_filename is present on all documents with
# archived files
shutil.copy2(archive_path, document.archive_path)
document.save()

View File

@@ -22,7 +22,10 @@ def _process_document(doc_in):
try:
thumb = parser.get_optimised_thumbnail(
document.source_path, document.mime_type)
document.source_path,
document.mime_type,
document.get_public_filename()
)
shutil.move(thumb, document.thumbnail_path)
finally:

View File

@@ -0,0 +1,330 @@
# Generated by Django 3.1.6 on 2021-02-07 22:26
import datetime
import hashlib
import logging
import os
import shutil
from time import sleep
import pathvalidate
from django.conf import settings
from django.db import migrations, models
from django.template.defaultfilters import slugify
from documents.file_handling import defaultdictNoStr, many_to_dictionary
logger = logging.getLogger("paperless.migrations")
###############################################################################
# This is code copied straight paperless before the change.
###############################################################################
def archive_name_from_filename(filename):
return os.path.splitext(filename)[0] + ".pdf"
def archive_path_old(doc):
if doc.filename:
fname = archive_name_from_filename(doc.filename)
else:
fname = "{:07}.pdf".format(doc.pk)
return os.path.join(
settings.ARCHIVE_DIR,
fname
)
STORAGE_TYPE_GPG = "gpg"
def archive_path_new(doc):
if doc.archive_filename is not None:
return os.path.join(
settings.ARCHIVE_DIR,
str(doc.archive_filename)
)
else:
return None
def source_path(doc):
if doc.filename:
fname = str(doc.filename)
else:
fname = "{:07}{}".format(doc.pk, doc.file_type)
if doc.storage_type == STORAGE_TYPE_GPG:
fname += ".gpg" # pragma: no cover
return os.path.join(
settings.ORIGINALS_DIR,
fname
)
def generate_unique_filename(doc, archive_filename=False):
if archive_filename:
old_filename = doc.archive_filename
root = settings.ARCHIVE_DIR
else:
old_filename = doc.filename
root = settings.ORIGINALS_DIR
counter = 0
while True:
new_filename = generate_filename(
doc, counter, archive_filename=archive_filename)
if new_filename == old_filename:
# still the same as before.
return new_filename
if os.path.exists(os.path.join(root, new_filename)):
counter += 1
else:
return new_filename
def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
path = ""
try:
if settings.PAPERLESS_FILENAME_FORMAT is not None:
tags = defaultdictNoStr(lambda: slugify(None),
many_to_dictionary(doc.tags))
tag_list = pathvalidate.sanitize_filename(
",".join(sorted(
[tag.name for tag in doc.tags.all()]
)),
replacement_text="-"
)
if doc.correspondent:
correspondent = pathvalidate.sanitize_filename(
doc.correspondent.name, replacement_text="-"
)
else:
correspondent = "none"
if doc.document_type:
document_type = pathvalidate.sanitize_filename(
doc.document_type.name, replacement_text="-"
)
else:
document_type = "none"
path = settings.PAPERLESS_FILENAME_FORMAT.format(
title=pathvalidate.sanitize_filename(
doc.title, replacement_text="-"),
correspondent=correspondent,
document_type=document_type,
created=datetime.date.isoformat(doc.created),
created_year=doc.created.year if doc.created else "none",
created_month=f"{doc.created.month:02}" if doc.created else "none", # NOQA: E501
created_day=f"{doc.created.day:02}" if doc.created else "none",
added=datetime.date.isoformat(doc.added),
added_year=doc.added.year if doc.added else "none",
added_month=f"{doc.added.month:02}" if doc.added else "none",
added_day=f"{doc.added.day:02}" if doc.added else "none",
tags=tags,
tag_list=tag_list
).strip()
path = path.strip(os.sep)
except (ValueError, KeyError, IndexError):
logger.warning(
f"Invalid PAPERLESS_FILENAME_FORMAT: "
f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default")
counter_str = f"_{counter:02}" if counter else ""
filetype_str = ".pdf" if archive_filename else doc.file_type
if len(path) > 0:
filename = f"{path}{counter_str}{filetype_str}"
else:
filename = f"{doc.pk:07}{counter_str}{filetype_str}"
# Append .gpg for encrypted files
if append_gpg and doc.storage_type == STORAGE_TYPE_GPG:
filename += ".gpg"
return filename
###############################################################################
# This code performs bidirection archive file transformation.
###############################################################################
def parse_wrapper(parser, path, mime_type, file_name):
# this is here so that I can mock this out for testing.
parser.parse(path, mime_type, file_name)
def create_archive_version(doc, retry_count=3):
from documents.parsers import get_parser_class_for_mime_type, \
DocumentParser, \
ParseError
logger.info(
f"Regenerating archive document for document ID:{doc.id}"
)
parser_class = get_parser_class_for_mime_type(doc.mime_type)
for try_num in range(retry_count):
parser: DocumentParser = parser_class(None, None)
try:
parse_wrapper(parser, source_path(doc), doc.mime_type,
os.path.basename(doc.filename))
doc.content = parser.get_text()
if parser.get_archive_path() and os.path.isfile(
parser.get_archive_path()):
doc.archive_filename = generate_unique_filename(
doc, archive_filename=True)
with open(parser.get_archive_path(), "rb") as f:
doc.archive_checksum = hashlib.md5(f.read()).hexdigest()
os.makedirs(os.path.dirname(archive_path_new(doc)),
exist_ok=True)
shutil.copy2(parser.get_archive_path(), archive_path_new(doc))
else:
doc.archive_checksum = None
logger.error(
f"Parser did not return an archive document for document "
f"ID:{doc.id}. Removing archive document."
)
doc.save()
return
except ParseError:
if try_num + 1 == retry_count:
logger.exception(
f"Unable to regenerate archive document for ID:{doc.id}. You "
f"need to invoke the document_archiver management command "
f"manually for that document."
)
doc.archive_checksum = None
doc.save()
return
else:
# This is mostly here for the tika parser in docker
# environemnts. The servers for parsing need to come up first,
# and the docker setup doesn't ensure that tika is running
# before attempting migrations.
logger.error("Parse error, will try again in 5 seconds...")
sleep(5)
finally:
parser.cleanup()
def move_old_to_new_locations(apps, schema_editor):
Document = apps.get_model("documents", "Document")
affected_document_ids = set()
old_archive_path_to_id = {}
# check for documents that have incorrect archive versions
for doc in Document.objects.filter(archive_checksum__isnull=False):
old_path = archive_path_old(doc)
if old_path in old_archive_path_to_id:
affected_document_ids.add(doc.id)
affected_document_ids.add(old_archive_path_to_id[old_path])
else:
old_archive_path_to_id[old_path] = doc.id
# check that archive files of all unaffected documents are in place
for doc in Document.objects.filter(archive_checksum__isnull=False):
old_path = archive_path_old(doc)
if doc.id not in affected_document_ids and not os.path.isfile(old_path):
raise ValueError(
f"Archived document ID:{doc.id} does not exist at: "
f"{old_path}")
# check that we can regenerate affected archive versions
for doc_id in affected_document_ids:
from documents.parsers import get_parser_class_for_mime_type
doc = Document.objects.get(id=doc_id)
parser_class = get_parser_class_for_mime_type(doc.mime_type)
if not parser_class:
raise ValueError(
f"Document ID:{doc.id} has an invalid archived document, "
f"but no parsers are available. Cannot migrate.")
for doc in Document.objects.filter(archive_checksum__isnull=False):
if doc.id in affected_document_ids:
old_path = archive_path_old(doc)
# remove affected archive versions
if os.path.isfile(old_path):
logger.debug(
f"Removing {old_path}"
)
os.unlink(old_path)
else:
# Set archive path for unaffected files
doc.archive_filename = archive_name_from_filename(doc.filename)
Document.objects.filter(id=doc.id).update(
archive_filename=doc.archive_filename
)
# regenerate archive documents
for doc_id in affected_document_ids:
doc = Document.objects.get(id=doc_id)
create_archive_version(doc)
def move_new_to_old_locations(apps, schema_editor):
Document = apps.get_model("documents", "Document")
old_archive_paths = set()
for doc in Document.objects.filter(archive_checksum__isnull=False):
new_archive_path = archive_path_new(doc)
old_archive_path = archive_path_old(doc)
if old_archive_path in old_archive_paths:
raise ValueError(
f"Cannot migrate: Archive file name {old_archive_path} of "
f"document {doc.filename} would clash with another archive "
f"filename.")
old_archive_paths.add(old_archive_path)
if new_archive_path != old_archive_path and os.path.isfile(old_archive_path):
raise ValueError(
f"Cannot migrate: Cannot move {new_archive_path} to "
f"{old_archive_path}: file already exists."
)
for doc in Document.objects.filter(archive_checksum__isnull=False):
new_archive_path = archive_path_new(doc)
old_archive_path = archive_path_old(doc)
if new_archive_path != old_archive_path:
logger.debug(f"Moving {new_archive_path} to {old_archive_path}")
shutil.move(new_archive_path, old_archive_path)
class Migration(migrations.Migration):
dependencies = [
('documents', '1011_auto_20210101_2340'),
]
operations = [
migrations.AddField(
model_name='document',
name='archive_filename',
field=models.FilePathField(default=None, editable=False, help_text='Current archive filename in storage', max_length=1024, null=True, unique=True, verbose_name='archive filename'),
),
migrations.AlterField(
model_name='document',
name='filename',
field=models.FilePathField(default=None, editable=False, help_text='Current filename in storage', max_length=1024, null=True, unique=True, verbose_name='filename'),
),
migrations.RunPython(
move_old_to_new_locations,
move_new_to_old_locations
),
]

View File

@@ -16,7 +16,6 @@ from django.utils.timezone import is_aware
from django.utils.translation import gettext_lazy as _
from documents.file_handling import archive_name_from_filename
from documents.parsers import get_default_file_extension
@@ -208,10 +207,21 @@ class Document(models.Model):
max_length=1024,
editable=False,
default=None,
unique=True,
null=True,
help_text=_("Current filename in storage")
)
archive_filename = models.FilePathField(
_("archive filename"),
max_length=1024,
editable=False,
default=None,
unique=True,
null=True,
help_text=_("Current archive filename in storage")
)
archive_serial_number = models.IntegerField(
_("archive serial number"),
blank=True,
@@ -256,16 +266,18 @@ class Document(models.Model):
return open(self.source_path, "rb")
@property
def archive_path(self):
if self.filename:
fname = archive_name_from_filename(self.filename)
else:
fname = "{:07}.pdf".format(self.pk)
def has_archive_version(self):
return self.archive_filename is not None
return os.path.join(
settings.ARCHIVE_DIR,
fname
)
@property
def archive_path(self):
if self.has_archive_version:
return os.path.join(
settings.ARCHIVE_DIR,
str(self.archive_filename)
)
else:
return None
@property
def archive_file(self):

View File

@@ -288,14 +288,17 @@ class DocumentParser(LoggingMixin):
def get_archive_path(self):
return self.archive_path
def get_thumbnail(self, document_path, mime_type):
def get_thumbnail(self, document_path, mime_type, file_name=None):
"""
Returns the path to a file we can use as a thumbnail for this document.
"""
raise NotImplementedError()
def get_optimised_thumbnail(self, document_path, mime_type):
thumbnail = self.get_thumbnail(document_path, mime_type)
def get_optimised_thumbnail(self,
document_path,
mime_type,
file_name=None):
thumbnail = self.get_thumbnail(document_path, mime_type, file_name)
if settings.OPTIMIZE_THUMBNAILS:
out_path = os.path.join(self.tempdir, "thumb_optipng.png")

View File

@@ -56,7 +56,8 @@ def check_sanity():
messages.append(SanityError(
f"Thumbnail of document {doc.pk} does not exist."))
else:
present_files.remove(os.path.normpath(doc.thumbnail_path))
if os.path.normpath(doc.thumbnail_path) in present_files:
present_files.remove(os.path.normpath(doc.thumbnail_path))
try:
with doc.thumbnail_file as f:
f.read()
@@ -71,7 +72,8 @@ def check_sanity():
messages.append(SanityError(
f"Original of document {doc.pk} does not exist."))
else:
present_files.remove(os.path.normpath(doc.source_path))
if os.path.normpath(doc.source_path) in present_files:
present_files.remove(os.path.normpath(doc.source_path))
try:
with doc.source_file as f:
checksum = hashlib.md5(f.read()).hexdigest()
@@ -86,13 +88,24 @@ def check_sanity():
))
# Check sanity of the archive file.
if doc.archive_checksum:
if doc.archive_checksum and not doc.archive_filename:
messages.append(SanityError(
f"Document {doc.pk} has an archive file checksum, but no "
f"archive filename."
))
elif not doc.archive_checksum and doc.archive_filename:
messages.append(SanityError(
f"Document {doc.pk} has an archive file, but its checksum is "
f"missing."
))
elif doc.has_archive_version:
if not os.path.isfile(doc.archive_path):
messages.append(SanityError(
f"Archived version of document {doc.pk} does not exist."
))
else:
present_files.remove(os.path.normpath(doc.archive_path))
if os.path.normpath(doc.archive_path) in present_files:
present_files.remove(os.path.normpath(doc.archive_path))
try:
with doc.archive_file as f:
checksum = hashlib.md5(f.read()).hexdigest()
@@ -103,7 +116,8 @@ def check_sanity():
else:
if not checksum == doc.archive_checksum:
messages.append(SanityError(
f"Checksum mismatch of archive {doc.pk}. "
f"Checksum mismatch of archived document "
f"{doc.pk}. "
f"Stored: {doc.checksum}, actual: {checksum}."
))

View File

@@ -129,7 +129,7 @@ class DocumentSerializer(DynamicFieldsModelSerializer):
return obj.get_public_filename()
def get_archived_file_name(self, obj):
if obj.archive_checksum:
if obj.has_archive_version:
return obj.get_public_filename(archive=True)
else:
return None

View File

@@ -1,6 +1,5 @@
import logging
import os
from subprocess import Popen
from django.conf import settings
from django.contrib.admin.models import ADDITION, LogEntry
@@ -14,7 +13,7 @@ from filelock import FileLock
from .. import index, matching
from ..file_handling import delete_empty_directories, \
create_source_path_directory, archive_name_from_filename, \
create_source_path_directory, \
generate_unique_filename
from ..models import Document, Tag
@@ -148,18 +147,18 @@ def set_tags(sender,
@receiver(models.signals.post_delete, sender=Document)
def cleanup_document_deletion(sender, instance, using, **kwargs):
with FileLock(settings.MEDIA_LOCK):
for f in (instance.source_path,
instance.archive_path,
instance.thumbnail_path):
if os.path.isfile(f):
for filename in (instance.source_path,
instance.archive_path,
instance.thumbnail_path):
if filename and os.path.isfile(filename):
try:
os.unlink(f)
os.unlink(filename)
logger.debug(
f"Deleted file {f}.")
f"Deleted file {filename}.")
except OSError as e:
logger.warning(
f"While deleting document {str(instance)}, the file "
f"{f} could not be deleted: {e}"
f"{filename} could not be deleted: {e}"
)
delete_empty_directories(
@@ -167,10 +166,15 @@ def cleanup_document_deletion(sender, instance, using, **kwargs):
root=settings.ORIGINALS_DIR
)
delete_empty_directories(
os.path.dirname(instance.archive_path),
root=settings.ARCHIVE_DIR
)
if instance.has_archive_version:
delete_empty_directories(
os.path.dirname(instance.archive_path),
root=settings.ARCHIVE_DIR
)
class CannotMoveFilesException(Exception):
pass
def validate_move(instance, old_path, new_path):
@@ -178,16 +182,14 @@ def validate_move(instance, old_path, new_path):
# Can't do anything if the old file does not exist anymore.
logger.fatal(
f"Document {str(instance)}: File {old_path} has gone.")
return False
raise CannotMoveFilesException()
if os.path.isfile(new_path):
# Can't do anything if the new file already exists. Skip updating file.
logger.warning(
f"Document {str(instance)}: Cannot rename file "
f"since target path {new_path} already exists.")
return False
return True
raise CannotMoveFilesException()
@receiver(models.signals.m2m_changed, sender=Document.tags.through)
@@ -206,56 +208,61 @@ def update_filename_and_move_files(sender, instance, **kwargs):
return
with FileLock(settings.MEDIA_LOCK):
old_filename = instance.filename
new_filename = generate_unique_filename(
instance, settings.ORIGINALS_DIR)
try:
old_filename = instance.filename
old_source_path = instance.source_path
if new_filename == instance.filename:
# Don't do anything if its the same.
return
instance.filename = generate_unique_filename(instance)
move_original = old_filename != instance.filename
old_source_path = instance.source_path
new_source_path = os.path.join(settings.ORIGINALS_DIR, new_filename)
if not validate_move(instance, old_source_path, new_source_path):
return
# archive files are optional, archive checksum tells us if we have one,
# since this is None for documents without archived files.
if instance.archive_checksum:
new_archive_filename = archive_name_from_filename(new_filename)
old_archive_filename = instance.archive_filename
old_archive_path = instance.archive_path
new_archive_path = os.path.join(settings.ARCHIVE_DIR,
new_archive_filename)
if not validate_move(instance, old_archive_path, new_archive_path):
if instance.has_archive_version:
instance.archive_filename = generate_unique_filename(
instance, archive_filename=True
)
move_archive = old_archive_filename != instance.archive_filename # NOQA: E501
else:
move_archive = False
if not move_original and not move_archive:
# Don't do anything if filenames did not change.
return
create_source_path_directory(new_archive_path)
else:
old_archive_path = None
new_archive_path = None
if move_original:
validate_move(instance, old_source_path, instance.source_path)
create_source_path_directory(instance.source_path)
os.rename(old_source_path, instance.source_path)
create_source_path_directory(new_source_path)
try:
os.rename(old_source_path, new_source_path)
if instance.archive_checksum:
os.rename(old_archive_path, new_archive_path)
instance.filename = new_filename
if move_archive:
validate_move(
instance, old_archive_path, instance.archive_path)
create_source_path_directory(instance.archive_path)
os.rename(old_archive_path, instance.archive_path)
# Don't save() here to prevent infinite recursion.
Document.objects.filter(pk=instance.pk).update(
filename=new_filename)
filename=instance.filename,
archive_filename=instance.archive_filename,
)
except OSError as e:
instance.filename = old_filename
# this happens when we can't move a file. If that's the case for
# the archive file, we try our best to revert the changes.
# no need to save the instance, the update() has not happened yet.
except (OSError, DatabaseError, CannotMoveFilesException):
# This happens when either:
# - moving the files failed due to file system errors
# - saving to the database failed due to database errors
# In both cases, we need to revert to the original state.
# Try to move files to their original location.
try:
os.rename(new_source_path, old_source_path)
os.rename(new_archive_path, old_archive_path)
if move_original and os.path.isfile(instance.source_path):
os.rename(instance.source_path, old_source_path)
if move_archive and os.path.isfile(instance.archive_path):
os.rename(instance.archive_path, old_archive_path)
except Exception as e:
# This is fine, since:
# A: if we managed to move source from A to B, we will also
@@ -266,16 +273,10 @@ def update_filename_and_move_files(sender, instance, **kwargs):
# B: if moving the orignal file failed, nothing has changed
# anyway.
pass
except DatabaseError as e:
# this happens after moving files, so move them back into place.
# since moving them once succeeded, it's very likely going to
# succeed again.
os.rename(new_source_path, old_source_path)
if instance.archive_checksum:
os.rename(new_archive_path, old_archive_path)
# restore old values on the instance
instance.filename = old_filename
# again, no need to save the instance, since the actual update()
# operation failed.
instance.archive_filename = old_archive_filename
# finally, remove any empty sub folders. This will do nothing if
# something has failed above.
@@ -283,7 +284,7 @@ def update_filename_and_move_files(sender, instance, **kwargs):
delete_empty_directories(os.path.dirname(old_source_path),
root=settings.ORIGINALS_DIR)
if old_archive_path and not os.path.isfile(old_archive_path):
if instance.has_archive_version and not os.path.isfile(old_archive_path): # NOQA: E501
delete_empty_directories(os.path.dirname(old_archive_path),
root=settings.ARCHIVE_DIR)

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.7 KiB

View File

@@ -0,0 +1 @@
This is a test file.

View File

@@ -23,18 +23,6 @@ class TestDocumentAdmin(DirectoriesMixin, TestCase):
self.assertEqual(Document.objects.get(id=doc.id).title, "new title")
m.assert_called_once()
def test_tags(self):
doc = Document.objects.create(title="test")
doc.tags.create(name="t1")
doc.tags.create(name="t2")
self.assertEqual(self.doc_admin.tags_(doc), "<span >t1, </span><span >t2, </span>")
def test_tags_empty(self):
doc = Document.objects.create(title="test")
self.assertEqual(self.doc_admin.tags_(doc), "")
@mock.patch("documents.admin.index.remove_document")
def test_delete_model(self, m):
doc = Document.objects.create(title="test")

View File

@@ -146,21 +146,19 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(response.status_code, 200)
self.assertEqual(response.content, content_thumbnail)
@override_settings(PAPERLESS_FILENAME_FORMAT="")
def test_download_with_archive(self):
_, filename = tempfile.mkstemp(dir=self.dirs.originals_dir)
content = b"This is a test"
content_archive = b"This is the same test but archived"
with open(filename, "wb") as f:
f.write(content)
filename = os.path.basename(filename)
doc = Document.objects.create(title="none", filename=filename,
doc = Document.objects.create(title="none", filename="my_document.pdf",
archive_filename="archived.pdf",
mime_type="application/pdf")
with open(doc.source_path, "wb") as f:
f.write(content)
with open(doc.archive_path, "wb") as f:
f.write(content_archive)
@@ -577,7 +575,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
async_task.assert_not_called()
def test_get_metadata(self):
doc = Document.objects.create(title="test", filename="file.pdf", mime_type="image/png", archive_checksum="A")
doc = Document.objects.create(title="test", filename="file.pdf", mime_type="image/png", archive_checksum="A", archive_filename="archive.pdf")
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "thumbnails", "0000001.png"), doc.source_path)
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), doc.archive_path)
@@ -591,6 +589,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertTrue(meta['has_archive_version'])
self.assertEqual(len(meta['original_metadata']), 0)
self.assertGreater(len(meta['archive_metadata']), 0)
self.assertEqual(meta['media_filename'], "file.pdf")
self.assertEqual(meta['archive_media_filename'], "archive.pdf")
def test_get_metadata_invalid_doc(self):
response = self.client.get(f"/api/documents/34576/metadata/")
@@ -610,6 +610,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertFalse(meta['has_archive_version'])
self.assertGreater(len(meta['original_metadata']), 0)
self.assertIsNone(meta['archive_metadata'])
self.assertIsNone(meta['archive_media_filename'])
def test_get_empty_suggestions(self):
doc = Document.objects.create(title="test", mime_type="application/pdf")

View File

@@ -5,12 +5,14 @@ import tempfile
from unittest import mock
from unittest.mock import MagicMock
from django.conf import settings
from django.test import TestCase, override_settings
from .utils import DirectoriesMixin
from ..consumer import Consumer, ConsumerError
from ..models import FileInfo, Tag, Correspondent, DocumentType, Document
from ..parsers import DocumentParser, ParseError
from ..tasks import sanity_check
class TestAttributes(TestCase):
@@ -165,7 +167,7 @@ class TestFieldPermutations(TestCase):
class DummyParser(DocumentParser):
def get_thumbnail(self, document_path, mime_type):
def get_thumbnail(self, document_path, mime_type, file_name=None):
# not important during tests
raise NotImplementedError()
@@ -174,16 +176,34 @@ class DummyParser(DocumentParser):
_, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir)
self.archive_path = archive_path
def get_optimised_thumbnail(self, document_path, mime_type):
def get_optimised_thumbnail(self, document_path, mime_type, file_name=None):
return self.fake_thumb
def parse(self, document_path, mime_type, file_name=None):
self.text = "The Text"
class CopyParser(DocumentParser):
def get_thumbnail(self, document_path, mime_type, file_name=None):
return self.fake_thumb
def get_optimised_thumbnail(self, document_path, mime_type, file_name=None):
return self.fake_thumb
def __init__(self, logging_group, progress_callback=None):
super(CopyParser, self).__init__(logging_group, progress_callback)
_, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=self.tempdir)
def parse(self, document_path, mime_type, file_name=None):
self.text = "The text"
self.archive_path = os.path.join(self.tempdir, "archive.pdf")
shutil.copy(document_path, self.archive_path)
class FaultyParser(DocumentParser):
def get_thumbnail(self, document_path, mime_type):
def get_thumbnail(self, document_path, mime_type, file_name=None):
# not important during tests
raise NotImplementedError()
@@ -191,7 +211,7 @@ class FaultyParser(DocumentParser):
super(FaultyParser, self).__init__(logging_group)
_, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir)
def get_optimised_thumbnail(self, document_path, mime_type):
def get_optimised_thumbnail(self, document_path, mime_type, file_name=None):
return self.fake_thumb
def parse(self, document_path, mime_type, file_name=None):
@@ -203,6 +223,8 @@ def fake_magic_from_file(file, mime=False):
if mime:
if os.path.splitext(file)[1] == ".pdf":
return "application/pdf"
elif os.path.splitext(file)[1] == ".png":
return "image/png"
else:
return "unknown"
else:
@@ -274,6 +296,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
self.assertIsNone(document.correspondent)
self.assertIsNone(document.document_type)
self.assertEqual(document.filename, "0000001.pdf")
self.assertEqual(document.archive_filename, "0000001.pdf")
self.assertTrue(os.path.isfile(
document.source_path
@@ -432,6 +455,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
self.assertEqual(document.title, "new docs")
self.assertEqual(document.filename, "none/new docs.pdf")
self.assertEqual(document.archive_filename, "none/new docs.pdf")
self._assert_first_last_send_progress()
@@ -446,7 +470,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
filenames.insert(0, f)
return f
m.side_effect = lambda f, root: get_filename()
m.side_effect = lambda f, archive_filename = False: get_filename()
filename = self.get_test_file()
@@ -457,6 +481,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
self.assertEqual(document.title, "new docs")
self.assertIsNotNone(os.path.isfile(document.title))
self.assertTrue(os.path.isfile(document.source_path))
self.assertTrue(os.path.isfile(document.archive_path))
self._assert_first_last_send_progress()
@@ -516,6 +541,30 @@ class TestConsumer(DirectoriesMixin, TestCase):
self._assert_first_last_send_progress(last_status="FAILED")
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
@mock.patch("documents.parsers.document_consumer_declaration.send")
def test_similar_filenames(self, m):
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), os.path.join(settings.CONSUMPTION_DIR, "simple.pdf"))
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.png"), os.path.join(settings.CONSUMPTION_DIR, "simple.png"))
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple-noalpha.png"), os.path.join(settings.CONSUMPTION_DIR, "simple.png.pdf"))
m.return_value = [(None, {
"parser": CopyParser,
"mime_types": {"application/pdf": ".pdf", "image/png": ".png"},
"weight": 0
})]
doc1 = self.consumer.try_consume_file(os.path.join(settings.CONSUMPTION_DIR, "simple.png"))
doc2 = self.consumer.try_consume_file(os.path.join(settings.CONSUMPTION_DIR, "simple.pdf"))
doc3 = self.consumer.try_consume_file(os.path.join(settings.CONSUMPTION_DIR, "simple.png.pdf"))
self.assertEqual(doc1.filename, "simple.png")
self.assertEqual(doc1.archive_filename, "simple.pdf")
self.assertEqual(doc2.filename, "simple.pdf")
self.assertEqual(doc2.archive_filename, "simple_01.pdf")
self.assertEqual(doc3.filename, "simple.png.pdf")
self.assertEqual(doc3.archive_filename, "simple.png.pdf")
sanity_check()
class PreConsumeTestCase(TestCase):

View File

@@ -1,7 +1,6 @@
import datetime
import os
import shutil
from unittest import mock
from uuid import uuid4
from dateutil import tz
@@ -9,7 +8,6 @@ from django.conf import settings
from django.test import TestCase, override_settings
from documents.parsers import parse_date
from paperless_tesseract.parsers import RasterisedDocumentParser
class TestDate(TestCase):
@@ -152,4 +150,4 @@ class TestDate(TestCase):
2018, 2, 13, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
)
)

View File

@@ -201,6 +201,13 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertEqual(generate_filename(d), "my_doc_type - the_doc.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{asn} - {title}")
def test_asn(self):
d1 = Document.objects.create(title="the_doc", mime_type="application/pdf", archive_serial_number=652, checksum="A")
d2 = Document.objects.create(title="the_doc", mime_type="application/pdf", archive_serial_number=None, checksum="B")
self.assertEqual(generate_filename(d1), "652 - the_doc.pdf")
self.assertEqual(generate_filename(d2), "none - the_doc.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
def test_tags_with_underscore(self):
document = Document()
@@ -439,6 +446,18 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertEqual(document2.filename, "qwe.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
@mock.patch("documents.signals.handlers.Document.objects.filter")
def test_no_update_without_change(self, m):
doc = Document.objects.create(title="document", filename="document.pdf", archive_filename="document.pdf", checksum="A", archive_checksum="B", mime_type="application/pdf")
Path(doc.source_path).touch()
Path(doc.archive_path).touch()
doc.save()
m.assert_not_called()
class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
@@ -448,7 +467,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
Path(original).touch()
Path(archive).touch()
doc = Document.objects.create(mime_type="application/pdf", filename="0000001.pdf", checksum="A", archive_checksum="B")
doc = Document.objects.create(mime_type="application/pdf", filename="0000001.pdf", checksum="A", archive_filename="0000001.pdf", archive_checksum="B")
self.assertTrue(os.path.isfile(original))
self.assertTrue(os.path.isfile(archive))
@@ -461,7 +480,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
Path(original).touch()
Path(archive).touch()
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B", archive_filename="0000001.pdf")
self.assertFalse(os.path.isfile(original))
self.assertFalse(os.path.isfile(archive))
@@ -475,7 +494,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
Path(original).touch()
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B", archive_filename="0000001.pdf")
self.assertTrue(os.path.isfile(original))
self.assertFalse(os.path.isfile(archive))
@@ -486,14 +505,49 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
def test_move_archive_exists(self):
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
existing_archive_file = os.path.join(settings.ARCHIVE_DIR, "none", "my_doc.pdf")
Path(original).touch()
Path(archive).touch()
os.makedirs(os.path.join(settings.ARCHIVE_DIR, "none"))
Path(os.path.join(settings.ARCHIVE_DIR, "none", "my_doc.pdf")).touch()
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
Path(existing_archive_file).touch()
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B", archive_filename="0000001.pdf")
self.assertFalse(os.path.isfile(original))
self.assertFalse(os.path.isfile(archive))
self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(doc.archive_path))
self.assertTrue(os.path.isfile(existing_archive_file))
self.assertEqual(doc.archive_filename, "none/my_doc_01.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
def test_move_original_only(self):
original = os.path.join(settings.ORIGINALS_DIR, "document_01.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "document.pdf")
Path(original).touch()
Path(archive).touch()
doc = Document.objects.create(mime_type="application/pdf", title="document", filename="document_01.pdf", checksum="A",
archive_checksum="B", archive_filename="document.pdf")
self.assertEqual(doc.filename, "document.pdf")
self.assertEqual(doc.archive_filename, "document.pdf")
self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(doc.archive_path))
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
def test_move_archive_only(self):
original = os.path.join(settings.ORIGINALS_DIR, "document.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "document_01.pdf")
Path(original).touch()
Path(archive).touch()
doc = Document.objects.create(mime_type="application/pdf", title="document", filename="document.pdf", checksum="A",
archive_checksum="B", archive_filename="document_01.pdf")
self.assertEqual(doc.filename, "document.pdf")
self.assertEqual(doc.archive_filename, "document.pdf")
self.assertTrue(os.path.isfile(original))
self.assertTrue(os.path.isfile(archive))
self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(doc.archive_path))
@@ -514,8 +568,9 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
Path(original).touch()
Path(archive).touch()
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B", archive_filename="0000001.pdf")
m.assert_called()
self.assertTrue(os.path.isfile(original))
self.assertTrue(os.path.isfile(archive))
self.assertTrue(os.path.isfile(doc.source_path))
@@ -527,7 +582,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
#Path(original).touch()
Path(archive).touch()
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", archive_filename="0000001.pdf", checksum="A", archive_checksum="B")
self.assertFalse(os.path.isfile(original))
self.assertTrue(os.path.isfile(archive))
@@ -551,19 +606,21 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
Path(original).touch()
Path(archive).touch()
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", archive_filename="0000001.pdf", checksum="A", archive_checksum="B")
m.assert_called()
self.assertTrue(os.path.isfile(original))
self.assertTrue(os.path.isfile(archive))
self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(doc.archive_path))
@override_settings(PAPERLESS_FILENAME_FORMAT="")
def test_archive_deleted(self):
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
Path(original).touch()
Path(archive).touch()
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B", archive_filename="0000001.pdf")
self.assertTrue(os.path.isfile(original))
self.assertTrue(os.path.isfile(archive))
@@ -577,6 +634,28 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
self.assertFalse(os.path.isfile(doc.source_path))
self.assertFalse(os.path.isfile(doc.archive_path))
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
def test_archive_deleted2(self):
original = os.path.join(settings.ORIGINALS_DIR, "document.png")
original2 = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
Path(original).touch()
Path(original2).touch()
Path(archive).touch()
doc1 = Document.objects.create(mime_type="image/png", title="document", filename="document.png", checksum="A", archive_checksum="B", archive_filename="0000001.pdf")
doc2 = Document.objects.create(mime_type="application/pdf", title="0000001", filename="0000001.pdf", checksum="C")
self.assertTrue(os.path.isfile(doc1.source_path))
self.assertTrue(os.path.isfile(doc1.archive_path))
self.assertTrue(os.path.isfile(doc2.source_path))
doc2.delete()
self.assertTrue(os.path.isfile(doc1.source_path))
self.assertTrue(os.path.isfile(doc1.archive_path))
self.assertFalse(os.path.isfile(doc2.source_path))
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
def test_database_error(self):
@@ -584,7 +663,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
Path(original).touch()
Path(archive).touch()
doc = Document(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
doc = Document(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_filename="0000001.pdf", archive_checksum="B")
with mock.patch("documents.signals.handlers.Document.objects.filter") as m:
m.side_effect = DatabaseError()
doc.save()
@@ -594,6 +673,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(doc.archive_path))
class TestFilenameGeneration(TestCase):
@override_settings(
@@ -617,7 +697,7 @@ class TestFilenameGeneration(TestCase):
def run():
doc = Document.objects.create(checksum=str(uuid.uuid4()), title=str(uuid.uuid4()), content="wow")
doc.filename = generate_unique_filename(doc, settings.ORIGINALS_DIR)
doc.filename = generate_unique_filename(doc)
Path(doc.thumbnail_path).touch()
with open(doc.source_path, "w") as f:
f.write(str(uuid.uuid4()))

View File

@@ -20,6 +20,7 @@ from documents.tests.utils import DirectoriesMixin
sample_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
class TestArchiver(DirectoriesMixin, TestCase):
def make_models(self):
@@ -42,10 +43,27 @@ class TestArchiver(DirectoriesMixin, TestCase):
doc = Document.objects.get(id=doc.id)
self.assertIsNotNone(doc.checksum)
self.assertIsNotNone(doc.archive_checksum)
self.assertTrue(os.path.isfile(doc.archive_path))
self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(filecmp.cmp(sample_file, doc.source_path))
self.assertEqual(doc.archive_filename, "none/A.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
def test_naming_priorities(self):
doc1 = Document.objects.create(checksum="A", title="document", content="first document", mime_type="application/pdf", filename="document.pdf")
doc2 = Document.objects.create(checksum="B", title="document", content="second document", mime_type="application/pdf", filename="document_01.pdf")
shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"document.pdf"))
shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"document_01.pdf"))
handle_document(doc2.pk)
handle_document(doc1.pk)
doc1 = Document.objects.get(id=doc1.id)
doc2 = Document.objects.get(id=doc2.id)
self.assertEqual(doc1.archive_filename, "document.pdf")
self.assertEqual(doc2.archive_filename, "document_01.pdf")
class TestDecryptDocuments(TestCase):
@@ -106,24 +124,27 @@ class TestMakeIndex(TestCase):
class TestRenamer(DirectoriesMixin, TestCase):
@override_settings(PAPERLESS_FILENAME_FORMAT="")
def test_rename(self):
doc = Document.objects.create(title="test", mime_type="application/pdf")
doc = Document.objects.create(title="test", mime_type="image/jpeg")
doc.filename = generate_filename(doc)
doc.archive_filename = generate_filename(doc, archive_filename=True)
doc.save()
Path(doc.source_path).touch()
Path(doc.archive_path).touch()
old_source_path = doc.source_path
with override_settings(PAPERLESS_FILENAME_FORMAT="{title}"):
with override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}"):
call_command("document_renamer")
doc2 = Document.objects.get(id=doc.id)
self.assertEqual(doc2.filename, "test.pdf")
self.assertFalse(os.path.isfile(old_source_path))
self.assertEqual(doc2.filename, "none/test.jpg")
self.assertEqual(doc2.archive_filename, "none/test.pdf")
self.assertFalse(os.path.isfile(doc.source_path))
self.assertFalse(os.path.isfile(doc.archive_path))
self.assertTrue(os.path.isfile(doc2.source_path))
self.assertTrue(os.path.isfile(doc2.archive_path))
class TestCreateClassifier(TestCase):

View File

@@ -22,7 +22,7 @@ class TestExportImport(DirectoriesMixin, TestCase):
self.target = tempfile.mkdtemp()
self.addCleanup(shutil.rmtree, self.target)
self.d1 = Document.objects.create(content="Content", checksum="42995833e01aea9b3edee44bbfdd7ce1", archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b", title="wow1", filename="0000001.pdf", mime_type="application/pdf")
self.d1 = Document.objects.create(content="Content", checksum="42995833e01aea9b3edee44bbfdd7ce1", archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b", title="wow1", filename="0000001.pdf", mime_type="application/pdf", archive_filename="0000001.pdf")
self.d2 = Document.objects.create(content="Content", checksum="9c9691e51741c1f4f41a20896af31770", title="wow2", filename="0000002.pdf", mime_type="application/pdf")
self.d3 = Document.objects.create(content="Content", checksum="d38d7ed02e988e072caf924e0f3fcb76", title="wow2", filename="0000003.pdf", mime_type="application/pdf")
self.d4 = Document.objects.create(content="Content", checksum="82186aaa94f0b98697d704b90fd1c072", title="wow_dec", filename="0000004.pdf.gpg", mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG)

View File

@@ -0,0 +1,325 @@
import hashlib
import os
import shutil
from pathlib import Path
from unittest import mock
from django.conf import settings
from django.test import override_settings
from documents.parsers import ParseError
from documents.tests.utils import DirectoriesMixin, TestMigrations
STORAGE_TYPE_GPG = "gpg"
def archive_name_from_filename(filename):
return os.path.splitext(filename)[0] + ".pdf"
def archive_path_old(self):
if self.filename:
fname = archive_name_from_filename(self.filename)
else:
fname = "{:07}.pdf".format(self.pk)
return os.path.join(
settings.ARCHIVE_DIR,
fname
)
def archive_path_new(doc):
if doc.archive_filename is not None:
return os.path.join(
settings.ARCHIVE_DIR,
str(doc.archive_filename)
)
else:
return None
def source_path(doc):
if doc.filename:
fname = str(doc.filename)
else:
fname = "{:07}{}".format(doc.pk, doc.file_type)
if doc.storage_type == STORAGE_TYPE_GPG:
fname += ".gpg" # pragma: no cover
return os.path.join(
settings.ORIGINALS_DIR,
fname
)
def thumbnail_path(doc):
file_name = "{:07}.png".format(doc.pk)
if doc.storage_type == STORAGE_TYPE_GPG:
file_name += ".gpg"
return os.path.join(
settings.THUMBNAIL_DIR,
file_name
)
def make_test_document(document_class, title: str, mime_type: str, original: str, original_filename: str, archive: str = None, archive_filename: str = None):
doc = document_class()
doc.filename = original_filename
doc.title = title
doc.mime_type = mime_type
doc.content = "the content, does not matter for this test"
doc.save()
shutil.copy2(original, source_path(doc))
with open(original, "rb") as f:
doc.checksum = hashlib.md5(f.read()).hexdigest()
if archive:
if archive_filename:
doc.archive_filename = archive_filename
shutil.copy2(archive, archive_path_new(doc))
else:
shutil.copy2(archive, archive_path_old(doc))
with open(archive, "rb") as f:
doc.archive_checksum = hashlib.md5(f.read()).hexdigest()
doc.save()
Path(thumbnail_path(doc)).touch()
return doc
simple_jpg = os.path.join(os.path.dirname(__file__), "samples", "simple.jpg")
simple_pdf = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
simple_pdf2 = os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000002.pdf")
simple_pdf3 = os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000003.pdf")
simple_txt = os.path.join(os.path.dirname(__file__), "samples", "simple.txt")
simple_png = os.path.join(os.path.dirname(__file__), "samples", "simple-noalpha.png")
simple_png2 = os.path.join(os.path.dirname(__file__), "examples", "no-text.png")
@override_settings(PAPERLESS_FILENAME_FORMAT="")
class TestMigrateArchiveFiles(DirectoriesMixin, TestMigrations):
migrate_from = '1011_auto_20210101_2340'
migrate_to = '1012_fix_archive_files'
def setUpBeforeMigration(self, apps):
Document = apps.get_model("documents", "Document")
self.unrelated = make_test_document(Document, "unrelated", "application/pdf", simple_pdf3, "unrelated.pdf", simple_pdf)
self.no_text = make_test_document(Document, "no-text", "image/png", simple_png2, "no-text.png", simple_pdf)
self.doc_no_archive = make_test_document(Document, "no_archive", "text/plain", simple_txt, "no_archive.txt")
self.clash1 = make_test_document(Document, "clash", "application/pdf", simple_pdf, "clash.pdf", simple_pdf)
self.clash2 = make_test_document(Document, "clash", "image/jpeg", simple_jpg, "clash.jpg", simple_pdf)
self.clash3 = make_test_document(Document, "clash", "image/png", simple_png, "clash.png", simple_pdf)
self.clash4 = make_test_document(Document, "clash.png", "application/pdf", simple_pdf2, "clash.png.pdf", simple_pdf2)
self.assertEqual(archive_path_old(self.clash1), archive_path_old(self.clash2))
self.assertEqual(archive_path_old(self.clash1), archive_path_old(self.clash3))
self.assertNotEqual(archive_path_old(self.clash1), archive_path_old(self.clash4))
def testArchiveFilesMigrated(self):
Document = self.apps.get_model('documents', 'Document')
for doc in Document.objects.all():
if doc.archive_checksum:
self.assertIsNotNone(doc.archive_filename)
self.assertTrue(os.path.isfile(archive_path_new(doc)))
else:
self.assertIsNone(doc.archive_filename)
with open(source_path(doc), "rb") as f:
original_checksum = hashlib.md5(f.read()).hexdigest()
self.assertEqual(original_checksum, doc.checksum)
if doc.archive_checksum:
self.assertTrue(os.path.isfile(archive_path_new(doc)))
with open(archive_path_new(doc), "rb") as f:
archive_checksum = hashlib.md5(f.read()).hexdigest()
self.assertEqual(archive_checksum, doc.archive_checksum)
self.assertEqual(Document.objects.filter(archive_checksum__isnull=False).count(), 6)
def test_filenames(self):
Document = self.apps.get_model('documents', 'Document')
self.assertEqual(Document.objects.get(id=self.unrelated.id).archive_filename, "unrelated.pdf")
self.assertEqual(Document.objects.get(id=self.no_text.id).archive_filename, "no-text.pdf")
self.assertEqual(Document.objects.get(id=self.doc_no_archive.id).archive_filename, None)
self.assertEqual(Document.objects.get(id=self.clash1.id).archive_filename, f"{self.clash1.id:07}.pdf")
self.assertEqual(Document.objects.get(id=self.clash2.id).archive_filename, f"{self.clash2.id:07}.pdf")
self.assertEqual(Document.objects.get(id=self.clash3.id).archive_filename, f"{self.clash3.id:07}.pdf")
self.assertEqual(Document.objects.get(id=self.clash4.id).archive_filename, "clash.png.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
class TestMigrateArchiveFilesWithFilenameFormat(TestMigrateArchiveFiles):
def test_filenames(self):
Document = self.apps.get_model('documents', 'Document')
self.assertEqual(Document.objects.get(id=self.unrelated.id).archive_filename, "unrelated.pdf")
self.assertEqual(Document.objects.get(id=self.no_text.id).archive_filename, "no-text.pdf")
self.assertEqual(Document.objects.get(id=self.doc_no_archive.id).archive_filename, None)
self.assertEqual(Document.objects.get(id=self.clash1.id).archive_filename, "none/clash.pdf")
self.assertEqual(Document.objects.get(id=self.clash2.id).archive_filename, "none/clash_01.pdf")
self.assertEqual(Document.objects.get(id=self.clash3.id).archive_filename, "none/clash_02.pdf")
self.assertEqual(Document.objects.get(id=self.clash4.id).archive_filename, "clash.png.pdf")
def fake_parse_wrapper(parser, path, mime_type, file_name):
parser.archive_path = None
parser.text = "the text"
@override_settings(PAPERLESS_FILENAME_FORMAT="")
class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
migrate_from = '1011_auto_20210101_2340'
migrate_to = '1012_fix_archive_files'
auto_migrate = False
def test_archive_missing(self):
Document = self.apps.get_model("documents", "Document")
doc = make_test_document(Document, "clash", "application/pdf", simple_pdf, "clash.pdf", simple_pdf)
os.unlink(archive_path_old(doc))
self.assertRaisesMessage(ValueError, "does not exist at: ", self.performMigration)
def test_parser_missing(self):
Document = self.apps.get_model("documents", "Document")
doc1 = make_test_document(Document, "document", "invalid/typesss768", simple_png, "document.png", simple_pdf)
doc2 = make_test_document(Document, "document", "invalid/typesss768", simple_jpg, "document.jpg", simple_pdf)
self.assertRaisesMessage(ValueError, "no parsers are available", self.performMigration)
@mock.patch("documents.migrations.1012_fix_archive_files.parse_wrapper")
def test_parser_error(self, m):
m.side_effect = ParseError()
Document = self.apps.get_model("documents", "Document")
doc1 = make_test_document(Document, "document", "image/png", simple_png, "document.png", simple_pdf)
doc2 = make_test_document(Document, "document", "application/pdf", simple_jpg, "document.jpg", simple_pdf)
self.assertIsNotNone(doc1.archive_checksum)
self.assertIsNotNone(doc2.archive_checksum)
with self.assertLogs() as capture:
self.performMigration()
self.assertEqual(m.call_count, 6)
self.assertEqual(
len(list(filter(lambda log: "Parse error, will try again in 5 seconds" in log, capture.output))),
4)
self.assertEqual(
len(list(filter(lambda log: "Unable to regenerate archive document for ID:" in log, capture.output))),
2)
Document = self.apps.get_model("documents", "Document")
doc1 = Document.objects.get(id=doc1.id)
doc2 = Document.objects.get(id=doc2.id)
self.assertIsNone(doc1.archive_checksum)
self.assertIsNone(doc2.archive_checksum)
self.assertIsNone(doc1.archive_filename)
self.assertIsNone(doc2.archive_filename)
@mock.patch("documents.migrations.1012_fix_archive_files.parse_wrapper")
def test_parser_no_archive(self, m):
m.side_effect = fake_parse_wrapper
Document = self.apps.get_model("documents", "Document")
doc1 = make_test_document(Document, "document", "image/png", simple_png, "document.png", simple_pdf)
doc2 = make_test_document(Document, "document", "application/pdf", simple_jpg, "document.jpg", simple_pdf)
with self.assertLogs() as capture:
self.performMigration()
self.assertEqual(
len(list(filter(lambda log: "Parser did not return an archive document for document" in log, capture.output))),
2)
Document = self.apps.get_model("documents", "Document")
doc1 = Document.objects.get(id=doc1.id)
doc2 = Document.objects.get(id=doc2.id)
self.assertIsNone(doc1.archive_checksum)
self.assertIsNone(doc2.archive_checksum)
self.assertIsNone(doc1.archive_filename)
self.assertIsNone(doc2.archive_filename)
@override_settings(PAPERLESS_FILENAME_FORMAT="")
class TestMigrateArchiveFilesBackwards(DirectoriesMixin, TestMigrations):
migrate_from = '1012_fix_archive_files'
migrate_to = '1011_auto_20210101_2340'
def setUpBeforeMigration(self, apps):
Document = apps.get_model("documents", "Document")
doc_unrelated = make_test_document(Document, "unrelated", "application/pdf", simple_pdf2, "unrelated.txt", simple_pdf2, "unrelated.pdf")
doc_no_archive = make_test_document(Document, "no_archive", "text/plain", simple_txt, "no_archive.txt")
clashB = make_test_document(Document, "clash", "image/jpeg", simple_jpg, "clash.jpg", simple_pdf, "clash_02.pdf")
def testArchiveFilesReverted(self):
Document = self.apps.get_model('documents', 'Document')
for doc in Document.objects.all():
if doc.archive_checksum:
self.assertTrue(os.path.isfile(archive_path_old(doc)))
with open(source_path(doc), "rb") as f:
original_checksum = hashlib.md5(f.read()).hexdigest()
self.assertEqual(original_checksum, doc.checksum)
if doc.archive_checksum:
self.assertTrue(os.path.isfile(archive_path_old(doc)))
with open(archive_path_old(doc), "rb") as f:
archive_checksum = hashlib.md5(f.read()).hexdigest()
self.assertEqual(archive_checksum, doc.archive_checksum)
self.assertEqual(Document.objects.filter(archive_checksum__isnull=False).count(), 2)
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
class TestMigrateArchiveFilesBackwardsWithFilenameFormat(TestMigrateArchiveFilesBackwards):
pass
@override_settings(PAPERLESS_FILENAME_FORMAT="")
class TestMigrateArchiveFilesBackwardsErrors(DirectoriesMixin, TestMigrations):
migrate_from = '1012_fix_archive_files'
migrate_to = '1011_auto_20210101_2340'
auto_migrate = False
def test_filename_clash(self):
Document = self.apps.get_model("documents", "Document")
self.clashA = make_test_document(Document, "clash", "application/pdf", simple_pdf, "clash.pdf", simple_pdf, "clash_02.pdf")
self.clashB = make_test_document(Document, "clash", "image/jpeg", simple_jpg, "clash.jpg", simple_pdf, "clash_01.pdf")
self.assertRaisesMessage(ValueError, "would clash with another archive filename", self.performMigration)
def test_filename_exists(self):
Document = self.apps.get_model("documents", "Document")
self.clashA = make_test_document(Document, "clash", "application/pdf", simple_pdf, "clash.pdf", simple_pdf, "clash.pdf")
self.clashB = make_test_document(Document, "clash", "image/jpeg", simple_jpg, "clash.jpg", simple_pdf, "clash_01.pdf")
self.assertRaisesMessage(ValueError, "file already exists.", self.performMigration)

View File

@@ -1,52 +1,11 @@
import os
import shutil
from pathlib import Path
from django.apps import apps
from django.conf import settings
from django.db import connection
from django.db.migrations.executor import MigrationExecutor
from django.test import TestCase, TransactionTestCase, override_settings
from django.test import override_settings
from documents.models import Document
from documents.parsers import get_default_file_extension
from documents.tests.utils import DirectoriesMixin
class TestMigrations(TransactionTestCase):
@property
def app(self):
return apps.get_containing_app_config(type(self).__module__).name
migrate_from = None
migrate_to = None
def setUp(self):
super(TestMigrations, self).setUp()
assert self.migrate_from and self.migrate_to, \
"TestCase '{}' must define migrate_from and migrate_to properties".format(type(self).__name__)
self.migrate_from = [(self.app, self.migrate_from)]
self.migrate_to = [(self.app, self.migrate_to)]
executor = MigrationExecutor(connection)
old_apps = executor.loader.project_state(self.migrate_from).apps
# Reverse to the original migration
executor.migrate(self.migrate_from)
self.setUpBeforeMigration(old_apps)
# Run the migration to test
executor = MigrationExecutor(connection)
executor.loader.build_graph() # reload.
executor.migrate(self.migrate_to)
self.apps = executor.loader.project_state(self.migrate_to).apps
def setUpBeforeMigration(self, apps):
pass
from documents.tests.utils import DirectoriesMixin, TestMigrations
STORAGE_TYPE_UNENCRYPTED = "unencrypted"
STORAGE_TYPE_GPG = "gpg"

View File

@@ -68,7 +68,7 @@ class TestParserDiscovery(TestCase):
)
def fake_get_thumbnail(self, path, mimetype):
def fake_get_thumbnail(self, path, mimetype, file_name):
return os.path.join(os.path.dirname(__file__), "examples", "no-text.png")
@@ -89,15 +89,15 @@ class TestBaseParser(TestCase):
def test_get_optimised_thumbnail(self):
parser = DocumentParser(None)
parser.get_optimised_thumbnail("any", "not important")
parser.get_optimised_thumbnail("any", "not important", "document.pdf")
@mock.patch("documents.parsers.DocumentParser.get_thumbnail", fake_get_thumbnail)
@override_settings(OPTIMIZE_THUMBNAILS=False)
def test_get_optimised_thumb_disabled(self):
parser = DocumentParser(None)
path = parser.get_optimised_thumbnail("any", "not important")
self.assertEqual(path, fake_get_thumbnail(None, None, None))
path = parser.get_optimised_thumbnail("any", "not important", "document.pdf")
self.assertEqual(path, fake_get_thumbnail(None, None, None, None))
class TestParserAvailability(TestCase):

View File

@@ -21,7 +21,7 @@ class TestSanityCheck(DirectoriesMixin, TestCase):
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "archive", "0000001.pdf"), os.path.join(self.dirs.archive_dir, "0000001.pdf"))
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "thumbnails", "0000001.png"), os.path.join(self.dirs.thumbnail_dir, "0000001.png"))
return Document.objects.create(title="test", checksum="42995833e01aea9b3edee44bbfdd7ce1", archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b", content="test", pk=1, filename="0000001.pdf", mime_type="application/pdf")
return Document.objects.create(title="test", checksum="42995833e01aea9b3edee44bbfdd7ce1", archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b", content="test", pk=1, filename="0000001.pdf", mime_type="application/pdf", archive_filename="0000001.pdf")
def test_no_docs(self):
self.assertEqual(len(check_sanity()), 0)
@@ -86,6 +86,19 @@ class TestSanityCheck(DirectoriesMixin, TestCase):
Path(self.dirs.originals_dir, "orphaned").touch()
self.assertEqual(len(check_sanity()), 1)
def test_all(self):
Document.objects.create(title="test", checksum="dgfhj", archive_checksum="dfhg", content="", pk=1, filename="0000001.pdf")
def test_error_tostring(self):
Document.objects.create(title="test", checksum="dgfhj", archive_checksum="dfhg", content="", pk=1, filename="0000001.pdf", archive_filename="0000001.pdf")
string = str(SanityFailedError(check_sanity()))
self.assertIsNotNone(string)
def test_archive_filename_no_checksum(self):
doc = self.make_test_data()
doc.archive_checksum = None
doc.save()
self.assertEqual(len(check_sanity()), 2)
def test_archive_checksum_no_filename(self):
doc = self.make_test_data()
doc.archive_filename = None
doc.save()
self.assertEqual(len(check_sanity()), 2)

View File

@@ -4,7 +4,10 @@ import tempfile
from collections import namedtuple
from contextlib import contextmanager
from django.test import override_settings
from django.apps import apps
from django.db import connection
from django.db.migrations.executor import MigrationExecutor
from django.test import override_settings, TransactionTestCase
def setup_directories():
@@ -79,3 +82,45 @@ class DirectoriesMixin:
def tearDown(self) -> None:
super(DirectoriesMixin, self).tearDown()
remove_dirs(self.dirs)
class TestMigrations(TransactionTestCase):
@property
def app(self):
return apps.get_containing_app_config(type(self).__module__).name
migrate_from = None
migrate_to = None
auto_migrate = True
def setUp(self):
super(TestMigrations, self).setUp()
assert self.migrate_from and self.migrate_to, \
"TestCase '{}' must define migrate_from and migrate_to properties".format(type(self).__name__)
self.migrate_from = [(self.app, self.migrate_from)]
self.migrate_to = [(self.app, self.migrate_to)]
executor = MigrationExecutor(connection)
old_apps = executor.loader.project_state(self.migrate_from).apps
# Reverse to the original migration
executor.migrate(self.migrate_from)
self.setUpBeforeMigration(old_apps)
self.apps = old_apps
if self.auto_migrate:
self.performMigration()
def performMigration(self):
# Run the migration to test
executor = MigrationExecutor(connection)
executor.loader.build_graph() # reload.
executor.migrate(self.migrate_to)
self.apps = executor.loader.project_state(self.migrate_to).apps
def setUpBeforeMigration(self, apps):
pass

View File

@@ -192,7 +192,7 @@ class DocumentViewSet(RetrieveModelMixin,
def file_response(self, pk, request, disposition):
doc = Document.objects.get(id=pk)
if not self.original_requested(request) and os.path.isfile(doc.archive_path): # NOQA: E501
if not self.original_requested(request) and doc.has_archive_version: # NOQA: E501
file_handle = doc.archive_file
filename = doc.get_public_filename(archive=True)
mime_type = 'application/pdf'
@@ -237,18 +237,18 @@ class DocumentViewSet(RetrieveModelMixin,
"original_size": os.stat(doc.source_path).st_size,
"original_mime_type": doc.mime_type,
"media_filename": doc.filename,
"has_archive_version": os.path.isfile(doc.archive_path),
"has_archive_version": doc.has_archive_version,
"original_metadata": self.get_metadata(
doc.source_path, doc.mime_type)
doc.source_path, doc.mime_type),
"archive_checksum": doc.archive_checksum,
"archive_media_filename": doc.archive_filename
}
if doc.archive_checksum and os.path.isfile(doc.archive_path):
meta['archive_checksum'] = doc.archive_checksum
if doc.has_archive_version:
meta['archive_size'] = os.stat(doc.archive_path).st_size,
meta['archive_metadata'] = self.get_metadata(
doc.archive_path, "application/pdf")
else:
meta['archive_checksum'] = None
meta['archive_size'] = None
meta['archive_metadata'] = None
@@ -291,6 +291,8 @@ class DocumentViewSet(RetrieveModelMixin,
handle = GnuPG.decrypted(doc.thumbnail_file)
else:
handle = doc.thumbnail_file
# TODO: Send ETag information and use that to send new thumbnails
# if available
return HttpResponse(handle,
content_type='image/png')
except (FileNotFoundError, Document.DoesNotExist):

View File

@@ -1 +1 @@
__version__ = (1, 1, 0)
__version__ = (1, 1, 1)

View File

@@ -13,11 +13,8 @@ def process_mail_accounts():
try:
total_new_documents += MailAccountHandler().handle_mail_account(
account)
except MailError as e:
logger.error(
f"Error while processing mail account {account}: {e}",
exc_info=True
)
except MailError:
logger.exception(f"Error while processing mail account {account}")
if total_new_documents > 0:
return f"Added {total_new_documents} document(s)."

View File

@@ -48,7 +48,7 @@ class RasterisedDocumentParser(DocumentParser):
)
return result
def get_thumbnail(self, document_path, mime_type):
def get_thumbnail(self, document_path, mime_type, file_name=None):
return make_thumbnail_from_pdf(
document_path, self.tempdir, self.logging_group)

View File

@@ -13,7 +13,7 @@ class TextDocumentParser(DocumentParser):
logging_name = "paperless.parsing.text"
def get_thumbnail(self, document_path, mime_type):
def get_thumbnail(self, document_path, mime_type, file_name=None):
def read_text():
with open(document_path, 'r') as src:

View File

@@ -16,9 +16,9 @@ class TikaDocumentParser(DocumentParser):
logging_name = "paperless.parsing.tika"
def get_thumbnail(self, document_path, mime_type):
def get_thumbnail(self, document_path, mime_type, file_name=None):
if not self.archive_path:
self.archive_path = self.convert_to_pdf(document_path)
self.archive_path = self.convert_to_pdf(document_path, file_name)
return make_thumbnail_from_pdf(
self.archive_path, self.tempdir, self.logging_group)