Compare commits

...

72 Commits

Author SHA1 Message Date
jonaswinkler
5e669534f2 reorganized test case 2021-02-14 17:24:31 +01:00
jonaswinkler
98b147b622 better sanity checker that logs messages in the log files and does not fail on warnings. 2021-02-14 17:08:29 +01:00
jonaswinkler
df6c59bc4f update dependencies 2021-02-14 15:38:47 +01:00
jonaswinkler
6e48da41e5 changelog 2021-02-14 14:05:42 +01:00
Jonas Winkler
5c8a01a6e8 Merge pull request #538 from jonaswinkler/translations_src-locale-en-us-lc-messages-django-po--dev_cs
Translate '/src/locale/en-us/LC_MESSAGES/django.po' in 'cs'
2021-02-14 13:41:33 +01:00
jonaswinkler
3d0a52c25f only load channels app if DEBUG is enabled; its only purpose is to monkey-patch the runserver command. 2021-02-14 12:50:30 +01:00
jonaswinkler
43c729568b release worker memory after tasks are done. 2021-02-14 12:29:55 +01:00
transifex-integration[bot]
62caeed283 Apply translations in cs
translation completed for the source file '/src/locale/en-us/LC_MESSAGES/django.po'
on the 'cs' language.
2021-02-14 07:05:05 +00:00
jonaswinkler
12836d4c68 revert django-q configuration 2021-02-13 20:25:52 +01:00
jonaswinkler
b48e67d714 revert a faulty change that caused memory usage to explode #537 2021-02-13 19:51:04 +01:00
jonaswinkler
f91f4d71bb Merge branch 'master' into dev 2021-02-13 18:09:14 +01:00
jonaswinkler
0a1f264c71 Gotenberg troubleshooting 2021-02-13 18:09:00 +01:00
jonaswinkler
64d61ae2fa version bump 2021-02-13 18:01:19 +01:00
jonaswinkler
5f0e800f6e metadata tab not showing anything if files are missing #534 2021-02-13 16:41:03 +01:00
jonaswinkler
8b2965d55b added sanity checker management command for manual execution #534 2021-02-13 16:39:29 +01:00
jonaswinkler
ed478a1d73 change thumbnail display for extra wide images #433 2021-02-12 18:20:17 +01:00
jonaswinkler
13e91d8c95 changelog 2021-02-12 18:04:15 +01:00
jonaswinkler
6ac90181cb documentation and changelog 2021-02-12 16:54:00 +01:00
jonaswinkler
d6c3471909 reprganized docker file, less layers, new shortcuts for management commands 2021-02-12 16:53:51 +01:00
jonaswinkler
5b56fad9c7 fix test case 2021-02-12 01:31:50 +01:00
jonaswinkler
ed0b1fe115 better exception logging 2021-02-11 22:16:41 +01:00
jonaswinkler
4211153527 update file renaming logic 2021-02-11 13:47:17 +01:00
jonaswinkler
2f85461109 added some test cases that I still need to address 2021-02-10 23:53:48 +01:00
jonaswinkler
3fa7dcb0cb changes to the admin document list 2021-02-10 21:34:58 +01:00
jonaswinkler
857fe3a55c fix one incorrect use of archive_version 2021-02-10 21:34:39 +01:00
jonaswinkler
dd19ea46fe backup documentation 2021-02-10 20:14:55 +01:00
jonaswinkler
21740a9d87 changelog 2021-02-10 20:05:02 +01:00
jonaswinkler
658fb2f208 remove invalid test cases 2021-02-10 20:01:35 +01:00
jonaswinkler
252d4cb513 update document admin 2021-02-10 18:55:39 +01:00
jonaswinkler
5aed41223b Merge branch 'master' into dev 2021-02-10 18:44:02 +01:00
jonaswinkler
45dfbf3747 downgrades 2021-02-10 18:27:41 +01:00
jonaswinkler
e4fe5bebab requirements 2021-02-10 17:09:22 +01:00
jonaswinkler
04519ee623 more testing of the migration 2021-02-10 16:58:55 +01:00
jonaswinkler
6c8f010f7a retries for archive generation 2021-02-10 14:50:20 +01:00
jonaswinkler
ed84cf26e7 update dependencies 2021-02-10 14:31:17 +01:00
jonaswinkler
1bc961f0c0 update dependencies 2021-02-10 11:50:57 +01:00
jonaswinkler
77d745381f more testing 2021-02-10 01:31:15 +01:00
jonaswinkler
7082cb9c36 document renamer testing 2021-02-10 01:12:45 +01:00
jonaswinkler
34e84cc757 sanity checker testing 2021-02-10 00:52:18 +01:00
jonaswinkler
9246411610 better logging for the migration 2021-02-10 00:52:01 +01:00
jonaswinkler
8330b3598c changelog 2021-02-09 23:23:11 +01:00
jonaswinkler
1d002149dc added ASN to filename format #519 2021-02-09 23:03:07 +01:00
jonaswinkler
8d6071e977 fix a bug with thumbnail generation when TIKA was enabled 2021-02-09 22:12:43 +01:00
jonaswinkler
7d67766508 todo note #520 2021-02-09 21:53:10 +01:00
jonaswinkler
887dd122fe more info in the admin 2021-02-09 21:00:04 +01:00
jonaswinkler
a1293c77b9 fix migration and more tests 2021-02-09 20:54:02 +01:00
jonaswinkler
ee9a73aa95 codestyle 2021-02-09 20:46:41 +01:00
jonaswinkler
9df332b614 test resources 2021-02-09 19:51:25 +01:00
jonaswinkler
d13e86a892 update all test cases to address the archive filename changes 2021-02-09 19:51:16 +01:00
jonaswinkler
69d7f8c180 testing the updated migration 2021-02-09 19:49:29 +01:00
jonaswinkler
1ba89ddd09 refactor migration tests to allow testing for exceptions while migrating 2021-02-09 19:47:50 +01:00
jonaswinkler
0c40a28ad3 more sanity checks regarding archive versions 2021-02-09 19:46:59 +01:00
jonaswinkler
2b7424c42a imports 2021-02-09 19:46:42 +01:00
jonaswinkler
a9f1766d1c todo note 2021-02-09 19:46:32 +01:00
jonaswinkler
fca8576d80 archive filenames are now stored in the database and checked for collisions just as original filenames as well, unified method for archive version checking 2021-02-09 19:46:19 +01:00
jonaswinkler
05f59e7d5e another way to make the test case fail 2021-02-09 02:13:25 +01:00
jonaswinkler
c9511680b3 version push 2021-02-09 01:36:39 +01:00
jonaswinkler
0ed001c56e validate move before migration 2021-02-09 00:13:13 +01:00
jonaswinkler
1e5a418191 more testing #511 2021-02-09 00:01:11 +01:00
jonaswinkler
e05735bc0f fix some test cases 2021-02-09 00:00:46 +01:00
jonaswinkler
7621e10840 only move unaffected files, regenerate affected files 2021-02-08 23:54:07 +01:00
jonaswinkler
d90080f325 only move files if necessary 2021-02-08 22:49:01 +01:00
jonaswinkler
0c676b90f2 migration for #511 2021-02-08 20:59:14 +01:00
jonaswinkler
c2d8bda83c fix for #511 2021-02-08 19:59:14 +01:00
jonaswinkler
302ebf737e refactor migration test case 2021-02-08 13:18:39 +01:00
jonaswinkler
816c95a4ae code style 2021-02-08 13:18:08 +01:00
jonaswinkler
40106f6fcc updated documentation regarding execution of management commands with docker fixes #509 2021-02-08 00:10:52 +01:00
jonaswinkler
61143b3ad1 make the test case fail 2021-02-07 19:53:08 +01:00
jonaswinkler
9b64eebd10 revert commit 2021-02-07 18:26:03 +01:00
jonaswinkler
731418349f added a test case that replicates #511 2021-02-07 18:23:54 +01:00
jonaswinkler
7728920670 Merge branch 'dev' 2021-02-07 01:22:04 +01:00
jonaswinkler
f555bb95ae possible fix for the ansible roles 2021-02-07 00:49:53 +01:00
59 changed files with 2324 additions and 520 deletions

View File

@@ -10,10 +10,6 @@ RUN ./configure && make
FROM python:3.7-slim
WORKDIR /usr/src/paperless/
COPY requirements.txt ./
# Binary dependencies
RUN apt-get update \
&& apt-get -y --no-install-recommends install \
@@ -49,16 +45,24 @@ RUN apt-get update \
tesseract-ocr-spa \
unpaper \
zlib1g \
&& rm -rf /var/lib/apt/lists/*
# This pulls in updated dependencies from bullseye to fix some issues with file type detection.
# TODO: Remove this once bullseye releases.
RUN echo "deb http://deb.debian.org/debian bullseye main" > /etc/apt/sources.list.d/bullseye.list \
&& echo "deb http://deb.debian.org/debian bullseye main" > /etc/apt/sources.list.d/bullseye.list \
&& apt-get update \
&& apt-get install --no-install-recommends -y file libmagic-dev \
&& rm -rf /var/lib/apt/lists/* \
&& rm /etc/apt/sources.list.d/bullseye.list
# copy jbig2enc
COPY --from=jbig2enc /usr/src/jbig2enc/src/.libs/libjbig2enc* /usr/local/lib/
COPY --from=jbig2enc /usr/src/jbig2enc/src/jbig2 /usr/local/bin/
COPY --from=jbig2enc /usr/src/jbig2enc/src/*.h /usr/local/include/
WORKDIR /usr/src/paperless/src/
COPY requirements.txt ../
# Python dependencies
RUN apt-get update \
&& apt-get -y --no-install-recommends install \
@@ -67,41 +71,36 @@ RUN apt-get update \
libpq-dev \
libqpdf-dev \
&& python3 -m pip install --upgrade --no-cache-dir supervisor \
&& python3 -m pip install --no-cache-dir -r requirements.txt \
&& python3 -m pip install --no-cache-dir -r ../requirements.txt \
&& apt-get -y purge build-essential libqpdf-dev \
&& apt-get -y autoremove --purge \
&& rm -rf /var/lib/apt/lists/* \
&& mkdir /var/log/supervisord /var/run/supervisord
&& rm -rf /var/lib/apt/lists/*
# setup docker-specific things
COPY docker/ ./docker/
# copy scripts
# this fixes issues with imagemagick and PDF
COPY docker/imagemagick-policy.xml /etc/ImageMagick-6/policy.xml
COPY gunicorn.conf.py ./
COPY docker/supervisord.conf /etc/supervisord.conf
COPY docker/docker-entrypoint.sh /sbin/docker-entrypoint.sh
# copy jbig2enc
COPY --from=jbig2enc /usr/src/jbig2enc/src/.libs/libjbig2enc* /usr/local/lib/
COPY --from=jbig2enc /usr/src/jbig2enc/src/jbig2 /usr/local/bin/
COPY --from=jbig2enc /usr/src/jbig2enc/src/*.h /usr/local/include/
RUN cd docker \
&& cp imagemagick-policy.xml /etc/ImageMagick-6/policy.xml \
&& mkdir /var/log/supervisord /var/run/supervisord \
&& cp supervisord.conf /etc/supervisord.conf \
&& cp docker-entrypoint.sh /sbin/docker-entrypoint.sh \
&& chmod 755 /sbin/docker-entrypoint.sh \
&& chmod +x install_management_commands.sh \
&& ./install_management_commands.sh \
&& cd .. \
&& rm docker -rf
COPY gunicorn.conf.py ../
# copy app
COPY src/ ./src/
COPY src/ ./
# add users, setup scripts
RUN addgroup --gid 1000 paperless \
&& useradd --uid 1000 --gid paperless --home-dir /usr/src/paperless paperless \
&& chown -R paperless:paperless . \
&& chmod 755 /sbin/docker-entrypoint.sh
WORKDIR /usr/src/paperless/src/
RUN sudo -HEu paperless python3 manage.py collectstatic --clear --no-input
RUN sudo -HEu paperless python3 manage.py compilemessages
&& chown -R paperless:paperless ../ \
&& sudo -HEu paperless python3 manage.py collectstatic --clear --no-input \
&& sudo -HEu paperless python3 manage.py compilemessages
VOLUME ["/usr/src/paperless/data", "/usr/src/paperless/media", "/usr/src/paperless/consume", "/usr/src/paperless/export"]
ENTRYPOINT ["/sbin/docker-entrypoint.sh"]

View File

@@ -39,7 +39,7 @@ scikit-learn="==0.24.0"
# Prevent scipy updates because 1.6 is incompatible with python 3.6
scipy="~=1.5.4"
whitenoise = "~=5.2.0"
watchdog = "*"
watchdog = "~=1.0.0"
whoosh="~=2.7.4"
inotifyrecursive = "~=0.3.4"
ocrmypdf = "~=11.6"
@@ -51,7 +51,10 @@ channels = "~=3.0"
channels-redis = "*"
uvicorn = {extras = ["standard"], version = "*"}
concurrent-log-handler = "*"
django-redis = "*"
# uvloop 0.15+ incompatible with python 3.6
uvloop = "~=0.14.0"
# TODO: keep an eye on piwheel builds and update this once available (https://www.piwheels.org/project/cryptography/)
cryptography = "~=3.3.2"
[dev-packages]
coveralls = "*"

168
Pipfile.lock generated
View File

@@ -1,7 +1,7 @@
{
"_meta": {
"hash": {
"sha256": "0c2003b9d3d95d1af594f749a2740b55079551ea0ae512177ee9524bb327281e"
"sha256": "bd8b69979d91f4d8c52cac127c891d750c52959807220a98dcf74fed126bfa26"
},
"pipfile-spec": 6,
"requires": {},
@@ -60,11 +60,11 @@
},
"autobahn": {
"hashes": [
"sha256:93df8fc9d1821c9dabff9fed52181a9ad6eea5e9989d53102c391607d7c1666e",
"sha256:cceed2121b7a93024daa93c91fae33007f8346f0e522796421f36a6183abea99"
"sha256:41a3a3f89cde48643baf4e105d9491c566295f9abee951379e59121784044b8b",
"sha256:7e6b1bf95196b733978bab2d54a7ab8899c16ce11be369dc58422c07b7eea726"
],
"markers": "python_version >= '3.6'",
"version": "==21.1.1"
"version": "==21.2.1"
},
"automat": {
"hashes": [
@@ -90,47 +90,47 @@
},
"cffi": {
"hashes": [
"sha256:00a1ba5e2e95684448de9b89888ccd02c98d512064b4cb987d48f4b40aa0421e",
"sha256:00e28066507bfc3fe865a31f325c8391a1ac2916219340f87dfad602c3e48e5d",
"sha256:045d792900a75e8b1e1b0ab6787dd733a8190ffcf80e8c8ceb2fb10a29ff238a",
"sha256:0638c3ae1a0edfb77c6765d487fee624d2b1ee1bdfeffc1f0b58c64d149e7eec",
"sha256:105abaf8a6075dc96c1fe5ae7aae073f4696f2905fde6aeada4c9d2926752362",
"sha256:155136b51fd733fa94e1c2ea5211dcd4c8879869008fc811648f16541bf99668",
"sha256:1a465cbe98a7fd391d47dce4b8f7e5b921e6cd805ef421d04f5f66ba8f06086c",
"sha256:1d2c4994f515e5b485fd6d3a73d05526aa0fcf248eb135996b088d25dfa1865b",
"sha256:23f318bf74b170c6e9adb390e8bd282457f6de46c19d03b52f3fd042b5e19654",
"sha256:2c24d61263f511551f740d1a065eb0212db1dbbbbd241db758f5244281590c06",
"sha256:51a8b381b16ddd370178a65360ebe15fbc1c71cf6f584613a7ea08bfad946698",
"sha256:594234691ac0e9b770aee9fcdb8fa02c22e43e5c619456efd0d6c2bf276f3eb2",
"sha256:5cf4be6c304ad0b6602f5c4e90e2f59b47653ac1ed9c662ed379fe48a8f26b0c",
"sha256:64081b3f8f6f3c3de6191ec89d7dc6c86a8a43911f7ecb422c60e90c70be41c7",
"sha256:6bc25fc545a6b3d57b5f8618e59fc13d3a3a68431e8ca5fd4c13241cd70d0009",
"sha256:798caa2a2384b1cbe8a2a139d80734c9db54f9cc155c99d7cc92441a23871c03",
"sha256:7c6b1dece89874d9541fc974917b631406233ea0440d0bdfbb8e03bf39a49b3b",
"sha256:7ef7d4ced6b325e92eb4d3502946c78c5367bc416398d387b39591532536734e",
"sha256:840793c68105fe031f34d6a086eaea153a0cd5c491cde82a74b420edd0a2b909",
"sha256:8d6603078baf4e11edc4168a514c5ce5b3ba6e3e9c374298cb88437957960a53",
"sha256:9cc46bc107224ff5b6d04369e7c595acb700c3613ad7bcf2e2012f62ece80c35",
"sha256:9f7a31251289b2ab6d4012f6e83e58bc3b96bd151f5b5262467f4bb6b34a7c26",
"sha256:9ffb888f19d54a4d4dfd4b3f29bc2c16aa4972f1c2ab9c4ab09b8ab8685b9c2b",
"sha256:a5ed8c05548b54b998b9498753fb9cadbfd92ee88e884641377d8a8b291bcc01",
"sha256:a7711edca4dcef1a75257b50a2fbfe92a65187c47dab5a0f1b9b332c5919a3fb",
"sha256:af5c59122a011049aad5dd87424b8e65a80e4a6477419c0c1015f73fb5ea0293",
"sha256:b18e0a9ef57d2b41f5c68beefa32317d286c3d6ac0484efd10d6e07491bb95dd",
"sha256:b4e248d1087abf9f4c10f3c398896c87ce82a9856494a7155823eb45a892395d",
"sha256:ba4e9e0ae13fc41c6b23299545e5ef73055213e466bd107953e4a013a5ddd7e3",
"sha256:be8661bcee1bc2fc4b033a6ab65bd1f87ce5008492601695d0b9a4e820c3bde5",
"sha256:c6332685306b6417a91b1ff9fae889b3ba65c2292d64bd9245c093b1b284809d",
"sha256:d5ff0621c88ce83a28a10d2ce719b2ee85635e85c515f12bac99a95306da4b2e",
"sha256:d9efd8b7a3ef378dd61a1e77367f1924375befc2eba06168b6ebfa903a5e59ca",
"sha256:df5169c4396adc04f9b0a05f13c074df878b6052430e03f50e68adf3a57aa28d",
"sha256:ebb253464a5d0482b191274f1c8bf00e33f7e0b9c66405fbffc61ed2c839c775",
"sha256:ec80dc47f54e6e9a78181ce05feb71a0353854cc26999db963695f950b5fb375",
"sha256:f032b34669220030f905152045dfa27741ce1a6db3324a5bc0b96b6c7420c87b",
"sha256:f60567825f791c6f8a592f3c6e3bd93dd2934e3f9dac189308426bd76b00ef3b",
"sha256:f803eaa94c2fcda012c047e62bc7a51b0bdabda1cad7a92a522694ea2d76e49f"
"sha256:005a36f41773e148deac64b08f233873a4d0c18b053d37da83f6af4d9087b813",
"sha256:0857f0ae312d855239a55c81ef453ee8fd24136eaba8e87a2eceba644c0d4c06",
"sha256:1071534bbbf8cbb31b498d5d9db0f274f2f7a865adca4ae429e147ba40f73dea",
"sha256:158d0d15119b4b7ff6b926536763dc0714313aa59e320ddf787502c70c4d4bee",
"sha256:1f436816fc868b098b0d63b8920de7d208c90a67212546d02f84fe78a9c26396",
"sha256:2894f2df484ff56d717bead0a5c2abb6b9d2bf26d6960c4604d5c48bbc30ee73",
"sha256:29314480e958fd8aab22e4a58b355b629c59bf5f2ac2492b61e3dc06d8c7a315",
"sha256:34eff4b97f3d982fb93e2831e6750127d1355a923ebaeeb565407b3d2f8d41a1",
"sha256:35f27e6eb43380fa080dccf676dece30bef72e4a67617ffda586641cd4508d49",
"sha256:3d3dd4c9e559eb172ecf00a2a7517e97d1e96de2a5e610bd9b68cea3925b4892",
"sha256:43e0b9d9e2c9e5d152946b9c5fe062c151614b262fda2e7b201204de0b99e482",
"sha256:48e1c69bbacfc3d932221851b39d49e81567a4d4aac3b21258d9c24578280058",
"sha256:51182f8927c5af975fece87b1b369f722c570fe169f9880764b1ee3bca8347b5",
"sha256:5560dbf8deedbffb638d8a2da31da91094db361cc07f8a501a339b2daae2cbcc",
"sha256:58e3f59d583d413809d60779492342801d6e82fefb89c86a38e040c16883be53",
"sha256:5de7970188bb46b7bf9858eb6890aad302577a5f6f75091fd7cdd3ef13ef3045",
"sha256:65fa59693c62cf06e45ddbb822165394a288edce9e276647f0046e1ec26920f3",
"sha256:69e395c24fc60aad6bb4fa7e583698ea6cc684648e1ffb7fe85e3c1ca131a7d5",
"sha256:6c97d7350133666fbb5cf4abdc1178c812cb205dc6f41d174a7b0f18fb93337e",
"sha256:6e4714cc64f474e4d6e37cfff31a814b509a35cb17de4fb1999907575684479c",
"sha256:72d8d3ef52c208ee1c7b2e341f7d71c6fd3157138abf1a95166e6165dd5d4369",
"sha256:8ae6299f6c68de06f136f1f9e69458eae58f1dacf10af5c17353eae03aa0d827",
"sha256:8b198cec6c72df5289c05b05b8b0969819783f9418e0409865dac47288d2a053",
"sha256:9338beed13d880320450d95c9e07ccf839faa3ea7b75d788f4ed46d845044a71",
"sha256:99cd03ae7988a93dd00bcd9d0b75e1f6c426063d6f03d2f90b89e29b25b82dfa",
"sha256:9cf8022fb8d07a97c178b02327b284521c7708d7c71a9c9c355c178ac4bbd3d4",
"sha256:9de2e279153a443c656f2defd67769e6d1e4163952b3c622dcea5b08a6405322",
"sha256:9e93e79c2551ff263400e1e4be085a1210e12073a31c2011dbbda14bda0c6132",
"sha256:9ff227395193126d82e60319a673a037d5de84633f11279e336f9c0f189ecc62",
"sha256:a465da611f6fa124963b91bf432d960a555563efe4ed1cc403ba5077b15370aa",
"sha256:ad17025d226ee5beec591b52800c11680fca3df50b8b29fe51d882576e039ee0",
"sha256:afb29c1ba2e5a3736f1c301d9d0abe3ec8b86957d04ddfa9d7a6a42b9367e396",
"sha256:b85eb46a81787c50650f2392b9b4ef23e1f126313b9e0e9013b35c15e4288e2e",
"sha256:bb89f306e5da99f4d922728ddcd6f7fcebb3241fc40edebcb7284d7514741991",
"sha256:cbde590d4faaa07c72bf979734738f328d239913ba3e043b1e98fe9a39f8b2b6",
"sha256:cd2868886d547469123fadc46eac7ea5253ea7fcb139f12e1dfc2bbd406427d1",
"sha256:d42b11d692e11b6634f7613ad8df5d6d5f8875f5d48939520d351007b3c13406",
"sha256:f2d45f97ab6bb54753eab54fffe75aaf3de4ff2341c9daee1987ee1837636f1d",
"sha256:fd78e5fee591709f32ef6edb9a015b4aa1a5022598e36227500c8f4e02328d9c"
],
"version": "==1.14.4"
"version": "==1.14.5"
},
"channels": {
"hashes": [
@@ -190,24 +190,24 @@
},
"cryptography": {
"hashes": [
"sha256:0003a52a123602e1acee177dc90dd201f9bb1e73f24a070db7d36c588e8f5c7d",
"sha256:0e85aaae861d0485eb5a79d33226dd6248d2a9f133b81532c8f5aae37de10ff7",
"sha256:594a1db4511bc4d960571536abe21b4e5c3003e8750ab8365fafce71c5d86901",
"sha256:69e836c9e5ff4373ce6d3ab311c1a2eed274793083858d3cd4c7d12ce20d5f9c",
"sha256:788a3c9942df5e4371c199d10383f44a105d67d401fb4304178020142f020244",
"sha256:7e177e4bea2de937a584b13645cab32f25e3d96fc0bc4a4cf99c27dc77682be6",
"sha256:83d9d2dfec70364a74f4e7c70ad04d3ca2e6a08b703606993407bf46b97868c5",
"sha256:84ef7a0c10c24a7773163f917f1cb6b4444597efd505a8aed0a22e8c4780f27e",
"sha256:982f661bffc7a24b6d4f8ebe3291f17cf3833a0941c6f4d9d55c790b9aa2cdb3",
"sha256:9e21301f7a1e7c03dbea73e8602905a4ebba641547a462b26dd03451e5769e7c",
"sha256:9f6b0492d111b43de5f70052e24c1f0951cb9e6022188ebcb1cc3a3d301469b0",
"sha256:a69bd3c68b98298f490e84519b954335154917eaab52cf582fa2c5c7efc6e812",
"sha256:b4890d5fb9b7a23e3bf8abf5a8a7da8e228f1e97dc96b30b95685df840b6914a",
"sha256:c366df0401d1ec4e548bebe8f91d55ebcc0ec3137900d214dd7aac8427ef3030",
"sha256:dc42f645f8f3a489c3dd416730a514e7a91a59510ddaadc09d04224c098d3302"
"sha256:0d7b69674b738068fa6ffade5c962ecd14969690585aaca0a1b1fc9058938a72",
"sha256:1bd0ccb0a1ed775cd7e2144fe46df9dc03eefd722bbcf587b3e0616ea4a81eff",
"sha256:3c284fc1e504e88e51c428db9c9274f2da9f73fdf5d7e13a36b8ecb039af6e6c",
"sha256:49570438e60f19243e7e0d504527dd5fe9b4b967b5a1ff21cc12b57602dd85d3",
"sha256:541dd758ad49b45920dda3b5b48c968f8b2533d8981bcdb43002798d8f7a89ed",
"sha256:5a60d3780149e13b7a6ff7ad6526b38846354d11a15e21068e57073e29e19bed",
"sha256:7951a966613c4211b6612b0352f5bf29989955ee592c4a885d8c7d0f830d0433",
"sha256:922f9602d67c15ade470c11d616f2b2364950602e370c76f0c94c94ae672742e",
"sha256:a0f0b96c572fc9f25c3f4ddbf4688b9b38c69836713fb255f4a2715d93cbaf44",
"sha256:a777c096a49d80f9d2979695b835b0f9c9edab73b59e4ceb51f19724dda887ed",
"sha256:a9a4ac9648d39ce71c2f63fe7dc6db144b9fa567ddfc48b9fde1b54483d26042",
"sha256:aa4969f24d536ae2268c902b2c3d62ab464b5a66bcb247630d208a79a8098e9b",
"sha256:c7390f9b2119b2b43160abb34f63277a638504ef8df99f11cb52c1fda66a2e6f",
"sha256:ddd06e71c449a4fe10d0c60846280ee35d69ce49e3e413ce46d5f129e1468083",
"sha256:e18e6ab84dfb0ab997faf8cca25a86ff15dfea4027b986322026cc99e0a892da"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'",
"version": "==3.3.1"
"index": "pypi",
"version": "==3.3.2"
},
"daphne": {
"hashes": [
@@ -243,11 +243,11 @@
},
"django-extensions": {
"hashes": [
"sha256:7cd002495ff0a0e5eb6cdd6be759600905b4e4079232ea27618fc46bdd853651",
"sha256:c7f88625a53f631745d4f2bef9ec4dcb999ed59476393bdbbe99db8596778846"
"sha256:674ad4c3b1587a884881824f40212d51829e662e52f85b012cd83d83fe1271d9",
"sha256:9507f8761ee760748938fd8af766d0608fb2738cf368adfa1b2451f61c15ae35"
],
"index": "pypi",
"version": "==3.1.0"
"version": "==3.1.1"
},
"django-filter": {
"hashes": [
@@ -273,15 +273,6 @@
"index": "pypi",
"version": "==1.3.4"
},
"django-redis": {
"hashes": [
"sha256:1133b26b75baa3664164c3f44b9d5d133d1b8de45d94d79f38d1adc5b1d502e5",
"sha256:306589c7021e6468b2656edc89f62b8ba67e8d5a1c8877e2688042263daa7a63",
"sha256:f2b25b62cc95b63b7059aaf8e81710e7eea94678e545d31c46e47a6f4af99e56"
],
"index": "pypi",
"version": "==4.12.1"
},
"djangorestframework": {
"hashes": [
"sha256:0209bafcb7b5010fdfec784034f059d512256424de2a0f084cb82b096d6dd6a7",
@@ -462,11 +453,11 @@
},
"joblib": {
"hashes": [
"sha256:75ead23f13484a2a414874779d69ade40d4fa1abe62b222a23cd50d4bc822f6f",
"sha256:7ad866067ac1fdec27d51c8678ea760601b70e32ff1881d4dc8e1171f2b64b24"
"sha256:9c17567692206d2f3fb9ecf5e991084254fe631665c450b443761c4186a613f7",
"sha256:feeb1ec69c4d45129954f1b7034954241eedfd6ba39b5e9e4b6883be3332d5e5"
],
"markers": "python_version >= '3.6'",
"version": "==1.0.0"
"version": "==1.0.1"
},
"langdetect": {
"hashes": [
@@ -1113,11 +1104,11 @@
},
"tqdm": {
"hashes": [
"sha256:4621f6823bab46a9cc33d48105753ccbea671b68bab2c50a9f0be23d4065cb5a",
"sha256:fe3d08dd00a526850568d542ff9de9bbc2a09a791da3c334f3213d8d0bbbca65"
"sha256:11d544652edbdfc9cc41aa4c8a5c166513e279f3f2d9f1a9e1c89935b51de6ff",
"sha256:a89be573bfddb81bb0b395a416d5e55e3ecc73ce95a368a4f6360bedea33195e"
],
"index": "pypi",
"version": "==4.56.0"
"version": "==4.56.2"
},
"twisted": {
"extras": [
@@ -1201,6 +1192,7 @@
"sha256:e7514d7a48c063226b7d06617cbb12a14278d4323a065a8d46a7962686ce2e95",
"sha256:f07909cd9fc08c52d294b1570bba92186181ca01fe3dc9ffba68955273dd7362"
],
"index": "pypi",
"version": "==0.14.0"
},
"watchdog": {
@@ -1506,11 +1498,11 @@
},
"faker": {
"hashes": [
"sha256:190f0d3ce037866b5d230f0b9fd0f513f07c25dc326dcad6ee019849c68d441c",
"sha256:db7adc3b4755005fc960cf96fb4ed46b54b6eb21413741ab3f31a9595f379905"
"sha256:bf2a9b3f8d00a5dada61fc4a3f80fe0d6795c7f02a138a7d2ef2db5817c7d017",
"sha256:d4aecdb877519d06c2fdc01ffc5ecf70658981acf5e13cd07ded9892994ef5c6"
],
"markers": "python_version >= '3.6'",
"version": "==6.0.0"
"version": "==6.1.1"
},
"filelock": {
"hashes": [
@@ -1648,11 +1640,11 @@
},
"pygments": {
"hashes": [
"sha256:bc9591213a8f0e0ca1a5e68a479b4887fdc3e75d0774e5c71c31920c427de435",
"sha256:df49d09b498e83c1a73128295860250b0b7edd4c723a32e9bc0d295c7c2ec337"
"sha256:37a13ba168a02ac54cc5891a42b1caec333e59b66addb7fa633ea8a6d73445c0",
"sha256:b21b072d0ccdf29297a82a2363359d99623597b8a265b8081760e4d0f7153c88"
],
"markers": "python_version >= '3.5'",
"version": "==2.7.4"
"version": "==2.8.0"
},
"pyparsing": {
"hashes": [
@@ -1713,11 +1705,11 @@
},
"pytest-xdist": {
"hashes": [
"sha256:1d8edbb1a45e8e1f8e44b1260583107fc23f8bc8da6d18cb331ff61d41258ecf",
"sha256:f127e11e84ad37cc1de1088cb2990f3c354630d428af3f71282de589c5bb779b"
"sha256:2447a1592ab41745955fb870ac7023026f20a5f0bfccf1b52a879bd193d46450",
"sha256:718887296892f92683f6a51f25a3ae584993b06f7076ce1e1fd482e59a8220a2"
],
"index": "pypi",
"version": "==2.2.0"
"version": "==2.2.1"
},
"python-dateutil": {
"hashes": [

View File

@@ -38,7 +38,7 @@
- name: verify uploaded document has been accepted
uri:
url: "http://{{ paperlessng_listen_address }}:{{ paperlessng_listen_port }}/api/logs/"
url: "http://{{ paperlessng_listen_address }}:{{ paperlessng_listen_port }}/api/logs/paperless/"
headers:
Authorization: 'Basic {{ (paperlessng_superuser_name + ":" + paperlessng_superuser_password) | b64encode }}'
return_content: yes
@@ -51,7 +51,7 @@
- name: verify uploaded document has been consumed
uri:
url: "http://{{ paperlessng_listen_address }}:{{ paperlessng_listen_port }}/api/logs/"
url: "http://{{ paperlessng_listen_address }}:{{ paperlessng_listen_port }}/api/logs/paperless/"
headers:
Authorization: 'Basic {{ (paperlessng_superuser_name + ":" + paperlessng_superuser_password) | b64encode }}'
return_content: yes

View File

@@ -0,0 +1,6 @@
for command in document_archiver document_exporter document_importer mail_fetcher document_create_classifier document_index document_renamer document_retagger document_thumbnails document_sanity_checker;
do
echo "installing $command..."
sed "s/management_command/$command/g" management_script.sh > /usr/local/bin/$command
chmod +x /usr/local/bin/$command
done

View File

@@ -0,0 +1,15 @@
#!/bin/bash
set -e
cd /usr/src/paperless/src/
if [[ $(id -u) == 0 ]] ;
then
sudo -HEu paperless python3 manage.py management_command "$@"
elif [[ $(id -un) == "paperless" ]] ;
then
python3 manage.py management_command "$@"
else
echo "Unknown user."
fi

View File

@@ -23,6 +23,12 @@ Options available to any installation of paperless:
* The document exporter is also able to update an already existing export.
Therefore, incremental backups with ``rsync`` are entirely possible.
.. caution::
You cannot import the export generated with one version of paperless in a
different version of paperless. The export contains an exact image of the
database, and migrations may change the database layout.
Options available to docker installations:
* Backup the docker volumes. These usually reside within
@@ -101,17 +107,17 @@ Then you can start paperless-ng with ``-d`` to have it run in the background.
update to newer versions. In order to enable updates as described above, either
get the new ``docker-compose.yml`` file from `here <https://github.com/jonaswinkler/paperless-ng/tree/master/docker/compose>`_
or edit the ``docker-compose.yml`` file, find the line that says
.. code::
image: jonaswinkler/paperless-ng:0.9.x
and replace the version with ``latest``:
.. code::
image: jonaswinkler/paperless-ng:latest
Bare Metal Route
================
@@ -171,26 +177,63 @@ Most of the update process is automated when using the ansible role.
$ ansible-playbook playbook.yml
Downgrading Paperless
#####################
Downgrades are possible. However, some updates also contain database migrations (these change the layout of the database and may move data).
In order to move back from a version that applied database migrations, you'll have to revert the database migration *before* downgrading,
and then downgrade paperless.
This table lists the most recent database migrations for each versions:
+---------+-------------------------+
| Version | Latest migration number |
+---------+-------------------------+
| 1.0.0 | 1011 |
+---------+-------------------------+
| 1.1.0 | 1011 |
+---------+-------------------------+
| 1.1.1 | 1012 |
+---------+-------------------------+
Execute the following management command to migrate your database:
.. code:: shell-session
$ python3 manage.py migrate documents <migration number>
.. note::
Some migrations cannot be undone. The command will issue errors if that happens.
.. _utilities-management-commands:
Management utilities
####################
Paperless comes with some management commands that perform various maintenance
tasks on your paperless instance. You can invoke these commands either by
tasks on your paperless instance. You can invoke these commands in the following way:
With docker-compose, while paperless is running:
.. code:: shell-session
$ cd /path/to/paperless
$ docker-compose run --rm webserver <command> <arguments>
$ docker-compose exec webserver <command> <arguments>
or
With docker, while paperless is running:
.. code:: shell-session
$ docker exec -it <container-name> <command> <arguments>
Bare metal:
.. code:: shell-session
$ cd /path/to/paperless/src
$ python3 manage.py <command> <arguments>
depending on whether you use docker or not.
All commands have built-in help, which can be accessed by executing them with
the argument ``--help``.
@@ -210,7 +253,7 @@ backup or migration to another DMS.
-c, --compare-checksums
-f, --use-filename-format
-d, --delete
``target`` is a folder to which the data gets written. This includes documents,
thumbnails and a ``manifest.json`` file. The manifest contains all metadata from
the database (correspondents, tags, etc).
@@ -367,6 +410,34 @@ the naming scheme.
The command takes no arguments and processes all your documents at once.
.. _utilities-sanity-checker:
Sanity checker
==============
Paperless has a built-in sanity checker that inspects your document collection for issues.
The issues detected by the sanity checker are as follows:
* Missing original files.
* Missing archive files.
* Inaccessible original files due to improper permissions.
* Inaccessible archive files due to improper permissions.
* Corrupted original documents by comparing their checksum against what is stored in the database.
* Corrupted archive documents by comparing their checksum against what is stored in the database.
* Missing thumbnails.
* Inaccessible thumbnails due to improper permissions.
* Documents without any content (warning).
* Orphaned files in the media directory (warning). These are files that are not referenced by any document im paperless.
.. code::
document_sanity_checker
The command takes no arguments. Depending on the size of your document archive, this may take some time.
Fetching e-mail
===============

View File

@@ -217,6 +217,7 @@ will create a directory structure as follows:
Paperless provides the following placeholders withing filenames:
* ``{asn}``: The archive serial number of the document, or "none".
* ``{correspondent}``: The name of the correspondent, or "none".
* ``{document_type}``: The name of the document type, or "none".
* ``{tag_list}``: A comma separated list of all tags assigned to the document.

View File

@@ -5,6 +5,43 @@
Changelog
*********
paperless-ng 1.1.2
##################
* Always show top left corner of thumbnails, even for extra wide documents.
* Added a management command for executing the sanity checker directly.
See :ref:`utilities-sanity-checker`.
* The weekly sanity check now reports messages in the log files.
* Fixed an issue with the metadata tab not reporting anything in case of missing files.
* Reverted a change from 1.1.0 that caused huge memory usage due to redis caching.
* Some memory usage optimizations.
paperless-ng 1.1.1
##################
This release contains new database migrations.
* Fixed a bug in the sanity checker that would cause it to display "x not in list" errors instead of actual issues.
* Fixed a bug with filename generation for archive filenames that would cause the archive files of two documents to overlap.
* This happened when ``PAPERLESS_FILENAME_FORMAT`` is used and the filenames of two or more documents are the same, except for the file extension.
* Paperless will now store the archive filename in the database as well instead of deriving it from the original filename, and use the
same logic for detecting and avoiding filename clashes that's also used for original filenames.
* The migrations will repair any missing archive files. If you're using tika, ensure that tika is running while performing the migration. Docker-compose will take care of that.
* Fixed a bug with thumbnail regeneration when TIKA integration was used.
* Added ASN as a placeholder field to the filename format.
* The docker image now comes with built-in shortcuts for most management commands. These are now the recommended way to execute management commands, since these
also ensure that they're always executed as the paperless user and you're less likely to run into permission issues. See :ref:`utilities-management-commands`.
paperless-ng 1.1.0
##################
@@ -17,7 +54,7 @@ paperless-ng 1.1.0
or added with one of the mobile apps.
* Documents are successfully added to paperless.
* Document consumption failed (with error messages)
* Configuration options to enable/disable individual notifications.
* Live updates to document lists and saved views when new documents are added.

View File

@@ -94,6 +94,30 @@ If you want to get rid of the warning or actually experience issues with automat
the file ``classification_model.pickle`` in the data directory and let paperless recreate it.
504 Server Error: Gateway Timeout when adding Office documents
##############################################################
You may experience these errors when using the optional TIKA integration:
.. code::
requests.exceptions.HTTPError: 504 Server Error: Gateway Timeout for url: http://gotenberg:3000/convert/office
Gotenberg is a server that converts Office documents into PDF documents and has a default timeout of 10 seconds.
When conversion takes longer, Gotenberg raises this error.
You can increase the timeout by configuring an environment variable for gotenberg (see also `here <https://thecodingmachine.github.io/gotenberg/#environment_variables.default_wait_timeout>`__).
If using docker-compose, this is achieved by the following configuration change in the ``docker-compose.yml`` file:
.. code:: yaml
gotenberg:
image: thecodingmachine/gotenberg
restart: unless-stopped
environment:
DISABLE_GOOGLE_CHROME: 1
DEFAULT_WAIT_TIMEOUT: 30
Permission denied errors in the consumption directory
#####################################################

View File

@@ -12,11 +12,11 @@ arrow==0.17.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2,
asgiref==3.3.1; python_version >= '3.5'
async-timeout==3.0.1; python_full_version >= '3.5.3'
attrs==20.3.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
autobahn==21.1.1; python_version >= '3.6'
autobahn==21.2.1; python_version >= '3.6'
automat==20.2.0
blessed==1.17.12
certifi==2020.12.5
cffi==1.14.4
cffi==1.14.5
channels-redis==3.2.0
channels==3.0.3
chardet==4.0.0; python_version >= '3.1'
@@ -24,15 +24,14 @@ click==7.1.2; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2,
coloredlogs==15.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
concurrent-log-handler==0.9.19
constantly==15.1.0
cryptography==3.3.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'
cryptography==3.3.2
daphne==3.0.1; python_version >= '3.6'
dateparser==0.7.6
django-cors-headers==3.7.0
django-extensions==3.1.0
django-extensions==3.1.1
django-filter==2.4.0
django-picklefield==3.0.1; python_version >= '3'
django-q==1.3.4
django-redis==4.12.1
django==3.1.6
djangorestframework==3.12.2
filelock==3.0.12
@@ -49,7 +48,7 @@ img2pdf==0.4.0
incremental==17.5.0
inotify-simple==1.3.5; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
inotifyrecursive==0.3.5
joblib==1.0.0; python_version >= '3.6'
joblib==1.0.1; python_version >= '3.6'
langdetect==1.0.8
lxml==4.6.2; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
msgpack==1.0.2
@@ -87,7 +86,7 @@ sortedcontainers==2.3.0
sqlparse==0.4.1; python_version >= '3.5'
threadpoolctl==2.1.0; python_version >= '3.5'
tika==1.24
tqdm==4.56.0
tqdm==4.56.2
twisted[tls]==20.3.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
txaio==20.12.1; python_version >= '3.6'
tzlocal==2.1

View File

@@ -6,7 +6,7 @@
.doc-img {
object-fit: cover;
object-position: top;
object-position: top left;
height: 100%;
position: absolute;
mix-blend-mode: multiply;

View File

@@ -2,7 +2,7 @@
.doc-img {
object-fit: cover;
object-position: top;
object-position: top left;
height: 200px;
mix-blend-mode: multiply;
}

View File

@@ -2,7 +2,7 @@ export const environment = {
production: true,
apiBaseUrl: "/api/",
appTitle: "Paperless-ng",
version: "1.1.0",
version: "1.1.2",
webSocketHost: window.location.host,
webSocketProtocol: (window.location.protocol == "https:" ? "wss:" : "ws:")
};

BIN
src/clash.pdf Normal file

Binary file not shown.

View File

@@ -50,26 +50,31 @@ class DocumentAdmin(admin.ModelAdmin):
"modified",
"mime_type",
"storage_type",
"filename")
"filename",
"checksum",
"archive_filename",
"archive_checksum"
)
list_display_links = ("title",)
list_display = (
"correspondent",
"id",
"title",
"tags_",
"created",
"mime_type",
"filename",
"archive_filename"
)
list_filter = (
"document_type",
"tags",
"correspondent"
("mime_type"),
("archive_serial_number", admin.EmptyFieldListFilter),
("archive_filename", admin.EmptyFieldListFilter),
)
filter_horizontal = ("tags",)
ordering = ["-created"]
ordering = ["-id"]
date_hierarchy = "created"
@@ -95,26 +100,6 @@ class DocumentAdmin(admin.ModelAdmin):
index.add_or_update_document(obj)
super(DocumentAdmin, self).save_model(request, obj, form, change)
@mark_safe
def tags_(self, obj):
r = ""
for tag in obj.tags.all():
r += self._html_tag(
"span",
tag.name + ", "
)
return r
@staticmethod
def _html_tag(kind, inside=None, **kwargs):
attributes = format_html_join(' ', '{}="{}"', kwargs.items())
if inside is not None:
return format_html("<{kind} {attributes}>{inside}</{kind}>",
kind=kind, attributes=attributes, inside=inside)
return format_html("<{} {}/>", kind, attributes)
class RuleInline(admin.TabularInline):
model = SavedViewFilterRule

View File

@@ -5,7 +5,6 @@ import pickle
import re
from django.conf import settings
from django.core.cache import cache
from documents.models import Document, MatchingModel
@@ -31,29 +30,23 @@ def load_classifier():
)
return None
version = os.stat(settings.MODEL_FILE).st_mtime
classifier = DocumentClassifier()
try:
classifier.load()
classifier = cache.get("paperless-classifier", version=version)
if not classifier:
classifier = DocumentClassifier()
try:
classifier.load()
cache.set("paperless-classifier", classifier,
version=version, timeout=86400)
except (EOFError, IncompatibleClassifierVersionError) as e:
# there's something wrong with the model file.
logger.error(
f"Unrecoverable error while loading document "
f"classification model: {str(e)}, deleting model file."
)
os.unlink(settings.MODEL_FILE)
classifier = None
except OSError as e:
logger.error(
f"Error while loading document classification model: {str(e)}"
)
classifier = None
except (EOFError, IncompatibleClassifierVersionError) as e:
# there's something wrong with the model file.
logger.exception(
f"Unrecoverable error while loading document "
f"classification model, deleting model file."
)
os.unlink(settings.MODEL_FILE)
classifier = None
except OSError as e:
logger.error(
f"Error while loading document classification model: {str(e)}"
)
classifier = None
return classifier

View File

@@ -241,7 +241,7 @@ class Consumer(LoggingMixin):
self._send_progress(70, 100, 'WORKING',
MESSAGE_GENERATING_THUMBNAIL)
thumbnail = document_parser.get_optimised_thumbnail(
self.path, mime_type)
self.path, mime_type, self.filename)
text = document_parser.get_text()
date = document_parser.get_date()
@@ -292,8 +292,7 @@ class Consumer(LoggingMixin):
# After everything is in the database, copy the files into
# place. If this fails, we'll also rollback the transaction.
with FileLock(settings.MEDIA_LOCK):
document.filename = generate_unique_filename(
document, settings.ORIGINALS_DIR)
document.filename = generate_unique_filename(document)
create_source_path_directory(document.source_path)
self._write(document.storage_type,
@@ -303,6 +302,10 @@ class Consumer(LoggingMixin):
thumbnail, document.thumbnail_path)
if archive_path and os.path.isfile(archive_path):
document.archive_filename = generate_unique_filename(
document,
archive_filename=True
)
create_source_path_directory(document.archive_path)
self._write(document.storage_type,
archive_path, document.archive_path)

View File

@@ -79,12 +79,40 @@ def many_to_dictionary(field):
return mydictionary
def generate_unique_filename(doc, root):
def generate_unique_filename(doc,
archive_filename=False):
"""
Generates a unique filename for doc in settings.ORIGINALS_DIR.
The returned filename is guaranteed to be either the current filename
of the document if unchanged, or a new filename that does not correspondent
to any existing files. The function will append _01, _02, etc to the
filename before the extension to avoid conflicts.
If archive_filename is True, return a unique archive filename instead.
"""
if archive_filename:
old_filename = doc.archive_filename
root = settings.ARCHIVE_DIR
else:
old_filename = doc.filename
root = settings.ORIGINALS_DIR
# If generating archive filenames, try to make a name that is similar to
# the original filename first.
if archive_filename and doc.filename:
new_filename = os.path.splitext(doc.filename)[0] + ".pdf"
if new_filename == old_filename or not os.path.exists(os.path.join(root, new_filename)): # NOQA: E501
return new_filename
counter = 0
while True:
new_filename = generate_filename(doc, counter)
if new_filename == doc.filename:
new_filename = generate_filename(
doc, counter, archive_filename=archive_filename)
if new_filename == old_filename:
# still the same as before.
return new_filename
@@ -94,7 +122,7 @@ def generate_unique_filename(doc, root):
return new_filename
def generate_filename(doc, counter=0, append_gpg=True):
def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
path = ""
try:
@@ -123,6 +151,11 @@ def generate_filename(doc, counter=0, append_gpg=True):
else:
document_type = "none"
if doc.archive_serial_number:
asn = str(doc.archive_serial_number)
else:
asn = "none"
path = settings.PAPERLESS_FILENAME_FORMAT.format(
title=pathvalidate.sanitize_filename(
doc.title, replacement_text="-"),
@@ -136,6 +169,7 @@ def generate_filename(doc, counter=0, append_gpg=True):
added_year=doc.added.year if doc.added else "none",
added_month=f"{doc.added.month:02}" if doc.added else "none",
added_day=f"{doc.added.day:02}" if doc.added else "none",
asn=asn,
tags=tags,
tag_list=tag_list
).strip()
@@ -148,18 +182,16 @@ def generate_filename(doc, counter=0, append_gpg=True):
f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default")
counter_str = f"_{counter:02}" if counter else ""
filetype_str = ".pdf" if archive_filename else doc.file_type
if len(path) > 0:
filename = f"{path}{counter_str}{doc.file_type}"
filename = f"{path}{counter_str}{filetype_str}"
else:
filename = f"{doc.pk:07}{counter_str}{doc.file_type}"
filename = f"{doc.pk:07}{counter_str}{filetype_str}"
# Append .gpg for encrypted files
if append_gpg and doc.storage_type == doc.STORAGE_TYPE_GPG:
filename += ".gpg"
return filename
def archive_name_from_filename(filename):
return os.path.splitext(filename)[0] + ".pdf"

View File

@@ -78,8 +78,8 @@ def open_index(recreate=False):
try:
if exists_in(settings.INDEX_DIR) and not recreate:
return open_dir(settings.INDEX_DIR, schema=get_schema())
except Exception as e:
logger.error(f"Error while opening the index: {e}, recreating.")
except Exception:
logger.exception(f"Error while opening the index, recreating.")
if not os.path.isdir(settings.INDEX_DIR):
os.makedirs(settings.INDEX_DIR, exist_ok=True)

View File

@@ -16,7 +16,8 @@ from whoosh.writing import AsyncWriter
from documents.models import Document
from ... import index
from ...file_handling import create_source_path_directory
from ...file_handling import create_source_path_directory, \
generate_unique_filename
from ...parsers import get_parser_class_for_mime_type
@@ -39,13 +40,16 @@ def handle_document(document_id):
with transaction.atomic():
with open(parser.get_archive_path(), 'rb') as f:
checksum = hashlib.md5(f.read()).hexdigest()
# i'm going to save first so that in case the file move
# I'm going to save first so that in case the file move
# fails, the database is rolled back.
# we also don't use save() since that triggers the filehandling
# We also don't use save() since that triggers the filehandling
# logic, and we don't want that yet (file not yet in place)
document.archive_filename = generate_unique_filename(
document, archive_filename=True)
Document.objects.filter(pk=document.pk).update(
archive_checksum=checksum,
content=parser.get_text()
content=parser.get_text(),
archive_filename=document.archive_filename
)
with FileLock(settings.MEDIA_LOCK):
create_source_path_directory(document.archive_path)
@@ -56,7 +60,7 @@ def handle_document(document_id):
index.update_document(writer, document)
except Exception as e:
logger.error(f"Error while parsing document {document}: {str(e)}")
logger.exception(f"Error while parsing document {document}")
finally:
parser.cleanup()
@@ -101,7 +105,7 @@ class Command(BaseCommand):
document_ids = list(map(
lambda doc: doc.id,
filter(
lambda d: overwrite or not d.archive_checksum,
lambda d: overwrite or not d.has_archive_version,
documents
)
))

View File

@@ -54,8 +54,7 @@ def _consume(filepath):
if settings.CONSUMER_SUBDIRS_AS_TAGS:
tag_ids = _tags_from_path(filepath)
except Exception as e:
logger.error(
"Error creating tags from path: {}".format(e))
logger.exception("Error creating tags from path")
try:
async_task("documents.tasks.consume_file",
@@ -66,8 +65,7 @@ def _consume(filepath):
# Catch all so that the consumer won't crash.
# This is also what the test case is listening for to check for
# errors.
logger.error(
"Error while consuming document: {}".format(e))
logger.exception("Error while consuming document")
def _consume_wait_unmodified(file, num_tries=20, wait_time=1):

View File

@@ -139,7 +139,7 @@ class Command(BaseCommand):
thumbnail_target = os.path.join(self.target, thumbnail_name)
document_dict[EXPORTER_THUMBNAIL_NAME] = thumbnail_name
if os.path.exists(document.archive_path):
if document.has_archive_version:
archive_name = base_name + "-archive.pdf"
archive_target = os.path.join(self.target, archive_name)
document_dict[EXPORTER_ARCHIVE_NAME] = archive_name

View File

@@ -151,6 +151,9 @@ class Command(BaseCommand):
shutil.copy2(thumbnail_path, document.thumbnail_path)
if archive_path:
create_source_path_directory(document.archive_path)
# TODO: this assumes that the export is valid and
# archive_filename is present on all documents with
# archived files
shutil.copy2(archive_path, document.archive_path)
document.save()

View File

@@ -0,0 +1,15 @@
from django.core.management.base import BaseCommand
from documents.sanity_checker import check_sanity
class Command(BaseCommand):
help = """
This command checks your document archive for issues.
""".replace(" ", "")
def handle(self, *args, **options):
messages = check_sanity(progress=True)
messages.log_messages()

View File

@@ -22,7 +22,10 @@ def _process_document(doc_in):
try:
thumb = parser.get_optimised_thumbnail(
document.source_path, document.mime_type)
document.source_path,
document.mime_type,
document.get_public_filename()
)
shutil.move(thumb, document.thumbnail_path)
finally:

View File

@@ -0,0 +1,330 @@
# Generated by Django 3.1.6 on 2021-02-07 22:26
import datetime
import hashlib
import logging
import os
import shutil
from time import sleep
import pathvalidate
from django.conf import settings
from django.db import migrations, models
from django.template.defaultfilters import slugify
from documents.file_handling import defaultdictNoStr, many_to_dictionary
logger = logging.getLogger("paperless.migrations")
###############################################################################
# This is code copied straight paperless before the change.
###############################################################################
def archive_name_from_filename(filename):
return os.path.splitext(filename)[0] + ".pdf"
def archive_path_old(doc):
if doc.filename:
fname = archive_name_from_filename(doc.filename)
else:
fname = "{:07}.pdf".format(doc.pk)
return os.path.join(
settings.ARCHIVE_DIR,
fname
)
STORAGE_TYPE_GPG = "gpg"
def archive_path_new(doc):
if doc.archive_filename is not None:
return os.path.join(
settings.ARCHIVE_DIR,
str(doc.archive_filename)
)
else:
return None
def source_path(doc):
if doc.filename:
fname = str(doc.filename)
else:
fname = "{:07}{}".format(doc.pk, doc.file_type)
if doc.storage_type == STORAGE_TYPE_GPG:
fname += ".gpg" # pragma: no cover
return os.path.join(
settings.ORIGINALS_DIR,
fname
)
def generate_unique_filename(doc, archive_filename=False):
if archive_filename:
old_filename = doc.archive_filename
root = settings.ARCHIVE_DIR
else:
old_filename = doc.filename
root = settings.ORIGINALS_DIR
counter = 0
while True:
new_filename = generate_filename(
doc, counter, archive_filename=archive_filename)
if new_filename == old_filename:
# still the same as before.
return new_filename
if os.path.exists(os.path.join(root, new_filename)):
counter += 1
else:
return new_filename
def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
path = ""
try:
if settings.PAPERLESS_FILENAME_FORMAT is not None:
tags = defaultdictNoStr(lambda: slugify(None),
many_to_dictionary(doc.tags))
tag_list = pathvalidate.sanitize_filename(
",".join(sorted(
[tag.name for tag in doc.tags.all()]
)),
replacement_text="-"
)
if doc.correspondent:
correspondent = pathvalidate.sanitize_filename(
doc.correspondent.name, replacement_text="-"
)
else:
correspondent = "none"
if doc.document_type:
document_type = pathvalidate.sanitize_filename(
doc.document_type.name, replacement_text="-"
)
else:
document_type = "none"
path = settings.PAPERLESS_FILENAME_FORMAT.format(
title=pathvalidate.sanitize_filename(
doc.title, replacement_text="-"),
correspondent=correspondent,
document_type=document_type,
created=datetime.date.isoformat(doc.created),
created_year=doc.created.year if doc.created else "none",
created_month=f"{doc.created.month:02}" if doc.created else "none", # NOQA: E501
created_day=f"{doc.created.day:02}" if doc.created else "none",
added=datetime.date.isoformat(doc.added),
added_year=doc.added.year if doc.added else "none",
added_month=f"{doc.added.month:02}" if doc.added else "none",
added_day=f"{doc.added.day:02}" if doc.added else "none",
tags=tags,
tag_list=tag_list
).strip()
path = path.strip(os.sep)
except (ValueError, KeyError, IndexError):
logger.warning(
f"Invalid PAPERLESS_FILENAME_FORMAT: "
f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default")
counter_str = f"_{counter:02}" if counter else ""
filetype_str = ".pdf" if archive_filename else doc.file_type
if len(path) > 0:
filename = f"{path}{counter_str}{filetype_str}"
else:
filename = f"{doc.pk:07}{counter_str}{filetype_str}"
# Append .gpg for encrypted files
if append_gpg and doc.storage_type == STORAGE_TYPE_GPG:
filename += ".gpg"
return filename
###############################################################################
# This code performs bidirection archive file transformation.
###############################################################################
def parse_wrapper(parser, path, mime_type, file_name):
# this is here so that I can mock this out for testing.
parser.parse(path, mime_type, file_name)
def create_archive_version(doc, retry_count=3):
from documents.parsers import get_parser_class_for_mime_type, \
DocumentParser, \
ParseError
logger.info(
f"Regenerating archive document for document ID:{doc.id}"
)
parser_class = get_parser_class_for_mime_type(doc.mime_type)
for try_num in range(retry_count):
parser: DocumentParser = parser_class(None, None)
try:
parse_wrapper(parser, source_path(doc), doc.mime_type,
os.path.basename(doc.filename))
doc.content = parser.get_text()
if parser.get_archive_path() and os.path.isfile(
parser.get_archive_path()):
doc.archive_filename = generate_unique_filename(
doc, archive_filename=True)
with open(parser.get_archive_path(), "rb") as f:
doc.archive_checksum = hashlib.md5(f.read()).hexdigest()
os.makedirs(os.path.dirname(archive_path_new(doc)),
exist_ok=True)
shutil.copy2(parser.get_archive_path(), archive_path_new(doc))
else:
doc.archive_checksum = None
logger.error(
f"Parser did not return an archive document for document "
f"ID:{doc.id}. Removing archive document."
)
doc.save()
return
except ParseError:
if try_num + 1 == retry_count:
logger.exception(
f"Unable to regenerate archive document for ID:{doc.id}. You "
f"need to invoke the document_archiver management command "
f"manually for that document."
)
doc.archive_checksum = None
doc.save()
return
else:
# This is mostly here for the tika parser in docker
# environemnts. The servers for parsing need to come up first,
# and the docker setup doesn't ensure that tika is running
# before attempting migrations.
logger.error("Parse error, will try again in 5 seconds...")
sleep(5)
finally:
parser.cleanup()
def move_old_to_new_locations(apps, schema_editor):
Document = apps.get_model("documents", "Document")
affected_document_ids = set()
old_archive_path_to_id = {}
# check for documents that have incorrect archive versions
for doc in Document.objects.filter(archive_checksum__isnull=False):
old_path = archive_path_old(doc)
if old_path in old_archive_path_to_id:
affected_document_ids.add(doc.id)
affected_document_ids.add(old_archive_path_to_id[old_path])
else:
old_archive_path_to_id[old_path] = doc.id
# check that archive files of all unaffected documents are in place
for doc in Document.objects.filter(archive_checksum__isnull=False):
old_path = archive_path_old(doc)
if doc.id not in affected_document_ids and not os.path.isfile(old_path):
raise ValueError(
f"Archived document ID:{doc.id} does not exist at: "
f"{old_path}")
# check that we can regenerate affected archive versions
for doc_id in affected_document_ids:
from documents.parsers import get_parser_class_for_mime_type
doc = Document.objects.get(id=doc_id)
parser_class = get_parser_class_for_mime_type(doc.mime_type)
if not parser_class:
raise ValueError(
f"Document ID:{doc.id} has an invalid archived document, "
f"but no parsers are available. Cannot migrate.")
for doc in Document.objects.filter(archive_checksum__isnull=False):
if doc.id in affected_document_ids:
old_path = archive_path_old(doc)
# remove affected archive versions
if os.path.isfile(old_path):
logger.debug(
f"Removing {old_path}"
)
os.unlink(old_path)
else:
# Set archive path for unaffected files
doc.archive_filename = archive_name_from_filename(doc.filename)
Document.objects.filter(id=doc.id).update(
archive_filename=doc.archive_filename
)
# regenerate archive documents
for doc_id in affected_document_ids:
doc = Document.objects.get(id=doc_id)
create_archive_version(doc)
def move_new_to_old_locations(apps, schema_editor):
Document = apps.get_model("documents", "Document")
old_archive_paths = set()
for doc in Document.objects.filter(archive_checksum__isnull=False):
new_archive_path = archive_path_new(doc)
old_archive_path = archive_path_old(doc)
if old_archive_path in old_archive_paths:
raise ValueError(
f"Cannot migrate: Archive file name {old_archive_path} of "
f"document {doc.filename} would clash with another archive "
f"filename.")
old_archive_paths.add(old_archive_path)
if new_archive_path != old_archive_path and os.path.isfile(old_archive_path):
raise ValueError(
f"Cannot migrate: Cannot move {new_archive_path} to "
f"{old_archive_path}: file already exists."
)
for doc in Document.objects.filter(archive_checksum__isnull=False):
new_archive_path = archive_path_new(doc)
old_archive_path = archive_path_old(doc)
if new_archive_path != old_archive_path:
logger.debug(f"Moving {new_archive_path} to {old_archive_path}")
shutil.move(new_archive_path, old_archive_path)
class Migration(migrations.Migration):
dependencies = [
('documents', '1011_auto_20210101_2340'),
]
operations = [
migrations.AddField(
model_name='document',
name='archive_filename',
field=models.FilePathField(default=None, editable=False, help_text='Current archive filename in storage', max_length=1024, null=True, unique=True, verbose_name='archive filename'),
),
migrations.AlterField(
model_name='document',
name='filename',
field=models.FilePathField(default=None, editable=False, help_text='Current filename in storage', max_length=1024, null=True, unique=True, verbose_name='filename'),
),
migrations.RunPython(
move_old_to_new_locations,
move_new_to_old_locations
),
]

View File

@@ -16,7 +16,6 @@ from django.utils.timezone import is_aware
from django.utils.translation import gettext_lazy as _
from documents.file_handling import archive_name_from_filename
from documents.parsers import get_default_file_extension
@@ -208,10 +207,21 @@ class Document(models.Model):
max_length=1024,
editable=False,
default=None,
unique=True,
null=True,
help_text=_("Current filename in storage")
)
archive_filename = models.FilePathField(
_("archive filename"),
max_length=1024,
editable=False,
default=None,
unique=True,
null=True,
help_text=_("Current archive filename in storage")
)
archive_serial_number = models.IntegerField(
_("archive serial number"),
blank=True,
@@ -256,16 +266,18 @@ class Document(models.Model):
return open(self.source_path, "rb")
@property
def archive_path(self):
if self.filename:
fname = archive_name_from_filename(self.filename)
else:
fname = "{:07}.pdf".format(self.pk)
def has_archive_version(self):
return self.archive_filename is not None
return os.path.join(
settings.ARCHIVE_DIR,
fname
)
@property
def archive_path(self):
if self.has_archive_version:
return os.path.join(
settings.ARCHIVE_DIR,
str(self.archive_filename)
)
else:
return None
@property
def archive_file(self):

View File

@@ -288,14 +288,17 @@ class DocumentParser(LoggingMixin):
def get_archive_path(self):
return self.archive_path
def get_thumbnail(self, document_path, mime_type):
def get_thumbnail(self, document_path, mime_type, file_name=None):
"""
Returns the path to a file we can use as a thumbnail for this document.
"""
raise NotImplementedError()
def get_optimised_thumbnail(self, document_path, mime_type):
thumbnail = self.get_thumbnail(document_path, mime_type)
def get_optimised_thumbnail(self,
document_path,
mime_type,
file_name=None):
thumbnail = self.get_thumbnail(document_path, mime_type, file_name)
if settings.OPTIMIZE_THUMBNAILS:
out_path = os.path.join(self.tempdir, "thumb_optipng.png")

View File

@@ -1,45 +1,55 @@
import hashlib
import logging
import os
from django.conf import settings
from tqdm import tqdm
from documents.models import Document
class SanityMessage:
message = None
class SanityCheckMessages:
def __init__(self):
self._messages = []
def error(self, message):
self._messages.append({"level": logging.ERROR, "message": message})
def warning(self, message):
self._messages.append({"level": logging.WARNING, "message": message})
def info(self, message):
self._messages.append({"level": logging.INFO, "message": message})
def log_messages(self):
logger = logging.getLogger("paperless.sanity_checker")
if len(self._messages) == 0:
logger.info("Sanity checker detected no issues.")
else:
for msg in self._messages:
logger.log(msg['level'], msg['message'])
def __len__(self):
return len(self._messages)
def __getitem__(self, item):
return self._messages[item]
def has_error(self):
return any([msg['level'] == logging.ERROR for msg in self._messages])
def has_warning(self):
return any([msg['level'] == logging.WARNING for msg in self._messages])
class SanityWarning(SanityMessage):
def __init__(self, message):
self.message = message
def __str__(self):
return f"Warning: {self.message}"
class SanityCheckFailedException(Exception):
pass
class SanityError(SanityMessage):
def __init__(self, message):
self.message = message
def __str__(self):
return f"ERROR: {self.message}"
class SanityFailedError(Exception):
def __init__(self, messages):
self.messages = messages
def __str__(self):
message_string = "\n".join([str(m) for m in self.messages])
return (
f"The following issuse were found by the sanity checker:\n"
f"{message_string}\n\n===============\n\n")
def check_sanity():
messages = []
def check_sanity(progress=False):
messages = SanityCheckMessages()
present_files = []
for root, subdirs, files in os.walk(settings.MEDIA_ROOT):
@@ -50,72 +60,86 @@ def check_sanity():
if lockfile in present_files:
present_files.remove(lockfile)
for doc in Document.objects.all():
if progress:
docs = tqdm(Document.objects.all())
else:
docs = Document.objects.all()
for doc in docs:
# Check sanity of the thumbnail
if not os.path.isfile(doc.thumbnail_path):
messages.append(SanityError(
f"Thumbnail of document {doc.pk} does not exist."))
messages.error(f"Thumbnail of document {doc.pk} does not exist.")
else:
present_files.remove(os.path.normpath(doc.thumbnail_path))
if os.path.normpath(doc.thumbnail_path) in present_files:
present_files.remove(os.path.normpath(doc.thumbnail_path))
try:
with doc.thumbnail_file as f:
f.read()
except OSError as e:
messages.append(SanityError(
messages.error(
f"Cannot read thumbnail file of document {doc.pk}: {e}"
))
)
# Check sanity of the original file
# TODO: extract method
if not os.path.isfile(doc.source_path):
messages.append(SanityError(
f"Original of document {doc.pk} does not exist."))
messages.error(f"Original of document {doc.pk} does not exist.")
else:
present_files.remove(os.path.normpath(doc.source_path))
if os.path.normpath(doc.source_path) in present_files:
present_files.remove(os.path.normpath(doc.source_path))
try:
with doc.source_file as f:
checksum = hashlib.md5(f.read()).hexdigest()
except OSError as e:
messages.append(SanityError(
f"Cannot read original file of document {doc.pk}: {e}"))
messages.error(
f"Cannot read original file of document {doc.pk}: {e}")
else:
if not checksum == doc.checksum:
messages.append(SanityError(
messages.error(
f"Checksum mismatch of document {doc.pk}. "
f"Stored: {doc.checksum}, actual: {checksum}."
))
)
# Check sanity of the archive file.
if doc.archive_checksum:
if doc.archive_checksum and not doc.archive_filename:
messages.error(
f"Document {doc.pk} has an archive file checksum, but no "
f"archive filename."
)
elif not doc.archive_checksum and doc.archive_filename:
messages.error(
f"Document {doc.pk} has an archive file, but its checksum is "
f"missing."
)
elif doc.has_archive_version:
if not os.path.isfile(doc.archive_path):
messages.append(SanityError(
messages.error(
f"Archived version of document {doc.pk} does not exist."
))
)
else:
present_files.remove(os.path.normpath(doc.archive_path))
if os.path.normpath(doc.archive_path) in present_files:
present_files.remove(os.path.normpath(doc.archive_path))
try:
with doc.archive_file as f:
checksum = hashlib.md5(f.read()).hexdigest()
except OSError as e:
messages.append(SanityError(
messages.error(
f"Cannot read archive file of document {doc.pk}: {e}"
))
)
else:
if not checksum == doc.archive_checksum:
messages.append(SanityError(
f"Checksum mismatch of archive {doc.pk}. "
f"Stored: {doc.checksum}, actual: {checksum}."
))
messages.error(
f"Checksum mismatch of archived document "
f"{doc.pk}. "
f"Stored: {doc.archive_checksum}, "
f"actual: {checksum}."
)
# other document checks
if not doc.content:
messages.append(SanityWarning(
f"Document {doc.pk} has no content."
))
messages.info(f"Document {doc.pk} has no content.")
for extra_file in present_files:
messages.append(SanityWarning(
f"Orphaned file in media dir: {extra_file}"
))
messages.warning(f"Orphaned file in media dir: {extra_file}")
return messages

View File

@@ -129,7 +129,7 @@ class DocumentSerializer(DynamicFieldsModelSerializer):
return obj.get_public_filename()
def get_archived_file_name(self, obj):
if obj.archive_checksum:
if obj.has_archive_version:
return obj.get_public_filename(archive=True)
else:
return None

View File

@@ -1,6 +1,5 @@
import logging
import os
from subprocess import Popen
from django.conf import settings
from django.contrib.admin.models import ADDITION, LogEntry
@@ -14,7 +13,7 @@ from filelock import FileLock
from .. import index, matching
from ..file_handling import delete_empty_directories, \
create_source_path_directory, archive_name_from_filename, \
create_source_path_directory, \
generate_unique_filename
from ..models import Document, Tag
@@ -148,18 +147,18 @@ def set_tags(sender,
@receiver(models.signals.post_delete, sender=Document)
def cleanup_document_deletion(sender, instance, using, **kwargs):
with FileLock(settings.MEDIA_LOCK):
for f in (instance.source_path,
instance.archive_path,
instance.thumbnail_path):
if os.path.isfile(f):
for filename in (instance.source_path,
instance.archive_path,
instance.thumbnail_path):
if filename and os.path.isfile(filename):
try:
os.unlink(f)
os.unlink(filename)
logger.debug(
f"Deleted file {f}.")
f"Deleted file {filename}.")
except OSError as e:
logger.warning(
f"While deleting document {str(instance)}, the file "
f"{f} could not be deleted: {e}"
f"{filename} could not be deleted: {e}"
)
delete_empty_directories(
@@ -167,10 +166,15 @@ def cleanup_document_deletion(sender, instance, using, **kwargs):
root=settings.ORIGINALS_DIR
)
delete_empty_directories(
os.path.dirname(instance.archive_path),
root=settings.ARCHIVE_DIR
)
if instance.has_archive_version:
delete_empty_directories(
os.path.dirname(instance.archive_path),
root=settings.ARCHIVE_DIR
)
class CannotMoveFilesException(Exception):
pass
def validate_move(instance, old_path, new_path):
@@ -178,16 +182,14 @@ def validate_move(instance, old_path, new_path):
# Can't do anything if the old file does not exist anymore.
logger.fatal(
f"Document {str(instance)}: File {old_path} has gone.")
return False
raise CannotMoveFilesException()
if os.path.isfile(new_path):
# Can't do anything if the new file already exists. Skip updating file.
logger.warning(
f"Document {str(instance)}: Cannot rename file "
f"since target path {new_path} already exists.")
return False
return True
raise CannotMoveFilesException()
@receiver(models.signals.m2m_changed, sender=Document.tags.through)
@@ -206,56 +208,61 @@ def update_filename_and_move_files(sender, instance, **kwargs):
return
with FileLock(settings.MEDIA_LOCK):
old_filename = instance.filename
new_filename = generate_unique_filename(
instance, settings.ORIGINALS_DIR)
try:
old_filename = instance.filename
old_source_path = instance.source_path
if new_filename == instance.filename:
# Don't do anything if its the same.
return
instance.filename = generate_unique_filename(instance)
move_original = old_filename != instance.filename
old_source_path = instance.source_path
new_source_path = os.path.join(settings.ORIGINALS_DIR, new_filename)
if not validate_move(instance, old_source_path, new_source_path):
return
# archive files are optional, archive checksum tells us if we have one,
# since this is None for documents without archived files.
if instance.archive_checksum:
new_archive_filename = archive_name_from_filename(new_filename)
old_archive_filename = instance.archive_filename
old_archive_path = instance.archive_path
new_archive_path = os.path.join(settings.ARCHIVE_DIR,
new_archive_filename)
if not validate_move(instance, old_archive_path, new_archive_path):
if instance.has_archive_version:
instance.archive_filename = generate_unique_filename(
instance, archive_filename=True
)
move_archive = old_archive_filename != instance.archive_filename # NOQA: E501
else:
move_archive = False
if not move_original and not move_archive:
# Don't do anything if filenames did not change.
return
create_source_path_directory(new_archive_path)
else:
old_archive_path = None
new_archive_path = None
if move_original:
validate_move(instance, old_source_path, instance.source_path)
create_source_path_directory(instance.source_path)
os.rename(old_source_path, instance.source_path)
create_source_path_directory(new_source_path)
try:
os.rename(old_source_path, new_source_path)
if instance.archive_checksum:
os.rename(old_archive_path, new_archive_path)
instance.filename = new_filename
if move_archive:
validate_move(
instance, old_archive_path, instance.archive_path)
create_source_path_directory(instance.archive_path)
os.rename(old_archive_path, instance.archive_path)
# Don't save() here to prevent infinite recursion.
Document.objects.filter(pk=instance.pk).update(
filename=new_filename)
filename=instance.filename,
archive_filename=instance.archive_filename,
)
except OSError as e:
instance.filename = old_filename
# this happens when we can't move a file. If that's the case for
# the archive file, we try our best to revert the changes.
# no need to save the instance, the update() has not happened yet.
except (OSError, DatabaseError, CannotMoveFilesException):
# This happens when either:
# - moving the files failed due to file system errors
# - saving to the database failed due to database errors
# In both cases, we need to revert to the original state.
# Try to move files to their original location.
try:
os.rename(new_source_path, old_source_path)
os.rename(new_archive_path, old_archive_path)
if move_original and os.path.isfile(instance.source_path):
os.rename(instance.source_path, old_source_path)
if move_archive and os.path.isfile(instance.archive_path):
os.rename(instance.archive_path, old_archive_path)
except Exception as e:
# This is fine, since:
# A: if we managed to move source from A to B, we will also
@@ -266,16 +273,10 @@ def update_filename_and_move_files(sender, instance, **kwargs):
# B: if moving the orignal file failed, nothing has changed
# anyway.
pass
except DatabaseError as e:
# this happens after moving files, so move them back into place.
# since moving them once succeeded, it's very likely going to
# succeed again.
os.rename(new_source_path, old_source_path)
if instance.archive_checksum:
os.rename(new_archive_path, old_archive_path)
# restore old values on the instance
instance.filename = old_filename
# again, no need to save the instance, since the actual update()
# operation failed.
instance.archive_filename = old_archive_filename
# finally, remove any empty sub folders. This will do nothing if
# something has failed above.
@@ -283,7 +284,7 @@ def update_filename_and_move_files(sender, instance, **kwargs):
delete_empty_directories(os.path.dirname(old_source_path),
root=settings.ORIGINALS_DIR)
if old_archive_path and not os.path.isfile(old_archive_path):
if instance.has_archive_version and not os.path.isfile(old_archive_path): # NOQA: E501
delete_empty_directories(os.path.dirname(old_archive_path),
root=settings.ARCHIVE_DIR)

View File

@@ -9,8 +9,7 @@ from documents import index, sanity_checker
from documents.classifier import DocumentClassifier, load_classifier
from documents.consumer import Consumer, ConsumerError
from documents.models import Document, Tag, DocumentType, Correspondent
from documents.sanity_checker import SanityFailedError
from documents.sanity_checker import SanityCheckFailedException
logger = logging.getLogger("paperless.tasks")
@@ -94,8 +93,15 @@ def consume_file(path,
def sanity_check():
messages = sanity_checker.check_sanity()
if len(messages) > 0:
raise SanityFailedError(messages)
messages.log_messages()
if messages.has_error():
raise SanityCheckFailedException(
"Sanity check failed with errors. See log.")
elif messages.has_warning():
return "Sanity check exited with warnings. See log."
elif len(messages) > 0:
return "Sanity check exited with infos. See log."
else:
return "No issues detected."

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.7 KiB

View File

@@ -0,0 +1 @@
This is a test file.

View File

@@ -23,18 +23,6 @@ class TestDocumentAdmin(DirectoriesMixin, TestCase):
self.assertEqual(Document.objects.get(id=doc.id).title, "new title")
m.assert_called_once()
def test_tags(self):
doc = Document.objects.create(title="test")
doc.tags.create(name="t1")
doc.tags.create(name="t2")
self.assertEqual(self.doc_admin.tags_(doc), "<span >t1, </span><span >t2, </span>")
def test_tags_empty(self):
doc = Document.objects.create(title="test")
self.assertEqual(self.doc_admin.tags_(doc), "")
@mock.patch("documents.admin.index.remove_document")
def test_delete_model(self, m):
doc = Document.objects.create(title="test")

View File

@@ -146,21 +146,19 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(response.status_code, 200)
self.assertEqual(response.content, content_thumbnail)
@override_settings(PAPERLESS_FILENAME_FORMAT="")
def test_download_with_archive(self):
_, filename = tempfile.mkstemp(dir=self.dirs.originals_dir)
content = b"This is a test"
content_archive = b"This is the same test but archived"
with open(filename, "wb") as f:
f.write(content)
filename = os.path.basename(filename)
doc = Document.objects.create(title="none", filename=filename,
doc = Document.objects.create(title="none", filename="my_document.pdf",
archive_filename="archived.pdf",
mime_type="application/pdf")
with open(doc.source_path, "wb") as f:
f.write(content)
with open(doc.archive_path, "wb") as f:
f.write(content_archive)
@@ -577,10 +575,13 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
async_task.assert_not_called()
def test_get_metadata(self):
doc = Document.objects.create(title="test", filename="file.pdf", mime_type="image/png", archive_checksum="A")
doc = Document.objects.create(title="test", filename="file.pdf", mime_type="image/png", archive_checksum="A", archive_filename="archive.pdf")
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "thumbnails", "0000001.png"), doc.source_path)
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), doc.archive_path)
source_file = os.path.join(os.path.dirname(__file__), "samples", "documents", "thumbnails", "0000001.png")
archive_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
shutil.copy(source_file, doc.source_path)
shutil.copy(archive_file, doc.archive_path)
response = self.client.get(f"/api/documents/{doc.pk}/metadata/")
self.assertEqual(response.status_code, 200)
@@ -591,6 +592,10 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertTrue(meta['has_archive_version'])
self.assertEqual(len(meta['original_metadata']), 0)
self.assertGreater(len(meta['archive_metadata']), 0)
self.assertEqual(meta['media_filename'], "file.pdf")
self.assertEqual(meta['archive_media_filename'], "archive.pdf")
self.assertEqual(meta['original_size'], os.stat(source_file).st_size)
self.assertEqual(meta['archive_size'], os.stat(archive_file).st_size)
def test_get_metadata_invalid_doc(self):
response = self.client.get(f"/api/documents/34576/metadata/")
@@ -610,6 +615,22 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertFalse(meta['has_archive_version'])
self.assertGreater(len(meta['original_metadata']), 0)
self.assertIsNone(meta['archive_metadata'])
self.assertIsNone(meta['archive_media_filename'])
def test_get_metadata_missing_files(self):
doc = Document.objects.create(title="test", filename="file.pdf", mime_type="application/pdf", archive_filename="file.pdf", archive_checksum="B", checksum="A")
response = self.client.get(f"/api/documents/{doc.pk}/metadata/")
self.assertEqual(response.status_code, 200)
meta = response.data
self.assertTrue(meta['has_archive_version'])
self.assertIsNone(meta['original_metadata'])
self.assertIsNone(meta['original_size'])
self.assertIsNone(meta['archive_metadata'])
self.assertIsNone(meta['archive_size'])
def test_get_empty_suggestions(self):
doc = Document.objects.create(title="test", mime_type="application/pdf")

View File

@@ -3,6 +3,7 @@ import tempfile
from pathlib import Path
from unittest import mock
import pytest
from django.conf import settings
from django.test import TestCase, override_settings
@@ -233,7 +234,6 @@ class TestClassifier(DirectoriesMixin, TestCase):
self.assertFalse(os.path.exists(settings.MODEL_FILE))
self.assertIsNone(load_classifier())
@override_settings(CACHES={'default': {'BACKEND': 'django.core.cache.backends.dummy.DummyCache'}})
@mock.patch("documents.classifier.DocumentClassifier.load")
def test_load_classifier(self, load):
Path(settings.MODEL_FILE).touch()
@@ -242,6 +242,7 @@ class TestClassifier(DirectoriesMixin, TestCase):
@override_settings(CACHES={'default': {'BACKEND': 'django.core.cache.backends.locmem.LocMemCache'}})
@override_settings(MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle"))
@pytest.mark.skip(reason="Disabled caching due to high memory usage - need to investigate.")
def test_load_classifier_cached(self):
classifier = load_classifier()
self.assertIsNotNone(classifier)
@@ -250,7 +251,6 @@ class TestClassifier(DirectoriesMixin, TestCase):
classifier2 = load_classifier()
load.assert_not_called()
@override_settings(CACHES={'default': {'BACKEND': 'django.core.cache.backends.dummy.DummyCache'}})
@mock.patch("documents.classifier.DocumentClassifier.load")
def test_load_classifier_incompatible_version(self, load):
Path(settings.MODEL_FILE).touch()
@@ -260,7 +260,6 @@ class TestClassifier(DirectoriesMixin, TestCase):
self.assertIsNone(load_classifier())
self.assertFalse(os.path.exists(settings.MODEL_FILE))
@override_settings(CACHES={'default': {'BACKEND': 'django.core.cache.backends.dummy.DummyCache'}})
@mock.patch("documents.classifier.DocumentClassifier.load")
def test_load_classifier_os_error(self, load):
Path(settings.MODEL_FILE).touch()

View File

@@ -5,12 +5,14 @@ import tempfile
from unittest import mock
from unittest.mock import MagicMock
from django.conf import settings
from django.test import TestCase, override_settings
from .utils import DirectoriesMixin
from ..consumer import Consumer, ConsumerError
from ..models import FileInfo, Tag, Correspondent, DocumentType, Document
from ..parsers import DocumentParser, ParseError
from ..tasks import sanity_check
class TestAttributes(TestCase):
@@ -165,7 +167,7 @@ class TestFieldPermutations(TestCase):
class DummyParser(DocumentParser):
def get_thumbnail(self, document_path, mime_type):
def get_thumbnail(self, document_path, mime_type, file_name=None):
# not important during tests
raise NotImplementedError()
@@ -174,16 +176,34 @@ class DummyParser(DocumentParser):
_, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir)
self.archive_path = archive_path
def get_optimised_thumbnail(self, document_path, mime_type):
def get_optimised_thumbnail(self, document_path, mime_type, file_name=None):
return self.fake_thumb
def parse(self, document_path, mime_type, file_name=None):
self.text = "The Text"
class CopyParser(DocumentParser):
def get_thumbnail(self, document_path, mime_type, file_name=None):
return self.fake_thumb
def get_optimised_thumbnail(self, document_path, mime_type, file_name=None):
return self.fake_thumb
def __init__(self, logging_group, progress_callback=None):
super(CopyParser, self).__init__(logging_group, progress_callback)
_, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=self.tempdir)
def parse(self, document_path, mime_type, file_name=None):
self.text = "The text"
self.archive_path = os.path.join(self.tempdir, "archive.pdf")
shutil.copy(document_path, self.archive_path)
class FaultyParser(DocumentParser):
def get_thumbnail(self, document_path, mime_type):
def get_thumbnail(self, document_path, mime_type, file_name=None):
# not important during tests
raise NotImplementedError()
@@ -191,7 +211,7 @@ class FaultyParser(DocumentParser):
super(FaultyParser, self).__init__(logging_group)
_, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir)
def get_optimised_thumbnail(self, document_path, mime_type):
def get_optimised_thumbnail(self, document_path, mime_type, file_name=None):
return self.fake_thumb
def parse(self, document_path, mime_type, file_name=None):
@@ -203,6 +223,8 @@ def fake_magic_from_file(file, mime=False):
if mime:
if os.path.splitext(file)[1] == ".pdf":
return "application/pdf"
elif os.path.splitext(file)[1] == ".png":
return "image/png"
else:
return "unknown"
else:
@@ -274,6 +296,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
self.assertIsNone(document.correspondent)
self.assertIsNone(document.document_type)
self.assertEqual(document.filename, "0000001.pdf")
self.assertEqual(document.archive_filename, "0000001.pdf")
self.assertTrue(os.path.isfile(
document.source_path
@@ -432,6 +455,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
self.assertEqual(document.title, "new docs")
self.assertEqual(document.filename, "none/new docs.pdf")
self.assertEqual(document.archive_filename, "none/new docs.pdf")
self._assert_first_last_send_progress()
@@ -446,7 +470,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
filenames.insert(0, f)
return f
m.side_effect = lambda f, root: get_filename()
m.side_effect = lambda f, archive_filename = False: get_filename()
filename = self.get_test_file()
@@ -457,6 +481,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
self.assertEqual(document.title, "new docs")
self.assertIsNotNone(os.path.isfile(document.title))
self.assertTrue(os.path.isfile(document.source_path))
self.assertTrue(os.path.isfile(document.archive_path))
self._assert_first_last_send_progress()
@@ -516,6 +541,30 @@ class TestConsumer(DirectoriesMixin, TestCase):
self._assert_first_last_send_progress(last_status="FAILED")
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
@mock.patch("documents.parsers.document_consumer_declaration.send")
def test_similar_filenames(self, m):
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), os.path.join(settings.CONSUMPTION_DIR, "simple.pdf"))
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.png"), os.path.join(settings.CONSUMPTION_DIR, "simple.png"))
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple-noalpha.png"), os.path.join(settings.CONSUMPTION_DIR, "simple.png.pdf"))
m.return_value = [(None, {
"parser": CopyParser,
"mime_types": {"application/pdf": ".pdf", "image/png": ".png"},
"weight": 0
})]
doc1 = self.consumer.try_consume_file(os.path.join(settings.CONSUMPTION_DIR, "simple.png"))
doc2 = self.consumer.try_consume_file(os.path.join(settings.CONSUMPTION_DIR, "simple.pdf"))
doc3 = self.consumer.try_consume_file(os.path.join(settings.CONSUMPTION_DIR, "simple.png.pdf"))
self.assertEqual(doc1.filename, "simple.png")
self.assertEqual(doc1.archive_filename, "simple.pdf")
self.assertEqual(doc2.filename, "simple.pdf")
self.assertEqual(doc2.archive_filename, "simple_01.pdf")
self.assertEqual(doc3.filename, "simple.png.pdf")
self.assertEqual(doc3.archive_filename, "simple.png.pdf")
sanity_check()
class PreConsumeTestCase(TestCase):

View File

@@ -1,7 +1,6 @@
import datetime
import os
import shutil
from unittest import mock
from uuid import uuid4
from dateutil import tz
@@ -9,7 +8,6 @@ from django.conf import settings
from django.test import TestCase, override_settings
from documents.parsers import parse_date
from paperless_tesseract.parsers import RasterisedDocumentParser
class TestDate(TestCase):
@@ -152,4 +150,4 @@ class TestDate(TestCase):
2018, 2, 13, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
)
)

View File

@@ -201,6 +201,13 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertEqual(generate_filename(d), "my_doc_type - the_doc.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{asn} - {title}")
def test_asn(self):
d1 = Document.objects.create(title="the_doc", mime_type="application/pdf", archive_serial_number=652, checksum="A")
d2 = Document.objects.create(title="the_doc", mime_type="application/pdf", archive_serial_number=None, checksum="B")
self.assertEqual(generate_filename(d1), "652 - the_doc.pdf")
self.assertEqual(generate_filename(d2), "none - the_doc.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
def test_tags_with_underscore(self):
document = Document()
@@ -439,6 +446,18 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertEqual(document2.filename, "qwe.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
@mock.patch("documents.signals.handlers.Document.objects.filter")
def test_no_update_without_change(self, m):
doc = Document.objects.create(title="document", filename="document.pdf", archive_filename="document.pdf", checksum="A", archive_checksum="B", mime_type="application/pdf")
Path(doc.source_path).touch()
Path(doc.archive_path).touch()
doc.save()
m.assert_not_called()
class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
@@ -448,7 +467,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
Path(original).touch()
Path(archive).touch()
doc = Document.objects.create(mime_type="application/pdf", filename="0000001.pdf", checksum="A", archive_checksum="B")
doc = Document.objects.create(mime_type="application/pdf", filename="0000001.pdf", checksum="A", archive_filename="0000001.pdf", archive_checksum="B")
self.assertTrue(os.path.isfile(original))
self.assertTrue(os.path.isfile(archive))
@@ -461,7 +480,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
Path(original).touch()
Path(archive).touch()
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B", archive_filename="0000001.pdf")
self.assertFalse(os.path.isfile(original))
self.assertFalse(os.path.isfile(archive))
@@ -475,7 +494,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
Path(original).touch()
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B", archive_filename="0000001.pdf")
self.assertTrue(os.path.isfile(original))
self.assertFalse(os.path.isfile(archive))
@@ -486,14 +505,49 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
def test_move_archive_exists(self):
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
existing_archive_file = os.path.join(settings.ARCHIVE_DIR, "none", "my_doc.pdf")
Path(original).touch()
Path(archive).touch()
os.makedirs(os.path.join(settings.ARCHIVE_DIR, "none"))
Path(os.path.join(settings.ARCHIVE_DIR, "none", "my_doc.pdf")).touch()
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
Path(existing_archive_file).touch()
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B", archive_filename="0000001.pdf")
self.assertFalse(os.path.isfile(original))
self.assertFalse(os.path.isfile(archive))
self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(doc.archive_path))
self.assertTrue(os.path.isfile(existing_archive_file))
self.assertEqual(doc.archive_filename, "none/my_doc_01.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
def test_move_original_only(self):
original = os.path.join(settings.ORIGINALS_DIR, "document_01.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "document.pdf")
Path(original).touch()
Path(archive).touch()
doc = Document.objects.create(mime_type="application/pdf", title="document", filename="document_01.pdf", checksum="A",
archive_checksum="B", archive_filename="document.pdf")
self.assertEqual(doc.filename, "document.pdf")
self.assertEqual(doc.archive_filename, "document.pdf")
self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(doc.archive_path))
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
def test_move_archive_only(self):
original = os.path.join(settings.ORIGINALS_DIR, "document.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "document_01.pdf")
Path(original).touch()
Path(archive).touch()
doc = Document.objects.create(mime_type="application/pdf", title="document", filename="document.pdf", checksum="A",
archive_checksum="B", archive_filename="document_01.pdf")
self.assertEqual(doc.filename, "document.pdf")
self.assertEqual(doc.archive_filename, "document.pdf")
self.assertTrue(os.path.isfile(original))
self.assertTrue(os.path.isfile(archive))
self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(doc.archive_path))
@@ -514,8 +568,9 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
Path(original).touch()
Path(archive).touch()
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B", archive_filename="0000001.pdf")
m.assert_called()
self.assertTrue(os.path.isfile(original))
self.assertTrue(os.path.isfile(archive))
self.assertTrue(os.path.isfile(doc.source_path))
@@ -527,7 +582,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
#Path(original).touch()
Path(archive).touch()
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", archive_filename="0000001.pdf", checksum="A", archive_checksum="B")
self.assertFalse(os.path.isfile(original))
self.assertTrue(os.path.isfile(archive))
@@ -551,19 +606,21 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
Path(original).touch()
Path(archive).touch()
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", archive_filename="0000001.pdf", checksum="A", archive_checksum="B")
m.assert_called()
self.assertTrue(os.path.isfile(original))
self.assertTrue(os.path.isfile(archive))
self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(doc.archive_path))
@override_settings(PAPERLESS_FILENAME_FORMAT="")
def test_archive_deleted(self):
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
Path(original).touch()
Path(archive).touch()
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B", archive_filename="0000001.pdf")
self.assertTrue(os.path.isfile(original))
self.assertTrue(os.path.isfile(archive))
@@ -577,6 +634,28 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
self.assertFalse(os.path.isfile(doc.source_path))
self.assertFalse(os.path.isfile(doc.archive_path))
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
def test_archive_deleted2(self):
original = os.path.join(settings.ORIGINALS_DIR, "document.png")
original2 = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
Path(original).touch()
Path(original2).touch()
Path(archive).touch()
doc1 = Document.objects.create(mime_type="image/png", title="document", filename="document.png", checksum="A", archive_checksum="B", archive_filename="0000001.pdf")
doc2 = Document.objects.create(mime_type="application/pdf", title="0000001", filename="0000001.pdf", checksum="C")
self.assertTrue(os.path.isfile(doc1.source_path))
self.assertTrue(os.path.isfile(doc1.archive_path))
self.assertTrue(os.path.isfile(doc2.source_path))
doc2.delete()
self.assertTrue(os.path.isfile(doc1.source_path))
self.assertTrue(os.path.isfile(doc1.archive_path))
self.assertFalse(os.path.isfile(doc2.source_path))
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
def test_database_error(self):
@@ -584,7 +663,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
Path(original).touch()
Path(archive).touch()
doc = Document(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
doc = Document(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_filename="0000001.pdf", archive_checksum="B")
with mock.patch("documents.signals.handlers.Document.objects.filter") as m:
m.side_effect = DatabaseError()
doc.save()
@@ -594,6 +673,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(doc.archive_path))
class TestFilenameGeneration(TestCase):
@override_settings(
@@ -617,7 +697,7 @@ class TestFilenameGeneration(TestCase):
def run():
doc = Document.objects.create(checksum=str(uuid.uuid4()), title=str(uuid.uuid4()), content="wow")
doc.filename = generate_unique_filename(doc, settings.ORIGINALS_DIR)
doc.filename = generate_unique_filename(doc)
Path(doc.thumbnail_path).touch()
with open(doc.source_path, "w") as f:
f.write(str(uuid.uuid4()))

View File

@@ -20,6 +20,7 @@ from documents.tests.utils import DirectoriesMixin
sample_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
class TestArchiver(DirectoriesMixin, TestCase):
def make_models(self):
@@ -42,9 +43,27 @@ class TestArchiver(DirectoriesMixin, TestCase):
doc = Document.objects.get(id=doc.id)
self.assertIsNotNone(doc.checksum)
self.assertIsNotNone(doc.archive_checksum)
self.assertTrue(os.path.isfile(doc.archive_path))
self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(filecmp.cmp(sample_file, doc.source_path))
self.assertEqual(doc.archive_filename, "none/A.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
def test_naming_priorities(self):
doc1 = Document.objects.create(checksum="A", title="document", content="first document", mime_type="application/pdf", filename="document.pdf")
doc2 = Document.objects.create(checksum="B", title="document", content="second document", mime_type="application/pdf", filename="document_01.pdf")
shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"document.pdf"))
shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"document_01.pdf"))
handle_document(doc2.pk)
handle_document(doc1.pk)
doc1 = Document.objects.get(id=doc1.id)
doc2 = Document.objects.get(id=doc2.id)
self.assertEqual(doc1.archive_filename, "document.pdf")
self.assertEqual(doc2.archive_filename, "document_01.pdf")
class TestDecryptDocuments(TestCase):
@@ -106,24 +125,27 @@ class TestMakeIndex(TestCase):
class TestRenamer(DirectoriesMixin, TestCase):
@override_settings(PAPERLESS_FILENAME_FORMAT="")
def test_rename(self):
doc = Document.objects.create(title="test", mime_type="application/pdf")
doc = Document.objects.create(title="test", mime_type="image/jpeg")
doc.filename = generate_filename(doc)
doc.archive_filename = generate_filename(doc, archive_filename=True)
doc.save()
Path(doc.source_path).touch()
Path(doc.archive_path).touch()
old_source_path = doc.source_path
with override_settings(PAPERLESS_FILENAME_FORMAT="{title}"):
with override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}"):
call_command("document_renamer")
doc2 = Document.objects.get(id=doc.id)
self.assertEqual(doc2.filename, "test.pdf")
self.assertFalse(os.path.isfile(old_source_path))
self.assertEqual(doc2.filename, "none/test.jpg")
self.assertEqual(doc2.archive_filename, "none/test.pdf")
self.assertFalse(os.path.isfile(doc.source_path))
self.assertFalse(os.path.isfile(doc.archive_path))
self.assertTrue(os.path.isfile(doc2.source_path))
self.assertTrue(os.path.isfile(doc2.archive_path))
class TestCreateClassifier(TestCase):
@@ -133,3 +155,24 @@ class TestCreateClassifier(TestCase):
call_command("document_create_classifier")
m.assert_called_once()
class TestSanityChecker(DirectoriesMixin, TestCase):
def test_no_issues(self):
with self.assertLogs() as capture:
call_command("document_sanity_checker")
self.assertEqual(len(capture.output), 1)
self.assertIn("Sanity checker detected no issues.", capture.output[0])
def test_errors(self):
doc = Document.objects.create(title="test", content="test", filename="test.pdf", checksum="abc")
Path(doc.source_path).touch()
Path(doc.thumbnail_path).touch()
with self.assertLogs() as capture:
call_command("document_sanity_checker")
self.assertEqual(len(capture.output), 1)
self.assertIn("Checksum mismatch of document", capture.output[0])

View File

@@ -22,7 +22,7 @@ class TestExportImport(DirectoriesMixin, TestCase):
self.target = tempfile.mkdtemp()
self.addCleanup(shutil.rmtree, self.target)
self.d1 = Document.objects.create(content="Content", checksum="42995833e01aea9b3edee44bbfdd7ce1", archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b", title="wow1", filename="0000001.pdf", mime_type="application/pdf")
self.d1 = Document.objects.create(content="Content", checksum="42995833e01aea9b3edee44bbfdd7ce1", archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b", title="wow1", filename="0000001.pdf", mime_type="application/pdf", archive_filename="0000001.pdf")
self.d2 = Document.objects.create(content="Content", checksum="9c9691e51741c1f4f41a20896af31770", title="wow2", filename="0000002.pdf", mime_type="application/pdf")
self.d3 = Document.objects.create(content="Content", checksum="d38d7ed02e988e072caf924e0f3fcb76", title="wow2", filename="0000003.pdf", mime_type="application/pdf")
self.d4 = Document.objects.create(content="Content", checksum="82186aaa94f0b98697d704b90fd1c072", title="wow_dec", filename="0000004.pdf.gpg", mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG)

View File

@@ -0,0 +1,325 @@
import hashlib
import os
import shutil
from pathlib import Path
from unittest import mock
from django.conf import settings
from django.test import override_settings
from documents.parsers import ParseError
from documents.tests.utils import DirectoriesMixin, TestMigrations
STORAGE_TYPE_GPG = "gpg"
def archive_name_from_filename(filename):
return os.path.splitext(filename)[0] + ".pdf"
def archive_path_old(self):
if self.filename:
fname = archive_name_from_filename(self.filename)
else:
fname = "{:07}.pdf".format(self.pk)
return os.path.join(
settings.ARCHIVE_DIR,
fname
)
def archive_path_new(doc):
if doc.archive_filename is not None:
return os.path.join(
settings.ARCHIVE_DIR,
str(doc.archive_filename)
)
else:
return None
def source_path(doc):
if doc.filename:
fname = str(doc.filename)
else:
fname = "{:07}{}".format(doc.pk, doc.file_type)
if doc.storage_type == STORAGE_TYPE_GPG:
fname += ".gpg" # pragma: no cover
return os.path.join(
settings.ORIGINALS_DIR,
fname
)
def thumbnail_path(doc):
file_name = "{:07}.png".format(doc.pk)
if doc.storage_type == STORAGE_TYPE_GPG:
file_name += ".gpg"
return os.path.join(
settings.THUMBNAIL_DIR,
file_name
)
def make_test_document(document_class, title: str, mime_type: str, original: str, original_filename: str, archive: str = None, archive_filename: str = None):
doc = document_class()
doc.filename = original_filename
doc.title = title
doc.mime_type = mime_type
doc.content = "the content, does not matter for this test"
doc.save()
shutil.copy2(original, source_path(doc))
with open(original, "rb") as f:
doc.checksum = hashlib.md5(f.read()).hexdigest()
if archive:
if archive_filename:
doc.archive_filename = archive_filename
shutil.copy2(archive, archive_path_new(doc))
else:
shutil.copy2(archive, archive_path_old(doc))
with open(archive, "rb") as f:
doc.archive_checksum = hashlib.md5(f.read()).hexdigest()
doc.save()
Path(thumbnail_path(doc)).touch()
return doc
simple_jpg = os.path.join(os.path.dirname(__file__), "samples", "simple.jpg")
simple_pdf = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
simple_pdf2 = os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000002.pdf")
simple_pdf3 = os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000003.pdf")
simple_txt = os.path.join(os.path.dirname(__file__), "samples", "simple.txt")
simple_png = os.path.join(os.path.dirname(__file__), "samples", "simple-noalpha.png")
simple_png2 = os.path.join(os.path.dirname(__file__), "examples", "no-text.png")
@override_settings(PAPERLESS_FILENAME_FORMAT="")
class TestMigrateArchiveFiles(DirectoriesMixin, TestMigrations):
migrate_from = '1011_auto_20210101_2340'
migrate_to = '1012_fix_archive_files'
def setUpBeforeMigration(self, apps):
Document = apps.get_model("documents", "Document")
self.unrelated = make_test_document(Document, "unrelated", "application/pdf", simple_pdf3, "unrelated.pdf", simple_pdf)
self.no_text = make_test_document(Document, "no-text", "image/png", simple_png2, "no-text.png", simple_pdf)
self.doc_no_archive = make_test_document(Document, "no_archive", "text/plain", simple_txt, "no_archive.txt")
self.clash1 = make_test_document(Document, "clash", "application/pdf", simple_pdf, "clash.pdf", simple_pdf)
self.clash2 = make_test_document(Document, "clash", "image/jpeg", simple_jpg, "clash.jpg", simple_pdf)
self.clash3 = make_test_document(Document, "clash", "image/png", simple_png, "clash.png", simple_pdf)
self.clash4 = make_test_document(Document, "clash.png", "application/pdf", simple_pdf2, "clash.png.pdf", simple_pdf2)
self.assertEqual(archive_path_old(self.clash1), archive_path_old(self.clash2))
self.assertEqual(archive_path_old(self.clash1), archive_path_old(self.clash3))
self.assertNotEqual(archive_path_old(self.clash1), archive_path_old(self.clash4))
def testArchiveFilesMigrated(self):
Document = self.apps.get_model('documents', 'Document')
for doc in Document.objects.all():
if doc.archive_checksum:
self.assertIsNotNone(doc.archive_filename)
self.assertTrue(os.path.isfile(archive_path_new(doc)))
else:
self.assertIsNone(doc.archive_filename)
with open(source_path(doc), "rb") as f:
original_checksum = hashlib.md5(f.read()).hexdigest()
self.assertEqual(original_checksum, doc.checksum)
if doc.archive_checksum:
self.assertTrue(os.path.isfile(archive_path_new(doc)))
with open(archive_path_new(doc), "rb") as f:
archive_checksum = hashlib.md5(f.read()).hexdigest()
self.assertEqual(archive_checksum, doc.archive_checksum)
self.assertEqual(Document.objects.filter(archive_checksum__isnull=False).count(), 6)
def test_filenames(self):
Document = self.apps.get_model('documents', 'Document')
self.assertEqual(Document.objects.get(id=self.unrelated.id).archive_filename, "unrelated.pdf")
self.assertEqual(Document.objects.get(id=self.no_text.id).archive_filename, "no-text.pdf")
self.assertEqual(Document.objects.get(id=self.doc_no_archive.id).archive_filename, None)
self.assertEqual(Document.objects.get(id=self.clash1.id).archive_filename, f"{self.clash1.id:07}.pdf")
self.assertEqual(Document.objects.get(id=self.clash2.id).archive_filename, f"{self.clash2.id:07}.pdf")
self.assertEqual(Document.objects.get(id=self.clash3.id).archive_filename, f"{self.clash3.id:07}.pdf")
self.assertEqual(Document.objects.get(id=self.clash4.id).archive_filename, "clash.png.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
class TestMigrateArchiveFilesWithFilenameFormat(TestMigrateArchiveFiles):
def test_filenames(self):
Document = self.apps.get_model('documents', 'Document')
self.assertEqual(Document.objects.get(id=self.unrelated.id).archive_filename, "unrelated.pdf")
self.assertEqual(Document.objects.get(id=self.no_text.id).archive_filename, "no-text.pdf")
self.assertEqual(Document.objects.get(id=self.doc_no_archive.id).archive_filename, None)
self.assertEqual(Document.objects.get(id=self.clash1.id).archive_filename, "none/clash.pdf")
self.assertEqual(Document.objects.get(id=self.clash2.id).archive_filename, "none/clash_01.pdf")
self.assertEqual(Document.objects.get(id=self.clash3.id).archive_filename, "none/clash_02.pdf")
self.assertEqual(Document.objects.get(id=self.clash4.id).archive_filename, "clash.png.pdf")
def fake_parse_wrapper(parser, path, mime_type, file_name):
parser.archive_path = None
parser.text = "the text"
@override_settings(PAPERLESS_FILENAME_FORMAT="")
class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
migrate_from = '1011_auto_20210101_2340'
migrate_to = '1012_fix_archive_files'
auto_migrate = False
def test_archive_missing(self):
Document = self.apps.get_model("documents", "Document")
doc = make_test_document(Document, "clash", "application/pdf", simple_pdf, "clash.pdf", simple_pdf)
os.unlink(archive_path_old(doc))
self.assertRaisesMessage(ValueError, "does not exist at: ", self.performMigration)
def test_parser_missing(self):
Document = self.apps.get_model("documents", "Document")
doc1 = make_test_document(Document, "document", "invalid/typesss768", simple_png, "document.png", simple_pdf)
doc2 = make_test_document(Document, "document", "invalid/typesss768", simple_jpg, "document.jpg", simple_pdf)
self.assertRaisesMessage(ValueError, "no parsers are available", self.performMigration)
@mock.patch("documents.migrations.1012_fix_archive_files.parse_wrapper")
def test_parser_error(self, m):
m.side_effect = ParseError()
Document = self.apps.get_model("documents", "Document")
doc1 = make_test_document(Document, "document", "image/png", simple_png, "document.png", simple_pdf)
doc2 = make_test_document(Document, "document", "application/pdf", simple_jpg, "document.jpg", simple_pdf)
self.assertIsNotNone(doc1.archive_checksum)
self.assertIsNotNone(doc2.archive_checksum)
with self.assertLogs() as capture:
self.performMigration()
self.assertEqual(m.call_count, 6)
self.assertEqual(
len(list(filter(lambda log: "Parse error, will try again in 5 seconds" in log, capture.output))),
4)
self.assertEqual(
len(list(filter(lambda log: "Unable to regenerate archive document for ID:" in log, capture.output))),
2)
Document = self.apps.get_model("documents", "Document")
doc1 = Document.objects.get(id=doc1.id)
doc2 = Document.objects.get(id=doc2.id)
self.assertIsNone(doc1.archive_checksum)
self.assertIsNone(doc2.archive_checksum)
self.assertIsNone(doc1.archive_filename)
self.assertIsNone(doc2.archive_filename)
@mock.patch("documents.migrations.1012_fix_archive_files.parse_wrapper")
def test_parser_no_archive(self, m):
m.side_effect = fake_parse_wrapper
Document = self.apps.get_model("documents", "Document")
doc1 = make_test_document(Document, "document", "image/png", simple_png, "document.png", simple_pdf)
doc2 = make_test_document(Document, "document", "application/pdf", simple_jpg, "document.jpg", simple_pdf)
with self.assertLogs() as capture:
self.performMigration()
self.assertEqual(
len(list(filter(lambda log: "Parser did not return an archive document for document" in log, capture.output))),
2)
Document = self.apps.get_model("documents", "Document")
doc1 = Document.objects.get(id=doc1.id)
doc2 = Document.objects.get(id=doc2.id)
self.assertIsNone(doc1.archive_checksum)
self.assertIsNone(doc2.archive_checksum)
self.assertIsNone(doc1.archive_filename)
self.assertIsNone(doc2.archive_filename)
@override_settings(PAPERLESS_FILENAME_FORMAT="")
class TestMigrateArchiveFilesBackwards(DirectoriesMixin, TestMigrations):
migrate_from = '1012_fix_archive_files'
migrate_to = '1011_auto_20210101_2340'
def setUpBeforeMigration(self, apps):
Document = apps.get_model("documents", "Document")
doc_unrelated = make_test_document(Document, "unrelated", "application/pdf", simple_pdf2, "unrelated.txt", simple_pdf2, "unrelated.pdf")
doc_no_archive = make_test_document(Document, "no_archive", "text/plain", simple_txt, "no_archive.txt")
clashB = make_test_document(Document, "clash", "image/jpeg", simple_jpg, "clash.jpg", simple_pdf, "clash_02.pdf")
def testArchiveFilesReverted(self):
Document = self.apps.get_model('documents', 'Document')
for doc in Document.objects.all():
if doc.archive_checksum:
self.assertTrue(os.path.isfile(archive_path_old(doc)))
with open(source_path(doc), "rb") as f:
original_checksum = hashlib.md5(f.read()).hexdigest()
self.assertEqual(original_checksum, doc.checksum)
if doc.archive_checksum:
self.assertTrue(os.path.isfile(archive_path_old(doc)))
with open(archive_path_old(doc), "rb") as f:
archive_checksum = hashlib.md5(f.read()).hexdigest()
self.assertEqual(archive_checksum, doc.archive_checksum)
self.assertEqual(Document.objects.filter(archive_checksum__isnull=False).count(), 2)
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
class TestMigrateArchiveFilesBackwardsWithFilenameFormat(TestMigrateArchiveFilesBackwards):
pass
@override_settings(PAPERLESS_FILENAME_FORMAT="")
class TestMigrateArchiveFilesBackwardsErrors(DirectoriesMixin, TestMigrations):
migrate_from = '1012_fix_archive_files'
migrate_to = '1011_auto_20210101_2340'
auto_migrate = False
def test_filename_clash(self):
Document = self.apps.get_model("documents", "Document")
self.clashA = make_test_document(Document, "clash", "application/pdf", simple_pdf, "clash.pdf", simple_pdf, "clash_02.pdf")
self.clashB = make_test_document(Document, "clash", "image/jpeg", simple_jpg, "clash.jpg", simple_pdf, "clash_01.pdf")
self.assertRaisesMessage(ValueError, "would clash with another archive filename", self.performMigration)
def test_filename_exists(self):
Document = self.apps.get_model("documents", "Document")
self.clashA = make_test_document(Document, "clash", "application/pdf", simple_pdf, "clash.pdf", simple_pdf, "clash.pdf")
self.clashB = make_test_document(Document, "clash", "image/jpeg", simple_jpg, "clash.jpg", simple_pdf, "clash_01.pdf")
self.assertRaisesMessage(ValueError, "file already exists.", self.performMigration)

View File

@@ -1,52 +1,11 @@
import os
import shutil
from pathlib import Path
from django.apps import apps
from django.conf import settings
from django.db import connection
from django.db.migrations.executor import MigrationExecutor
from django.test import TestCase, TransactionTestCase, override_settings
from django.test import override_settings
from documents.models import Document
from documents.parsers import get_default_file_extension
from documents.tests.utils import DirectoriesMixin
class TestMigrations(TransactionTestCase):
@property
def app(self):
return apps.get_containing_app_config(type(self).__module__).name
migrate_from = None
migrate_to = None
def setUp(self):
super(TestMigrations, self).setUp()
assert self.migrate_from and self.migrate_to, \
"TestCase '{}' must define migrate_from and migrate_to properties".format(type(self).__name__)
self.migrate_from = [(self.app, self.migrate_from)]
self.migrate_to = [(self.app, self.migrate_to)]
executor = MigrationExecutor(connection)
old_apps = executor.loader.project_state(self.migrate_from).apps
# Reverse to the original migration
executor.migrate(self.migrate_from)
self.setUpBeforeMigration(old_apps)
# Run the migration to test
executor = MigrationExecutor(connection)
executor.loader.build_graph() # reload.
executor.migrate(self.migrate_to)
self.apps = executor.loader.project_state(self.migrate_to).apps
def setUpBeforeMigration(self, apps):
pass
from documents.tests.utils import DirectoriesMixin, TestMigrations
STORAGE_TYPE_UNENCRYPTED = "unencrypted"
STORAGE_TYPE_GPG = "gpg"

View File

@@ -68,7 +68,7 @@ class TestParserDiscovery(TestCase):
)
def fake_get_thumbnail(self, path, mimetype):
def fake_get_thumbnail(self, path, mimetype, file_name):
return os.path.join(os.path.dirname(__file__), "examples", "no-text.png")
@@ -89,15 +89,15 @@ class TestBaseParser(TestCase):
def test_get_optimised_thumbnail(self):
parser = DocumentParser(None)
parser.get_optimised_thumbnail("any", "not important")
parser.get_optimised_thumbnail("any", "not important", "document.pdf")
@mock.patch("documents.parsers.DocumentParser.get_thumbnail", fake_get_thumbnail)
@override_settings(OPTIMIZE_THUMBNAILS=False)
def test_get_optimised_thumb_disabled(self):
parser = DocumentParser(None)
path = parser.get_optimised_thumbnail("any", "not important")
self.assertEqual(path, fake_get_thumbnail(None, None, None))
path = parser.get_optimised_thumbnail("any", "not important", "document.pdf")
self.assertEqual(path, fake_get_thumbnail(None, None, None, None))
class TestParserAvailability(TestCase):

View File

@@ -1,3 +1,4 @@
import logging
import os
import shutil
from pathlib import Path
@@ -7,10 +8,59 @@ from django.conf import settings
from django.test import TestCase
from documents.models import Document
from documents.sanity_checker import check_sanity, SanityFailedError
from documents.sanity_checker import check_sanity, SanityCheckMessages
from documents.tests.utils import DirectoriesMixin
class TestSanityCheckMessages(TestCase):
def test_no_messages(self):
messages = SanityCheckMessages()
self.assertEqual(len(messages), 0)
self.assertFalse(messages.has_error())
self.assertFalse(messages.has_warning())
with self.assertLogs() as capture:
messages.log_messages()
self.assertEqual(len(capture.output), 1)
self.assertEqual(capture.records[0].levelno, logging.INFO)
self.assertEqual(capture.records[0].message, "Sanity checker detected no issues.")
def test_info(self):
messages = SanityCheckMessages()
messages.info("Something might be wrong")
self.assertEqual(len(messages), 1)
self.assertFalse(messages.has_error())
self.assertFalse(messages.has_warning())
with self.assertLogs() as capture:
messages.log_messages()
self.assertEqual(len(capture.output), 1)
self.assertEqual(capture.records[0].levelno, logging.INFO)
self.assertEqual(capture.records[0].message, "Something might be wrong")
def test_warning(self):
messages = SanityCheckMessages()
messages.warning("Something is wrong")
self.assertEqual(len(messages), 1)
self.assertFalse(messages.has_error())
self.assertTrue(messages.has_warning())
with self.assertLogs() as capture:
messages.log_messages()
self.assertEqual(len(capture.output), 1)
self.assertEqual(capture.records[0].levelno, logging.WARNING)
self.assertEqual(capture.records[0].message, "Something is wrong")
def test_error(self):
messages = SanityCheckMessages()
messages.error("Something is seriously wrong")
self.assertEqual(len(messages), 1)
self.assertTrue(messages.has_error())
self.assertFalse(messages.has_warning())
with self.assertLogs() as capture:
messages.log_messages()
self.assertEqual(len(capture.output), 1)
self.assertEqual(capture.records[0].levelno, logging.ERROR)
self.assertEqual(capture.records[0].message, "Something is seriously wrong")
class TestSanityCheck(DirectoriesMixin, TestCase):
def make_test_data(self):
@@ -21,7 +71,12 @@ class TestSanityCheck(DirectoriesMixin, TestCase):
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "archive", "0000001.pdf"), os.path.join(self.dirs.archive_dir, "0000001.pdf"))
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "thumbnails", "0000001.png"), os.path.join(self.dirs.thumbnail_dir, "0000001.png"))
return Document.objects.create(title="test", checksum="42995833e01aea9b3edee44bbfdd7ce1", archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b", content="test", pk=1, filename="0000001.pdf", mime_type="application/pdf")
return Document.objects.create(title="test", checksum="42995833e01aea9b3edee44bbfdd7ce1", archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b", content="test", pk=1, filename="0000001.pdf", mime_type="application/pdf", archive_filename="0000001.pdf")
def assertSanityError(self, messageRegex):
messages = check_sanity()
self.assertTrue(messages.has_error())
self.assertRegex(messages[0]['message'], messageRegex)
def test_no_docs(self):
self.assertEqual(len(check_sanity()), 0)
@@ -33,59 +88,75 @@ class TestSanityCheck(DirectoriesMixin, TestCase):
def test_no_thumbnail(self):
doc = self.make_test_data()
os.remove(doc.thumbnail_path)
self.assertEqual(len(check_sanity()), 1)
self.assertSanityError("Thumbnail of document .* does not exist")
def test_thumbnail_no_access(self):
doc = self.make_test_data()
os.chmod(doc.thumbnail_path, 0o000)
self.assertEqual(len(check_sanity()), 1)
self.assertSanityError("Cannot read thumbnail file of document")
os.chmod(doc.thumbnail_path, 0o777)
def test_no_original(self):
doc = self.make_test_data()
os.remove(doc.source_path)
self.assertEqual(len(check_sanity()), 1)
self.assertSanityError("Original of document .* does not exist.")
def test_original_no_access(self):
doc = self.make_test_data()
os.chmod(doc.source_path, 0o000)
self.assertEqual(len(check_sanity()), 1)
self.assertSanityError("Cannot read original file of document")
os.chmod(doc.source_path, 0o777)
def test_original_checksum_mismatch(self):
doc = self.make_test_data()
doc.checksum = "WOW"
doc.save()
self.assertEqual(len(check_sanity()), 1)
self.assertSanityError("Checksum mismatch of document")
def test_no_archive(self):
doc = self.make_test_data()
os.remove(doc.archive_path)
self.assertEqual(len(check_sanity()), 1)
self.assertSanityError("Archived version of document .* does not exist.")
def test_archive_no_access(self):
doc = self.make_test_data()
os.chmod(doc.archive_path, 0o000)
self.assertEqual(len(check_sanity()), 1)
self.assertSanityError("Cannot read archive file of document")
os.chmod(doc.archive_path, 0o777)
def test_archive_checksum_mismatch(self):
doc = self.make_test_data()
doc.archive_checksum = "WOW"
doc.save()
self.assertEqual(len(check_sanity()), 1)
self.assertSanityError("Checksum mismatch of archived document")
def test_empty_content(self):
doc = self.make_test_data()
doc.content = ""
doc.save()
self.assertEqual(len(check_sanity()), 1)
messages = check_sanity()
self.assertFalse(messages.has_error())
self.assertFalse(messages.has_warning())
self.assertEqual(len(messages), 1)
self.assertRegex(messages[0]['message'], "Document .* has no content.")
def test_orphaned_file(self):
doc = self.make_test_data()
Path(self.dirs.originals_dir, "orphaned").touch()
self.assertEqual(len(check_sanity()), 1)
messages = check_sanity()
self.assertFalse(messages.has_error())
self.assertTrue(messages.has_warning())
self.assertEqual(len(messages), 1)
self.assertRegex(messages[0]['message'], "Orphaned file in media dir")
def test_all(self):
Document.objects.create(title="test", checksum="dgfhj", archive_checksum="dfhg", content="", pk=1, filename="0000001.pdf")
string = str(SanityFailedError(check_sanity()))
def test_archive_filename_no_checksum(self):
doc = self.make_test_data()
doc.archive_checksum = None
doc.save()
self.assertSanityError("has an archive file, but its checksum is missing.")
def test_archive_checksum_no_filename(self):
doc = self.make_test_data()
doc.archive_filename = None
doc.save()
self.assertSanityError("has an archive file checksum, but no archive filename.")

View File

@@ -2,12 +2,12 @@ import os
from unittest import mock
from django.conf import settings
from django.test import TestCase, override_settings
from django.test import TestCase
from django.utils import timezone
from documents import tasks
from documents.models import Document, Tag, Correspondent, DocumentType
from documents.sanity_checker import SanityError, SanityFailedError
from documents.sanity_checker import SanityCheckMessages, SanityCheckFailedException
from documents.tests.utils import DirectoriesMixin
@@ -52,7 +52,6 @@ class TestTasks(DirectoriesMixin, TestCase):
load_classifier.assert_called_once()
self.assertFalse(os.path.isfile(settings.MODEL_FILE))
@override_settings(CACHES={'default': {'BACKEND': 'django.core.cache.backends.dummy.DummyCache'}})
def test_train_classifier(self):
c = Correspondent.objects.create(matching_algorithm=Tag.MATCH_AUTO, name="test")
doc = Document.objects.create(correspondent=c, content="test", title="test")
@@ -75,13 +74,33 @@ class TestTasks(DirectoriesMixin, TestCase):
self.assertNotEqual(mtime2, mtime3)
@mock.patch("documents.tasks.sanity_checker.check_sanity")
def test_sanity_check(self, m):
m.return_value = []
tasks.sanity_check()
def test_sanity_check_success(self, m):
m.return_value = SanityCheckMessages()
self.assertEqual(tasks.sanity_check(), "No issues detected.")
m.assert_called_once()
m.reset_mock()
m.return_value = [SanityError("")]
self.assertRaises(SanityFailedError, tasks.sanity_check)
@mock.patch("documents.tasks.sanity_checker.check_sanity")
def test_sanity_check_error(self, m):
messages = SanityCheckMessages()
messages.error("Some error")
m.return_value = messages
self.assertRaises(SanityCheckFailedException, tasks.sanity_check)
m.assert_called_once()
@mock.patch("documents.tasks.sanity_checker.check_sanity")
def test_sanity_check_warning(self, m):
messages = SanityCheckMessages()
messages.warning("Some warning")
m.return_value = messages
self.assertEqual(tasks.sanity_check(), "Sanity check exited with warnings. See log.")
m.assert_called_once()
@mock.patch("documents.tasks.sanity_checker.check_sanity")
def test_sanity_check_info(self, m):
messages = SanityCheckMessages()
messages.info("Some info")
m.return_value = messages
self.assertEqual(tasks.sanity_check(), "Sanity check exited with infos. See log.")
m.assert_called_once()
def test_bulk_update_documents(self):

View File

@@ -4,7 +4,10 @@ import tempfile
from collections import namedtuple
from contextlib import contextmanager
from django.test import override_settings
from django.apps import apps
from django.db import connection
from django.db.migrations.executor import MigrationExecutor
from django.test import override_settings, TransactionTestCase
def setup_directories():
@@ -79,3 +82,45 @@ class DirectoriesMixin:
def tearDown(self) -> None:
super(DirectoriesMixin, self).tearDown()
remove_dirs(self.dirs)
class TestMigrations(TransactionTestCase):
@property
def app(self):
return apps.get_containing_app_config(type(self).__module__).name
migrate_from = None
migrate_to = None
auto_migrate = True
def setUp(self):
super(TestMigrations, self).setUp()
assert self.migrate_from and self.migrate_to, \
"TestCase '{}' must define migrate_from and migrate_to properties".format(type(self).__name__)
self.migrate_from = [(self.app, self.migrate_from)]
self.migrate_to = [(self.app, self.migrate_to)]
executor = MigrationExecutor(connection)
old_apps = executor.loader.project_state(self.migrate_from).apps
# Reverse to the original migration
executor.migrate(self.migrate_from)
self.setUpBeforeMigration(old_apps)
self.apps = old_apps
if self.auto_migrate:
self.performMigration()
def performMigration(self):
# Run the migration to test
executor = MigrationExecutor(connection)
executor.loader.build_graph() # reload.
executor.migrate(self.migrate_to)
self.apps = executor.loader.project_state(self.migrate_to).apps
def setUpBeforeMigration(self, apps):
pass

View File

@@ -192,7 +192,7 @@ class DocumentViewSet(RetrieveModelMixin,
def file_response(self, pk, request, disposition):
doc = Document.objects.get(id=pk)
if not self.original_requested(request) and os.path.isfile(doc.archive_path): # NOQA: E501
if not self.original_requested(request) and doc.has_archive_version: # NOQA: E501
file_handle = doc.archive_file
filename = doc.get_public_filename(archive=True)
mime_type = 'application/pdf'
@@ -225,6 +225,12 @@ class DocumentViewSet(RetrieveModelMixin,
else:
return []
def get_filesize(self, filename):
if os.path.isfile(filename):
return os.stat(filename).st_size
else:
return None
@action(methods=['get'], detail=True)
def metadata(self, request, pk=None):
try:
@@ -234,21 +240,21 @@ class DocumentViewSet(RetrieveModelMixin,
meta = {
"original_checksum": doc.checksum,
"original_size": os.stat(doc.source_path).st_size,
"original_size": self.get_filesize(doc.source_path),
"original_mime_type": doc.mime_type,
"media_filename": doc.filename,
"has_archive_version": os.path.isfile(doc.archive_path),
"has_archive_version": doc.has_archive_version,
"original_metadata": self.get_metadata(
doc.source_path, doc.mime_type)
doc.source_path, doc.mime_type),
"archive_checksum": doc.archive_checksum,
"archive_media_filename": doc.archive_filename
}
if doc.archive_checksum and os.path.isfile(doc.archive_path):
meta['archive_checksum'] = doc.archive_checksum
meta['archive_size'] = os.stat(doc.archive_path).st_size,
if doc.has_archive_version:
meta['archive_size'] = self.get_filesize(doc.archive_path)
meta['archive_metadata'] = self.get_metadata(
doc.archive_path, "application/pdf")
else:
meta['archive_checksum'] = None
meta['archive_size'] = None
meta['archive_metadata'] = None
@@ -291,6 +297,8 @@ class DocumentViewSet(RetrieveModelMixin,
handle = GnuPG.decrypted(doc.thumbnail_file)
else:
handle = doc.thumbnail_file
# TODO: Send ETag information and use that to send new thumbnails
# if available
return HttpResponse(handle,
content_type='image/png')
except (FileNotFoundError, Document.DoesNotExist):

View File

@@ -0,0 +1,650 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
# This file is distributed under the same license as the PACKAGE package.
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
#
# Translators:
# Štěpán Šebestian <mys.orangeorange0123@gmail.com>, 2021
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2021-01-28 22:02+0100\n"
"PO-Revision-Date: 2020-12-30 19:27+0000\n"
"Last-Translator: Štěpán Šebestian <mys.orangeorange0123@gmail.com>, 2021\n"
"Language-Team: Czech (https://www.transifex.com/paperless/teams/115905/cs/)\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Language: cs\n"
"Plural-Forms: nplurals=4; plural=(n == 1 && n % 1 == 0) ? 0 : (n >= 2 && n <= 4 && n % 1 == 0) ? 1: (n % 1 != 0 ) ? 2 : 3;\n"
#: documents/apps.py:10
msgid "Documents"
msgstr "Dokumenty"
#: documents/models.py:33
msgid "Any word"
msgstr "Jakékoliv slovo"
#: documents/models.py:34
msgid "All words"
msgstr "Všechna slova"
#: documents/models.py:35
msgid "Exact match"
msgstr "Přesná shoda"
#: documents/models.py:36
msgid "Regular expression"
msgstr "Regulární výraz"
#: documents/models.py:37
msgid "Fuzzy word"
msgstr "Fuzzy slovo"
#: documents/models.py:38
msgid "Automatic"
msgstr "Automatický"
#: documents/models.py:42 documents/models.py:352 paperless_mail/models.py:25
#: paperless_mail/models.py:109
msgid "name"
msgstr "název"
#: documents/models.py:46
msgid "match"
msgstr "shoda"
#: documents/models.py:50
msgid "matching algorithm"
msgstr "algoritmus pro shodu"
#: documents/models.py:56
msgid "is insensitive"
msgstr "je ignorováno"
#: documents/models.py:75 documents/models.py:135
msgid "correspondent"
msgstr "korespondent"
#: documents/models.py:76
msgid "correspondents"
msgstr "korespondenti"
#: documents/models.py:98
msgid "color"
msgstr "barva"
#: documents/models.py:102
msgid "is inbox tag"
msgstr "tag přichozí"
#: documents/models.py:104
msgid ""
"Marks this tag as an inbox tag: All newly consumed documents will be tagged "
"with inbox tags."
msgstr ""
"Označí tento tag jako tag pro příchozí: Všechny nově zkonzumované dokumenty "
"budou označeny tagem pro přichozí"
#: documents/models.py:109
msgid "tag"
msgstr "tag"
#: documents/models.py:110 documents/models.py:166
msgid "tags"
msgstr "tagy"
#: documents/models.py:116 documents/models.py:148
msgid "document type"
msgstr "typ dokumentu"
#: documents/models.py:117
msgid "document types"
msgstr "typy dokumentu"
#: documents/models.py:125
msgid "Unencrypted"
msgstr "Nešifrované"
#: documents/models.py:126
msgid "Encrypted with GNU Privacy Guard"
msgstr "Šifrované pomocí GNU Privacy Guard"
#: documents/models.py:139
msgid "title"
msgstr "titulek"
#: documents/models.py:152
msgid "content"
msgstr "obsah"
#: documents/models.py:154
msgid ""
"The raw, text-only data of the document. This field is primarily used for "
"searching."
msgstr ""
"Nezpracovaná, pouze textová data dokumentu. Toto pole je používáno především"
" pro vyhledávání."
#: documents/models.py:159
msgid "mime type"
msgstr "mime typ"
#: documents/models.py:170
msgid "checksum"
msgstr "kontrolní součet"
#: documents/models.py:174
msgid "The checksum of the original document."
msgstr "Kontrolní součet původního dokumentu"
#: documents/models.py:178
msgid "archive checksum"
msgstr "kontrolní součet archivu"
#: documents/models.py:183
msgid "The checksum of the archived document."
msgstr "Kontrolní součet archivovaného dokumentu."
#: documents/models.py:187 documents/models.py:330
msgid "created"
msgstr "vytvořeno"
#: documents/models.py:191
msgid "modified"
msgstr "upraveno"
#: documents/models.py:195
msgid "storage type"
msgstr "typ úložiště"
#: documents/models.py:203
msgid "added"
msgstr "přidáno"
#: documents/models.py:207
msgid "filename"
msgstr "název souboru"
#: documents/models.py:212
msgid "Current filename in storage"
msgstr "Aktuální název souboru v úložišti"
#: documents/models.py:216
msgid "archive serial number"
msgstr "sériové číslo archivu"
#: documents/models.py:221
msgid "The position of this document in your physical document archive."
msgstr "Pozice dokumentu ve vašem archivu fyzických dokumentů"
#: documents/models.py:227
msgid "document"
msgstr "dokument"
#: documents/models.py:228
msgid "documents"
msgstr "dokumenty"
#: documents/models.py:313
msgid "debug"
msgstr "debug"
#: documents/models.py:314
msgid "information"
msgstr "informace"
#: documents/models.py:315
msgid "warning"
msgstr "varování"
#: documents/models.py:316
msgid "error"
msgstr "chyba"
#: documents/models.py:317
msgid "critical"
msgstr "kritická"
#: documents/models.py:321
msgid "group"
msgstr "skupina"
#: documents/models.py:324
msgid "message"
msgstr "zpráva"
#: documents/models.py:327
msgid "level"
msgstr "úroveň"
#: documents/models.py:334
msgid "log"
msgstr "záznam"
#: documents/models.py:335
msgid "logs"
msgstr "záznamy"
#: documents/models.py:346 documents/models.py:396
msgid "saved view"
msgstr "uložený pohled"
#: documents/models.py:347
msgid "saved views"
msgstr "uložené pohledy"
#: documents/models.py:350
msgid "user"
msgstr "uživatel"
#: documents/models.py:356
msgid "show on dashboard"
msgstr "zobrazit v dashboardu"
#: documents/models.py:359
msgid "show in sidebar"
msgstr "zobrazit v postranním menu"
#: documents/models.py:363
msgid "sort field"
msgstr "pole na řazení"
#: documents/models.py:366
msgid "sort reverse"
msgstr "třídit opačně"
#: documents/models.py:372
msgid "title contains"
msgstr "titulek obsahuje"
#: documents/models.py:373
msgid "content contains"
msgstr "obsah obsahuje"
#: documents/models.py:374
msgid "ASN is"
msgstr "ASN je"
#: documents/models.py:375
msgid "correspondent is"
msgstr "korespondent je"
#: documents/models.py:376
msgid "document type is"
msgstr "typ dokumentu je"
#: documents/models.py:377
msgid "is in inbox"
msgstr "je v příchozích"
#: documents/models.py:378
msgid "has tag"
msgstr "má tag"
#: documents/models.py:379
msgid "has any tag"
msgstr "má jakýkoliv tag"
#: documents/models.py:380
msgid "created before"
msgstr "vytvořeno před"
#: documents/models.py:381
msgid "created after"
msgstr "vytvořeno po"
#: documents/models.py:382
msgid "created year is"
msgstr "rok vytvoření je"
#: documents/models.py:383
msgid "created month is"
msgstr "měsíc vytvoření je"
#: documents/models.py:384
msgid "created day is"
msgstr "den vytvoření je"
#: documents/models.py:385
msgid "added before"
msgstr "přidáno před"
#: documents/models.py:386
msgid "added after"
msgstr "přidáno po"
#: documents/models.py:387
msgid "modified before"
msgstr "upraveno před"
#: documents/models.py:388
msgid "modified after"
msgstr "upraveno po"
#: documents/models.py:389
msgid "does not have tag"
msgstr "nemá tag"
#: documents/models.py:400
msgid "rule type"
msgstr "typ pravidla"
#: documents/models.py:404
msgid "value"
msgstr "hodnota"
#: documents/models.py:410
msgid "filter rule"
msgstr "filtrovací pravidlo"
#: documents/models.py:411
msgid "filter rules"
msgstr "filtrovací pravidla"
#: documents/serialisers.py:383
#, python-format
msgid "File type %(type)s not supported"
msgstr "Typ souboru %(type)s není podporován"
#: documents/templates/index.html:20
msgid "Paperless-ng is loading..."
msgstr "Paperless-ng se načítá..."
#: documents/templates/registration/logged_out.html:13
msgid "Paperless-ng signed out"
msgstr "Odhlášeno od Paperless-ng"
#: documents/templates/registration/logged_out.html:41
msgid "You have been successfully logged out. Bye!"
msgstr "Byli jste úspěšně odhlášeni. Nashledanou!"
#: documents/templates/registration/logged_out.html:42
msgid "Sign in again"
msgstr "Přihlašte se znovu"
#: documents/templates/registration/login.html:13
msgid "Paperless-ng sign in"
msgstr "Paperless-ng přihlášení"
#: documents/templates/registration/login.html:42
msgid "Please sign in."
msgstr "Prosím přihlaste se."
#: documents/templates/registration/login.html:45
msgid "Your username and password didn't match. Please try again."
msgstr "Vaše uživatelské jméno a heslo se neshodují. Prosím, zkuste to znovu."
#: documents/templates/registration/login.html:48
msgid "Username"
msgstr "Uživatelské jméno"
#: documents/templates/registration/login.html:49
msgid "Password"
msgstr "Heslo"
#: documents/templates/registration/login.html:54
msgid "Sign in"
msgstr "Přihlásit se"
#: paperless/settings.py:286
msgid "English"
msgstr "Angličtina"
#: paperless/settings.py:287
msgid "German"
msgstr "Němčina"
#: paperless/settings.py:288
msgid "Dutch"
msgstr "Holandština"
#: paperless/settings.py:289
msgid "French"
msgstr "Francouzština"
#: paperless/urls.py:114
msgid "Paperless-ng administration"
msgstr "Správa Paperless-ng"
#: paperless_mail/admin.py:25
msgid "Filter"
msgstr "Filtr"
#: paperless_mail/admin.py:27
msgid ""
"Paperless will only process mails that match ALL of the filters given below."
msgstr ""
"Paperless zpracuje pouze emaily které odpovídají VŠEM níže zadaným filtrům."
#: paperless_mail/admin.py:37
msgid "Actions"
msgstr "Akce"
#: paperless_mail/admin.py:39
msgid ""
"The action applied to the mail. This action is only performed when documents"
" were consumed from the mail. Mails without attachments will remain entirely"
" untouched."
msgstr ""
"Akce provedena na emailu. Tato akce je provedena jen pokud byly dokumenty "
"zkonzumovány z emailu. Emaily bez příloh zůstanou nedotčeny."
#: paperless_mail/admin.py:46
msgid "Metadata"
msgstr "Metadata"
#: paperless_mail/admin.py:48
msgid ""
"Assign metadata to documents consumed from this rule automatically. If you "
"do not assign tags, types or correspondents here, paperless will still "
"process all matching rules that you have defined."
msgstr ""
"Automaticky přiřadit metadata dokumentům zkonzumovaných z tohoto pravidla. "
"Pokud zde nepřiřadíte tagy, typy nebo korespondenty, paperless stále "
"zpracuje všechna shodující-se pravidla které jste definovali."
#: paperless_mail/apps.py:9
msgid "Paperless mail"
msgstr "Paperless pošta"
#: paperless_mail/models.py:11
msgid "mail account"
msgstr "emailový účet"
#: paperless_mail/models.py:12
msgid "mail accounts"
msgstr "emailové účty"
#: paperless_mail/models.py:19
msgid "No encryption"
msgstr "Žádné šifrování"
#: paperless_mail/models.py:20
msgid "Use SSL"
msgstr "Používat SSL"
#: paperless_mail/models.py:21
msgid "Use STARTTLS"
msgstr "Používat STARTTLS"
#: paperless_mail/models.py:29
msgid "IMAP server"
msgstr "IMAP server"
#: paperless_mail/models.py:33
msgid "IMAP port"
msgstr "IMAP port"
#: paperless_mail/models.py:36
msgid ""
"This is usually 143 for unencrypted and STARTTLS connections, and 993 for "
"SSL connections."
msgstr ""
"Toto je většinou 143 pro nešifrovaná připojení/připojení používající "
"STARTTLS a 993 pro SSL připojení."
#: paperless_mail/models.py:40
msgid "IMAP security"
msgstr "IMAP bezpečnost"
#: paperless_mail/models.py:46
msgid "username"
msgstr "uživatelské jméno"
#: paperless_mail/models.py:50
msgid "password"
msgstr "heslo"
#: paperless_mail/models.py:60
msgid "mail rule"
msgstr "mailové pravidlo"
#: paperless_mail/models.py:61
msgid "mail rules"
msgstr "mailová pravidla"
#: paperless_mail/models.py:67
msgid "Only process attachments."
msgstr "Zpracovávat jen přílohy"
#: paperless_mail/models.py:68
msgid "Process all files, including 'inline' attachments."
msgstr "Zpracovat všechny soubory, včetně vložených příloh"
#: paperless_mail/models.py:78
msgid "Mark as read, don't process read mails"
msgstr "Označit jako přečtené, nezpracovávat přečtené emaily"
#: paperless_mail/models.py:79
msgid "Flag the mail, don't process flagged mails"
msgstr "Označit email, nezpracovávat označené emaily"
#: paperless_mail/models.py:80
msgid "Move to specified folder"
msgstr "Přesunout do specifikované složky"
#: paperless_mail/models.py:81
msgid "Delete"
msgstr "Odstranit"
#: paperless_mail/models.py:88
msgid "Use subject as title"
msgstr "Použít předmět jako titulek"
#: paperless_mail/models.py:89
msgid "Use attachment filename as title"
msgstr "Použít název souboru u přílohy jako titulek"
#: paperless_mail/models.py:99
msgid "Do not assign a correspondent"
msgstr "Nepřiřazovat korespondenta"
#: paperless_mail/models.py:101
msgid "Use mail address"
msgstr "Použít emailovou adresu"
#: paperless_mail/models.py:103
msgid "Use name (or mail address if not available)"
msgstr "Použít jméno (nebo emailovou adresu pokud jméno není dostupné)"
#: paperless_mail/models.py:105
msgid "Use correspondent selected below"
msgstr "Použít korespondenta vybraného níže"
#: paperless_mail/models.py:113
msgid "order"
msgstr "pořadí"
#: paperless_mail/models.py:120
msgid "account"
msgstr "účet"
#: paperless_mail/models.py:124
msgid "folder"
msgstr "složka"
#: paperless_mail/models.py:128
msgid "filter from"
msgstr "filtrovat z"
#: paperless_mail/models.py:131
msgid "filter subject"
msgstr "název filtru"
#: paperless_mail/models.py:134
msgid "filter body"
msgstr "tělo filtru"
#: paperless_mail/models.py:138
msgid "filter attachment filename"
msgstr "název souboru u přílohy filtru"
#: paperless_mail/models.py:140
msgid ""
"Only consume documents which entirely match this filename if specified. "
"Wildcards such as *.pdf or *invoice* are allowed. Case insensitive."
msgstr ""
"Konzumovat jen dokumenty které přesně odpovídají tomuto názvu souboru pokud "
"specifikováno. Zástupné znaky jako *.pdf nebo *invoice* jsou povoleny. "
"Nezáleží na velikosti písmen."
#: paperless_mail/models.py:146
msgid "maximum age"
msgstr "maximální stáří"
#: paperless_mail/models.py:148
msgid "Specified in days."
msgstr "Specifikováno ve dnech."
#: paperless_mail/models.py:151
msgid "attachment type"
msgstr "typ přílohy"
#: paperless_mail/models.py:154
msgid ""
"Inline attachments include embedded images, so it's best to combine this "
"option with a filename filter."
msgstr ""
"Vložené přílohy zahrnují vložené obrázky, takže je nejlepší tuto možnost "
"kombinovat s filtrem na název souboru"
#: paperless_mail/models.py:159
msgid "action"
msgstr "akce"
#: paperless_mail/models.py:165
msgid "action parameter"
msgstr "parametr akce"
#: paperless_mail/models.py:167
msgid ""
"Additional parameter for the action selected above, i.e., the target folder "
"of the move to folder action."
msgstr ""
"Další parametr pro výše vybranou akci, napříkad cílová složka akce přesunutí"
" do složky."
#: paperless_mail/models.py:173
msgid "assign title from"
msgstr "nastavit titulek z"
#: paperless_mail/models.py:183
msgid "assign this tag"
msgstr "přiřadit tento tag"
#: paperless_mail/models.py:191
msgid "assign this document type"
msgstr "přiřadit tento typ dokumentu"
#: paperless_mail/models.py:195
msgid "assign correspondent from"
msgstr "přiřadit korespondenta z"
#: paperless_mail/models.py:205
msgid "assign this correspondent"
msgstr "přiřadit tohoto korespondenta"

View File

@@ -102,10 +102,11 @@ INSTALLED_APPS = [
"django_q",
"channels",
] + env_apps
if DEBUG:
INSTALLED_APPS.append("channels")
REST_FRAMEWORK = {
'DEFAULT_AUTHENTICATION_CLASSES': [
'rest_framework.authentication.BasicAuthentication',
@@ -169,16 +170,6 @@ CHANNEL_LAYERS = {
},
}
CACHES = {
"default": {
"BACKEND": "django_redis.cache.RedisCache",
"LOCATION": os.getenv("PAPERLESS_REDIS", "redis://localhost:6379"),
"OPTIONS": {
"CLIENT_CLASS": "django_redis.client.DefaultClient",
}
}
}
###############################################################################
# Security #
###############################################################################
@@ -407,8 +398,9 @@ TASK_WORKERS = int(os.getenv("PAPERLESS_TASK_WORKERS", default_task_workers()))
Q_CLUSTER = {
'name': 'paperless',
'catch_up': False,
'recycle': 1,
'workers': TASK_WORKERS,
'django_redis': 'default'
'redis': os.getenv("PAPERLESS_REDIS", "redis://localhost:6379")
}

View File

@@ -1 +1 @@
__version__ = (1, 1, 0)
__version__ = (1, 1, 2)

View File

@@ -13,11 +13,8 @@ def process_mail_accounts():
try:
total_new_documents += MailAccountHandler().handle_mail_account(
account)
except MailError as e:
logger.error(
f"Error while processing mail account {account}: {e}",
exc_info=True
)
except MailError:
logger.exception(f"Error while processing mail account {account}")
if total_new_documents > 0:
return f"Added {total_new_documents} document(s)."

View File

@@ -48,7 +48,7 @@ class RasterisedDocumentParser(DocumentParser):
)
return result
def get_thumbnail(self, document_path, mime_type):
def get_thumbnail(self, document_path, mime_type, file_name=None):
return make_thumbnail_from_pdf(
document_path, self.tempdir, self.logging_group)

View File

@@ -13,7 +13,7 @@ class TextDocumentParser(DocumentParser):
logging_name = "paperless.parsing.text"
def get_thumbnail(self, document_path, mime_type):
def get_thumbnail(self, document_path, mime_type, file_name=None):
def read_text():
with open(document_path, 'r') as src:

View File

@@ -16,9 +16,9 @@ class TikaDocumentParser(DocumentParser):
logging_name = "paperless.parsing.tika"
def get_thumbnail(self, document_path, mime_type):
def get_thumbnail(self, document_path, mime_type, file_name=None):
if not self.archive_path:
self.archive_path = self.convert_to_pdf(document_path)
self.archive_path = self.convert_to_pdf(document_path, file_name)
return make_thumbnail_from_pdf(
self.archive_path, self.tempdir, self.logging_group)