changelog

documentation and changelog
reprganized docker file, less layers, new shortcuts for management commands
2026-03-31 13:22:43 +00:00 · 2021-02-12 18:04:15 +01:00 · 2021-02-12 16:54:00 +01:00 · 2021-02-12 16:53:51 +01:00 · 2021-02-12 01:31:50 +01:00 · 2021-02-11 22:16:41 +01:00
50 changed files with 1295 additions and 338 deletions
--- a/59
+++ b/59
@@ -10,10 +10,6 @@ RUN ./configure && make

 FROM python:3.7-slim

-WORKDIR /usr/src/paperless/
-
-COPY requirements.txt ./
-
 # Binary dependencies
 RUN apt-get update \
  && apt-get -y --no-install-recommends install \
@@ -49,16 +45,24 @@ RUN apt-get update \
 		tesseract-ocr-spa \
 		unpaper \
 		zlib1g \
-		&& rm -rf /var/lib/apt/lists/*

 # This pulls in updated dependencies from bullseye to fix some issues with file type detection.
 # TODO: Remove this once bullseye releases.
-RUN echo "deb http://deb.debian.org/debian bullseye main" > /etc/apt/sources.list.d/bullseye.list \
+  && echo "deb http://deb.debian.org/debian bullseye main" > /etc/apt/sources.list.d/bullseye.list \
  && apt-get update \
  && apt-get install --no-install-recommends -y file libmagic-dev \
  && rm -rf /var/lib/apt/lists/* \
  && rm /etc/apt/sources.list.d/bullseye.list

+# copy jbig2enc
+COPY --from=jbig2enc /usr/src/jbig2enc/src/.libs/libjbig2enc* /usr/local/lib/
+COPY --from=jbig2enc /usr/src/jbig2enc/src/jbig2 /usr/local/bin/
+COPY --from=jbig2enc /usr/src/jbig2enc/src/*.h /usr/local/include/
+
+WORKDIR /usr/src/paperless/src/
+
+COPY requirements.txt ../
+
 # Python dependencies
 RUN apt-get update \
  && apt-get -y --no-install-recommends install \
@@ -67,41 +71,36 @@ RUN apt-get update \
 		libpq-dev \
 		libqpdf-dev \
 	&& python3 -m pip install --upgrade --no-cache-dir supervisor \
-  && python3 -m pip install --no-cache-dir -r requirements.txt \
+  && python3 -m pip install --no-cache-dir -r ../requirements.txt \
 	&& apt-get -y purge build-essential libqpdf-dev \
 	&& apt-get -y autoremove --purge \
-	&& rm -rf /var/lib/apt/lists/* \
-	&& mkdir /var/log/supervisord /var/run/supervisord
+	&& rm -rf /var/lib/apt/lists/*

+# setup docker-specific things
+COPY docker/ ./docker/

-# copy scripts
-# this fixes issues with imagemagick and PDF
-COPY docker/imagemagick-policy.xml /etc/ImageMagick-6/policy.xml
-
-COPY gunicorn.conf.py ./
-COPY docker/supervisord.conf /etc/supervisord.conf
-COPY docker/docker-entrypoint.sh /sbin/docker-entrypoint.sh
-
-# copy jbig2enc
-COPY --from=jbig2enc /usr/src/jbig2enc/src/.libs/libjbig2enc* /usr/local/lib/
-COPY --from=jbig2enc /usr/src/jbig2enc/src/jbig2 /usr/local/bin/
-COPY --from=jbig2enc /usr/src/jbig2enc/src/*.h /usr/local/include/
+RUN cd docker \
+  && cp imagemagick-policy.xml /etc/ImageMagick-6/policy.xml \
+	&& mkdir /var/log/supervisord /var/run/supervisord \
+	&& cp supervisord.conf /etc/supervisord.conf \
+	&& cp docker-entrypoint.sh /sbin/docker-entrypoint.sh \
+	&& chmod 755 /sbin/docker-entrypoint.sh \
+	&& chmod +x install_management_commands.sh \
+	&& ./install_management_commands.sh \
+	&& cd .. \
+	&& rm docker -rf

+COPY gunicorn.conf.py ../

 # copy app
-COPY src/ ./src/
+COPY src/ ./

 # add users, setup scripts
 RUN addgroup --gid 1000 paperless \
 	&& useradd --uid 1000 --gid paperless --home-dir /usr/src/paperless paperless \
-	&& chown -R paperless:paperless . \
-	&& chmod 755 /sbin/docker-entrypoint.sh
-
-WORKDIR /usr/src/paperless/src/
-
-RUN sudo -HEu paperless python3 manage.py collectstatic --clear --no-input
-
-RUN sudo -HEu paperless python3 manage.py compilemessages
+	&& chown -R paperless:paperless ../ \
+	&& sudo -HEu paperless python3 manage.py collectstatic --clear --no-input \
+	&& sudo -HEu paperless python3 manage.py compilemessages

 VOLUME ["/usr/src/paperless/data", "/usr/src/paperless/media", "/usr/src/paperless/consume", "/usr/src/paperless/export"]
 ENTRYPOINT ["/sbin/docker-entrypoint.sh"]
--- a/4
+++ b/4
@@ -52,6 +52,10 @@ channels-redis = "*"
 uvicorn = {extras = ["standard"], version = "*"}
 concurrent-log-handler = "*"
 django-redis = "*"
+# uvloop 0.15+ incompatible with python 3.6
+uvloop = "~=0.14.0"
+# TODO: keep an eye on piwheel builds and update this once available (https://www.piwheels.org/project/cryptography/)
+cryptography = "~=3.3.2"

 [dev-packages]
 coveralls = "*"
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,7 +1,7 @@
 {
    "_meta": {
        "hash": {
-            "sha256": "0c2003b9d3d95d1af594f749a2740b55079551ea0ae512177ee9524bb327281e"
+            "sha256": "b3bed0a6b8981e8fffc1b6aa3bc35a0b1472f28e6f745c62469eb8045740e57b"
        },
        "pipfile-spec": 6,
        "requires": {},
@@ -190,24 +190,24 @@
        },
        "cryptography": {
            "hashes": [
-                "sha256:0003a52a123602e1acee177dc90dd201f9bb1e73f24a070db7d36c588e8f5c7d",
-                "sha256:0e85aaae861d0485eb5a79d33226dd6248d2a9f133b81532c8f5aae37de10ff7",
-                "sha256:594a1db4511bc4d960571536abe21b4e5c3003e8750ab8365fafce71c5d86901",
-                "sha256:69e836c9e5ff4373ce6d3ab311c1a2eed274793083858d3cd4c7d12ce20d5f9c",
-                "sha256:788a3c9942df5e4371c199d10383f44a105d67d401fb4304178020142f020244",
-                "sha256:7e177e4bea2de937a584b13645cab32f25e3d96fc0bc4a4cf99c27dc77682be6",
-                "sha256:83d9d2dfec70364a74f4e7c70ad04d3ca2e6a08b703606993407bf46b97868c5",
-                "sha256:84ef7a0c10c24a7773163f917f1cb6b4444597efd505a8aed0a22e8c4780f27e",
-                "sha256:982f661bffc7a24b6d4f8ebe3291f17cf3833a0941c6f4d9d55c790b9aa2cdb3",
-                "sha256:9e21301f7a1e7c03dbea73e8602905a4ebba641547a462b26dd03451e5769e7c",
-                "sha256:9f6b0492d111b43de5f70052e24c1f0951cb9e6022188ebcb1cc3a3d301469b0",
-                "sha256:a69bd3c68b98298f490e84519b954335154917eaab52cf582fa2c5c7efc6e812",
-                "sha256:b4890d5fb9b7a23e3bf8abf5a8a7da8e228f1e97dc96b30b95685df840b6914a",
-                "sha256:c366df0401d1ec4e548bebe8f91d55ebcc0ec3137900d214dd7aac8427ef3030",
-                "sha256:dc42f645f8f3a489c3dd416730a514e7a91a59510ddaadc09d04224c098d3302"
+                "sha256:0d7b69674b738068fa6ffade5c962ecd14969690585aaca0a1b1fc9058938a72",
+                "sha256:1bd0ccb0a1ed775cd7e2144fe46df9dc03eefd722bbcf587b3e0616ea4a81eff",
+                "sha256:3c284fc1e504e88e51c428db9c9274f2da9f73fdf5d7e13a36b8ecb039af6e6c",
+                "sha256:49570438e60f19243e7e0d504527dd5fe9b4b967b5a1ff21cc12b57602dd85d3",
+                "sha256:541dd758ad49b45920dda3b5b48c968f8b2533d8981bcdb43002798d8f7a89ed",
+                "sha256:5a60d3780149e13b7a6ff7ad6526b38846354d11a15e21068e57073e29e19bed",
+                "sha256:7951a966613c4211b6612b0352f5bf29989955ee592c4a885d8c7d0f830d0433",
+                "sha256:922f9602d67c15ade470c11d616f2b2364950602e370c76f0c94c94ae672742e",
+                "sha256:a0f0b96c572fc9f25c3f4ddbf4688b9b38c69836713fb255f4a2715d93cbaf44",
+                "sha256:a777c096a49d80f9d2979695b835b0f9c9edab73b59e4ceb51f19724dda887ed",
+                "sha256:a9a4ac9648d39ce71c2f63fe7dc6db144b9fa567ddfc48b9fde1b54483d26042",
+                "sha256:aa4969f24d536ae2268c902b2c3d62ab464b5a66bcb247630d208a79a8098e9b",
+                "sha256:c7390f9b2119b2b43160abb34f63277a638504ef8df99f11cb52c1fda66a2e6f",
+                "sha256:ddd06e71c449a4fe10d0c60846280ee35d69ce49e3e413ce46d5f129e1468083",
+                "sha256:e18e6ab84dfb0ab997faf8cca25a86ff15dfea4027b986322026cc99e0a892da"
            ],
-            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'",
-            "version": "==3.3.1"
+            "index": "pypi",
+            "version": "==3.3.2"
        },
        "daphne": {
            "hashes": [
@@ -243,11 +243,11 @@
        },
        "django-extensions": {
            "hashes": [
-                "sha256:7cd002495ff0a0e5eb6cdd6be759600905b4e4079232ea27618fc46bdd853651",
-                "sha256:c7f88625a53f631745d4f2bef9ec4dcb999ed59476393bdbbe99db8596778846"
+                "sha256:674ad4c3b1587a884881824f40212d51829e662e52f85b012cd83d83fe1271d9",
+                "sha256:9507f8761ee760748938fd8af766d0608fb2738cf368adfa1b2451f61c15ae35"
            ],
            "index": "pypi",
-            "version": "==3.1.0"
+            "version": "==3.1.1"
        },
        "django-filter": {
            "hashes": [
@@ -462,11 +462,11 @@
        },
        "joblib": {
            "hashes": [
-                "sha256:75ead23f13484a2a414874779d69ade40d4fa1abe62b222a23cd50d4bc822f6f",
-                "sha256:7ad866067ac1fdec27d51c8678ea760601b70e32ff1881d4dc8e1171f2b64b24"
+                "sha256:9c17567692206d2f3fb9ecf5e991084254fe631665c450b443761c4186a613f7",
+                "sha256:feeb1ec69c4d45129954f1b7034954241eedfd6ba39b5e9e4b6883be3332d5e5"
            ],
            "markers": "python_version >= '3.6'",
-            "version": "==1.0.0"
+            "version": "==1.0.1"
        },
        "langdetect": {
            "hashes": [
@@ -1113,11 +1113,11 @@
        },
        "tqdm": {
            "hashes": [
-                "sha256:4621f6823bab46a9cc33d48105753ccbea671b68bab2c50a9f0be23d4065cb5a",
-                "sha256:fe3d08dd00a526850568d542ff9de9bbc2a09a791da3c334f3213d8d0bbbca65"
+                "sha256:2874fa525c051177583ec59c0fb4583e91f28ccd3f217ffad2acdb32d2c789ac",
+                "sha256:ab9b659241d82b8b51b2269ee243ec95286046bf06015c4e15a947cc15914211"
            ],
            "index": "pypi",
-            "version": "==4.56.0"
+            "version": "==4.56.1"
        },
        "twisted": {
            "extras": [
@@ -1201,6 +1201,7 @@
                "sha256:e7514d7a48c063226b7d06617cbb12a14278d4323a065a8d46a7962686ce2e95",
                "sha256:f07909cd9fc08c52d294b1570bba92186181ca01fe3dc9ffba68955273dd7362"
            ],
+            "index": "pypi",
            "version": "==0.14.0"
        },
        "watchdog": {
@@ -1506,11 +1507,11 @@
        },
        "faker": {
            "hashes": [
-                "sha256:190f0d3ce037866b5d230f0b9fd0f513f07c25dc326dcad6ee019849c68d441c",
-                "sha256:db7adc3b4755005fc960cf96fb4ed46b54b6eb21413741ab3f31a9595f379905"
+                "sha256:bf2a9b3f8d00a5dada61fc4a3f80fe0d6795c7f02a138a7d2ef2db5817c7d017",
+                "sha256:d4aecdb877519d06c2fdc01ffc5ecf70658981acf5e13cd07ded9892994ef5c6"
            ],
            "markers": "python_version >= '3.6'",
-            "version": "==6.0.0"
+            "version": "==6.1.1"
        },
        "filelock": {
            "hashes": [
@@ -1713,11 +1714,11 @@
        },
        "pytest-xdist": {
            "hashes": [
-                "sha256:1d8edbb1a45e8e1f8e44b1260583107fc23f8bc8da6d18cb331ff61d41258ecf",
-                "sha256:f127e11e84ad37cc1de1088cb2990f3c354630d428af3f71282de589c5bb779b"
+                "sha256:2447a1592ab41745955fb870ac7023026f20a5f0bfccf1b52a879bd193d46450",
+                "sha256:718887296892f92683f6a51f25a3ae584993b06f7076ce1e1fd482e59a8220a2"
            ],
            "index": "pypi",
-            "version": "==2.2.0"
+            "version": "==2.2.1"
        },
        "python-dateutil": {
            "hashes": [
--- a/ansible/molecule/default/verify.yml
+++ b/ansible/molecule/default/verify.yml
@@ -38,7 +38,7 @@

    - name: verify uploaded document has been accepted
      uri:
-        url: "http://{{ paperlessng_listen_address }}:{{ paperlessng_listen_port }}/api/logs/"
+        url: "http://{{ paperlessng_listen_address }}:{{ paperlessng_listen_port }}/api/logs/paperless/"
        headers:
          Authorization: 'Basic {{ (paperlessng_superuser_name + ":" + paperlessng_superuser_password) | b64encode }}'
        return_content: yes
@@ -51,7 +51,7 @@

    - name: verify uploaded document has been consumed
      uri:
-        url: "http://{{ paperlessng_listen_address }}:{{ paperlessng_listen_port }}/api/logs/"
+        url: "http://{{ paperlessng_listen_address }}:{{ paperlessng_listen_port }}/api/logs/paperless/"
        headers:
          Authorization: 'Basic {{ (paperlessng_superuser_name + ":" + paperlessng_superuser_password) | b64encode }}'
        return_content: yes
--- a/docker/install_management_commands.sh
+++ b/docker/install_management_commands.sh
@@ -0,0 +1,6 @@
+for command in document_archiver document_exporter document_importer mail_fetcher document_create_classifier document_index document_renamer document_retagger document_thumbnails;
+do
+	echo "installing $command..."
+	sed "s/management_command/$command/g" management_script.sh > /usr/local/bin/$command
+	chmod +x /usr/local/bin/$command
+done
--- a/docker/management_script.sh
+++ b/docker/management_script.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+set -e
+
+cd /usr/src/paperless/src/
+
+if [[ $(id -u) == 0 ]] ;
+then
+  sudo -HEu paperless python3 manage.py management_command "$@"
+elif [[ $(id -un) == "paperless" ]] ;
+then
+  python3 manage.py management_command "$@"
+else
+  echo "Unknown user."
+fi
--- a/docs/administration.rst
+++ b/docs/administration.rst
@@ -23,6 +23,12 @@ Options available to any installation of paperless:
 *   The document exporter is also able to update an already existing export.
    Therefore, incremental backups with ``rsync`` are entirely possible.

+.. caution::
+
+    You cannot import the export generated with one version of paperless in a
+    different version of paperless. The export contains an exact image of the
+    database, and migrations may change the database layout.
+
 Options available to docker installations:

 *   Backup the docker volumes. These usually reside within
@@ -101,17 +107,17 @@ Then you can start paperless-ng with ``-d`` to have it run in the background.
        update to newer versions. In order to enable updates as described above, either
        get the new ``docker-compose.yml`` file from `here <https://github.com/jonaswinkler/paperless-ng/tree/master/docker/compose>`_
        or edit the ``docker-compose.yml`` file, find the line that says
-        
+
            .. code::

                image: jonaswinkler/paperless-ng:0.9.x
-        
+
        and replace the version with ``latest``:

            .. code::

                image: jonaswinkler/paperless-ng:latest
-        
+
 Bare Metal Route
 ================

@@ -171,26 +177,63 @@ Most of the update process is automated when using the ansible role.
        $ ansible-playbook playbook.yml


+Downgrading Paperless
+#####################
+
+Downgrades are possible. However, some updates also contain database migrations (these change the layout of the database and may move data).
+In order to move back from a version that applied database migrations, you'll have to revert the database migration *before* downgrading,
+and then downgrade paperless.
+
+This table lists the most recent database migrations for each versions:
+
+---------+-------------------------+
+| Version | Latest migration number |
+---------+-------------------------+
+| 1.0.0   | 1011                    |
+---------+-------------------------+
+| 1.1.0   | 1011                    |
+---------+-------------------------+
+| 1.1.1   | 1012                    |
+---------+-------------------------+
+
+Execute the following management command to migrate your database:
+
+.. code:: shell-session
+
+    $ python3 manage.py migrate documents <migration number>
+
+.. note::
+
+    Some migrations cannot be undone. The command will issue errors if that happens.
+
+.. _utilities-management-commands:
+
 Management utilities
 ####################

 Paperless comes with some management commands that perform various maintenance
-tasks on your paperless instance. You can invoke these commands either by
+tasks on your paperless instance. You can invoke these commands in the following way:
+
+With docker-compose, while paperless is running:

 .. code:: shell-session

    $ cd /path/to/paperless
-    $ docker-compose run --rm webserver <command> <arguments>
+    $ docker-compose exec webserver <command> <arguments>

-or
+With docker, while paperless is running:
+
+.. code:: shell-session
+
+    $ docker exec -it <container-name> <command> <arguments>
+
+Bare metal:

 .. code:: shell-session

    $ cd /path/to/paperless/src
    $ python3 manage.py <command> <arguments>

-depending on whether you use docker or not.
-
 All commands have built-in help, which can be accessed by executing them with
 the argument ``--help``.

@@ -210,7 +253,7 @@ backup or migration to another DMS.
    -c, --compare-checksums
    -f, --use-filename-format
    -d, --delete
-    
+
 ``target`` is a folder to which the data gets written. This includes documents,
 thumbnails and a ``manifest.json`` file. The manifest contains all metadata from
 the database (correspondents, tags, etc).
--- a/docs/advanced_usage.rst
+++ b/docs/advanced_usage.rst
@@ -217,6 +217,7 @@ will create a directory structure as follows:

 Paperless provides the following placeholders withing filenames:

+* ``{asn}``: The archive serial number of the document, or "none".
 * ``{correspondent}``: The name of the correspondent, or "none".
 * ``{document_type}``: The name of the document type, or "none".
 * ``{tag_list}``: A comma separated list of all tags assigned to the document.
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -5,6 +5,27 @@
 Changelog
 *********

+paperless-ng 1.1.1
+##################
+
+This release contains new database migrations.
+
+* Fixed a bug in the sanity checker that would cause it to display "x not in list" errors instead of actual issues.
+
+* Fixed a bug with filename generation for archive filenames that would cause the archive files of two documents to overlap.
+
+  * This happened when ``PAPERLESS_FILENAME_FORMAT`` is used and the filenames of two or more documents are the same, except for the file extension.
+  * Paperless will now store the archive filename in the database as well instead of deriving it from the original filename, and use the
+    same logic for detecting and avoiding filename clashes that's also used for original filenames.
+  * The migrations will repair any missing archive files. If you're using tika, ensure that tika is running while performing the migration. Docker-compose will take care of that.
+
+* Fixed a bug with thumbnail regeneration when TIKA integration was used.
+
+* Added ASN as a placeholder field to the filename format.
+
+* The docker image now comes with built-in shortcuts for most management commands. These are now the recommended way to execute management commands, since these
+  also ensure that they're always executed as the paperless user and you're less likely to run into permission issues. See :ref:`utilities-management-commands`.
+
 paperless-ng 1.1.0
 ##################

@@ -17,7 +38,7 @@ paperless-ng 1.1.0
      or added with one of the mobile apps.
    * Documents are successfully added to paperless.
    * Document consumption failed (with error messages)
-  
+
  * Configuration options to enable/disable individual notifications.

 * Live updates to document lists and saved views when new documents are added.
--- a/requirements.txt
+++ b/requirements.txt
@@ -24,11 +24,11 @@ click==7.1.2; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2,
 coloredlogs==15.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
 concurrent-log-handler==0.9.19
 constantly==15.1.0
-cryptography==3.3.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'
+cryptography==3.3.2
 daphne==3.0.1; python_version >= '3.6'
 dateparser==0.7.6
 django-cors-headers==3.7.0
-django-extensions==3.1.0
+django-extensions==3.1.1
 django-filter==2.4.0
 django-picklefield==3.0.1; python_version >= '3'
 django-q==1.3.4
@@ -49,7 +49,7 @@ img2pdf==0.4.0
 incremental==17.5.0
 inotify-simple==1.3.5; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
 inotifyrecursive==0.3.5
-joblib==1.0.0; python_version >= '3.6'
+joblib==1.0.1; python_version >= '3.6'
 langdetect==1.0.8
 lxml==4.6.2; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
 msgpack==1.0.2
@@ -87,7 +87,7 @@ sortedcontainers==2.3.0
 sqlparse==0.4.1; python_version >= '3.5'
 threadpoolctl==2.1.0; python_version >= '3.5'
 tika==1.24
-tqdm==4.56.0
+tqdm==4.56.1
 twisted[tls]==20.3.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
 txaio==20.12.1; python_version >= '3.6'
 tzlocal==2.1
--- a/src-ui/src/environments/environment.prod.ts
+++ b/src-ui/src/environments/environment.prod.ts
@@ -2,7 +2,7 @@ export const environment = {
  production: true,
  apiBaseUrl: "/api/",
  appTitle: "Paperless-ng",
-  version: "1.1.0",
+  version: "1.1.1",
  webSocketHost: window.location.host,
  webSocketProtocol: (window.location.protocol == "https:" ? "wss:" : "ws:")
 };
--- a/src/clash.pdf
+++ b/src/clash.pdf
--- a/src/documents/admin.py
+++ b/src/documents/admin.py
@@ -50,26 +50,31 @@ class DocumentAdmin(admin.ModelAdmin):
        "modified",
        "mime_type",
        "storage_type",
-        "filename")
+        "filename",
+        "checksum",
+        "archive_filename",
+        "archive_checksum"
+    )

    list_display_links = ("title",)

    list_display = (
-        "correspondent",
+        "id",
        "title",
-        "tags_",
-        "created",
+        "mime_type",
+        "filename",
+        "archive_filename"
    )

    list_filter = (
-        "document_type",
-        "tags",
-        "correspondent"
+        ("mime_type"),
+        ("archive_serial_number", admin.EmptyFieldListFilter),
+        ("archive_filename", admin.EmptyFieldListFilter),
    )

    filter_horizontal = ("tags",)

-    ordering = ["-created"]
+    ordering = ["-id"]

    date_hierarchy = "created"

@@ -95,26 +100,6 @@ class DocumentAdmin(admin.ModelAdmin):
        index.add_or_update_document(obj)
        super(DocumentAdmin, self).save_model(request, obj, form, change)

-    @mark_safe
-    def tags_(self, obj):
-        r = ""
-        for tag in obj.tags.all():
-            r += self._html_tag(
-                "span",
-                tag.name + ", "
-            )
-        return r
-
-    @staticmethod
-    def _html_tag(kind, inside=None, **kwargs):
-        attributes = format_html_join(' ', '{}="{}"', kwargs.items())
-
-        if inside is not None:
-            return format_html("<{kind} {attributes}>{inside}</{kind}>",
-                               kind=kind, attributes=attributes, inside=inside)
-
-        return format_html("<{} {}/>", kind, attributes)
-

 class RuleInline(admin.TabularInline):
    model = SavedViewFilterRule
--- a/src/documents/classifier.py
+++ b/src/documents/classifier.py
@@ -43,9 +43,9 @@ def load_classifier():
                      version=version, timeout=86400)
        except (EOFError, IncompatibleClassifierVersionError) as e:
            # there's something wrong with the model file.
-            logger.error(
+            logger.exception(
                f"Unrecoverable error while loading document "
-                f"classification model: {str(e)}, deleting model file."
+                f"classification model, deleting model file."
            )
            os.unlink(settings.MODEL_FILE)
            classifier = None
--- a/src/documents/consumer.py
+++ b/src/documents/consumer.py
@@ -241,7 +241,7 @@ class Consumer(LoggingMixin):
            self._send_progress(70, 100, 'WORKING',
                                MESSAGE_GENERATING_THUMBNAIL)
            thumbnail = document_parser.get_optimised_thumbnail(
-                self.path, mime_type)
+                self.path, mime_type, self.filename)

            text = document_parser.get_text()
            date = document_parser.get_date()
@@ -292,8 +292,7 @@ class Consumer(LoggingMixin):
                # After everything is in the database, copy the files into
                # place. If this fails, we'll also rollback the transaction.
                with FileLock(settings.MEDIA_LOCK):
-                    document.filename = generate_unique_filename(
-                        document, settings.ORIGINALS_DIR)
+                    document.filename = generate_unique_filename(document)
                    create_source_path_directory(document.source_path)

                    self._write(document.storage_type,
@@ -303,6 +302,10 @@ class Consumer(LoggingMixin):
                                thumbnail, document.thumbnail_path)

                    if archive_path and os.path.isfile(archive_path):
+                        document.archive_filename = generate_unique_filename(
+                            document,
+                            archive_filename=True
+                        )
                        create_source_path_directory(document.archive_path)
                        self._write(document.storage_type,
                                    archive_path, document.archive_path)
--- a/src/documents/file_handling.py
+++ b/src/documents/file_handling.py
@@ -79,12 +79,40 @@ def many_to_dictionary(field):
    return mydictionary


-def generate_unique_filename(doc, root):
+def generate_unique_filename(doc,
+                             archive_filename=False):
+    """
+    Generates a unique filename for doc in settings.ORIGINALS_DIR.
+
+    The returned filename is guaranteed to be either the current filename
+    of the document if unchanged, or a new filename that does not correspondent
+    to any existing files. The function will append _01, _02, etc to the
+    filename before the extension to avoid conflicts.
+
+    If archive_filename is True, return a unique archive filename instead.
+
+    """
+    if archive_filename:
+        old_filename = doc.archive_filename
+        root = settings.ARCHIVE_DIR
+    else:
+        old_filename = doc.filename
+        root = settings.ORIGINALS_DIR
+
+    # If generating archive filenames, try to make a name that is similar to
+    # the original filename first.
+
+    if archive_filename and doc.filename:
+        new_filename = os.path.splitext(doc.filename)[0] + ".pdf"
+        if new_filename == old_filename or not os.path.exists(os.path.join(root, new_filename)):  # NOQA: E501
+            return new_filename
+
    counter = 0

    while True:
-        new_filename = generate_filename(doc, counter)
-        if new_filename == doc.filename:
+        new_filename = generate_filename(
+            doc, counter, archive_filename=archive_filename)
+        if new_filename == old_filename:
            # still the same as before.
            return new_filename

@@ -94,7 +122,7 @@ def generate_unique_filename(doc, root):
            return new_filename


-def generate_filename(doc, counter=0, append_gpg=True):
+def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
    path = ""

    try:
@@ -123,6 +151,11 @@ def generate_filename(doc, counter=0, append_gpg=True):
            else:
                document_type = "none"

+            if doc.archive_serial_number:
+                asn = str(doc.archive_serial_number)
+            else:
+                asn = "none"
+
            path = settings.PAPERLESS_FILENAME_FORMAT.format(
                title=pathvalidate.sanitize_filename(
                    doc.title, replacement_text="-"),
@@ -136,6 +169,7 @@ def generate_filename(doc, counter=0, append_gpg=True):
                added_year=doc.added.year if doc.added else "none",
                added_month=f"{doc.added.month:02}" if doc.added else "none",
                added_day=f"{doc.added.day:02}" if doc.added else "none",
+                asn=asn,
                tags=tags,
                tag_list=tag_list
            ).strip()
@@ -148,18 +182,16 @@ def generate_filename(doc, counter=0, append_gpg=True):
            f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default")

    counter_str = f"_{counter:02}" if counter else ""
+
+    filetype_str = ".pdf" if archive_filename else doc.file_type
+
    if len(path) > 0:
-        filename = f"{path}{counter_str}{doc.file_type}"
+        filename = f"{path}{counter_str}{filetype_str}"
    else:
-        filename = f"{doc.pk:07}{counter_str}{doc.file_type}"
+        filename = f"{doc.pk:07}{counter_str}{filetype_str}"

    # Append .gpg for encrypted files
    if append_gpg and doc.storage_type == doc.STORAGE_TYPE_GPG:
        filename += ".gpg"

    return filename
-
-
-def archive_name_from_filename(filename):
-
-    return os.path.splitext(filename)[0] + ".pdf"
--- a/src/documents/index.py
+++ b/src/documents/index.py
@@ -78,8 +78,8 @@ def open_index(recreate=False):
    try:
        if exists_in(settings.INDEX_DIR) and not recreate:
            return open_dir(settings.INDEX_DIR, schema=get_schema())
-    except Exception as e:
-        logger.error(f"Error while opening the index: {e}, recreating.")
+    except Exception:
+        logger.exception(f"Error while opening the index, recreating.")

    if not os.path.isdir(settings.INDEX_DIR):
        os.makedirs(settings.INDEX_DIR, exist_ok=True)
--- a/src/documents/management/commands/document_archiver.py
+++ b/src/documents/management/commands/document_archiver.py
@@ -16,7 +16,8 @@ from whoosh.writing import AsyncWriter

 from documents.models import Document
 from ... import index
-from ...file_handling import create_source_path_directory
+from ...file_handling import create_source_path_directory, \
+    generate_unique_filename
 from ...parsers import get_parser_class_for_mime_type


@@ -39,13 +40,16 @@ def handle_document(document_id):
            with transaction.atomic():
                with open(parser.get_archive_path(), 'rb') as f:
                    checksum = hashlib.md5(f.read()).hexdigest()
-                # i'm going to save first so that in case the file move
+                # I'm going to save first so that in case the file move
                # fails, the database is rolled back.
-                # we also don't use save() since that triggers the filehandling
+                # We also don't use save() since that triggers the filehandling
                # logic, and we don't want that yet (file not yet in place)
+                document.archive_filename = generate_unique_filename(
+                    document, archive_filename=True)
                Document.objects.filter(pk=document.pk).update(
                    archive_checksum=checksum,
-                    content=parser.get_text()
+                    content=parser.get_text(),
+                    archive_filename=document.archive_filename
                )
                with FileLock(settings.MEDIA_LOCK):
                    create_source_path_directory(document.archive_path)
@@ -56,7 +60,7 @@ def handle_document(document_id):
            index.update_document(writer, document)

    except Exception as e:
-        logger.error(f"Error while parsing document {document}: {str(e)}")
+        logger.exception(f"Error while parsing document {document}")
    finally:
        parser.cleanup()

@@ -101,7 +105,7 @@ class Command(BaseCommand):
        document_ids = list(map(
            lambda doc: doc.id,
            filter(
-                lambda d: overwrite or not d.archive_checksum,
+                lambda d: overwrite or not d.has_archive_version,
                documents
            )
        ))
--- a/src/documents/management/commands/document_consumer.py
+++ b/src/documents/management/commands/document_consumer.py
@@ -54,8 +54,7 @@ def _consume(filepath):
        if settings.CONSUMER_SUBDIRS_AS_TAGS:
            tag_ids = _tags_from_path(filepath)
    except Exception as e:
-        logger.error(
-            "Error creating tags from path: {}".format(e))
+        logger.exception("Error creating tags from path")

    try:
        async_task("documents.tasks.consume_file",
@@ -66,8 +65,7 @@ def _consume(filepath):
        # Catch all so that the consumer won't crash.
        # This is also what the test case is listening for to check for
        # errors.
-        logger.error(
-            "Error while consuming document: {}".format(e))
+        logger.exception("Error while consuming document")


 def _consume_wait_unmodified(file, num_tries=20, wait_time=1):
--- a/src/documents/management/commands/document_exporter.py
+++ b/src/documents/management/commands/document_exporter.py
@@ -139,7 +139,7 @@ class Command(BaseCommand):
            thumbnail_target = os.path.join(self.target, thumbnail_name)
            document_dict[EXPORTER_THUMBNAIL_NAME] = thumbnail_name

-            if os.path.exists(document.archive_path):
+            if document.has_archive_version:
                archive_name = base_name + "-archive.pdf"
                archive_target = os.path.join(self.target, archive_name)
                document_dict[EXPORTER_ARCHIVE_NAME] = archive_name
--- a/src/documents/management/commands/document_importer.py
+++ b/src/documents/management/commands/document_importer.py
@@ -151,6 +151,9 @@ class Command(BaseCommand):
                shutil.copy2(thumbnail_path, document.thumbnail_path)
                if archive_path:
                    create_source_path_directory(document.archive_path)
+                    # TODO: this assumes that the export is valid and
+                    #  archive_filename is present on all documents with
+                    #  archived files
                    shutil.copy2(archive_path, document.archive_path)

            document.save()
--- a/src/documents/management/commands/document_thumbnails.py
+++ b/src/documents/management/commands/document_thumbnails.py
@@ -22,7 +22,10 @@ def _process_document(doc_in):

    try:
        thumb = parser.get_optimised_thumbnail(
-            document.source_path, document.mime_type)
+            document.source_path,
+            document.mime_type,
+            document.get_public_filename()
+        )

        shutil.move(thumb, document.thumbnail_path)
    finally:
--- a/src/documents/migrations/1012_fix_archive_files.py
+++ b/src/documents/migrations/1012_fix_archive_files.py
@@ -0,0 +1,330 @@
+# Generated by Django 3.1.6 on 2021-02-07 22:26
+import datetime
+import hashlib
+import logging
+import os
+import shutil
+from time import sleep
+
+import pathvalidate
+from django.conf import settings
+from django.db import migrations, models
+from django.template.defaultfilters import slugify
+
+from documents.file_handling import defaultdictNoStr, many_to_dictionary
+
+
+logger = logging.getLogger("paperless.migrations")
+
+###############################################################################
+# This is code copied straight paperless before the change.
+###############################################################################
+
+def archive_name_from_filename(filename):
+    return os.path.splitext(filename)[0] + ".pdf"
+
+
+def archive_path_old(doc):
+    if doc.filename:
+        fname = archive_name_from_filename(doc.filename)
+    else:
+        fname = "{:07}.pdf".format(doc.pk)
+
+    return os.path.join(
+        settings.ARCHIVE_DIR,
+        fname
+    )
+
+
+STORAGE_TYPE_GPG = "gpg"
+
+
+def archive_path_new(doc):
+    if doc.archive_filename is not None:
+        return os.path.join(
+            settings.ARCHIVE_DIR,
+            str(doc.archive_filename)
+        )
+    else:
+        return None
+
+
+def source_path(doc):
+    if doc.filename:
+        fname = str(doc.filename)
+    else:
+        fname = "{:07}{}".format(doc.pk, doc.file_type)
+        if doc.storage_type == STORAGE_TYPE_GPG:
+            fname += ".gpg"  # pragma: no cover
+
+    return os.path.join(
+        settings.ORIGINALS_DIR,
+        fname
+    )
+
+
+def generate_unique_filename(doc, archive_filename=False):
+    if archive_filename:
+        old_filename = doc.archive_filename
+        root = settings.ARCHIVE_DIR
+    else:
+        old_filename = doc.filename
+        root = settings.ORIGINALS_DIR
+
+    counter = 0
+
+    while True:
+        new_filename = generate_filename(
+            doc, counter, archive_filename=archive_filename)
+        if new_filename == old_filename:
+            # still the same as before.
+            return new_filename
+
+        if os.path.exists(os.path.join(root, new_filename)):
+            counter += 1
+        else:
+            return new_filename
+
+
+def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
+    path = ""
+
+    try:
+        if settings.PAPERLESS_FILENAME_FORMAT is not None:
+            tags = defaultdictNoStr(lambda: slugify(None),
+                                    many_to_dictionary(doc.tags))
+
+            tag_list = pathvalidate.sanitize_filename(
+                ",".join(sorted(
+                    [tag.name for tag in doc.tags.all()]
+                )),
+                replacement_text="-"
+            )
+
+            if doc.correspondent:
+                correspondent = pathvalidate.sanitize_filename(
+                    doc.correspondent.name, replacement_text="-"
+                )
+            else:
+                correspondent = "none"
+
+            if doc.document_type:
+                document_type = pathvalidate.sanitize_filename(
+                    doc.document_type.name, replacement_text="-"
+                )
+            else:
+                document_type = "none"
+
+            path = settings.PAPERLESS_FILENAME_FORMAT.format(
+                title=pathvalidate.sanitize_filename(
+                    doc.title, replacement_text="-"),
+                correspondent=correspondent,
+                document_type=document_type,
+                created=datetime.date.isoformat(doc.created),
+                created_year=doc.created.year if doc.created else "none",
+                created_month=f"{doc.created.month:02}" if doc.created else "none",  # NOQA: E501
+                created_day=f"{doc.created.day:02}" if doc.created else "none",
+                added=datetime.date.isoformat(doc.added),
+                added_year=doc.added.year if doc.added else "none",
+                added_month=f"{doc.added.month:02}" if doc.added else "none",
+                added_day=f"{doc.added.day:02}" if doc.added else "none",
+                tags=tags,
+                tag_list=tag_list
+            ).strip()
+
+            path = path.strip(os.sep)
+
+    except (ValueError, KeyError, IndexError):
+        logger.warning(
+            f"Invalid PAPERLESS_FILENAME_FORMAT: "
+            f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default")
+
+    counter_str = f"_{counter:02}" if counter else ""
+
+    filetype_str = ".pdf" if archive_filename else doc.file_type
+
+    if len(path) > 0:
+        filename = f"{path}{counter_str}{filetype_str}"
+    else:
+        filename = f"{doc.pk:07}{counter_str}{filetype_str}"
+
+    # Append .gpg for encrypted files
+    if append_gpg and doc.storage_type == STORAGE_TYPE_GPG:
+        filename += ".gpg"
+
+    return filename
+
+
+###############################################################################
+# This code performs bidirection archive file transformation.
+###############################################################################
+
+
+def parse_wrapper(parser, path, mime_type, file_name):
+    # this is here so that I can mock this out for testing.
+    parser.parse(path, mime_type, file_name)
+
+
+def create_archive_version(doc, retry_count=3):
+    from documents.parsers import get_parser_class_for_mime_type, \
+        DocumentParser, \
+        ParseError
+
+    logger.info(
+        f"Regenerating archive document for document ID:{doc.id}"
+    )
+    parser_class = get_parser_class_for_mime_type(doc.mime_type)
+    for try_num in range(retry_count):
+        parser: DocumentParser = parser_class(None, None)
+        try:
+            parse_wrapper(parser, source_path(doc), doc.mime_type,
+                          os.path.basename(doc.filename))
+            doc.content = parser.get_text()
+
+            if parser.get_archive_path() and os.path.isfile(
+                parser.get_archive_path()):
+                doc.archive_filename = generate_unique_filename(
+                    doc, archive_filename=True)
+                with open(parser.get_archive_path(), "rb") as f:
+                    doc.archive_checksum = hashlib.md5(f.read()).hexdigest()
+                os.makedirs(os.path.dirname(archive_path_new(doc)),
+                            exist_ok=True)
+                shutil.copy2(parser.get_archive_path(), archive_path_new(doc))
+            else:
+                doc.archive_checksum = None
+                logger.error(
+                    f"Parser did not return an archive document for document "
+                    f"ID:{doc.id}. Removing archive document."
+                )
+            doc.save()
+            return
+        except ParseError:
+            if try_num + 1 == retry_count:
+                logger.exception(
+                    f"Unable to regenerate archive document for ID:{doc.id}. You "
+                    f"need to invoke the document_archiver management command "
+                    f"manually for that document."
+                )
+                doc.archive_checksum = None
+                doc.save()
+                return
+            else:
+                # This is mostly here for the tika parser in docker
+                # environemnts. The servers for parsing need to come up first,
+                # and the docker setup doesn't ensure that tika is running
+                # before attempting migrations.
+                logger.error("Parse error, will try again in 5 seconds...")
+                sleep(5)
+        finally:
+            parser.cleanup()
+
+
+def move_old_to_new_locations(apps, schema_editor):
+    Document = apps.get_model("documents", "Document")
+
+    affected_document_ids = set()
+
+    old_archive_path_to_id = {}
+
+    # check for documents that have incorrect archive versions
+    for doc in Document.objects.filter(archive_checksum__isnull=False):
+        old_path = archive_path_old(doc)
+
+        if old_path in old_archive_path_to_id:
+            affected_document_ids.add(doc.id)
+            affected_document_ids.add(old_archive_path_to_id[old_path])
+        else:
+            old_archive_path_to_id[old_path] = doc.id
+
+    # check that archive files of all unaffected documents are in place
+    for doc in Document.objects.filter(archive_checksum__isnull=False):
+        old_path = archive_path_old(doc)
+        if doc.id not in affected_document_ids and not os.path.isfile(old_path):
+            raise ValueError(
+                f"Archived document ID:{doc.id} does not exist at: "
+                f"{old_path}")
+
+    # check that we can regenerate affected archive versions
+    for doc_id in affected_document_ids:
+        from documents.parsers import get_parser_class_for_mime_type
+
+        doc = Document.objects.get(id=doc_id)
+        parser_class = get_parser_class_for_mime_type(doc.mime_type)
+        if not parser_class:
+            raise ValueError(
+                f"Document ID:{doc.id} has an invalid archived document, "
+                f"but no parsers are available. Cannot migrate.")
+
+    for doc in Document.objects.filter(archive_checksum__isnull=False):
+
+        if doc.id in affected_document_ids:
+            old_path = archive_path_old(doc)
+            # remove affected archive versions
+            if os.path.isfile(old_path):
+                logger.debug(
+                    f"Removing {old_path}"
+                )
+                os.unlink(old_path)
+        else:
+            # Set archive path for unaffected files
+            doc.archive_filename = archive_name_from_filename(doc.filename)
+            Document.objects.filter(id=doc.id).update(
+                archive_filename=doc.archive_filename
+            )
+
+    # regenerate archive documents
+    for doc_id in affected_document_ids:
+        doc = Document.objects.get(id=doc_id)
+        create_archive_version(doc)
+
+
+def move_new_to_old_locations(apps, schema_editor):
+    Document = apps.get_model("documents", "Document")
+
+    old_archive_paths = set()
+
+    for doc in Document.objects.filter(archive_checksum__isnull=False):
+        new_archive_path = archive_path_new(doc)
+        old_archive_path = archive_path_old(doc)
+        if old_archive_path in old_archive_paths:
+            raise ValueError(
+                f"Cannot migrate: Archive file name {old_archive_path} of "
+                f"document {doc.filename} would clash with another archive "
+                f"filename.")
+        old_archive_paths.add(old_archive_path)
+        if new_archive_path != old_archive_path and os.path.isfile(old_archive_path):
+            raise ValueError(
+                f"Cannot migrate: Cannot move {new_archive_path} to "
+                f"{old_archive_path}: file already exists."
+            )
+
+    for doc in Document.objects.filter(archive_checksum__isnull=False):
+        new_archive_path = archive_path_new(doc)
+        old_archive_path = archive_path_old(doc)
+        if new_archive_path != old_archive_path:
+            logger.debug(f"Moving {new_archive_path} to {old_archive_path}")
+            shutil.move(new_archive_path, old_archive_path)
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('documents', '1011_auto_20210101_2340'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='document',
+            name='archive_filename',
+            field=models.FilePathField(default=None, editable=False, help_text='Current archive filename in storage', max_length=1024, null=True, unique=True, verbose_name='archive filename'),
+        ),
+        migrations.AlterField(
+            model_name='document',
+            name='filename',
+            field=models.FilePathField(default=None, editable=False, help_text='Current filename in storage', max_length=1024, null=True, unique=True, verbose_name='filename'),
+        ),
+        migrations.RunPython(
+            move_old_to_new_locations,
+            move_new_to_old_locations
+        ),
+    ]
--- a/src/documents/models.py
+++ b/src/documents/models.py
@@ -16,7 +16,6 @@ from django.utils.timezone import is_aware

 from django.utils.translation import gettext_lazy as _

-from documents.file_handling import archive_name_from_filename
 from documents.parsers import get_default_file_extension


@@ -208,10 +207,21 @@ class Document(models.Model):
        max_length=1024,
        editable=False,
        default=None,
+        unique=True,
        null=True,
        help_text=_("Current filename in storage")
    )

+    archive_filename = models.FilePathField(
+        _("archive filename"),
+        max_length=1024,
+        editable=False,
+        default=None,
+        unique=True,
+        null=True,
+        help_text=_("Current archive filename in storage")
+    )
+
    archive_serial_number = models.IntegerField(
        _("archive serial number"),
        blank=True,
@@ -256,16 +266,18 @@ class Document(models.Model):
        return open(self.source_path, "rb")

    @property
-    def archive_path(self):
-        if self.filename:
-            fname = archive_name_from_filename(self.filename)
-        else:
-            fname = "{:07}.pdf".format(self.pk)
+    def has_archive_version(self):
+        return self.archive_filename is not None

-        return os.path.join(
-            settings.ARCHIVE_DIR,
-            fname
-        )
+    @property
+    def archive_path(self):
+        if self.has_archive_version:
+            return os.path.join(
+                settings.ARCHIVE_DIR,
+                str(self.archive_filename)
+            )
+        else:
+            return None

    @property
    def archive_file(self):
--- a/src/documents/parsers.py
+++ b/src/documents/parsers.py
@@ -288,14 +288,17 @@ class DocumentParser(LoggingMixin):
    def get_archive_path(self):
        return self.archive_path

-    def get_thumbnail(self, document_path, mime_type):
+    def get_thumbnail(self, document_path, mime_type, file_name=None):
        """
        Returns the path to a file we can use as a thumbnail for this document.
        """
        raise NotImplementedError()

-    def get_optimised_thumbnail(self, document_path, mime_type):
-        thumbnail = self.get_thumbnail(document_path, mime_type)
+    def get_optimised_thumbnail(self,
+                                document_path,
+                                mime_type,
+                                file_name=None):
+        thumbnail = self.get_thumbnail(document_path, mime_type, file_name)
        if settings.OPTIMIZE_THUMBNAILS:
            out_path = os.path.join(self.tempdir, "thumb_optipng.png")

--- a/src/documents/sanity_checker.py
+++ b/src/documents/sanity_checker.py
@@ -56,7 +56,8 @@ def check_sanity():
            messages.append(SanityError(
                f"Thumbnail of document {doc.pk} does not exist."))
        else:
-            present_files.remove(os.path.normpath(doc.thumbnail_path))
+            if os.path.normpath(doc.thumbnail_path) in present_files:
+                present_files.remove(os.path.normpath(doc.thumbnail_path))
            try:
                with doc.thumbnail_file as f:
                    f.read()
@@ -71,7 +72,8 @@ def check_sanity():
            messages.append(SanityError(
                f"Original of document {doc.pk} does not exist."))
        else:
-            present_files.remove(os.path.normpath(doc.source_path))
+            if os.path.normpath(doc.source_path) in present_files:
+                present_files.remove(os.path.normpath(doc.source_path))
            try:
                with doc.source_file as f:
                    checksum = hashlib.md5(f.read()).hexdigest()
@@ -86,13 +88,24 @@ def check_sanity():
                    ))

        # Check sanity of the archive file.
-        if doc.archive_checksum:
+        if doc.archive_checksum and not doc.archive_filename:
+            messages.append(SanityError(
+                f"Document {doc.pk} has an archive file checksum, but no "
+                f"archive filename."
+            ))
+        elif not doc.archive_checksum and doc.archive_filename:
+            messages.append(SanityError(
+                f"Document {doc.pk} has an archive file, but its checksum is "
+                f"missing."
+            ))
+        elif doc.has_archive_version:
            if not os.path.isfile(doc.archive_path):
                messages.append(SanityError(
                    f"Archived version of document {doc.pk} does not exist."
                ))
            else:
-                present_files.remove(os.path.normpath(doc.archive_path))
+                if os.path.normpath(doc.archive_path) in present_files:
+                    present_files.remove(os.path.normpath(doc.archive_path))
                try:
                    with doc.archive_file as f:
                        checksum = hashlib.md5(f.read()).hexdigest()
@@ -103,7 +116,8 @@ def check_sanity():
                else:
                    if not checksum == doc.archive_checksum:
                        messages.append(SanityError(
-                            f"Checksum mismatch of archive {doc.pk}. "
+                            f"Checksum mismatch of archived document "
+                            f"{doc.pk}. "
                            f"Stored: {doc.checksum}, actual: {checksum}."
                        ))

--- a/src/documents/serialisers.py
+++ b/src/documents/serialisers.py
@@ -129,7 +129,7 @@ class DocumentSerializer(DynamicFieldsModelSerializer):
        return obj.get_public_filename()

    def get_archived_file_name(self, obj):
-        if obj.archive_checksum:
+        if obj.has_archive_version:
            return obj.get_public_filename(archive=True)
        else:
            return None
--- a/src/documents/signals/handlers.py
+++ b/src/documents/signals/handlers.py
@@ -1,6 +1,5 @@
 import logging
 import os
-from subprocess import Popen

 from django.conf import settings
 from django.contrib.admin.models import ADDITION, LogEntry
@@ -14,7 +13,7 @@ from filelock import FileLock

 from .. import index, matching
 from ..file_handling import delete_empty_directories, \
-    create_source_path_directory, archive_name_from_filename, \
+    create_source_path_directory, \
    generate_unique_filename
 from ..models import Document, Tag

@@ -148,18 +147,18 @@ def set_tags(sender,
@receiver(models.signals.post_delete, sender=Document)
 def cleanup_document_deletion(sender, instance, using, **kwargs):
    with FileLock(settings.MEDIA_LOCK):
-        for f in (instance.source_path,
-                  instance.archive_path,
-                  instance.thumbnail_path):
-            if os.path.isfile(f):
+        for filename in (instance.source_path,
+                         instance.archive_path,
+                         instance.thumbnail_path):
+            if filename and os.path.isfile(filename):
                try:
-                    os.unlink(f)
+                    os.unlink(filename)
                    logger.debug(
-                        f"Deleted file {f}.")
+                        f"Deleted file {filename}.")
                except OSError as e:
                    logger.warning(
                        f"While deleting document {str(instance)}, the file "
-                        f"{f} could not be deleted: {e}"
+                        f"{filename} could not be deleted: {e}"
                    )

        delete_empty_directories(
@@ -167,10 +166,15 @@ def cleanup_document_deletion(sender, instance, using, **kwargs):
            root=settings.ORIGINALS_DIR
        )

-        delete_empty_directories(
-            os.path.dirname(instance.archive_path),
-            root=settings.ARCHIVE_DIR
-        )
+        if instance.has_archive_version:
+            delete_empty_directories(
+                os.path.dirname(instance.archive_path),
+                root=settings.ARCHIVE_DIR
+            )
+
+
+class CannotMoveFilesException(Exception):
+    pass


 def validate_move(instance, old_path, new_path):
@@ -178,16 +182,14 @@ def validate_move(instance, old_path, new_path):
        # Can't do anything if the old file does not exist anymore.
        logger.fatal(
            f"Document {str(instance)}: File {old_path} has gone.")
-        return False
+        raise CannotMoveFilesException()

    if os.path.isfile(new_path):
        # Can't do anything if the new file already exists. Skip updating file.
        logger.warning(
            f"Document {str(instance)}: Cannot rename file "
            f"since target path {new_path} already exists.")
-        return False
-
-    return True
+        raise CannotMoveFilesException()


@receiver(models.signals.m2m_changed, sender=Document.tags.through)
@@ -206,56 +208,61 @@ def update_filename_and_move_files(sender, instance, **kwargs):
        return

    with FileLock(settings.MEDIA_LOCK):
-        old_filename = instance.filename
-        new_filename = generate_unique_filename(
-            instance, settings.ORIGINALS_DIR)
+        try:
+            old_filename = instance.filename
+            old_source_path = instance.source_path

-        if new_filename == instance.filename:
-            # Don't do anything if its the same.
-            return
+            instance.filename = generate_unique_filename(instance)
+            move_original = old_filename != instance.filename

-        old_source_path = instance.source_path
-        new_source_path = os.path.join(settings.ORIGINALS_DIR, new_filename)
-
-        if not validate_move(instance, old_source_path, new_source_path):
-            return
-
-        # archive files are optional, archive checksum tells us if we have one,
-        # since this is None for documents without archived files.
-        if instance.archive_checksum:
-            new_archive_filename = archive_name_from_filename(new_filename)
+            old_archive_filename = instance.archive_filename
            old_archive_path = instance.archive_path
-            new_archive_path = os.path.join(settings.ARCHIVE_DIR,
-                                            new_archive_filename)

-            if not validate_move(instance, old_archive_path, new_archive_path):
+            if instance.has_archive_version:
+
+                instance.archive_filename = generate_unique_filename(
+                    instance, archive_filename=True
+                )
+
+                move_archive = old_archive_filename != instance.archive_filename  # NOQA: E501
+            else:
+                move_archive = False
+
+            if not move_original and not move_archive:
+                # Don't do anything if filenames did not change.
                return

-            create_source_path_directory(new_archive_path)
-        else:
-            old_archive_path = None
-            new_archive_path = None
+            if move_original:
+                validate_move(instance, old_source_path, instance.source_path)
+                create_source_path_directory(instance.source_path)
+                os.rename(old_source_path, instance.source_path)

-        create_source_path_directory(new_source_path)
-
-        try:
-            os.rename(old_source_path, new_source_path)
-            if instance.archive_checksum:
-                os.rename(old_archive_path, new_archive_path)
-            instance.filename = new_filename
+            if move_archive:
+                validate_move(
+                    instance, old_archive_path, instance.archive_path)
+                create_source_path_directory(instance.archive_path)
+                os.rename(old_archive_path, instance.archive_path)

            # Don't save() here to prevent infinite recursion.
            Document.objects.filter(pk=instance.pk).update(
-                filename=new_filename)
+                filename=instance.filename,
+                archive_filename=instance.archive_filename,
+            )

-        except OSError as e:
-            instance.filename = old_filename
-            # this happens when we can't move a file. If that's the case for
-            # the archive file, we try our best to revert the changes.
-            # no need to save the instance, the update() has not happened yet.
+        except (OSError, DatabaseError, CannotMoveFilesException):
+            # This happens when either:
+            #  - moving the files failed due to file system errors
+            #  - saving to the database failed due to database errors
+            # In both cases, we need to revert to the original state.
+
+            # Try to move files to their original location.
            try:
-                os.rename(new_source_path, old_source_path)
-                os.rename(new_archive_path, old_archive_path)
+                if move_original and os.path.isfile(instance.source_path):
+                    os.rename(instance.source_path, old_source_path)
+
+                if move_archive and os.path.isfile(instance.archive_path):
+                    os.rename(instance.archive_path, old_archive_path)
+
            except Exception as e:
                # This is fine, since:
                # A: if we managed to move source from A to B, we will also
@@ -266,16 +273,10 @@ def update_filename_and_move_files(sender, instance, **kwargs):
                # B: if moving the orignal file failed, nothing has changed
                #  anyway.
                pass
-        except DatabaseError as e:
-            # this happens after moving files, so move them back into place.
-            # since moving them once succeeded, it's very likely going to
-            # succeed again.
-            os.rename(new_source_path, old_source_path)
-            if instance.archive_checksum:
-                os.rename(new_archive_path, old_archive_path)
+
+            # restore old values on the instance
            instance.filename = old_filename
-            # again, no need to save the instance, since the actual update()
-            # operation failed.
+            instance.archive_filename = old_archive_filename

        # finally, remove any empty sub folders. This will do nothing if
        # something has failed above.
@@ -283,7 +284,7 @@ def update_filename_and_move_files(sender, instance, **kwargs):
            delete_empty_directories(os.path.dirname(old_source_path),
                                     root=settings.ORIGINALS_DIR)

-        if old_archive_path and not os.path.isfile(old_archive_path):
+        if instance.has_archive_version and not os.path.isfile(old_archive_path):  # NOQA: E501
            delete_empty_directories(os.path.dirname(old_archive_path),
                                     root=settings.ARCHIVE_DIR)

--- a/src/documents/tests/samples/simple-noalpha.png
+++ b/src/documents/tests/samples/simple-noalpha.png
--- a/src/documents/tests/samples/simple.jpg
+++ b/src/documents/tests/samples/simple.jpg
--- a/src/documents/tests/samples/simple.png
+++ b/src/documents/tests/samples/simple.png
--- a/src/documents/tests/samples/simple.txt
+++ b/src/documents/tests/samples/simple.txt
@@ -0,0 +1 @@
+This is a test file.
--- a/src/documents/tests/test_admin.py
+++ b/src/documents/tests/test_admin.py
@@ -23,18 +23,6 @@ class TestDocumentAdmin(DirectoriesMixin, TestCase):
        self.assertEqual(Document.objects.get(id=doc.id).title, "new title")
        m.assert_called_once()

-    def test_tags(self):
-        doc = Document.objects.create(title="test")
-        doc.tags.create(name="t1")
-        doc.tags.create(name="t2")
-
-        self.assertEqual(self.doc_admin.tags_(doc), "<span >t1, </span><span >t2, </span>")
-
-    def test_tags_empty(self):
-        doc = Document.objects.create(title="test")
-
-        self.assertEqual(self.doc_admin.tags_(doc), "")
-
    @mock.patch("documents.admin.index.remove_document")
    def test_delete_model(self, m):
        doc = Document.objects.create(title="test")
--- a/src/documents/tests/test_api.py
+++ b/src/documents/tests/test_api.py
@@ -146,21 +146,19 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
        self.assertEqual(response.status_code, 200)
        self.assertEqual(response.content, content_thumbnail)

+    @override_settings(PAPERLESS_FILENAME_FORMAT="")
    def test_download_with_archive(self):

-        _, filename = tempfile.mkstemp(dir=self.dirs.originals_dir)
-
        content = b"This is a test"
        content_archive = b"This is the same test but archived"

-        with open(filename, "wb") as f:
-            f.write(content)
-
-        filename = os.path.basename(filename)
-
-        doc = Document.objects.create(title="none", filename=filename,
+        doc = Document.objects.create(title="none", filename="my_document.pdf",
+                                      archive_filename="archived.pdf",
                                      mime_type="application/pdf")

+        with open(doc.source_path, "wb") as f:
+            f.write(content)
+
        with open(doc.archive_path, "wb") as f:
            f.write(content_archive)

@@ -577,7 +575,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
        async_task.assert_not_called()

    def test_get_metadata(self):
-        doc = Document.objects.create(title="test", filename="file.pdf", mime_type="image/png", archive_checksum="A")
+        doc = Document.objects.create(title="test", filename="file.pdf", mime_type="image/png", archive_checksum="A", archive_filename="archive.pdf")

        shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "thumbnails", "0000001.png"), doc.source_path)
        shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), doc.archive_path)
@@ -591,6 +589,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
        self.assertTrue(meta['has_archive_version'])
        self.assertEqual(len(meta['original_metadata']), 0)
        self.assertGreater(len(meta['archive_metadata']), 0)
+        self.assertEqual(meta['media_filename'], "file.pdf")
+        self.assertEqual(meta['archive_media_filename'], "archive.pdf")

    def test_get_metadata_invalid_doc(self):
        response = self.client.get(f"/api/documents/34576/metadata/")
@@ -610,6 +610,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
        self.assertFalse(meta['has_archive_version'])
        self.assertGreater(len(meta['original_metadata']), 0)
        self.assertIsNone(meta['archive_metadata'])
+        self.assertIsNone(meta['archive_media_filename'])

    def test_get_empty_suggestions(self):
        doc = Document.objects.create(title="test", mime_type="application/pdf")
--- a/src/documents/tests/test_consumer.py
+++ b/src/documents/tests/test_consumer.py
@@ -5,12 +5,14 @@ import tempfile
 from unittest import mock
 from unittest.mock import MagicMock

+from django.conf import settings
 from django.test import TestCase, override_settings

 from .utils import DirectoriesMixin
 from ..consumer import Consumer, ConsumerError
 from ..models import FileInfo, Tag, Correspondent, DocumentType, Document
 from ..parsers import DocumentParser, ParseError
+from ..tasks import sanity_check


 class TestAttributes(TestCase):
@@ -165,7 +167,7 @@ class TestFieldPermutations(TestCase):

 class DummyParser(DocumentParser):

-    def get_thumbnail(self, document_path, mime_type):
+    def get_thumbnail(self, document_path, mime_type, file_name=None):
        # not important during tests
        raise NotImplementedError()

@@ -174,16 +176,34 @@ class DummyParser(DocumentParser):
        _, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir)
        self.archive_path = archive_path

-    def get_optimised_thumbnail(self, document_path, mime_type):
+    def get_optimised_thumbnail(self, document_path, mime_type, file_name=None):
        return self.fake_thumb

    def parse(self, document_path, mime_type, file_name=None):
        self.text = "The Text"


+class CopyParser(DocumentParser):
+
+    def get_thumbnail(self, document_path, mime_type, file_name=None):
+        return self.fake_thumb
+
+    def get_optimised_thumbnail(self, document_path, mime_type, file_name=None):
+        return self.fake_thumb
+
+    def __init__(self, logging_group, progress_callback=None):
+        super(CopyParser, self).__init__(logging_group, progress_callback)
+        _, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=self.tempdir)
+
+    def parse(self, document_path, mime_type, file_name=None):
+        self.text = "The text"
+        self.archive_path = os.path.join(self.tempdir, "archive.pdf")
+        shutil.copy(document_path, self.archive_path)
+
+
 class FaultyParser(DocumentParser):

-    def get_thumbnail(self, document_path, mime_type):
+    def get_thumbnail(self, document_path, mime_type, file_name=None):
        # not important during tests
        raise NotImplementedError()

@@ -191,7 +211,7 @@ class FaultyParser(DocumentParser):
        super(FaultyParser, self).__init__(logging_group)
        _, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir)

-    def get_optimised_thumbnail(self, document_path, mime_type):
+    def get_optimised_thumbnail(self, document_path, mime_type, file_name=None):
        return self.fake_thumb

    def parse(self, document_path, mime_type, file_name=None):
@@ -203,6 +223,8 @@ def fake_magic_from_file(file, mime=False):
    if mime:
        if os.path.splitext(file)[1] == ".pdf":
            return "application/pdf"
+        elif os.path.splitext(file)[1] == ".png":
+            return "image/png"
        else:
            return "unknown"
    else:
@@ -274,6 +296,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
        self.assertIsNone(document.correspondent)
        self.assertIsNone(document.document_type)
        self.assertEqual(document.filename, "0000001.pdf")
+        self.assertEqual(document.archive_filename, "0000001.pdf")

        self.assertTrue(os.path.isfile(
            document.source_path
@@ -432,6 +455,7 @@ class TestConsumer(DirectoriesMixin, TestCase):

        self.assertEqual(document.title, "new docs")
        self.assertEqual(document.filename, "none/new docs.pdf")
+        self.assertEqual(document.archive_filename, "none/new docs.pdf")

        self._assert_first_last_send_progress()

@@ -446,7 +470,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
            filenames.insert(0, f)
            return f

-        m.side_effect = lambda f, root: get_filename()
+        m.side_effect = lambda f, archive_filename = False: get_filename()

        filename = self.get_test_file()

@@ -457,6 +481,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
        self.assertEqual(document.title, "new docs")
        self.assertIsNotNone(os.path.isfile(document.title))
        self.assertTrue(os.path.isfile(document.source_path))
+        self.assertTrue(os.path.isfile(document.archive_path))

        self._assert_first_last_send_progress()

@@ -516,6 +541,30 @@ class TestConsumer(DirectoriesMixin, TestCase):

        self._assert_first_last_send_progress(last_status="FAILED")

+    @override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
+    @mock.patch("documents.parsers.document_consumer_declaration.send")
+    def test_similar_filenames(self, m):
+        shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), os.path.join(settings.CONSUMPTION_DIR, "simple.pdf"))
+        shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.png"), os.path.join(settings.CONSUMPTION_DIR, "simple.png"))
+        shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple-noalpha.png"), os.path.join(settings.CONSUMPTION_DIR, "simple.png.pdf"))
+        m.return_value = [(None, {
+            "parser": CopyParser,
+            "mime_types": {"application/pdf": ".pdf", "image/png": ".png"},
+            "weight": 0
+        })]
+        doc1 = self.consumer.try_consume_file(os.path.join(settings.CONSUMPTION_DIR, "simple.png"))
+        doc2 = self.consumer.try_consume_file(os.path.join(settings.CONSUMPTION_DIR, "simple.pdf"))
+        doc3 = self.consumer.try_consume_file(os.path.join(settings.CONSUMPTION_DIR, "simple.png.pdf"))
+
+        self.assertEqual(doc1.filename, "simple.png")
+        self.assertEqual(doc1.archive_filename, "simple.pdf")
+        self.assertEqual(doc2.filename, "simple.pdf")
+        self.assertEqual(doc2.archive_filename, "simple_01.pdf")
+        self.assertEqual(doc3.filename, "simple.png.pdf")
+        self.assertEqual(doc3.archive_filename, "simple.png.pdf")
+
+        sanity_check()
+

 class PreConsumeTestCase(TestCase):

--- a/src/documents/tests/test_date_parsing.py
+++ b/src/documents/tests/test_date_parsing.py
@@ -1,7 +1,6 @@
 import datetime
 import os
 import shutil
-from unittest import mock
 from uuid import uuid4

 from dateutil import tz
@@ -9,7 +8,6 @@ from django.conf import settings
 from django.test import TestCase, override_settings

 from documents.parsers import parse_date
-from paperless_tesseract.parsers import RasterisedDocumentParser


 class TestDate(TestCase):
@@ -152,4 +150,4 @@ class TestDate(TestCase):
                2018, 2, 13, 0, 0,
                tzinfo=tz.gettz(settings.TIME_ZONE)
            )
-        )
+        )
--- a/src/documents/tests/test_file_handling.py
+++ b/src/documents/tests/test_file_handling.py
@@ -201,6 +201,13 @@ class TestFileHandling(DirectoriesMixin, TestCase):

        self.assertEqual(generate_filename(d), "my_doc_type - the_doc.pdf")

+    @override_settings(PAPERLESS_FILENAME_FORMAT="{asn} - {title}")
+    def test_asn(self):
+        d1 = Document.objects.create(title="the_doc", mime_type="application/pdf", archive_serial_number=652, checksum="A")
+        d2 = Document.objects.create(title="the_doc", mime_type="application/pdf", archive_serial_number=None, checksum="B")
+        self.assertEqual(generate_filename(d1), "652 - the_doc.pdf")
+        self.assertEqual(generate_filename(d2), "none - the_doc.pdf")
+
    @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
    def test_tags_with_underscore(self):
        document = Document()
@@ -439,6 +446,18 @@ class TestFileHandling(DirectoriesMixin, TestCase):
        self.assertEqual(document2.filename, "qwe.pdf")


+    @override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
+    @mock.patch("documents.signals.handlers.Document.objects.filter")
+    def test_no_update_without_change(self, m):
+        doc = Document.objects.create(title="document", filename="document.pdf", archive_filename="document.pdf", checksum="A", archive_checksum="B", mime_type="application/pdf")
+        Path(doc.source_path).touch()
+        Path(doc.archive_path).touch()
+
+        doc.save()
+
+        m.assert_not_called()
+
+

 class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):

@@ -448,7 +467,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
        archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
        Path(original).touch()
        Path(archive).touch()
-        doc = Document.objects.create(mime_type="application/pdf", filename="0000001.pdf", checksum="A", archive_checksum="B")
+        doc = Document.objects.create(mime_type="application/pdf", filename="0000001.pdf", checksum="A", archive_filename="0000001.pdf", archive_checksum="B")

        self.assertTrue(os.path.isfile(original))
        self.assertTrue(os.path.isfile(archive))
@@ -461,7 +480,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
        archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
        Path(original).touch()
        Path(archive).touch()
-        doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
+        doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B", archive_filename="0000001.pdf")

        self.assertFalse(os.path.isfile(original))
        self.assertFalse(os.path.isfile(archive))
@@ -475,7 +494,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
        original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
        archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
        Path(original).touch()
-        doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
+        doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B", archive_filename="0000001.pdf")

        self.assertTrue(os.path.isfile(original))
        self.assertFalse(os.path.isfile(archive))
@@ -486,14 +505,49 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
    def test_move_archive_exists(self):
        original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
        archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
+        existing_archive_file = os.path.join(settings.ARCHIVE_DIR, "none", "my_doc.pdf")
        Path(original).touch()
        Path(archive).touch()
        os.makedirs(os.path.join(settings.ARCHIVE_DIR, "none"))
-        Path(os.path.join(settings.ARCHIVE_DIR, "none", "my_doc.pdf")).touch()
-        doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
+        Path(existing_archive_file).touch()
+        doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B", archive_filename="0000001.pdf")
+
+        self.assertFalse(os.path.isfile(original))
+        self.assertFalse(os.path.isfile(archive))
+        self.assertTrue(os.path.isfile(doc.source_path))
+        self.assertTrue(os.path.isfile(doc.archive_path))
+        self.assertTrue(os.path.isfile(existing_archive_file))
+        self.assertEqual(doc.archive_filename, "none/my_doc_01.pdf")
+
+    @override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
+    def test_move_original_only(self):
+        original = os.path.join(settings.ORIGINALS_DIR, "document_01.pdf")
+        archive = os.path.join(settings.ARCHIVE_DIR, "document.pdf")
+        Path(original).touch()
+        Path(archive).touch()
+
+        doc = Document.objects.create(mime_type="application/pdf", title="document", filename="document_01.pdf", checksum="A",
+                                      archive_checksum="B", archive_filename="document.pdf")
+
+        self.assertEqual(doc.filename, "document.pdf")
+        self.assertEqual(doc.archive_filename, "document.pdf")
+
+        self.assertTrue(os.path.isfile(doc.source_path))
+        self.assertTrue(os.path.isfile(doc.archive_path))
+
+    @override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
+    def test_move_archive_only(self):
+        original = os.path.join(settings.ORIGINALS_DIR, "document.pdf")
+        archive = os.path.join(settings.ARCHIVE_DIR, "document_01.pdf")
+        Path(original).touch()
+        Path(archive).touch()
+
+        doc = Document.objects.create(mime_type="application/pdf", title="document", filename="document.pdf", checksum="A",
+                                      archive_checksum="B", archive_filename="document_01.pdf")
+
+        self.assertEqual(doc.filename, "document.pdf")
+        self.assertEqual(doc.archive_filename, "document.pdf")

-        self.assertTrue(os.path.isfile(original))
-        self.assertTrue(os.path.isfile(archive))
        self.assertTrue(os.path.isfile(doc.source_path))
        self.assertTrue(os.path.isfile(doc.archive_path))

@@ -514,8 +568,9 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
        archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
        Path(original).touch()
        Path(archive).touch()
-        doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
+        doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B", archive_filename="0000001.pdf")

+        m.assert_called()
        self.assertTrue(os.path.isfile(original))
        self.assertTrue(os.path.isfile(archive))
        self.assertTrue(os.path.isfile(doc.source_path))
@@ -527,7 +582,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
        archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
        #Path(original).touch()
        Path(archive).touch()
-        doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
+        doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", archive_filename="0000001.pdf", checksum="A", archive_checksum="B")

        self.assertFalse(os.path.isfile(original))
        self.assertTrue(os.path.isfile(archive))
@@ -551,19 +606,21 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
        archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
        Path(original).touch()
        Path(archive).touch()
-        doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
+        doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", archive_filename="0000001.pdf", checksum="A", archive_checksum="B")

+        m.assert_called()
        self.assertTrue(os.path.isfile(original))
        self.assertTrue(os.path.isfile(archive))
        self.assertTrue(os.path.isfile(doc.source_path))
        self.assertTrue(os.path.isfile(doc.archive_path))

+    @override_settings(PAPERLESS_FILENAME_FORMAT="")
    def test_archive_deleted(self):
        original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
        archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
        Path(original).touch()
        Path(archive).touch()
-        doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
+        doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B", archive_filename="0000001.pdf")

        self.assertTrue(os.path.isfile(original))
        self.assertTrue(os.path.isfile(archive))
@@ -577,6 +634,28 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
        self.assertFalse(os.path.isfile(doc.source_path))
        self.assertFalse(os.path.isfile(doc.archive_path))

+    @override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
+    def test_archive_deleted2(self):
+        original = os.path.join(settings.ORIGINALS_DIR, "document.png")
+        original2 = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
+        archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
+        Path(original).touch()
+        Path(original2).touch()
+        Path(archive).touch()
+
+        doc1 = Document.objects.create(mime_type="image/png", title="document", filename="document.png", checksum="A", archive_checksum="B", archive_filename="0000001.pdf")
+        doc2 = Document.objects.create(mime_type="application/pdf", title="0000001", filename="0000001.pdf", checksum="C")
+
+        self.assertTrue(os.path.isfile(doc1.source_path))
+        self.assertTrue(os.path.isfile(doc1.archive_path))
+        self.assertTrue(os.path.isfile(doc2.source_path))
+
+        doc2.delete()
+
+        self.assertTrue(os.path.isfile(doc1.source_path))
+        self.assertTrue(os.path.isfile(doc1.archive_path))
+        self.assertFalse(os.path.isfile(doc2.source_path))
+
    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
    def test_database_error(self):

@@ -584,7 +663,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
        archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
        Path(original).touch()
        Path(archive).touch()
-        doc = Document(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
+        doc = Document(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_filename="0000001.pdf", archive_checksum="B")
        with mock.patch("documents.signals.handlers.Document.objects.filter") as m:
            m.side_effect = DatabaseError()
            doc.save()
@@ -594,6 +673,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
        self.assertTrue(os.path.isfile(doc.source_path))
        self.assertTrue(os.path.isfile(doc.archive_path))

+
 class TestFilenameGeneration(TestCase):

    @override_settings(
@@ -617,7 +697,7 @@ class TestFilenameGeneration(TestCase):

 def run():
    doc = Document.objects.create(checksum=str(uuid.uuid4()), title=str(uuid.uuid4()), content="wow")
-    doc.filename = generate_unique_filename(doc, settings.ORIGINALS_DIR)
+    doc.filename = generate_unique_filename(doc)
    Path(doc.thumbnail_path).touch()
    with open(doc.source_path, "w") as f:
        f.write(str(uuid.uuid4()))
--- a/src/documents/tests/test_management.py
+++ b/src/documents/tests/test_management.py
@@ -20,6 +20,7 @@ from documents.tests.utils import DirectoriesMixin
 sample_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")


+@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
 class TestArchiver(DirectoriesMixin, TestCase):

    def make_models(self):
@@ -42,10 +43,27 @@ class TestArchiver(DirectoriesMixin, TestCase):
        doc = Document.objects.get(id=doc.id)

        self.assertIsNotNone(doc.checksum)
+        self.assertIsNotNone(doc.archive_checksum)
        self.assertTrue(os.path.isfile(doc.archive_path))
        self.assertTrue(os.path.isfile(doc.source_path))
        self.assertTrue(filecmp.cmp(sample_file, doc.source_path))
+        self.assertEqual(doc.archive_filename, "none/A.pdf")

+    @override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
+    def test_naming_priorities(self):
+        doc1 = Document.objects.create(checksum="A", title="document", content="first document", mime_type="application/pdf", filename="document.pdf")
+        doc2 = Document.objects.create(checksum="B", title="document", content="second document", mime_type="application/pdf", filename="document_01.pdf")
+        shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"document.pdf"))
+        shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"document_01.pdf"))
+
+        handle_document(doc2.pk)
+        handle_document(doc1.pk)
+
+        doc1 = Document.objects.get(id=doc1.id)
+        doc2 = Document.objects.get(id=doc2.id)
+
+        self.assertEqual(doc1.archive_filename, "document.pdf")
+        self.assertEqual(doc2.archive_filename, "document_01.pdf")

 class TestDecryptDocuments(TestCase):

@@ -106,24 +124,27 @@ class TestMakeIndex(TestCase):

 class TestRenamer(DirectoriesMixin, TestCase):

+    @override_settings(PAPERLESS_FILENAME_FORMAT="")
    def test_rename(self):
-        doc = Document.objects.create(title="test", mime_type="application/pdf")
+        doc = Document.objects.create(title="test", mime_type="image/jpeg")
        doc.filename = generate_filename(doc)
+        doc.archive_filename = generate_filename(doc, archive_filename=True)
        doc.save()

        Path(doc.source_path).touch()
+        Path(doc.archive_path).touch()

-        old_source_path = doc.source_path
-
-        with override_settings(PAPERLESS_FILENAME_FORMAT="{title}"):
+        with override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}"):
            call_command("document_renamer")

        doc2 = Document.objects.get(id=doc.id)

-        self.assertEqual(doc2.filename, "test.pdf")
-        self.assertFalse(os.path.isfile(old_source_path))
+        self.assertEqual(doc2.filename, "none/test.jpg")
+        self.assertEqual(doc2.archive_filename, "none/test.pdf")
        self.assertFalse(os.path.isfile(doc.source_path))
+        self.assertFalse(os.path.isfile(doc.archive_path))
        self.assertTrue(os.path.isfile(doc2.source_path))
+        self.assertTrue(os.path.isfile(doc2.archive_path))


 class TestCreateClassifier(TestCase):
--- a/src/documents/tests/test_management_exporter.py
+++ b/src/documents/tests/test_management_exporter.py
@@ -22,7 +22,7 @@ class TestExportImport(DirectoriesMixin, TestCase):
        self.target = tempfile.mkdtemp()
        self.addCleanup(shutil.rmtree, self.target)

-        self.d1 = Document.objects.create(content="Content", checksum="42995833e01aea9b3edee44bbfdd7ce1", archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b", title="wow1", filename="0000001.pdf", mime_type="application/pdf")
+        self.d1 = Document.objects.create(content="Content", checksum="42995833e01aea9b3edee44bbfdd7ce1", archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b", title="wow1", filename="0000001.pdf", mime_type="application/pdf", archive_filename="0000001.pdf")
        self.d2 = Document.objects.create(content="Content", checksum="9c9691e51741c1f4f41a20896af31770", title="wow2", filename="0000002.pdf", mime_type="application/pdf")
        self.d3 = Document.objects.create(content="Content", checksum="d38d7ed02e988e072caf924e0f3fcb76", title="wow2", filename="0000003.pdf", mime_type="application/pdf")
        self.d4 = Document.objects.create(content="Content", checksum="82186aaa94f0b98697d704b90fd1c072", title="wow_dec", filename="0000004.pdf.gpg", mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG)
--- a/src/documents/tests/test_migration_archive_files.py
+++ b/src/documents/tests/test_migration_archive_files.py
@@ -0,0 +1,325 @@
+import hashlib
+import os
+import shutil
+from pathlib import Path
+from unittest import mock
+
+from django.conf import settings
+from django.test import override_settings
+
+from documents.parsers import ParseError
+from documents.tests.utils import DirectoriesMixin, TestMigrations
+
+
+STORAGE_TYPE_GPG = "gpg"
+
+
+def archive_name_from_filename(filename):
+    return os.path.splitext(filename)[0] + ".pdf"
+
+
+def archive_path_old(self):
+    if self.filename:
+        fname = archive_name_from_filename(self.filename)
+    else:
+        fname = "{:07}.pdf".format(self.pk)
+
+    return os.path.join(
+        settings.ARCHIVE_DIR,
+        fname
+    )
+
+
+def archive_path_new(doc):
+        if doc.archive_filename is not None:
+            return os.path.join(
+                settings.ARCHIVE_DIR,
+                str(doc.archive_filename)
+            )
+        else:
+            return None
+
+
+def source_path(doc):
+    if doc.filename:
+        fname = str(doc.filename)
+    else:
+        fname = "{:07}{}".format(doc.pk, doc.file_type)
+        if doc.storage_type == STORAGE_TYPE_GPG:
+            fname += ".gpg"  # pragma: no cover
+
+    return os.path.join(
+        settings.ORIGINALS_DIR,
+        fname
+    )
+
+
+def thumbnail_path(doc):
+    file_name = "{:07}.png".format(doc.pk)
+    if doc.storage_type == STORAGE_TYPE_GPG:
+        file_name += ".gpg"
+
+    return os.path.join(
+        settings.THUMBNAIL_DIR,
+        file_name
+    )
+
+
+def make_test_document(document_class, title: str, mime_type: str, original: str, original_filename: str, archive: str = None, archive_filename: str = None):
+    doc = document_class()
+    doc.filename = original_filename
+    doc.title = title
+    doc.mime_type = mime_type
+    doc.content = "the content, does not matter for this test"
+    doc.save()
+
+    shutil.copy2(original, source_path(doc))
+    with open(original, "rb") as f:
+        doc.checksum = hashlib.md5(f.read()).hexdigest()
+
+    if archive:
+        if archive_filename:
+            doc.archive_filename = archive_filename
+            shutil.copy2(archive, archive_path_new(doc))
+        else:
+            shutil.copy2(archive, archive_path_old(doc))
+
+        with open(archive, "rb") as f:
+            doc.archive_checksum = hashlib.md5(f.read()).hexdigest()
+
+    doc.save()
+
+    Path(thumbnail_path(doc)).touch()
+
+    return doc
+
+
+simple_jpg = os.path.join(os.path.dirname(__file__), "samples", "simple.jpg")
+simple_pdf = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
+simple_pdf2 = os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000002.pdf")
+simple_pdf3 = os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000003.pdf")
+simple_txt = os.path.join(os.path.dirname(__file__), "samples", "simple.txt")
+simple_png = os.path.join(os.path.dirname(__file__), "samples", "simple-noalpha.png")
+simple_png2 = os.path.join(os.path.dirname(__file__), "examples", "no-text.png")
+
+
+@override_settings(PAPERLESS_FILENAME_FORMAT="")
+class TestMigrateArchiveFiles(DirectoriesMixin, TestMigrations):
+
+    migrate_from = '1011_auto_20210101_2340'
+    migrate_to = '1012_fix_archive_files'
+
+    def setUpBeforeMigration(self, apps):
+        Document = apps.get_model("documents", "Document")
+
+        self.unrelated = make_test_document(Document, "unrelated", "application/pdf", simple_pdf3, "unrelated.pdf", simple_pdf)
+        self.no_text = make_test_document(Document, "no-text", "image/png", simple_png2, "no-text.png", simple_pdf)
+        self.doc_no_archive = make_test_document(Document, "no_archive", "text/plain", simple_txt, "no_archive.txt")
+        self.clash1 = make_test_document(Document, "clash", "application/pdf", simple_pdf, "clash.pdf", simple_pdf)
+        self.clash2 = make_test_document(Document, "clash", "image/jpeg", simple_jpg, "clash.jpg", simple_pdf)
+        self.clash3 = make_test_document(Document, "clash", "image/png", simple_png, "clash.png", simple_pdf)
+        self.clash4 = make_test_document(Document, "clash.png", "application/pdf", simple_pdf2, "clash.png.pdf", simple_pdf2)
+
+        self.assertEqual(archive_path_old(self.clash1), archive_path_old(self.clash2))
+        self.assertEqual(archive_path_old(self.clash1), archive_path_old(self.clash3))
+        self.assertNotEqual(archive_path_old(self.clash1), archive_path_old(self.clash4))
+
+    def testArchiveFilesMigrated(self):
+        Document = self.apps.get_model('documents', 'Document')
+
+        for doc in Document.objects.all():
+            if doc.archive_checksum:
+                self.assertIsNotNone(doc.archive_filename)
+                self.assertTrue(os.path.isfile(archive_path_new(doc)))
+            else:
+                self.assertIsNone(doc.archive_filename)
+
+            with open(source_path(doc), "rb") as f:
+                original_checksum = hashlib.md5(f.read()).hexdigest()
+            self.assertEqual(original_checksum, doc.checksum)
+
+            if doc.archive_checksum:
+                self.assertTrue(os.path.isfile(archive_path_new(doc)))
+                with open(archive_path_new(doc), "rb") as f:
+                    archive_checksum = hashlib.md5(f.read()).hexdigest()
+                self.assertEqual(archive_checksum, doc.archive_checksum)
+
+        self.assertEqual(Document.objects.filter(archive_checksum__isnull=False).count(), 6)
+
+    def test_filenames(self):
+        Document = self.apps.get_model('documents', 'Document')
+        self.assertEqual(Document.objects.get(id=self.unrelated.id).archive_filename, "unrelated.pdf")
+        self.assertEqual(Document.objects.get(id=self.no_text.id).archive_filename, "no-text.pdf")
+        self.assertEqual(Document.objects.get(id=self.doc_no_archive.id).archive_filename, None)
+        self.assertEqual(Document.objects.get(id=self.clash1.id).archive_filename, f"{self.clash1.id:07}.pdf")
+        self.assertEqual(Document.objects.get(id=self.clash2.id).archive_filename, f"{self.clash2.id:07}.pdf")
+        self.assertEqual(Document.objects.get(id=self.clash3.id).archive_filename, f"{self.clash3.id:07}.pdf")
+        self.assertEqual(Document.objects.get(id=self.clash4.id).archive_filename, "clash.png.pdf")
+
+
+@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
+class TestMigrateArchiveFilesWithFilenameFormat(TestMigrateArchiveFiles):
+
+    def test_filenames(self):
+        Document = self.apps.get_model('documents', 'Document')
+        self.assertEqual(Document.objects.get(id=self.unrelated.id).archive_filename, "unrelated.pdf")
+        self.assertEqual(Document.objects.get(id=self.no_text.id).archive_filename, "no-text.pdf")
+        self.assertEqual(Document.objects.get(id=self.doc_no_archive.id).archive_filename, None)
+        self.assertEqual(Document.objects.get(id=self.clash1.id).archive_filename, "none/clash.pdf")
+        self.assertEqual(Document.objects.get(id=self.clash2.id).archive_filename, "none/clash_01.pdf")
+        self.assertEqual(Document.objects.get(id=self.clash3.id).archive_filename, "none/clash_02.pdf")
+        self.assertEqual(Document.objects.get(id=self.clash4.id).archive_filename, "clash.png.pdf")
+
+
+def fake_parse_wrapper(parser, path, mime_type, file_name):
+    parser.archive_path = None
+    parser.text = "the text"
+
+
+@override_settings(PAPERLESS_FILENAME_FORMAT="")
+class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
+
+    migrate_from = '1011_auto_20210101_2340'
+    migrate_to = '1012_fix_archive_files'
+    auto_migrate = False
+
+    def test_archive_missing(self):
+
+        Document = self.apps.get_model("documents", "Document")
+
+        doc = make_test_document(Document, "clash", "application/pdf", simple_pdf, "clash.pdf", simple_pdf)
+        os.unlink(archive_path_old(doc))
+
+        self.assertRaisesMessage(ValueError, "does not exist at: ", self.performMigration)
+
+    def test_parser_missing(self):
+        Document = self.apps.get_model("documents", "Document")
+
+        doc1 = make_test_document(Document, "document", "invalid/typesss768", simple_png, "document.png", simple_pdf)
+        doc2 = make_test_document(Document, "document", "invalid/typesss768", simple_jpg, "document.jpg", simple_pdf)
+
+        self.assertRaisesMessage(ValueError, "no parsers are available", self.performMigration)
+
+    @mock.patch("documents.migrations.1012_fix_archive_files.parse_wrapper")
+    def test_parser_error(self, m):
+        m.side_effect = ParseError()
+        Document = self.apps.get_model("documents", "Document")
+
+        doc1 = make_test_document(Document, "document", "image/png", simple_png, "document.png", simple_pdf)
+        doc2 = make_test_document(Document, "document", "application/pdf", simple_jpg, "document.jpg", simple_pdf)
+
+        self.assertIsNotNone(doc1.archive_checksum)
+        self.assertIsNotNone(doc2.archive_checksum)
+
+        with self.assertLogs() as capture:
+            self.performMigration()
+
+        self.assertEqual(m.call_count, 6)
+
+        self.assertEqual(
+            len(list(filter(lambda log: "Parse error, will try again in 5 seconds" in log, capture.output))),
+            4)
+
+        self.assertEqual(
+            len(list(filter(lambda log: "Unable to regenerate archive document for ID:" in log, capture.output))),
+            2)
+
+        Document = self.apps.get_model("documents", "Document")
+
+        doc1 = Document.objects.get(id=doc1.id)
+        doc2 = Document.objects.get(id=doc2.id)
+
+        self.assertIsNone(doc1.archive_checksum)
+        self.assertIsNone(doc2.archive_checksum)
+        self.assertIsNone(doc1.archive_filename)
+        self.assertIsNone(doc2.archive_filename)
+
+    @mock.patch("documents.migrations.1012_fix_archive_files.parse_wrapper")
+    def test_parser_no_archive(self, m):
+        m.side_effect = fake_parse_wrapper
+
+        Document = self.apps.get_model("documents", "Document")
+
+        doc1 = make_test_document(Document, "document", "image/png", simple_png, "document.png", simple_pdf)
+        doc2 = make_test_document(Document, "document", "application/pdf", simple_jpg, "document.jpg", simple_pdf)
+
+        with self.assertLogs() as capture:
+            self.performMigration()
+
+        self.assertEqual(
+            len(list(filter(lambda log: "Parser did not return an archive document for document" in log, capture.output))),
+            2)
+
+        Document = self.apps.get_model("documents", "Document")
+
+        doc1 = Document.objects.get(id=doc1.id)
+        doc2 = Document.objects.get(id=doc2.id)
+
+        self.assertIsNone(doc1.archive_checksum)
+        self.assertIsNone(doc2.archive_checksum)
+        self.assertIsNone(doc1.archive_filename)
+        self.assertIsNone(doc2.archive_filename)
+
+
+@override_settings(PAPERLESS_FILENAME_FORMAT="")
+class TestMigrateArchiveFilesBackwards(DirectoriesMixin, TestMigrations):
+
+    migrate_from = '1012_fix_archive_files'
+    migrate_to = '1011_auto_20210101_2340'
+
+    def setUpBeforeMigration(self, apps):
+
+        Document = apps.get_model("documents", "Document")
+
+        doc_unrelated = make_test_document(Document, "unrelated", "application/pdf", simple_pdf2, "unrelated.txt", simple_pdf2, "unrelated.pdf")
+        doc_no_archive = make_test_document(Document, "no_archive", "text/plain", simple_txt, "no_archive.txt")
+        clashB = make_test_document(Document, "clash", "image/jpeg", simple_jpg, "clash.jpg", simple_pdf, "clash_02.pdf")
+
+    def testArchiveFilesReverted(self):
+        Document = self.apps.get_model('documents', 'Document')
+
+        for doc in Document.objects.all():
+            if doc.archive_checksum:
+                self.assertTrue(os.path.isfile(archive_path_old(doc)))
+            with open(source_path(doc), "rb") as f:
+                original_checksum = hashlib.md5(f.read()).hexdigest()
+            self.assertEqual(original_checksum, doc.checksum)
+
+            if doc.archive_checksum:
+                self.assertTrue(os.path.isfile(archive_path_old(doc)))
+                with open(archive_path_old(doc), "rb") as f:
+                    archive_checksum = hashlib.md5(f.read()).hexdigest()
+                self.assertEqual(archive_checksum, doc.archive_checksum)
+
+        self.assertEqual(Document.objects.filter(archive_checksum__isnull=False).count(), 2)
+
+
+@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
+class TestMigrateArchiveFilesBackwardsWithFilenameFormat(TestMigrateArchiveFilesBackwards):
+    pass
+
+
+@override_settings(PAPERLESS_FILENAME_FORMAT="")
+class TestMigrateArchiveFilesBackwardsErrors(DirectoriesMixin, TestMigrations):
+
+    migrate_from = '1012_fix_archive_files'
+    migrate_to = '1011_auto_20210101_2340'
+    auto_migrate = False
+
+    def test_filename_clash(self):
+
+        Document = self.apps.get_model("documents", "Document")
+
+        self.clashA = make_test_document(Document, "clash", "application/pdf", simple_pdf, "clash.pdf", simple_pdf, "clash_02.pdf")
+        self.clashB = make_test_document(Document, "clash", "image/jpeg", simple_jpg, "clash.jpg", simple_pdf, "clash_01.pdf")
+
+        self.assertRaisesMessage(ValueError, "would clash with another archive filename", self.performMigration)
+
+    def test_filename_exists(self):
+
+        Document = self.apps.get_model("documents", "Document")
+
+        self.clashA = make_test_document(Document, "clash", "application/pdf", simple_pdf, "clash.pdf", simple_pdf, "clash.pdf")
+        self.clashB = make_test_document(Document, "clash", "image/jpeg", simple_jpg, "clash.jpg", simple_pdf, "clash_01.pdf")
+
+        self.assertRaisesMessage(ValueError, "file already exists.", self.performMigration)
--- a/src/documents/tests/test_migration_mime_type.py
+++ b/src/documents/tests/test_migration_mime_type.py
@@ -1,52 +1,11 @@
 import os
 import shutil
-from pathlib import Path

-from django.apps import apps
 from django.conf import settings
-from django.db import connection
-from django.db.migrations.executor import MigrationExecutor
-from django.test import TestCase, TransactionTestCase, override_settings
+from django.test import override_settings

-from documents.models import Document
 from documents.parsers import get_default_file_extension
-from documents.tests.utils import DirectoriesMixin
-
-
-class TestMigrations(TransactionTestCase):
-
-    @property
-    def app(self):
-        return apps.get_containing_app_config(type(self).__module__).name
-
-    migrate_from = None
-    migrate_to = None
-
-    def setUp(self):
-        super(TestMigrations, self).setUp()
-
-        assert self.migrate_from and self.migrate_to, \
-            "TestCase '{}' must define migrate_from and migrate_to     properties".format(type(self).__name__)
-        self.migrate_from = [(self.app, self.migrate_from)]
-        self.migrate_to = [(self.app, self.migrate_to)]
-        executor = MigrationExecutor(connection)
-        old_apps = executor.loader.project_state(self.migrate_from).apps
-
-        # Reverse to the original migration
-        executor.migrate(self.migrate_from)
-
-        self.setUpBeforeMigration(old_apps)
-
-        # Run the migration to test
-        executor = MigrationExecutor(connection)
-        executor.loader.build_graph()  # reload.
-        executor.migrate(self.migrate_to)
-
-        self.apps = executor.loader.project_state(self.migrate_to).apps
-
-    def setUpBeforeMigration(self, apps):
-        pass
-
+from documents.tests.utils import DirectoriesMixin, TestMigrations

 STORAGE_TYPE_UNENCRYPTED = "unencrypted"
 STORAGE_TYPE_GPG = "gpg"
--- a/src/documents/tests/test_parsers.py
+++ b/src/documents/tests/test_parsers.py
@@ -68,7 +68,7 @@ class TestParserDiscovery(TestCase):
            )


-def fake_get_thumbnail(self, path, mimetype):
+def fake_get_thumbnail(self, path, mimetype, file_name):
    return os.path.join(os.path.dirname(__file__), "examples", "no-text.png")


@@ -89,15 +89,15 @@ class TestBaseParser(TestCase):
    def test_get_optimised_thumbnail(self):
        parser = DocumentParser(None)

-        parser.get_optimised_thumbnail("any", "not important")
+        parser.get_optimised_thumbnail("any", "not important", "document.pdf")

    @mock.patch("documents.parsers.DocumentParser.get_thumbnail", fake_get_thumbnail)
    @override_settings(OPTIMIZE_THUMBNAILS=False)
    def test_get_optimised_thumb_disabled(self):
        parser = DocumentParser(None)

-        path = parser.get_optimised_thumbnail("any", "not important")
-        self.assertEqual(path, fake_get_thumbnail(None, None, None))
+        path = parser.get_optimised_thumbnail("any", "not important", "document.pdf")
+        self.assertEqual(path, fake_get_thumbnail(None, None, None, None))


 class TestParserAvailability(TestCase):
--- a/src/documents/tests/test_sanity_check.py
+++ b/src/documents/tests/test_sanity_check.py
@@ -21,7 +21,7 @@ class TestSanityCheck(DirectoriesMixin, TestCase):
            shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "archive", "0000001.pdf"), os.path.join(self.dirs.archive_dir, "0000001.pdf"))
            shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "thumbnails", "0000001.png"), os.path.join(self.dirs.thumbnail_dir, "0000001.png"))

-        return Document.objects.create(title="test", checksum="42995833e01aea9b3edee44bbfdd7ce1", archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b", content="test", pk=1, filename="0000001.pdf", mime_type="application/pdf")
+        return Document.objects.create(title="test", checksum="42995833e01aea9b3edee44bbfdd7ce1", archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b", content="test", pk=1, filename="0000001.pdf", mime_type="application/pdf", archive_filename="0000001.pdf")

    def test_no_docs(self):
        self.assertEqual(len(check_sanity()), 0)
@@ -86,6 +86,19 @@ class TestSanityCheck(DirectoriesMixin, TestCase):
        Path(self.dirs.originals_dir, "orphaned").touch()
        self.assertEqual(len(check_sanity()), 1)

-    def test_all(self):
-        Document.objects.create(title="test", checksum="dgfhj", archive_checksum="dfhg", content="", pk=1, filename="0000001.pdf")
+    def test_error_tostring(self):
+        Document.objects.create(title="test", checksum="dgfhj", archive_checksum="dfhg", content="", pk=1, filename="0000001.pdf", archive_filename="0000001.pdf")
        string = str(SanityFailedError(check_sanity()))
+        self.assertIsNotNone(string)
+
+    def test_archive_filename_no_checksum(self):
+        doc = self.make_test_data()
+        doc.archive_checksum = None
+        doc.save()
+        self.assertEqual(len(check_sanity()), 2)
+
+    def test_archive_checksum_no_filename(self):
+        doc = self.make_test_data()
+        doc.archive_filename = None
+        doc.save()
+        self.assertEqual(len(check_sanity()), 2)
--- a/src/documents/tests/utils.py
+++ b/src/documents/tests/utils.py
@@ -4,7 +4,10 @@ import tempfile
 from collections import namedtuple
 from contextlib import contextmanager

-from django.test import override_settings
+from django.apps import apps
+from django.db import connection
+from django.db.migrations.executor import MigrationExecutor
+from django.test import override_settings, TransactionTestCase


 def setup_directories():
@@ -79,3 +82,45 @@ class DirectoriesMixin:
    def tearDown(self) -> None:
        super(DirectoriesMixin, self).tearDown()
        remove_dirs(self.dirs)
+
+
+class TestMigrations(TransactionTestCase):
+
+    @property
+    def app(self):
+        return apps.get_containing_app_config(type(self).__module__).name
+
+    migrate_from = None
+    migrate_to = None
+    auto_migrate = True
+
+    def setUp(self):
+        super(TestMigrations, self).setUp()
+
+        assert self.migrate_from and self.migrate_to, \
+            "TestCase '{}' must define migrate_from and migrate_to     properties".format(type(self).__name__)
+        self.migrate_from = [(self.app, self.migrate_from)]
+        self.migrate_to = [(self.app, self.migrate_to)]
+        executor = MigrationExecutor(connection)
+        old_apps = executor.loader.project_state(self.migrate_from).apps
+
+        # Reverse to the original migration
+        executor.migrate(self.migrate_from)
+
+        self.setUpBeforeMigration(old_apps)
+
+        self.apps = old_apps
+
+        if self.auto_migrate:
+            self.performMigration()
+
+    def performMigration(self):
+        # Run the migration to test
+        executor = MigrationExecutor(connection)
+        executor.loader.build_graph()  # reload.
+        executor.migrate(self.migrate_to)
+
+        self.apps = executor.loader.project_state(self.migrate_to).apps
+
+    def setUpBeforeMigration(self, apps):
+        pass
--- a/src/documents/views.py
+++ b/src/documents/views.py
@@ -192,7 +192,7 @@ class DocumentViewSet(RetrieveModelMixin,

    def file_response(self, pk, request, disposition):
        doc = Document.objects.get(id=pk)
-        if not self.original_requested(request) and os.path.isfile(doc.archive_path):  # NOQA: E501
+        if not self.original_requested(request) and doc.has_archive_version:  # NOQA: E501
            file_handle = doc.archive_file
            filename = doc.get_public_filename(archive=True)
            mime_type = 'application/pdf'
@@ -237,18 +237,18 @@ class DocumentViewSet(RetrieveModelMixin,
            "original_size": os.stat(doc.source_path).st_size,
            "original_mime_type": doc.mime_type,
            "media_filename": doc.filename,
-            "has_archive_version": os.path.isfile(doc.archive_path),
+            "has_archive_version": doc.has_archive_version,
            "original_metadata": self.get_metadata(
-                doc.source_path, doc.mime_type)
+                doc.source_path, doc.mime_type),
+            "archive_checksum": doc.archive_checksum,
+            "archive_media_filename": doc.archive_filename
        }

-        if doc.archive_checksum and os.path.isfile(doc.archive_path):
-            meta['archive_checksum'] = doc.archive_checksum
+        if doc.has_archive_version:
            meta['archive_size'] = os.stat(doc.archive_path).st_size,
            meta['archive_metadata'] = self.get_metadata(
                doc.archive_path, "application/pdf")
        else:
-            meta['archive_checksum'] = None
            meta['archive_size'] = None
            meta['archive_metadata'] = None

@@ -291,6 +291,8 @@ class DocumentViewSet(RetrieveModelMixin,
                handle = GnuPG.decrypted(doc.thumbnail_file)
            else:
                handle = doc.thumbnail_file
+            # TODO: Send ETag information and use that to send new thumbnails
+            #  if available
            return HttpResponse(handle,
                                content_type='image/png')
        except (FileNotFoundError, Document.DoesNotExist):
--- a/src/paperless/version.py
+++ b/src/paperless/version.py
@@ -1 +1 @@
-__version__ = (1, 1, 0)
+__version__ = (1, 1, 1)
--- a/src/paperless_mail/tasks.py
+++ b/src/paperless_mail/tasks.py
@@ -13,11 +13,8 @@ def process_mail_accounts():
        try:
            total_new_documents += MailAccountHandler().handle_mail_account(
                account)
-        except MailError as e:
-            logger.error(
-                f"Error while processing mail account {account}: {e}",
-                exc_info=True
-            )
+        except MailError:
+            logger.exception(f"Error while processing mail account {account}")

    if total_new_documents > 0:
        return f"Added {total_new_documents} document(s)."
--- a/src/paperless_tesseract/parsers.py
+++ b/src/paperless_tesseract/parsers.py
@@ -48,7 +48,7 @@ class RasterisedDocumentParser(DocumentParser):
                    )
        return result

-    def get_thumbnail(self, document_path, mime_type):
+    def get_thumbnail(self, document_path, mime_type, file_name=None):
        return make_thumbnail_from_pdf(
            document_path, self.tempdir, self.logging_group)

--- a/src/paperless_text/parsers.py
+++ b/src/paperless_text/parsers.py
@@ -13,7 +13,7 @@ class TextDocumentParser(DocumentParser):

    logging_name = "paperless.parsing.text"

-    def get_thumbnail(self, document_path, mime_type):
+    def get_thumbnail(self, document_path, mime_type, file_name=None):

        def read_text():
            with open(document_path, 'r') as src:
--- a/src/paperless_tika/parsers.py
+++ b/src/paperless_tika/parsers.py
@@ -16,9 +16,9 @@ class TikaDocumentParser(DocumentParser):

    logging_name = "paperless.parsing.tika"

-    def get_thumbnail(self, document_path, mime_type):
+    def get_thumbnail(self, document_path, mime_type, file_name=None):
        if not self.archive_path:
-            self.archive_path = self.convert_to_pdf(document_path)
+            self.archive_path = self.convert_to_pdf(document_path, file_name)

        return make_thumbnail_from_pdf(
            self.archive_path, self.tempdir, self.logging_group)
Author	SHA1	Message	Date
jonaswinkler	13e91d8c95	changelog	2021-02-12 18:04:15 +01:00
jonaswinkler	6ac90181cb	documentation and changelog	2021-02-12 16:54:00 +01:00
jonaswinkler	d6c3471909	reprganized docker file, less layers, new shortcuts for management commands	2021-02-12 16:53:51 +01:00
jonaswinkler	5b56fad9c7	fix test case	2021-02-12 01:31:50 +01:00
jonaswinkler	ed0b1fe115	better exception logging	2021-02-11 22:16:41 +01:00
jonaswinkler	4211153527	update file renaming logic	2021-02-11 13:47:17 +01:00
jonaswinkler	2f85461109	added some test cases that I still need to address	2021-02-10 23:53:48 +01:00
jonaswinkler	3fa7dcb0cb	changes to the admin document list	2021-02-10 21:34:58 +01:00
jonaswinkler	857fe3a55c	fix one incorrect use of archive_version	2021-02-10 21:34:39 +01:00
jonaswinkler	dd19ea46fe	backup documentation	2021-02-10 20:14:55 +01:00
jonaswinkler	21740a9d87	changelog	2021-02-10 20:05:02 +01:00
jonaswinkler	658fb2f208	remove invalid test cases	2021-02-10 20:01:35 +01:00
jonaswinkler	252d4cb513	update document admin	2021-02-10 18:55:39 +01:00
jonaswinkler	5aed41223b	Merge branch 'master' into dev	2021-02-10 18:44:02 +01:00
jonaswinkler	45dfbf3747	downgrades	2021-02-10 18:27:41 +01:00
jonaswinkler	e4fe5bebab	requirements	2021-02-10 17:09:22 +01:00
jonaswinkler	04519ee623	more testing of the migration	2021-02-10 16:58:55 +01:00
jonaswinkler	6c8f010f7a	retries for archive generation	2021-02-10 14:50:20 +01:00
jonaswinkler	ed84cf26e7	update dependencies	2021-02-10 14:31:17 +01:00
jonaswinkler	1bc961f0c0	update dependencies	2021-02-10 11:50:57 +01:00
jonaswinkler	77d745381f	more testing	2021-02-10 01:31:15 +01:00
jonaswinkler	7082cb9c36	document renamer testing	2021-02-10 01:12:45 +01:00
jonaswinkler	34e84cc757	sanity checker testing	2021-02-10 00:52:18 +01:00
jonaswinkler	9246411610	better logging for the migration	2021-02-10 00:52:01 +01:00
jonaswinkler	8330b3598c	changelog	2021-02-09 23:23:11 +01:00
jonaswinkler	1d002149dc	added ASN to filename format #519	2021-02-09 23:03:07 +01:00
jonaswinkler	8d6071e977	fix a bug with thumbnail generation when TIKA was enabled	2021-02-09 22:12:43 +01:00
jonaswinkler	7d67766508	todo note #520	2021-02-09 21:53:10 +01:00
jonaswinkler	887dd122fe	more info in the admin	2021-02-09 21:00:04 +01:00
jonaswinkler	a1293c77b9	fix migration and more tests	2021-02-09 20:54:02 +01:00
jonaswinkler	ee9a73aa95	codestyle	2021-02-09 20:46:41 +01:00
jonaswinkler	9df332b614	test resources	2021-02-09 19:51:25 +01:00
jonaswinkler	d13e86a892	update all test cases to address the archive filename changes	2021-02-09 19:51:16 +01:00
jonaswinkler	69d7f8c180	testing the updated migration	2021-02-09 19:49:29 +01:00
jonaswinkler	1ba89ddd09	refactor migration tests to allow testing for exceptions while migrating	2021-02-09 19:47:50 +01:00
jonaswinkler	0c40a28ad3	more sanity checks regarding archive versions	2021-02-09 19:46:59 +01:00
jonaswinkler	2b7424c42a	imports	2021-02-09 19:46:42 +01:00
jonaswinkler	a9f1766d1c	todo note	2021-02-09 19:46:32 +01:00
jonaswinkler	fca8576d80	archive filenames are now stored in the database and checked for collisions just as original filenames as well, unified method for archive version checking	2021-02-09 19:46:19 +01:00
jonaswinkler	05f59e7d5e	another way to make the test case fail	2021-02-09 02:13:25 +01:00
jonaswinkler	c9511680b3	version push	2021-02-09 01:36:39 +01:00
jonaswinkler	0ed001c56e	validate move before migration	2021-02-09 00:13:13 +01:00
jonaswinkler	1e5a418191	more testing #511	2021-02-09 00:01:11 +01:00
jonaswinkler	e05735bc0f	fix some test cases	2021-02-09 00:00:46 +01:00
jonaswinkler	7621e10840	only move unaffected files, regenerate affected files	2021-02-08 23:54:07 +01:00
jonaswinkler	d90080f325	only move files if necessary	2021-02-08 22:49:01 +01:00
jonaswinkler	0c676b90f2	migration for #511	2021-02-08 20:59:14 +01:00
jonaswinkler	c2d8bda83c	fix for #511	2021-02-08 19:59:14 +01:00
jonaswinkler	302ebf737e	refactor migration test case	2021-02-08 13:18:39 +01:00
jonaswinkler	816c95a4ae	code style	2021-02-08 13:18:08 +01:00
jonaswinkler	40106f6fcc	updated documentation regarding execution of management commands with docker fixes #509	2021-02-08 00:10:52 +01:00
jonaswinkler	61143b3ad1	make the test case fail	2021-02-07 19:53:08 +01:00
jonaswinkler	9b64eebd10	revert commit	2021-02-07 18:26:03 +01:00
jonaswinkler	731418349f	added a test case that replicates #511	2021-02-07 18:23:54 +01:00
jonaswinkler	7728920670	Merge branch 'dev'	2021-02-07 01:22:04 +01:00
jonaswinkler	f555bb95ae	possible fix for the ansible roles	2021-02-07 00:49:53 +01:00