diff --git a/.editorconfig b/.editorconfig index a8b21f510..eecb25efb 100644 --- a/.editorconfig +++ b/.editorconfig @@ -39,3 +39,6 @@ max_line_length = off [Dockerfile*] indent_style = space + +[*.toml] +indent_style = space diff --git a/docs/administration.md b/docs/administration.md index 7e5ca2d41..511015b6b 100644 --- a/docs/administration.md +++ b/docs/administration.md @@ -62,6 +62,10 @@ copies you created in the steps above. ## Updating Paperless {#updating} +!!! warning + + Please review the [migration instructions](migration-v3.md) before upgrading Paperless-ngx to v3.0, it includes some breaking changes that require manual intervention before upgrading. + ### Docker Route {#docker-updating} If a new release of paperless-ngx is available, upgrading depends on how diff --git a/docs/configuration.md b/docs/configuration.md index 10b20fe9a..2fc651a76 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -51,137 +51,172 @@ matcher. ### Database By default, Paperless uses **SQLite** with a database stored at `data/db.sqlite3`. -To switch to **PostgreSQL** or **MariaDB**, set [`PAPERLESS_DBHOST`](#PAPERLESS_DBHOST) and optionally configure other -database-related environment variables. +For multi-user or higher-throughput deployments, **PostgreSQL** (recommended) or +**MariaDB** can be used instead by setting [`PAPERLESS_DBENGINE`](#PAPERLESS_DBENGINE) +and the relevant connection variables. + +#### [`PAPERLESS_DBENGINE=`](#PAPERLESS_DBENGINE) {#PAPERLESS_DBENGINE} + +: Specifies the database engine to use. Accepted values are `sqlite`, `postgresql`, +and `mariadb`. + + Defaults to `sqlite` if not set. + + PostgreSQL and MariaDB both require [`PAPERLESS_DBHOST`](#PAPERLESS_DBHOST) to be + set. SQLite does not use any other connection variables; the database file is always + located at `/db.sqlite3`. + + !!! warning + Using MariaDB comes with some caveats. + See [MySQL Caveats](advanced_usage.md#mysql-caveats). #### [`PAPERLESS_DBHOST=`](#PAPERLESS_DBHOST) {#PAPERLESS_DBHOST} -: If unset, Paperless uses **SQLite** by default. - - Set `PAPERLESS_DBHOST` to switch to PostgreSQL or MariaDB instead. - -#### [`PAPERLESS_DBENGINE=`](#PAPERLESS_DBENGINE) {#PAPERLESS_DBENGINE} - -: Optional. Specifies the database engine to use when connecting to a remote database. -Available options are `postgresql` and `mariadb`. - - Defaults to `postgresql` if `PAPERLESS_DBHOST` is set. - - !!! warning - - Using MariaDB comes with some caveats. See [MySQL Caveats](advanced_usage.md#mysql-caveats). +: Hostname of the PostgreSQL or MariaDB database server. Required when +`PAPERLESS_DBENGINE` is `postgresql` or `mariadb`. #### [`PAPERLESS_DBPORT=`](#PAPERLESS_DBPORT) {#PAPERLESS_DBPORT} : Port to use when connecting to PostgreSQL or MariaDB. - Default is `5432` for PostgreSQL and `3306` for MariaDB. + Defaults to `5432` for PostgreSQL and `3306` for MariaDB. #### [`PAPERLESS_DBNAME=`](#PAPERLESS_DBNAME) {#PAPERLESS_DBNAME} -: Name of the database to connect to when using PostgreSQL or MariaDB. +: Name of the PostgreSQL or MariaDB database to connect to. - Defaults to "paperless". + Defaults to `paperless`. -#### [`PAPERLESS_DBUSER=`](#PAPERLESS_DBUSER) {#PAPERLESS_DBUSER} +#### [`PAPERLESS_DBUSER=`](#PAPERLESS_DBUSER) {#PAPERLESS_DBUSER} : Username for authenticating with the PostgreSQL or MariaDB database. - Defaults to "paperless". + Defaults to `paperless`. #### [`PAPERLESS_DBPASS=`](#PAPERLESS_DBPASS) {#PAPERLESS_DBPASS} : Password for the PostgreSQL or MariaDB database user. - Defaults to "paperless". + Defaults to `paperless`. -#### [`PAPERLESS_DBSSLMODE=`](#PAPERLESS_DBSSLMODE) {#PAPERLESS_DBSSLMODE} +#### [`PAPERLESS_DB_OPTIONS=`](#PAPERLESS_DB_OPTIONS) {#PAPERLESS_DB_OPTIONS} -: SSL mode to use when connecting to PostgreSQL or MariaDB. +: Advanced database connection options as a semicolon-delimited key-value string. +Keys and values are separated by `=`. Dot-notation produces nested option +dictionaries; for example, `pool.max_size=20` sets +`OPTIONS["pool"]["max_size"] = 20`. - See [the official documentation about - sslmode for PostgreSQL](https://www.postgresql.org/docs/current/libpq-ssl.html). + Options specified here are merged over the engine defaults. Unrecognised keys + are passed through to the underlying database driver without validation, so a + typo will be silently ignored rather than producing an error. - See [the official documentation about - sslmode for MySQL and MariaDB](https://dev.mysql.com/doc/refman/8.0/en/connection-options.html#option_general_ssl-mode). + Refer to your database driver's documentation for the full set of accepted keys: - *Note*: SSL mode values differ between PostgreSQL and MariaDB. + - PostgreSQL: [libpq connection parameters](https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-PARAMKEYWORDS) + - MariaDB: [MariaDB Connector/Python](https://mariadb.com/kb/en/mariadb-connector-python/) + - SQLite: [SQLite PRAGMA statements](https://www.sqlite.org/pragma.html) - Default is `prefer` for PostgreSQL and `PREFERRED` for MariaDB. + !!! note "PostgreSQL connection pooling" -#### [`PAPERLESS_DBSSLROOTCERT=`](#PAPERLESS_DBSSLROOTCERT) {#PAPERLESS_DBSSLROOTCERT} + Pool size is controlled via `pool.min_size` and `pool.max_size`. When + configuring pooling, ensure your PostgreSQL `max_connections` is large enough + to handle all pool connections across all workers: + `(web_workers + celery_workers) * pool.max_size + safety_margin`. -: Path to the SSL root certificate used to verify the database server. + **Examples:** - See [the official documentation about - sslmode for PostgreSQL](https://www.postgresql.org/docs/current/libpq-ssl.html). - Changes the location of `root.crt`. + ```bash title="PostgreSQL: require SSL, set a custom CA certificate, and limit the pool size" + PAPERLESS_DB_OPTIONS="sslmode=require;sslrootcert=/certs/ca.pem;pool.max_size=5" + ``` - See [the official documentation about - sslmode for MySQL and MariaDB](https://dev.mysql.com/doc/refman/8.0/en/connection-options.html#option_general_ssl-ca). + ```bash title="MariaDB: require SSL with a custom CA certificate" + PAPERLESS_DB_OPTIONS="ssl_mode=REQUIRED;ssl.ca=/certs/ca.pem" + ``` - Defaults to unset, using the standard location in the home directory. + ```bash title="SQLite: set a busy timeout of 30 seconds" + # PostgreSQL: set a connection timeout + PAPERLESS_DB_OPTIONS="connect_timeout=10" + ``` -#### [`PAPERLESS_DBSSLCERT=`](#PAPERLESS_DBSSLCERT) {#PAPERLESS_DBSSLCERT} +#### ~~[`PAPERLESS_DBSSLMODE`](#PAPERLESS_DBSSLMODE)~~ {#PAPERLESS_DBSSLMODE} -: Path to the client SSL certificate used when connecting securely. +!!! failure "Removed in v3" - See [the official documentation about - sslmode for PostgreSQL](https://www.postgresql.org/docs/current/libpq-ssl.html). + Use [`PAPERLESS_DB_OPTIONS`](#PAPERLESS_DB_OPTIONS) instead. - See [the official documentation about - sslmode for MySQL and MariaDB](https://dev.mysql.com/doc/refman/8.0/en/connection-options.html#option_general_ssl-cert). + ```bash title="PostgreSQL" + PAPERLESS_DB_OPTIONS="sslmode=require" + ``` - Changes the location of `postgresql.crt`. + ```bash title="MariaDB" + PAPERLESS_DB_OPTIONS="ssl_mode=REQUIRED" + ``` - Defaults to unset, using the standard location in the home directory. +#### ~~[`PAPERLESS_DBSSLROOTCERT`](#PAPERLESS_DBSSLROOTCERT)~~ {#PAPERLESS_DBSSLROOTCERT} -#### [`PAPERLESS_DBSSLKEY=`](#PAPERLESS_DBSSLKEY) {#PAPERLESS_DBSSLKEY} +!!! failure "Removed in v3" -: Path to the client SSL private key used when connecting securely. + Use [`PAPERLESS_DB_OPTIONS`](#PAPERLESS_DB_OPTIONS) instead. - See [the official documentation about - sslmode for PostgreSQL](https://www.postgresql.org/docs/current/libpq-ssl.html). + ```bash title="PostgreSQL" + PAPERLESS_DB_OPTIONS="sslrootcert=/path/to/ca.pem" + ``` - See [the official documentation about - sslmode for MySQL and MariaDB](https://dev.mysql.com/doc/refman/8.0/en/connection-options.html#option_general_ssl-key). + ```bash title="MariaDB" + PAPERLESS_DB_OPTIONS="ssl.ca=/path/to/ca.pem" + ``` - Changes the location of `postgresql.key`. +#### ~~[`PAPERLESS_DBSSLCERT`](#PAPERLESS_DBSSLCERT)~~ {#PAPERLESS_DBSSLCERT} - Defaults to unset, using the standard location in the home directory. +!!! failure "Removed in v3" -#### [`PAPERLESS_DB_TIMEOUT=`](#PAPERLESS_DB_TIMEOUT) {#PAPERLESS_DB_TIMEOUT} + Use [`PAPERLESS_DB_OPTIONS`](#PAPERLESS_DB_OPTIONS) instead. -: Sets how long a database connection should wait before timing out. + ```bash title="PostgreSQL" + PAPERLESS_DB_OPTIONS="sslcert=/path/to/client.crt" + ``` - For SQLite, this sets how long to wait if the database is locked. - For PostgreSQL or MariaDB, this sets the connection timeout. + ```bash title="MariaDB" + PAPERLESS_DB_OPTIONS="ssl.cert=/path/to/client.crt" + ``` - Defaults to unset, which uses Django’s built-in defaults. +#### ~~[`PAPERLESS_DBSSLKEY`](#PAPERLESS_DBSSLKEY)~~ {#PAPERLESS_DBSSLKEY} -#### [`PAPERLESS_DB_POOLSIZE=`](#PAPERLESS_DB_POOLSIZE) {#PAPERLESS_DB_POOLSIZE} +!!! failure "Removed in v3" -: Defines the maximum number of database connections to keep in the pool. + Use [`PAPERLESS_DB_OPTIONS`](#PAPERLESS_DB_OPTIONS) instead. - Only applies to PostgreSQL. This setting is ignored for other database engines. + ```bash title="PostgreSQL" + PAPERLESS_DB_OPTIONS="sslkey=/path/to/client.key" + ``` - The value must be greater than or equal to 1 to be used. - Defaults to unset, which disables connection pooling. + ```bash title="MariaDB" + PAPERLESS_DB_OPTIONS="ssl.key=/path/to/client.key" + ``` - !!! note +#### ~~[`PAPERLESS_DB_TIMEOUT`](#PAPERLESS_DB_TIMEOUT)~~ {#PAPERLESS_DB_TIMEOUT} - A pool of 8-10 connections per worker is typically sufficient. - If you encounter error messages such as `couldn't get a connection` - or database connection timeouts, you probably need to increase the pool size. +!!! failure "Removed in v3" - !!! warning - Make sure your PostgreSQL `max_connections` setting is large enough to handle the connection pools: - `(NB_PAPERLESS_WORKERS + NB_CELERY_WORKERS) × POOL_SIZE + SAFETY_MARGIN`. For example, with - 4 Paperless workers and 2 Celery workers, and a pool size of 8:``(4 + 2) × 8 + 10 = 58`, - so `max_connections = 60` (or even more) is appropriate. + Use [`PAPERLESS_DB_OPTIONS`](#PAPERLESS_DB_OPTIONS) instead. - This assumes only Paperless-ngx connects to your PostgreSQL instance. If you have other applications, - you should increase `max_connections` accordingly. + ```bash title="SQLite" + PAPERLESS_DB_OPTIONS="timeout=30" + ``` + + ```bash title="PostgreSQL or MariaDB" + PAPERLESS_DB_OPTIONS="connect_timeout=30" + ``` + +#### ~~[`PAPERLESS_DB_POOLSIZE`](#PAPERLESS_DB_POOLSIZE)~~ {#PAPERLESS_DB_POOLSIZE} + +!!! failure "Removed in v3" + + Use [`PAPERLESS_DB_OPTIONS`](#PAPERLESS_DB_OPTIONS) instead. + + ```bash + PAPERLESS_DB_OPTIONS="pool.max_size=10" + ``` #### [`PAPERLESS_DB_READ_CACHE_ENABLED=`](#PAPERLESS_DB_READ_CACHE_ENABLED) {#PAPERLESS_DB_READ_CACHE_ENABLED} diff --git a/docs/migration.md b/docs/migration-v3.md similarity index 61% rename from docs/migration.md rename to docs/migration-v3.md index 60ffbf074..91d9ed54d 100644 --- a/docs/migration.md +++ b/docs/migration-v3.md @@ -48,3 +48,58 @@ The `CONSUMER_BARCODE_SCANNER` setting has been removed. zxing-cpp is now the on reliability. - The `libzbar0` / `libzbar-dev` system packages are no longer required and can be removed from any custom Docker images or host installations. + +## Database Engine + +`PAPERLESS_DBENGINE` is now required to use PostgreSQL or MariaDB. Previously, the +engine was inferred from the presence of `PAPERLESS_DBHOST`, with `PAPERLESS_DBENGINE` +only needed to select MariaDB over PostgreSQL. + +SQLite users require no changes, though they may explicitly set their engine if desired. + +#### Action Required + +PostgreSQL and MariaDB users must add `PAPERLESS_DBENGINE` to their environment: + +```yaml +# v2 (PostgreSQL inferred from PAPERLESS_DBHOST) +PAPERLESS_DBHOST: postgres + +# v3 (engine must be explicit) +PAPERLESS_DBENGINE: postgresql +PAPERLESS_DBHOST: postgres +``` + +See [`PAPERLESS_DBENGINE`](configuration.md#PAPERLESS_DBENGINE) for accepted values. + +## Database Advanced Options + +The individual SSL, timeout, and pooling variables have been removed in favor of a +single [`PAPERLESS_DB_OPTIONS`](configuration.md#PAPERLESS_DB_OPTIONS) string. This +consolidates a growing set of engine-specific variables into one place, and allows +any option supported by the underlying database driver to be set without requiring a +dedicated environment variable for each. + +The removed variables and their replacements are: + +| Removed Variable | Replacement in `PAPERLESS_DB_OPTIONS` | +| ------------------------- | ---------------------------------------------------------------------------- | +| `PAPERLESS_DBSSLMODE` | `sslmode=` (PostgreSQL) or `ssl_mode=` (MariaDB) | +| `PAPERLESS_DBSSLROOTCERT` | `sslrootcert=` (PostgreSQL) or `ssl.ca=` (MariaDB) | +| `PAPERLESS_DBSSLCERT` | `sslcert=` (PostgreSQL) or `ssl.cert=` (MariaDB) | +| `PAPERLESS_DBSSLKEY` | `sslkey=` (PostgreSQL) or `ssl.key=` (MariaDB) | +| `PAPERLESS_DB_POOLSIZE` | `pool.max_size=` (PostgreSQL only) | +| `PAPERLESS_DB_TIMEOUT` | `timeout=` (SQLite) or `connect_timeout=` (PostgreSQL/MariaDB) | + +The deprecated variables will continue to function for now but will be removed in a +future release. A deprecation warning is logged at startup for each deprecated variable +that is still set. + +#### Action Required + +Users with any of the deprecated variables set should migrate to `PAPERLESS_DB_OPTIONS`. +Multiple options are combined in a single value: + +```bash +PAPERLESS_DB_OPTIONS="sslmode=require;sslrootcert=/certs/ca.pem;pool.max_size=10" +``` diff --git a/docs/setup.md b/docs/setup.md index 3b033ed16..415b3622e 100644 --- a/docs/setup.md +++ b/docs/setup.md @@ -504,8 +504,7 @@ installation. Keep these points in mind: - Read the [changelog](changelog.md) and take note of breaking changes. - Decide whether to stay on SQLite or migrate to PostgreSQL. - See [documentation](#sqlite_to_psql) for details on moving data - from SQLite to PostgreSQL. Both work fine with + Both work fine with Paperless. However, if you already have a database server running for other services, you might as well use it for Paperless as well. - The task scheduler of Paperless, which is used to execute periodic diff --git a/src/paperless/checks.py b/src/paperless/checks.py index 7df85d146..c0c7f49e1 100644 --- a/src/paperless/checks.py +++ b/src/paperless/checks.py @@ -202,3 +202,43 @@ def audit_log_check(app_configs, **kwargs): ) return result + + +@register() +def check_deprecated_db_settings( + app_configs: object, + **kwargs: object, +) -> list[Warning]: + """Check for deprecated database environment variables. + + Detects legacy advanced options that should be migrated to + PAPERLESS_DB_OPTIONS. Returns one Warning per deprecated variable found. + """ + deprecated_vars: dict[str, str] = { + "PAPERLESS_DB_TIMEOUT": "timeout", + "PAPERLESS_DB_POOLSIZE": "pool.min_size / pool.max_size", + "PAPERLESS_DBSSLMODE": "sslmode", + "PAPERLESS_DBSSLROOTCERT": "sslrootcert", + "PAPERLESS_DBSSLCERT": "sslcert", + "PAPERLESS_DBSSLKEY": "sslkey", + } + + warnings: list[Warning] = [] + + for var_name, db_option_key in deprecated_vars.items(): + if not os.getenv(var_name): + continue + warnings.append( + Warning( + f"Deprecated environment variable: {var_name}", + hint=( + f"{var_name} is no longer supported and will be removed in v3.2. " + f"Set the equivalent option via PAPERLESS_DB_OPTIONS instead. " + f'Example: PAPERLESS_DB_OPTIONS=\'{{"{db_option_key}": ""}}\'. ' + "See https://docs.paperless-ngx.com/migration/ for the full reference." + ), + id="paperless.W001", + ), + ) + + return warnings diff --git a/src/paperless/settings.py b/src/paperless/settings/__init__.py similarity index 93% rename from src/paperless/settings.py rename to src/paperless/settings/__init__.py index bee406fa2..9f820bb04 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings/__init__.py @@ -17,6 +17,8 @@ from dateparser.languages.loader import LocaleDataLoader from django.utils.translation import gettext_lazy as _ from dotenv import load_dotenv +from paperless.settings.custom import parse_db_settings + logger = logging.getLogger("paperless.settings") # Tap paperless.conf if it's available @@ -282,7 +284,7 @@ DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO") # Directories # ############################################################################### -BASE_DIR: Path = Path(__file__).resolve().parent.parent +BASE_DIR: Path = Path(__file__).resolve().parent.parent.parent STATIC_ROOT = __get_path("PAPERLESS_STATICDIR", BASE_DIR.parent / "static") @@ -722,83 +724,8 @@ EMAIL_CERTIFICATE_FILE = __get_optional_path("PAPERLESS_EMAIL_CERTIFICATE_LOCATI ############################################################################### # Database # ############################################################################### -def _parse_db_settings() -> dict: - databases = { - "default": { - "ENGINE": "django.db.backends.sqlite3", - "NAME": DATA_DIR / "db.sqlite3", - "OPTIONS": {}, - }, - } - if os.getenv("PAPERLESS_DBHOST"): - # Have sqlite available as a second option for management commands - # This is important when migrating to/from sqlite - databases["sqlite"] = databases["default"].copy() - databases["default"] = { - "HOST": os.getenv("PAPERLESS_DBHOST"), - "NAME": os.getenv("PAPERLESS_DBNAME", "paperless"), - "USER": os.getenv("PAPERLESS_DBUSER", "paperless"), - "PASSWORD": os.getenv("PAPERLESS_DBPASS", "paperless"), - "OPTIONS": {}, - } - if os.getenv("PAPERLESS_DBPORT"): - databases["default"]["PORT"] = os.getenv("PAPERLESS_DBPORT") - - # Leave room for future extensibility - if os.getenv("PAPERLESS_DBENGINE") == "mariadb": - engine = "django.db.backends.mysql" - # Contrary to Postgres, Django does not natively support connection pooling for MariaDB. - # However, since MariaDB uses threads instead of forks, establishing connections is significantly faster - # compared to PostgreSQL, so the lack of pooling is not an issue - options = { - "read_default_file": "/etc/mysql/my.cnf", - "charset": "utf8mb4", - "ssl_mode": os.getenv("PAPERLESS_DBSSLMODE", "PREFERRED"), - "ssl": { - "ca": os.getenv("PAPERLESS_DBSSLROOTCERT", None), - "cert": os.getenv("PAPERLESS_DBSSLCERT", None), - "key": os.getenv("PAPERLESS_DBSSLKEY", None), - }, - } - - else: # Default to PostgresDB - engine = "django.db.backends.postgresql" - options = { - "sslmode": os.getenv("PAPERLESS_DBSSLMODE", "prefer"), - "sslrootcert": os.getenv("PAPERLESS_DBSSLROOTCERT", None), - "sslcert": os.getenv("PAPERLESS_DBSSLCERT", None), - "sslkey": os.getenv("PAPERLESS_DBSSLKEY", None), - } - if int(os.getenv("PAPERLESS_DB_POOLSIZE", 0)) > 0: - options.update( - { - "pool": { - "min_size": 1, - "max_size": int(os.getenv("PAPERLESS_DB_POOLSIZE")), - }, - }, - ) - - databases["default"]["ENGINE"] = engine - databases["default"]["OPTIONS"].update(options) - - if os.getenv("PAPERLESS_DB_TIMEOUT") is not None: - if databases["default"]["ENGINE"] == "django.db.backends.sqlite3": - databases["default"]["OPTIONS"].update( - {"timeout": int(os.getenv("PAPERLESS_DB_TIMEOUT"))}, - ) - else: - databases["default"]["OPTIONS"].update( - {"connect_timeout": int(os.getenv("PAPERLESS_DB_TIMEOUT"))}, - ) - databases["sqlite"]["OPTIONS"].update( - {"timeout": int(os.getenv("PAPERLESS_DB_TIMEOUT"))}, - ) - return databases - - -DATABASES = _parse_db_settings() +DATABASES = parse_db_settings(DATA_DIR) if os.getenv("PAPERLESS_DBENGINE") == "mariadb": # Silence Django error on old MariaDB versions. diff --git a/src/paperless/settings/custom.py b/src/paperless/settings/custom.py new file mode 100644 index 000000000..6f7d94201 --- /dev/null +++ b/src/paperless/settings/custom.py @@ -0,0 +1,122 @@ +import os +from pathlib import Path +from typing import Any + +from paperless.settings.parsers import get_choice_from_env +from paperless.settings.parsers import get_int_from_env +from paperless.settings.parsers import parse_dict_from_str + + +def parse_db_settings(data_dir: Path) -> dict[str, dict[str, Any]]: + """Parse database settings from environment variables. + + Core connection variables (no deprecation): + - PAPERLESS_DBENGINE (sqlite/postgresql/mariadb) + - PAPERLESS_DBHOST, PAPERLESS_DBPORT + - PAPERLESS_DBNAME, PAPERLESS_DBUSER, PAPERLESS_DBPASS + + Advanced options can be set via: + - Legacy individual env vars (deprecated in v3.0, removed in v3.2) + - PAPERLESS_DB_OPTIONS (recommended v3+ approach) + + Args: + data_dir: The data directory path for SQLite database location. + + Returns: + A databases dict suitable for Django DATABASES setting. + """ + try: + engine = get_choice_from_env( + "PAPERLESS_DBENGINE", + {"sqlite", "postgresql", "mariadb"}, + default="sqlite", + ) + except ValueError: + # MariaDB users already had to set PAPERLESS_DBENGINE, so it was picked up above + # SQLite users didn't need to set anything + engine = "postgresql" if "PAPERLESS_DBHOST" in os.environ else "sqlite" + + db_config: dict[str, Any] + base_options: dict[str, Any] + + match engine: + case "sqlite": + db_config = { + "ENGINE": "django.db.backends.sqlite3", + "NAME": str((data_dir / "db.sqlite3").resolve()), + } + base_options = {} + + case "postgresql": + db_config = { + "ENGINE": "django.db.backends.postgresql", + "HOST": os.getenv("PAPERLESS_DBHOST"), + "NAME": os.getenv("PAPERLESS_DBNAME", "paperless"), + "USER": os.getenv("PAPERLESS_DBUSER", "paperless"), + "PASSWORD": os.getenv("PAPERLESS_DBPASS", "paperless"), + } + + base_options = { + "sslmode": os.getenv("PAPERLESS_DBSSLMODE", "prefer"), + "sslrootcert": os.getenv("PAPERLESS_DBSSLROOTCERT"), + "sslcert": os.getenv("PAPERLESS_DBSSLCERT"), + "sslkey": os.getenv("PAPERLESS_DBSSLKEY"), + } + + if (pool_size := get_int_from_env("PAPERLESS_DB_POOLSIZE")) is not None: + base_options["pool"] = { + "min_size": 1, + "max_size": pool_size, + } + + case "mariadb": + db_config = { + "ENGINE": "django.db.backends.mysql", + "HOST": os.getenv("PAPERLESS_DBHOST"), + "NAME": os.getenv("PAPERLESS_DBNAME", "paperless"), + "USER": os.getenv("PAPERLESS_DBUSER", "paperless"), + "PASSWORD": os.getenv("PAPERLESS_DBPASS", "paperless"), + } + + base_options = { + "read_default_file": "/etc/mysql/my.cnf", + "charset": "utf8mb4", + "collation": "utf8mb4_unicode_ci", + "ssl_mode": os.getenv("PAPERLESS_DBSSLMODE", "PREFERRED"), + "ssl": { + "ca": os.getenv("PAPERLESS_DBSSLROOTCERT"), + "cert": os.getenv("PAPERLESS_DBSSLCERT"), + "key": os.getenv("PAPERLESS_DBSSLKEY"), + }, + } + case _: # pragma: no cover + raise NotImplementedError(engine) + + # Handle port setting for external databases + if ( + engine in ("postgresql", "mariadb") + and (port := get_int_from_env("PAPERLESS_DBPORT")) is not None + ): + db_config["PORT"] = port + + # Handle timeout setting (common across all engines, different key names) + if (timeout := get_int_from_env("PAPERLESS_DB_TIMEOUT")) is not None: + timeout_key = "timeout" if engine == "sqlite" else "connect_timeout" + base_options[timeout_key] = timeout + + # Apply PAPERLESS_DB_OPTIONS overrides + db_config["OPTIONS"] = parse_dict_from_str( + os.getenv("PAPERLESS_DB_OPTIONS"), + defaults=base_options, + separator=";", + type_map={ + # SQLite options + "timeout": int, + # Postgres/MariaDB options + "connect_timeout": int, + "pool.min_size": int, + "pool.max_size": int, + }, + ) + + return {"default": db_config} diff --git a/src/paperless/settings/parsers.py b/src/paperless/settings/parsers.py new file mode 100644 index 000000000..e6960abf1 --- /dev/null +++ b/src/paperless/settings/parsers.py @@ -0,0 +1,192 @@ +import copy +import os +from collections.abc import Callable +from collections.abc import Mapping +from pathlib import Path +from typing import Any +from typing import TypeVar +from typing import overload + +T = TypeVar("T") + + +def str_to_bool(value: str) -> bool: + """ + Converts a string representation of truth to a boolean value. + + Recognizes 'true', '1', 't', 'y', 'yes' as True, and + 'false', '0', 'f', 'n', 'no' as False. Case-insensitive. + + Args: + value: The string to convert. + + Returns: + The boolean representation of the string. + + Raises: + ValueError: If the string is not a recognized boolean value. + """ + val_lower = value.strip().lower() + if val_lower in ("true", "1", "t", "y", "yes"): + return True + elif val_lower in ("false", "0", "f", "n", "no"): + return False + raise ValueError(f"Cannot convert '{value}' to a boolean.") + + +@overload +def get_int_from_env(key: str) -> int | None: ... + + +@overload +def get_int_from_env(key: str, default: None) -> int | None: ... + + +@overload +def get_int_from_env(key: str, default: int) -> int: ... + + +def get_int_from_env(key: str, default: int | None = None) -> int | None: + """ + Return an integer value based on the environment variable. + If default is provided, returns that value when key is missing. + If default is None, returns None when key is missing. + """ + if key not in os.environ: + return default + + return int(os.environ[key]) + + +def parse_dict_from_str( + env_str: str | None, + defaults: dict[str, Any] | None = None, + type_map: Mapping[str, Callable[[str], Any]] | None = None, + separator: str = ",", +) -> dict[str, Any]: + """ + Parses a key-value string into a dictionary, applying defaults and casting types. + + Supports nested keys via dot-notation, e.g.: + "database.host=localhost,database.port=5432" + + Args: + env_str: The string from the environment variable (e.g., "port=9090,debug=true"). + defaults: A dictionary of default values (can contain nested dicts). + type_map: A dictionary mapping keys (dot-notation allowed) to a type or a parsing + function (e.g., {'port': int, 'debug': bool, 'database.port': int}). + The special `bool` type triggers custom boolean parsing. + separator: The character used to separate key-value pairs. Defaults to ','. + + Returns: + A dictionary with the parsed and correctly-typed settings. + + Raises: + ValueError: If a value cannot be cast to its specified type. + """ + + def _set_nested(d: dict, keys: list[str], value: Any) -> None: + """Set a nested value, creating intermediate dicts as needed.""" + cur = d + for k in keys[:-1]: + if k not in cur or not isinstance(cur[k], dict): + cur[k] = {} + cur = cur[k] + cur[keys[-1]] = value + + def _get_nested(d: dict, keys: list[str]) -> Any: + """Get nested value or raise KeyError if not present.""" + cur = d + for k in keys: + if not isinstance(cur, dict) or k not in cur: + raise KeyError + cur = cur[k] + return cur + + def _has_nested(d: dict, keys: list[str]) -> bool: + try: + _get_nested(d, keys) + return True + except KeyError: + return False + + settings: dict[str, Any] = copy.deepcopy(defaults) if defaults else {} + _type_map = type_map if type_map else {} + + if not env_str: + return settings + + # Parse the environment string using the specified separator + pairs = [p.strip() for p in env_str.split(separator) if p.strip()] + for pair in pairs: + if "=" not in pair: + # ignore malformed pairs + continue + key, val = pair.split("=", 1) + key = key.strip() + val = val.strip() + if not key: + continue + parts = key.split(".") + _set_nested(settings, parts, val) + + # Apply type casting to the updated settings (supports nested keys in type_map) + for key, caster in _type_map.items(): + key_parts = key.split(".") + if _has_nested(settings, key_parts): + raw_val = _get_nested(settings, key_parts) + # Only cast if it's a string (i.e. from env parsing). If defaults already provided + # a different type we leave it as-is. + if isinstance(raw_val, str): + try: + if caster is bool: + parsed = str_to_bool(raw_val) + elif caster is Path: + parsed = Path(raw_val).resolve() + else: + parsed = caster(raw_val) + except (ValueError, TypeError) as e: + caster_name = getattr(caster, "__name__", repr(caster)) + raise ValueError( + f"Error casting key '{key}' with value '{raw_val}' " + f"to type '{caster_name}'", + ) from e + _set_nested(settings, key_parts, parsed) + + return settings + + +def get_choice_from_env( + env_key: str, + choices: set[str], + default: str | None = None, +) -> str: + """ + Gets and validates an environment variable against a set of allowed choices. + + Args: + env_key: The environment variable key to validate + choices: Set of valid choices for the environment variable + default: Optional default value if environment variable is not set + + Returns: + The validated environment variable value + + Raises: + ValueError: If the environment variable value is not in choices + or if no default is provided and env var is missing + """ + value = os.environ.get(env_key, default) + + if value is None: + raise ValueError( + f"Environment variable '{env_key}' is required but not set.", + ) + + if value not in choices: + raise ValueError( + f"Environment variable '{env_key}' has invalid value '{value}'. " + f"Valid choices are: {', '.join(sorted(choices))}", + ) + + return value diff --git a/src/paperless/tests/settings/__init__.py b/src/paperless/tests/settings/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/paperless/tests/settings/test_custom_parsers.py b/src/paperless/tests/settings/test_custom_parsers.py new file mode 100644 index 000000000..e297d9e04 --- /dev/null +++ b/src/paperless/tests/settings/test_custom_parsers.py @@ -0,0 +1,266 @@ +import os +from pathlib import Path + +import pytest +from pytest_mock import MockerFixture + +from paperless.settings.custom import parse_db_settings + + +class TestParseDbSettings: + """Test suite for parse_db_settings function.""" + + @pytest.mark.parametrize( + ("env_vars", "expected_database_settings"), + [ + pytest.param( + {}, + { + "default": { + "ENGINE": "django.db.backends.sqlite3", + "NAME": None, # Will be replaced with tmp_path + "OPTIONS": {}, + }, + }, + id="default-sqlite", + ), + pytest.param( + { + "PAPERLESS_DBENGINE": "sqlite", + "PAPERLESS_DB_OPTIONS": "timeout=30", + }, + { + "default": { + "ENGINE": "django.db.backends.sqlite3", + "NAME": None, # Will be replaced with tmp_path + "OPTIONS": { + "timeout": 30, + }, + }, + }, + id="sqlite-with-timeout-override", + ), + pytest.param( + { + "PAPERLESS_DBENGINE": "postgresql", + "PAPERLESS_DBHOST": "localhost", + }, + { + "default": { + "ENGINE": "django.db.backends.postgresql", + "HOST": "localhost", + "NAME": "paperless", + "USER": "paperless", + "PASSWORD": "paperless", + "OPTIONS": { + "sslmode": "prefer", + "sslrootcert": None, + "sslcert": None, + "sslkey": None, + }, + }, + }, + id="postgresql-defaults", + ), + pytest.param( + { + "PAPERLESS_DBENGINE": "postgresql", + "PAPERLESS_DBHOST": "paperless-db-host", + "PAPERLESS_DBPORT": "1111", + "PAPERLESS_DBNAME": "customdb", + "PAPERLESS_DBUSER": "customuser", + "PAPERLESS_DBPASS": "custompass", + "PAPERLESS_DB_OPTIONS": "pool.max_size=50;pool.min_size=2;sslmode=require", + }, + { + "default": { + "ENGINE": "django.db.backends.postgresql", + "HOST": "paperless-db-host", + "PORT": 1111, + "NAME": "customdb", + "USER": "customuser", + "PASSWORD": "custompass", + "OPTIONS": { + "sslmode": "require", + "sslrootcert": None, + "sslcert": None, + "sslkey": None, + "pool": { + "min_size": 2, + "max_size": 50, + }, + }, + }, + }, + id="postgresql-overrides", + ), + pytest.param( + { + "PAPERLESS_DBENGINE": "postgresql", + "PAPERLESS_DBHOST": "pghost", + "PAPERLESS_DB_POOLSIZE": "10", + }, + { + "default": { + "ENGINE": "django.db.backends.postgresql", + "HOST": "pghost", + "NAME": "paperless", + "USER": "paperless", + "PASSWORD": "paperless", + "OPTIONS": { + "sslmode": "prefer", + "sslrootcert": None, + "sslcert": None, + "sslkey": None, + "pool": { + "min_size": 1, + "max_size": 10, + }, + }, + }, + }, + id="postgresql-legacy-poolsize", + ), + pytest.param( + { + "PAPERLESS_DBENGINE": "postgresql", + "PAPERLESS_DBHOST": "pghost", + "PAPERLESS_DBSSLMODE": "require", + "PAPERLESS_DBSSLROOTCERT": "/certs/ca.crt", + "PAPERLESS_DB_TIMEOUT": "30", + }, + { + "default": { + "ENGINE": "django.db.backends.postgresql", + "HOST": "pghost", + "NAME": "paperless", + "USER": "paperless", + "PASSWORD": "paperless", + "OPTIONS": { + "sslmode": "require", + "sslrootcert": "/certs/ca.crt", + "sslcert": None, + "sslkey": None, + "connect_timeout": 30, + }, + }, + }, + id="postgresql-legacy-ssl-and-timeout", + ), + pytest.param( + { + "PAPERLESS_DBENGINE": "mariadb", + "PAPERLESS_DBHOST": "localhost", + }, + { + "default": { + "ENGINE": "django.db.backends.mysql", + "HOST": "localhost", + "NAME": "paperless", + "USER": "paperless", + "PASSWORD": "paperless", + "OPTIONS": { + "read_default_file": "/etc/mysql/my.cnf", + "charset": "utf8mb4", + "collation": "utf8mb4_unicode_ci", + "ssl_mode": "PREFERRED", + "ssl": { + "ca": None, + "cert": None, + "key": None, + }, + }, + }, + }, + id="mariadb-defaults", + ), + pytest.param( + { + "PAPERLESS_DBENGINE": "mariadb", + "PAPERLESS_DBHOST": "paperless-mariadb-host", + "PAPERLESS_DBPORT": "5555", + "PAPERLESS_DBUSER": "my-cool-user", + "PAPERLESS_DBPASS": "my-secure-password", + "PAPERLESS_DB_OPTIONS": "ssl.ca=/path/to/ca.pem;ssl_mode=REQUIRED", + }, + { + "default": { + "ENGINE": "django.db.backends.mysql", + "HOST": "paperless-mariadb-host", + "PORT": 5555, + "NAME": "paperless", + "USER": "my-cool-user", + "PASSWORD": "my-secure-password", + "OPTIONS": { + "read_default_file": "/etc/mysql/my.cnf", + "charset": "utf8mb4", + "collation": "utf8mb4_unicode_ci", + "ssl_mode": "REQUIRED", + "ssl": { + "ca": "/path/to/ca.pem", + "cert": None, + "key": None, + }, + }, + }, + }, + id="mariadb-overrides", + ), + pytest.param( + { + "PAPERLESS_DBENGINE": "mariadb", + "PAPERLESS_DBHOST": "mariahost", + "PAPERLESS_DBSSLMODE": "REQUIRED", + "PAPERLESS_DBSSLROOTCERT": "/certs/ca.pem", + "PAPERLESS_DBSSLCERT": "/certs/client.pem", + "PAPERLESS_DBSSLKEY": "/certs/client.key", + "PAPERLESS_DB_TIMEOUT": "25", + }, + { + "default": { + "ENGINE": "django.db.backends.mysql", + "HOST": "mariahost", + "NAME": "paperless", + "USER": "paperless", + "PASSWORD": "paperless", + "OPTIONS": { + "read_default_file": "/etc/mysql/my.cnf", + "charset": "utf8mb4", + "collation": "utf8mb4_unicode_ci", + "ssl_mode": "REQUIRED", + "ssl": { + "ca": "/certs/ca.pem", + "cert": "/certs/client.pem", + "key": "/certs/client.key", + }, + "connect_timeout": 25, + }, + }, + }, + id="mariadb-legacy-ssl-and-timeout", + ), + ], + ) + def test_parse_db_settings( + self, + tmp_path: Path, + mocker: MockerFixture, + env_vars: dict[str, str], + expected_database_settings: dict[str, dict], + ) -> None: + """Test various database configurations with defaults and overrides.""" + # Clear environment and set test vars + mocker.patch.dict(os.environ, env_vars, clear=True) + + # Update expected paths with actual tmp_path + if ( + "default" in expected_database_settings + and expected_database_settings["default"]["NAME"] is None + ): + expected_database_settings["default"]["NAME"] = str( + tmp_path / "db.sqlite3", + ) + + settings = parse_db_settings(tmp_path) + + assert settings == expected_database_settings diff --git a/src/paperless/tests/settings/test_environment_parsers.py b/src/paperless/tests/settings/test_environment_parsers.py new file mode 100644 index 000000000..ecce0ea16 --- /dev/null +++ b/src/paperless/tests/settings/test_environment_parsers.py @@ -0,0 +1,414 @@ +import os +from pathlib import Path + +import pytest +from pytest_mock import MockerFixture + +from paperless.settings.parsers import get_choice_from_env +from paperless.settings.parsers import get_int_from_env +from paperless.settings.parsers import parse_dict_from_str +from paperless.settings.parsers import str_to_bool + + +class TestStringToBool: + @pytest.mark.parametrize( + "true_value", + [ + pytest.param("true", id="lowercase_true"), + pytest.param("1", id="digit_1"), + pytest.param("T", id="capital_T"), + pytest.param("y", id="lowercase_y"), + pytest.param("YES", id="uppercase_YES"), + pytest.param(" True ", id="whitespace_true"), + ], + ) + def test_true_conversion(self, true_value: str): + """Test that various 'true' strings correctly evaluate to True.""" + assert str_to_bool(true_value) is True + + @pytest.mark.parametrize( + "false_value", + [ + pytest.param("false", id="lowercase_false"), + pytest.param("0", id="digit_0"), + pytest.param("f", id="capital_f"), + pytest.param("N", id="capital_N"), + pytest.param("no", id="lowercase_no"), + pytest.param(" False ", id="whitespace_false"), + ], + ) + def test_false_conversion(self, false_value: str): + """Test that various 'false' strings correctly evaluate to False.""" + assert str_to_bool(false_value) is False + + def test_invalid_conversion(self): + """Test that an invalid string raises a ValueError.""" + with pytest.raises(ValueError, match="Cannot convert 'maybe' to a boolean\\."): + str_to_bool("maybe") + + +class TestParseDictFromString: + def test_empty_and_none_input(self): + """Test behavior with None or empty string input.""" + assert parse_dict_from_str(None) == {} + assert parse_dict_from_str("") == {} + defaults = {"a": 1} + res = parse_dict_from_str(None, defaults=defaults) + assert res == defaults + # Ensure it returns a copy, not the original object + assert res is not defaults + + def test_basic_parsing(self): + """Test simple key-value parsing without defaults or types.""" + env_str = "key1=val1, key2=val2" + expected = {"key1": "val1", "key2": "val2"} + assert parse_dict_from_str(env_str) == expected + + def test_with_defaults(self): + """Test that environment values override defaults correctly.""" + defaults = {"host": "localhost", "port": 8000, "user": "default"} + env_str = "port=9090, host=db.example.com" + expected = {"host": "db.example.com", "port": "9090", "user": "default"} + result = parse_dict_from_str(env_str, defaults=defaults) + assert result == expected + + def test_type_casting(self): + """Test successful casting of values to specified types.""" + env_str = "port=9090, debug=true, timeout=12.5, user=admin" + type_map = {"port": int, "debug": bool, "timeout": float} + expected = {"port": 9090, "debug": True, "timeout": 12.5, "user": "admin"} + result = parse_dict_from_str(env_str, type_map=type_map) + assert result == expected + + def test_type_casting_with_defaults(self): + """Test casting when values come from both defaults and env string.""" + defaults = {"port": 8000, "debug": False, "retries": 3} + env_str = "port=9090, debug=true" + type_map = {"port": int, "debug": bool, "retries": int} + + # The 'retries' value comes from defaults and is already an int, + # so it should not be processed by the caster. + expected = {"port": 9090, "debug": True, "retries": 3} + result = parse_dict_from_str(env_str, defaults=defaults, type_map=type_map) + assert result == expected + assert isinstance(result["retries"], int) + + def test_path_casting(self, tmp_path: Path): + """Test successful casting of a string to a resolved pathlib.Path object.""" + # Create a dummy file to resolve against + test_file = tmp_path / "test_file.txt" + test_file.touch() + + env_str = f"config_path={test_file}" + type_map = {"config_path": Path} + result = parse_dict_from_str(env_str, type_map=type_map) + + # The result should be a resolved Path object + assert isinstance(result["config_path"], Path) + assert result["config_path"] == test_file.resolve() + + def test_custom_separator(self): + """Test parsing with a custom separator like a semicolon.""" + env_str = "host=db; port=5432; user=test" + expected = {"host": "db", "port": "5432", "user": "test"} + result = parse_dict_from_str(env_str, separator=";") + assert result == expected + + def test_edge_cases_in_string(self): + """Test malformed strings to ensure robustness.""" + # Malformed pair 'debug' is skipped, extra comma is ignored + env_str = "key=val,, debug, foo=bar" + expected = {"key": "val", "foo": "bar"} + assert parse_dict_from_str(env_str) == expected + + # Value can contain the equals sign + env_str = "url=postgres://user:pass@host:5432/db" + expected = {"url": "postgres://user:pass@host:5432/db"} + assert parse_dict_from_str(env_str) == expected + + def test_casting_error_handling(self): + """Test that a ValueError is raised for invalid casting.""" + env_str = "port=not-a-number" + type_map = {"port": int} + + with pytest.raises(ValueError) as excinfo: + parse_dict_from_str(env_str, type_map=type_map) + + assert "Error casting key 'port'" in str(excinfo.value) + assert "value 'not-a-number'" in str(excinfo.value) + assert "to type 'int'" in str(excinfo.value) + + def test_bool_casting_error(self): + """Test that an invalid boolean string raises a ValueError.""" + env_str = "debug=maybe" + type_map = {"debug": bool} + with pytest.raises(ValueError, match="Error casting key 'debug'"): + parse_dict_from_str(env_str, type_map=type_map) + + def test_nested_key_parsing_basic(self): + """Basic nested key parsing using dot-notation.""" + env_str = "database.host=db.example.com, database.port=5432, logging.level=INFO" + result = parse_dict_from_str(env_str) + assert result == { + "database": {"host": "db.example.com", "port": "5432"}, + "logging": {"level": "INFO"}, + } + + def test_nested_overrides_defaults_and_deepcopy(self): + """Nested env keys override defaults and defaults are deep-copied.""" + defaults = {"database": {"host": "127.0.0.1", "port": 3306, "user": "default"}} + env_str = "database.host=db.example.com, debug=true" + result = parse_dict_from_str( + env_str, + defaults=defaults, + type_map={"debug": bool}, + ) + + assert result["database"]["host"] == "db.example.com" + # Unchanged default preserved + assert result["database"]["port"] == 3306 + assert result["database"]["user"] == "default" + # Default object was deep-copied (no same nested object identity) + assert result is not defaults + assert result["database"] is not defaults["database"] + + def test_nested_type_casting(self): + """Type casting for nested keys (dot-notation) should work.""" + env_str = "database.host=db.example.com, database.port=5433, debug=false" + type_map = {"database.port": int, "debug": bool} + result = parse_dict_from_str(env_str, type_map=type_map) + + assert result["database"]["host"] == "db.example.com" + assert result["database"]["port"] == 5433 + assert isinstance(result["database"]["port"], int) + assert result["debug"] is False + assert isinstance(result["debug"], bool) + + def test_nested_casting_error_message(self): + """Error messages should include the full dotted key name on failure.""" + env_str = "database.port=not-a-number" + type_map = {"database.port": int} + with pytest.raises(ValueError) as excinfo: + parse_dict_from_str(env_str, type_map=type_map) + + msg = str(excinfo.value) + assert "Error casting key 'database.port'" in msg + assert "value 'not-a-number'" in msg + assert "to type 'int'" in msg + + def test_type_map_does_not_recast_non_string_defaults(self): + """If a default already provides a non-string value, the caster should skip it.""" + defaults = {"database": {"port": 3306}} + type_map = {"database.port": int} + result = parse_dict_from_str(None, defaults=defaults, type_map=type_map) + assert result["database"]["port"] == 3306 + assert isinstance(result["database"]["port"], int) + + +class TestGetIntFromEnv: + @pytest.mark.parametrize( + ("env_value", "expected"), + [ + pytest.param("42", 42, id="positive"), + pytest.param("-10", -10, id="negative"), + pytest.param("0", 0, id="zero"), + pytest.param("999", 999, id="large_positive"), + pytest.param("-999", -999, id="large_negative"), + ], + ) + def test_existing_env_var_valid_ints(self, mocker, env_value, expected): + """Test that existing environment variables with valid integers return correct values.""" + mocker.patch.dict(os.environ, {"INT_VAR": env_value}) + assert get_int_from_env("INT_VAR") == expected + + @pytest.mark.parametrize( + ("default", "expected"), + [ + pytest.param(100, 100, id="positive_default"), + pytest.param(0, 0, id="zero_default"), + pytest.param(-50, -50, id="negative_default"), + pytest.param(None, None, id="none_default"), + ], + ) + def test_missing_env_var_with_defaults(self, mocker, default, expected): + """Test that missing environment variables return provided defaults.""" + mocker.patch.dict(os.environ, {}, clear=True) + assert get_int_from_env("MISSING_VAR", default=default) == expected + + def test_missing_env_var_no_default(self, mocker): + """Test that missing environment variable with no default returns None.""" + mocker.patch.dict(os.environ, {}, clear=True) + assert get_int_from_env("MISSING_VAR") is None + + @pytest.mark.parametrize( + "invalid_value", + [ + pytest.param("not_a_number", id="text"), + pytest.param("42.5", id="float"), + pytest.param("42a", id="alpha_suffix"), + pytest.param("", id="empty"), + pytest.param(" ", id="whitespace"), + pytest.param("true", id="boolean"), + pytest.param("1.0", id="decimal"), + ], + ) + def test_invalid_int_values_raise_error(self, mocker, invalid_value): + """Test that invalid integer values raise ValueError.""" + mocker.patch.dict(os.environ, {"INVALID_INT": invalid_value}) + with pytest.raises(ValueError): + get_int_from_env("INVALID_INT") + + +class TestGetEnvChoice: + @pytest.fixture + def valid_choices(self) -> set[str]: + """Fixture providing a set of valid environment choices.""" + return {"development", "staging", "production"} + + def test_returns_valid_env_value( + self, + mocker: MockerFixture, + valid_choices: set[str], + ) -> None: + """Test that function returns the environment value when it's valid.""" + mocker.patch.dict("os.environ", {"TEST_ENV": "development"}) + + result = get_choice_from_env("TEST_ENV", valid_choices) + + assert result == "development" + + def test_returns_default_when_env_not_set( + self, + mocker: MockerFixture, + valid_choices: set[str], + ) -> None: + """Test that function returns default value when env var is not set.""" + mocker.patch.dict("os.environ", {}, clear=True) + + result = get_choice_from_env("TEST_ENV", valid_choices, default="staging") + + assert result == "staging" + + def test_raises_error_when_env_not_set_and_no_default( + self, + mocker: MockerFixture, + valid_choices: set[str], + ) -> None: + """Test that function raises ValueError when env var is missing and no default.""" + mocker.patch.dict("os.environ", {}, clear=True) + + with pytest.raises(ValueError) as exc_info: + get_choice_from_env("TEST_ENV", valid_choices) + + assert "Environment variable 'TEST_ENV' is required but not set" in str( + exc_info.value, + ) + + def test_raises_error_when_env_value_invalid( + self, + mocker: MockerFixture, + valid_choices: set[str], + ) -> None: + """Test that function raises ValueError when env value is not in choices.""" + mocker.patch.dict("os.environ", {"TEST_ENV": "invalid_value"}) + + with pytest.raises(ValueError) as exc_info: + get_choice_from_env("TEST_ENV", valid_choices) + + error_msg = str(exc_info.value) + assert ( + "Environment variable 'TEST_ENV' has invalid value 'invalid_value'" + in error_msg + ) + assert "Valid choices are:" in error_msg + assert "development" in error_msg + assert "staging" in error_msg + assert "production" in error_msg + + def test_raises_error_when_default_invalid( + self, + mocker: MockerFixture, + valid_choices: set[str], + ) -> None: + """Test that function raises ValueError when default value is not in choices.""" + mocker.patch.dict("os.environ", {}, clear=True) + + with pytest.raises(ValueError) as exc_info: + get_choice_from_env("TEST_ENV", valid_choices, default="invalid_default") + + error_msg = str(exc_info.value) + assert ( + "Environment variable 'TEST_ENV' has invalid value 'invalid_default'" + in error_msg + ) + + def test_case_sensitive_validation( + self, + mocker: MockerFixture, + valid_choices: set[str], + ) -> None: + """Test that validation is case sensitive.""" + mocker.patch.dict("os.environ", {"TEST_ENV": "DEVELOPMENT"}) + + with pytest.raises(ValueError): + get_choice_from_env("TEST_ENV", valid_choices) + + def test_empty_string_env_value( + self, + mocker: MockerFixture, + valid_choices: set[str], + ) -> None: + """Test behavior with empty string environment value.""" + mocker.patch.dict("os.environ", {"TEST_ENV": ""}) + + with pytest.raises(ValueError) as exc_info: + get_choice_from_env("TEST_ENV", valid_choices) + + assert "has invalid value ''" in str(exc_info.value) + + def test_whitespace_env_value( + self, + mocker: MockerFixture, + valid_choices: set[str], + ) -> None: + """Test behavior with whitespace-only environment value.""" + mocker.patch.dict("os.environ", {"TEST_ENV": " development "}) + + with pytest.raises(ValueError): + get_choice_from_env("TEST_ENV", valid_choices) + + def test_single_choice_set(self, mocker: MockerFixture) -> None: + """Test function works correctly with single choice set.""" + single_choice: set[str] = {"production"} + mocker.patch.dict("os.environ", {"TEST_ENV": "production"}) + + result = get_choice_from_env("TEST_ENV", single_choice) + + assert result == "production" + + def test_large_choice_set(self, mocker: MockerFixture) -> None: + """Test function works correctly with large choice set.""" + large_choices: set[str] = {f"option_{i}" for i in range(100)} + mocker.patch.dict("os.environ", {"TEST_ENV": "option_50"}) + + result = get_choice_from_env("TEST_ENV", large_choices) + + assert result == "option_50" + + def test_different_env_keys( + self, + mocker: MockerFixture, + valid_choices: set[str], + ) -> None: + """Test function works with different environment variable keys.""" + test_cases = [ + ("DJANGO_ENV", "development"), + ("DATABASE_BACKEND", "staging"), + ("LOG_LEVEL", "production"), + ("APP_MODE", "development"), + ] + + for env_key, env_value in test_cases: + mocker.patch.dict("os.environ", {env_key: env_value}) + result = get_choice_from_env(env_key, valid_choices) + assert result == env_value diff --git a/src/paperless/tests/test_checks.py b/src/paperless/tests/test_checks.py index fc6150826..513cfb870 100644 --- a/src/paperless/tests/test_checks.py +++ b/src/paperless/tests/test_checks.py @@ -2,13 +2,17 @@ import os from pathlib import Path from unittest import mock +import pytest +from django.core.checks import Warning from django.test import TestCase from django.test import override_settings +from pytest_mock import MockerFixture from documents.tests.utils import DirectoriesMixin from documents.tests.utils import FileSystemAssertsMixin from paperless.checks import audit_log_check from paperless.checks import binaries_check +from paperless.checks import check_deprecated_db_settings from paperless.checks import debug_mode_check from paperless.checks import paths_check from paperless.checks import settings_values_check @@ -237,3 +241,157 @@ class TestAuditLogChecks(TestCase): ("auditlog table was found but audit log is disabled."), msg.msg, ) + + +DEPRECATED_VARS: dict[str, str] = { + "PAPERLESS_DB_TIMEOUT": "timeout", + "PAPERLESS_DB_POOLSIZE": "pool.min_size / pool.max_size", + "PAPERLESS_DBSSLMODE": "sslmode", + "PAPERLESS_DBSSLROOTCERT": "sslrootcert", + "PAPERLESS_DBSSLCERT": "sslcert", + "PAPERLESS_DBSSLKEY": "sslkey", +} + + +class TestDeprecatedDbSettings: + """Test suite for the check_deprecated_db_settings system check.""" + + def test_no_deprecated_vars_returns_empty( + self, + mocker: MockerFixture, + ) -> None: + """No warnings when none of the deprecated vars are present.""" + # clear=True ensures vars from the outer test environment do not leak in + mocker.patch.dict(os.environ, {}, clear=True) + result = check_deprecated_db_settings(None) + assert result == [] + + @pytest.mark.parametrize( + ("env_var", "db_option_key"), + [ + ("PAPERLESS_DB_TIMEOUT", "timeout"), + ("PAPERLESS_DB_POOLSIZE", "pool.min_size / pool.max_size"), + ("PAPERLESS_DBSSLMODE", "sslmode"), + ("PAPERLESS_DBSSLROOTCERT", "sslrootcert"), + ("PAPERLESS_DBSSLCERT", "sslcert"), + ("PAPERLESS_DBSSLKEY", "sslkey"), + ], + ids=[ + "db-timeout", + "db-poolsize", + "ssl-mode", + "ssl-rootcert", + "ssl-cert", + "ssl-key", + ], + ) + def test_single_deprecated_var_produces_one_warning( + self, + mocker: MockerFixture, + env_var: str, + db_option_key: str, + ) -> None: + """Each deprecated var in isolation produces exactly one warning.""" + mocker.patch.dict(os.environ, {env_var: "some_value"}, clear=True) + result = check_deprecated_db_settings(None) + + assert len(result) == 1 + warning = result[0] + assert isinstance(warning, Warning) + assert warning.id == "paperless.W001" + assert env_var in warning.hint + assert db_option_key in warning.hint + + def test_multiple_deprecated_vars_produce_one_warning_each( + self, + mocker: MockerFixture, + ) -> None: + """Each deprecated var present in the environment gets its own warning.""" + set_vars = { + "PAPERLESS_DB_TIMEOUT": "30", + "PAPERLESS_DB_POOLSIZE": "10", + "PAPERLESS_DBSSLMODE": "require", + } + mocker.patch.dict(os.environ, set_vars, clear=True) + result = check_deprecated_db_settings(None) + + assert len(result) == len(set_vars) + assert all(isinstance(w, Warning) for w in result) + assert all(w.id == "paperless.W001" for w in result) + all_hints = " ".join(w.hint for w in result) + for var_name in set_vars: + assert var_name in all_hints + + def test_all_deprecated_vars_produces_one_warning_each( + self, + mocker: MockerFixture, + ) -> None: + """All deprecated vars set simultaneously produces one warning per var.""" + all_vars = dict.fromkeys(DEPRECATED_VARS, "some_value") + mocker.patch.dict(os.environ, all_vars, clear=True) + result = check_deprecated_db_settings(None) + + assert len(result) == len(DEPRECATED_VARS) + assert all(isinstance(w, Warning) for w in result) + assert all(w.id == "paperless.W001" for w in result) + + def test_unset_vars_not_mentioned_in_warnings( + self, + mocker: MockerFixture, + ) -> None: + """Vars absent from the environment do not appear in any warning.""" + mocker.patch.dict( + os.environ, + {"PAPERLESS_DB_TIMEOUT": "30"}, + clear=True, + ) + result = check_deprecated_db_settings(None) + + assert len(result) == 1 + assert "PAPERLESS_DB_TIMEOUT" in result[0].hint + unset_vars = [v for v in DEPRECATED_VARS if v != "PAPERLESS_DB_TIMEOUT"] + for var_name in unset_vars: + assert var_name not in result[0].hint + + def test_empty_string_var_not_treated_as_set( + self, + mocker: MockerFixture, + ) -> None: + """A var set to an empty string is not flagged as a deprecated setting.""" + mocker.patch.dict( + os.environ, + {"PAPERLESS_DB_TIMEOUT": ""}, + clear=True, + ) + result = check_deprecated_db_settings(None) + assert result == [] + + def test_warning_mentions_migration_target( + self, + mocker: MockerFixture, + ) -> None: + """Each warning hints at PAPERLESS_DB_OPTIONS as the migration target.""" + mocker.patch.dict( + os.environ, + {"PAPERLESS_DBSSLMODE": "require"}, + clear=True, + ) + result = check_deprecated_db_settings(None) + + assert len(result) == 1 + assert "PAPERLESS_DB_OPTIONS" in result[0].hint + + def test_warning_message_identifies_var( + self, + mocker: MockerFixture, + ) -> None: + """The warning message (not just the hint) identifies the offending var.""" + mocker.patch.dict( + os.environ, + {"PAPERLESS_DBSSLCERT": "/path/to/cert.pem"}, + clear=True, + ) + result = check_deprecated_db_settings(None) + + assert len(result) == 1 + assert "PAPERLESS_DBSSLCERT" in result[0].msg diff --git a/src/paperless/tests/test_settings.py b/src/paperless/tests/test_settings.py index 02db82ef2..cc9ad2081 100644 --- a/src/paperless/tests/test_settings.py +++ b/src/paperless/tests/test_settings.py @@ -9,7 +9,6 @@ from celery.schedules import crontab from paperless.settings import _parse_base_paths from paperless.settings import _parse_beat_schedule from paperless.settings import _parse_dateparser_languages -from paperless.settings import _parse_db_settings from paperless.settings import _parse_ignore_dates from paperless.settings import _parse_paperless_url from paperless.settings import _parse_redis_url @@ -378,64 +377,6 @@ class TestCeleryScheduleParsing(TestCase): ) -class TestDBSettings(TestCase): - def test_db_timeout_with_sqlite(self) -> None: - """ - GIVEN: - - PAPERLESS_DB_TIMEOUT is set - WHEN: - - Settings are parsed - THEN: - - PAPERLESS_DB_TIMEOUT set for sqlite - """ - with mock.patch.dict( - os.environ, - { - "PAPERLESS_DB_TIMEOUT": "10", - }, - ): - databases = _parse_db_settings() - - self.assertDictEqual( - { - "timeout": 10.0, - }, - databases["default"]["OPTIONS"], - ) - - def test_db_timeout_with_not_sqlite(self) -> None: - """ - GIVEN: - - PAPERLESS_DB_TIMEOUT is set but db is not sqlite - WHEN: - - Settings are parsed - THEN: - - PAPERLESS_DB_TIMEOUT set correctly in non-sqlite db & for fallback sqlite db - """ - with mock.patch.dict( - os.environ, - { - "PAPERLESS_DBHOST": "127.0.0.1", - "PAPERLESS_DB_TIMEOUT": "10", - }, - ): - databases = _parse_db_settings() - - self.assertDictEqual( - databases["default"]["OPTIONS"], - databases["default"]["OPTIONS"] - | { - "connect_timeout": 10.0, - }, - ) - self.assertDictEqual( - { - "timeout": 10.0, - }, - databases["sqlite"]["OPTIONS"], - ) - - class TestPaperlessURLSettings(TestCase): def test_paperless_url(self) -> None: """ diff --git a/zensical.toml b/zensical.toml index 4dbd2bf2e..d78ed4b39 100644 --- a/zensical.toml +++ b/zensical.toml @@ -18,7 +18,10 @@ nav = [ "setup.md", "usage.md", "configuration.md", - "administration.md", + { Administration = [ + "administration.md", + { "v3 Migration Guide" = "migration-v3.md" }, + ] }, "advanced_usage.md", "api.md", "development.md",