Compare commits

..

1 Commits

Author SHA1 Message Date
stumpylog 5b13d24043 Construct the pinned transports and clients in 1 place exactly 2026-06-16 11:34:10 -07:00
5 changed files with 156 additions and 30 deletions
+7 -9
View File
@@ -4,8 +4,7 @@ import httpx
from celery import shared_task
from django.conf import settings
from paperless.network import PinnedHostHTTPTransport
from paperless.network import validate_outbound_http_url
from paperless.network import make_pinned_transport
logger = logging.getLogger("paperless.workflows.webhooks")
@@ -25,21 +24,20 @@ def send_webhook(
as_json: bool = False,
):
try:
validate_outbound_http_url(
# Internal-address checks happen in the transport (per-request) to preserve
# ConnectError behavior, so the upfront validation allows internal here and
# the transport pins per the configured WEBHOOKS_ALLOW_INTERNAL_REQUESTS flag.
transport = make_pinned_transport(
url,
allowed_schemes=settings.WEBHOOKS_ALLOWED_SCHEMES,
allowed_ports=settings.WEBHOOKS_ALLOWED_PORTS,
# Internal-address checks happen in transport to preserve ConnectError behavior.
allow_internal=True,
allow_internal=settings.WEBHOOKS_ALLOW_INTERNAL_REQUESTS,
defer_internal_check_to_transport=True,
)
except ValueError as e:
logger.warning("Webhook blocked: %s", e)
raise
transport = PinnedHostHTTPTransport(
allow_internal=settings.WEBHOOKS_ALLOW_INTERNAL_REQUESTS,
)
try:
post_args = {
"url": url,
+82 -4
View File
@@ -170,15 +170,94 @@ class PinnedHostAsyncHTTPTransport(httpx.AsyncHTTPTransport):
return await super().handle_async_request(request)
def _validate_and_resolve_allow_internal(
url: str,
*,
allowed_schemes: Collection[str],
allowed_ports: Collection[int] | None,
allow_internal: bool,
defer_internal_check_to_transport: bool,
) -> bool:
"""Validate ``url`` against outbound policy and return the ``allow_internal``
flag the pinned transport should enforce per request.
The "is a non-public/internal address allowed" decision has two enforcement
layers, normally driven by the same ``allow_internal`` value:
* **upfront**, in ``validate_outbound_http_url`` — fails fast with ``ValueError``;
* **per-request**, in the pinned transport — fails with ``httpx.ConnectError``.
Set ``defer_internal_check_to_transport=True`` to skip *only* the upfront
internal-address check (scheme and port are still validated) and let the
transport be the sole enforcer. Webhooks use this so a blocked internal target
surfaces as a retryable ``ConnectError`` rather than a ``ValueError``.
This is the single chokepoint pairing URL validation with pinned-transport
construction: code outside this module must use the ``make_pinned_*`` /
``create_pinned_*`` factories and must NOT construct ``PinnedHost*Transport``
(or a plain ``httpx`` client) directly for a user-influenced URL.
"""
validate_outbound_http_url(
url,
allowed_schemes=allowed_schemes,
allowed_ports=allowed_ports,
allow_internal=allow_internal or defer_internal_check_to_transport,
)
return allow_internal
def make_pinned_transport(
url: str,
*,
allowed_schemes: Collection[str] = ("http", "https"),
allowed_ports: Collection[int] | None = None,
allow_internal: bool = False,
defer_internal_check_to_transport: bool = False,
) -> PinnedHostHTTPTransport:
"""Validate ``url`` and return a sync pinned transport.
See :func:`_validate_and_resolve_allow_internal` for ``allow_internal`` and
``defer_internal_check_to_transport``.
"""
return PinnedHostHTTPTransport(
allow_internal=_validate_and_resolve_allow_internal(
url,
allowed_schemes=allowed_schemes,
allowed_ports=allowed_ports,
allow_internal=allow_internal,
defer_internal_check_to_transport=defer_internal_check_to_transport,
),
)
def make_pinned_async_transport(
url: str,
*,
allowed_schemes: Collection[str] = ("http", "https"),
allowed_ports: Collection[int] | None = None,
allow_internal: bool = False,
defer_internal_check_to_transport: bool = False,
) -> PinnedHostAsyncHTTPTransport:
"""Async counterpart of :func:`make_pinned_transport`."""
return PinnedHostAsyncHTTPTransport(
allow_internal=_validate_and_resolve_allow_internal(
url,
allowed_schemes=allowed_schemes,
allowed_ports=allowed_ports,
allow_internal=allow_internal,
defer_internal_check_to_transport=defer_internal_check_to_transport,
),
)
def create_pinned_httpx_client(
url: str,
*,
allow_internal: bool = False,
**kwargs,
) -> httpx.Client:
validate_outbound_http_url(url, allow_internal=allow_internal)
return httpx.Client(
transport=PinnedHostHTTPTransport(allow_internal=allow_internal),
transport=make_pinned_transport(url, allow_internal=allow_internal),
**kwargs,
)
@@ -189,8 +268,7 @@ def create_pinned_async_httpx_client(
allow_internal: bool = False,
**kwargs,
) -> httpx.AsyncClient:
validate_outbound_http_url(url, allow_internal=allow_internal)
return httpx.AsyncClient(
transport=PinnedHostAsyncHTTPTransport(allow_internal=allow_internal),
transport=make_pinned_async_transport(url, allow_internal=allow_internal),
**kwargs,
)
+56
View File
@@ -2,8 +2,11 @@ from unittest import mock
import httpx
import pytest
from pytest_mock import MockerFixture
from paperless.network import PinnedHostHTTPTransport
from paperless.network import make_pinned_async_transport
from paperless.network import make_pinned_transport
def test_pinned_host_transport_blocks_internal_rebinding():
@@ -48,3 +51,56 @@ def test_pinned_host_transport_rewrites_to_vetted_ip():
response = transport.handle_request(request)
assert response.status_code == 200
class TestPinnedTransportFactories:
"""Covers only the chokepoint behavior the call-site tests can't reach.
The sync reject and happy-path construction are already exercised end-to-end
by ``test_get_llm_ollama`` and the ``*_blocks_internal_endpoint_when_disallowed``
tests in ``test_client.py`` / ``test_embedding.py``, so they are not repeated
here. What those can't reach: the *async* factory validating on its own (every
real call site builds the sync transport first and raises before the async one),
the ``defer_internal_check_to_transport`` webhook policy, and ``allowed_ports``
forwarding.
"""
def test_async_factory_validates_independently_of_sync(
self,
mocker: MockerFixture,
) -> None:
mocker.patch(
"paperless.network.resolve_hostname_ips",
return_value=["10.0.0.1"],
)
with pytest.raises(ValueError, match="non-public address"):
make_pinned_async_transport("http://internal.example/api")
def test_defer_internal_check_skips_upfront_but_pins_transport(
self,
mocker: MockerFixture,
) -> None:
# Webhook policy: the upfront internal-address check is skipped (no raise on
# an internal IP), but the transport still pins with allow_internal=False so
# the block happens at connect time as a ConnectError.
mocker.patch(
"paperless.network.resolve_hostname_ips",
return_value=["10.0.0.1"],
)
transport = make_pinned_transport(
"http://internal.example",
allow_internal=False,
defer_internal_check_to_transport=True,
)
assert transport.allow_internal is False
def test_allowed_ports_enforced_before_construction(
self,
mocker: MockerFixture,
) -> None:
mocker.patch(
"paperless.network.resolve_hostname_ips",
return_value=["93.184.216.34"],
)
with pytest.raises(ValueError, match="port not permitted"):
make_pinned_transport("https://example.com:9999", allowed_ports={443})
+5 -8
View File
@@ -10,11 +10,10 @@ if TYPE_CHECKING:
from llama_index.llms.openai_like import OpenAILike
from paperless.config import AIConfig
from paperless.network import PinnedHostAsyncHTTPTransport
from paperless.network import PinnedHostHTTPTransport
from paperless.network import create_pinned_async_httpx_client
from paperless.network import create_pinned_httpx_client
from paperless.network import validate_outbound_http_url
from paperless.network import make_pinned_async_transport
from paperless.network import make_pinned_transport
from paperless_ai.base_model import DocumentClassifierSchema
logger = logging.getLogger("paperless_ai.client")
@@ -47,14 +46,12 @@ class AIClient:
from ollama import Client
endpoint = self.settings.llm_endpoint or "http://localhost:11434"
validate_outbound_http_url(
transport = make_pinned_transport(
endpoint,
allow_internal=self.settings.llm_allow_internal_endpoints,
)
transport = PinnedHostHTTPTransport(
allow_internal=self.settings.llm_allow_internal_endpoints,
)
async_transport = PinnedHostAsyncHTTPTransport(
async_transport = make_pinned_async_transport(
endpoint,
allow_internal=self.settings.llm_allow_internal_endpoints,
)
return Ollama(
+6 -9
View File
@@ -10,11 +10,10 @@ from documents.models import Document
from documents.models import Note
from paperless.config import AIConfig
from paperless.models import LLMEmbeddingBackend
from paperless.network import PinnedHostAsyncHTTPTransport
from paperless.network import PinnedHostHTTPTransport
from paperless.network import create_pinned_async_httpx_client
from paperless.network import create_pinned_httpx_client
from paperless.network import validate_outbound_http_url
from paperless.network import make_pinned_async_transport
from paperless.network import make_pinned_transport
OCR_LEADER_REGEX = re.compile(r"[._\-\u00b7]{4,}")
HORIZONTAL_WHITESPACE_REGEX = re.compile(r"[ \t\u00a0]+")
@@ -62,10 +61,6 @@ def get_embedding_model(config: AIConfig) -> "BaseEmbedding":
or config.llm_endpoint
or "http://localhost:11434"
)
validate_outbound_http_url(
endpoint,
allow_internal=config.llm_allow_internal_endpoints,
)
embedding = OllamaEmbedding(
model_name=config.llm_embedding_model or "embeddinggemma",
base_url=endpoint,
@@ -73,13 +68,15 @@ def get_embedding_model(config: AIConfig) -> "BaseEmbedding":
)
embedding._client = Client(
host=endpoint,
transport=PinnedHostHTTPTransport(
transport=make_pinned_transport(
endpoint,
allow_internal=config.llm_allow_internal_endpoints,
),
)
embedding._async_client = AsyncClient(
host=endpoint,
transport=PinnedHostAsyncHTTPTransport(
transport=make_pinned_async_transport(
endpoint,
allow_internal=config.llm_allow_internal_endpoints,
),
)