mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-06-30 09:14:17 +00:00
Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 5b13d24043 |
@@ -4,8 +4,7 @@ import httpx
|
||||
from celery import shared_task
|
||||
from django.conf import settings
|
||||
|
||||
from paperless.network import PinnedHostHTTPTransport
|
||||
from paperless.network import validate_outbound_http_url
|
||||
from paperless.network import make_pinned_transport
|
||||
|
||||
logger = logging.getLogger("paperless.workflows.webhooks")
|
||||
|
||||
@@ -25,21 +24,20 @@ def send_webhook(
|
||||
as_json: bool = False,
|
||||
):
|
||||
try:
|
||||
validate_outbound_http_url(
|
||||
# Internal-address checks happen in the transport (per-request) to preserve
|
||||
# ConnectError behavior, so the upfront validation allows internal here and
|
||||
# the transport pins per the configured WEBHOOKS_ALLOW_INTERNAL_REQUESTS flag.
|
||||
transport = make_pinned_transport(
|
||||
url,
|
||||
allowed_schemes=settings.WEBHOOKS_ALLOWED_SCHEMES,
|
||||
allowed_ports=settings.WEBHOOKS_ALLOWED_PORTS,
|
||||
# Internal-address checks happen in transport to preserve ConnectError behavior.
|
||||
allow_internal=True,
|
||||
allow_internal=settings.WEBHOOKS_ALLOW_INTERNAL_REQUESTS,
|
||||
defer_internal_check_to_transport=True,
|
||||
)
|
||||
except ValueError as e:
|
||||
logger.warning("Webhook blocked: %s", e)
|
||||
raise
|
||||
|
||||
transport = PinnedHostHTTPTransport(
|
||||
allow_internal=settings.WEBHOOKS_ALLOW_INTERNAL_REQUESTS,
|
||||
)
|
||||
|
||||
try:
|
||||
post_args = {
|
||||
"url": url,
|
||||
|
||||
@@ -170,15 +170,94 @@ class PinnedHostAsyncHTTPTransport(httpx.AsyncHTTPTransport):
|
||||
return await super().handle_async_request(request)
|
||||
|
||||
|
||||
def _validate_and_resolve_allow_internal(
|
||||
url: str,
|
||||
*,
|
||||
allowed_schemes: Collection[str],
|
||||
allowed_ports: Collection[int] | None,
|
||||
allow_internal: bool,
|
||||
defer_internal_check_to_transport: bool,
|
||||
) -> bool:
|
||||
"""Validate ``url`` against outbound policy and return the ``allow_internal``
|
||||
flag the pinned transport should enforce per request.
|
||||
|
||||
The "is a non-public/internal address allowed" decision has two enforcement
|
||||
layers, normally driven by the same ``allow_internal`` value:
|
||||
|
||||
* **upfront**, in ``validate_outbound_http_url`` — fails fast with ``ValueError``;
|
||||
* **per-request**, in the pinned transport — fails with ``httpx.ConnectError``.
|
||||
|
||||
Set ``defer_internal_check_to_transport=True`` to skip *only* the upfront
|
||||
internal-address check (scheme and port are still validated) and let the
|
||||
transport be the sole enforcer. Webhooks use this so a blocked internal target
|
||||
surfaces as a retryable ``ConnectError`` rather than a ``ValueError``.
|
||||
|
||||
This is the single chokepoint pairing URL validation with pinned-transport
|
||||
construction: code outside this module must use the ``make_pinned_*`` /
|
||||
``create_pinned_*`` factories and must NOT construct ``PinnedHost*Transport``
|
||||
(or a plain ``httpx`` client) directly for a user-influenced URL.
|
||||
"""
|
||||
validate_outbound_http_url(
|
||||
url,
|
||||
allowed_schemes=allowed_schemes,
|
||||
allowed_ports=allowed_ports,
|
||||
allow_internal=allow_internal or defer_internal_check_to_transport,
|
||||
)
|
||||
return allow_internal
|
||||
|
||||
|
||||
def make_pinned_transport(
|
||||
url: str,
|
||||
*,
|
||||
allowed_schemes: Collection[str] = ("http", "https"),
|
||||
allowed_ports: Collection[int] | None = None,
|
||||
allow_internal: bool = False,
|
||||
defer_internal_check_to_transport: bool = False,
|
||||
) -> PinnedHostHTTPTransport:
|
||||
"""Validate ``url`` and return a sync pinned transport.
|
||||
|
||||
See :func:`_validate_and_resolve_allow_internal` for ``allow_internal`` and
|
||||
``defer_internal_check_to_transport``.
|
||||
"""
|
||||
return PinnedHostHTTPTransport(
|
||||
allow_internal=_validate_and_resolve_allow_internal(
|
||||
url,
|
||||
allowed_schemes=allowed_schemes,
|
||||
allowed_ports=allowed_ports,
|
||||
allow_internal=allow_internal,
|
||||
defer_internal_check_to_transport=defer_internal_check_to_transport,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def make_pinned_async_transport(
|
||||
url: str,
|
||||
*,
|
||||
allowed_schemes: Collection[str] = ("http", "https"),
|
||||
allowed_ports: Collection[int] | None = None,
|
||||
allow_internal: bool = False,
|
||||
defer_internal_check_to_transport: bool = False,
|
||||
) -> PinnedHostAsyncHTTPTransport:
|
||||
"""Async counterpart of :func:`make_pinned_transport`."""
|
||||
return PinnedHostAsyncHTTPTransport(
|
||||
allow_internal=_validate_and_resolve_allow_internal(
|
||||
url,
|
||||
allowed_schemes=allowed_schemes,
|
||||
allowed_ports=allowed_ports,
|
||||
allow_internal=allow_internal,
|
||||
defer_internal_check_to_transport=defer_internal_check_to_transport,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def create_pinned_httpx_client(
|
||||
url: str,
|
||||
*,
|
||||
allow_internal: bool = False,
|
||||
**kwargs,
|
||||
) -> httpx.Client:
|
||||
validate_outbound_http_url(url, allow_internal=allow_internal)
|
||||
return httpx.Client(
|
||||
transport=PinnedHostHTTPTransport(allow_internal=allow_internal),
|
||||
transport=make_pinned_transport(url, allow_internal=allow_internal),
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@@ -189,8 +268,7 @@ def create_pinned_async_httpx_client(
|
||||
allow_internal: bool = False,
|
||||
**kwargs,
|
||||
) -> httpx.AsyncClient:
|
||||
validate_outbound_http_url(url, allow_internal=allow_internal)
|
||||
return httpx.AsyncClient(
|
||||
transport=PinnedHostAsyncHTTPTransport(allow_internal=allow_internal),
|
||||
transport=make_pinned_async_transport(url, allow_internal=allow_internal),
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@@ -2,8 +2,11 @@ from unittest import mock
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
from pytest_mock import MockerFixture
|
||||
|
||||
from paperless.network import PinnedHostHTTPTransport
|
||||
from paperless.network import make_pinned_async_transport
|
||||
from paperless.network import make_pinned_transport
|
||||
|
||||
|
||||
def test_pinned_host_transport_blocks_internal_rebinding():
|
||||
@@ -48,3 +51,56 @@ def test_pinned_host_transport_rewrites_to_vetted_ip():
|
||||
response = transport.handle_request(request)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
|
||||
class TestPinnedTransportFactories:
|
||||
"""Covers only the chokepoint behavior the call-site tests can't reach.
|
||||
|
||||
The sync reject and happy-path construction are already exercised end-to-end
|
||||
by ``test_get_llm_ollama`` and the ``*_blocks_internal_endpoint_when_disallowed``
|
||||
tests in ``test_client.py`` / ``test_embedding.py``, so they are not repeated
|
||||
here. What those can't reach: the *async* factory validating on its own (every
|
||||
real call site builds the sync transport first and raises before the async one),
|
||||
the ``defer_internal_check_to_transport`` webhook policy, and ``allowed_ports``
|
||||
forwarding.
|
||||
"""
|
||||
|
||||
def test_async_factory_validates_independently_of_sync(
|
||||
self,
|
||||
mocker: MockerFixture,
|
||||
) -> None:
|
||||
mocker.patch(
|
||||
"paperless.network.resolve_hostname_ips",
|
||||
return_value=["10.0.0.1"],
|
||||
)
|
||||
with pytest.raises(ValueError, match="non-public address"):
|
||||
make_pinned_async_transport("http://internal.example/api")
|
||||
|
||||
def test_defer_internal_check_skips_upfront_but_pins_transport(
|
||||
self,
|
||||
mocker: MockerFixture,
|
||||
) -> None:
|
||||
# Webhook policy: the upfront internal-address check is skipped (no raise on
|
||||
# an internal IP), but the transport still pins with allow_internal=False so
|
||||
# the block happens at connect time as a ConnectError.
|
||||
mocker.patch(
|
||||
"paperless.network.resolve_hostname_ips",
|
||||
return_value=["10.0.0.1"],
|
||||
)
|
||||
transport = make_pinned_transport(
|
||||
"http://internal.example",
|
||||
allow_internal=False,
|
||||
defer_internal_check_to_transport=True,
|
||||
)
|
||||
assert transport.allow_internal is False
|
||||
|
||||
def test_allowed_ports_enforced_before_construction(
|
||||
self,
|
||||
mocker: MockerFixture,
|
||||
) -> None:
|
||||
mocker.patch(
|
||||
"paperless.network.resolve_hostname_ips",
|
||||
return_value=["93.184.216.34"],
|
||||
)
|
||||
with pytest.raises(ValueError, match="port not permitted"):
|
||||
make_pinned_transport("https://example.com:9999", allowed_ports={443})
|
||||
|
||||
@@ -10,11 +10,10 @@ if TYPE_CHECKING:
|
||||
from llama_index.llms.openai_like import OpenAILike
|
||||
|
||||
from paperless.config import AIConfig
|
||||
from paperless.network import PinnedHostAsyncHTTPTransport
|
||||
from paperless.network import PinnedHostHTTPTransport
|
||||
from paperless.network import create_pinned_async_httpx_client
|
||||
from paperless.network import create_pinned_httpx_client
|
||||
from paperless.network import validate_outbound_http_url
|
||||
from paperless.network import make_pinned_async_transport
|
||||
from paperless.network import make_pinned_transport
|
||||
from paperless_ai.base_model import DocumentClassifierSchema
|
||||
|
||||
logger = logging.getLogger("paperless_ai.client")
|
||||
@@ -47,14 +46,12 @@ class AIClient:
|
||||
from ollama import Client
|
||||
|
||||
endpoint = self.settings.llm_endpoint or "http://localhost:11434"
|
||||
validate_outbound_http_url(
|
||||
transport = make_pinned_transport(
|
||||
endpoint,
|
||||
allow_internal=self.settings.llm_allow_internal_endpoints,
|
||||
)
|
||||
transport = PinnedHostHTTPTransport(
|
||||
allow_internal=self.settings.llm_allow_internal_endpoints,
|
||||
)
|
||||
async_transport = PinnedHostAsyncHTTPTransport(
|
||||
async_transport = make_pinned_async_transport(
|
||||
endpoint,
|
||||
allow_internal=self.settings.llm_allow_internal_endpoints,
|
||||
)
|
||||
return Ollama(
|
||||
|
||||
@@ -10,11 +10,10 @@ from documents.models import Document
|
||||
from documents.models import Note
|
||||
from paperless.config import AIConfig
|
||||
from paperless.models import LLMEmbeddingBackend
|
||||
from paperless.network import PinnedHostAsyncHTTPTransport
|
||||
from paperless.network import PinnedHostHTTPTransport
|
||||
from paperless.network import create_pinned_async_httpx_client
|
||||
from paperless.network import create_pinned_httpx_client
|
||||
from paperless.network import validate_outbound_http_url
|
||||
from paperless.network import make_pinned_async_transport
|
||||
from paperless.network import make_pinned_transport
|
||||
|
||||
OCR_LEADER_REGEX = re.compile(r"[._\-\u00b7]{4,}")
|
||||
HORIZONTAL_WHITESPACE_REGEX = re.compile(r"[ \t\u00a0]+")
|
||||
@@ -62,10 +61,6 @@ def get_embedding_model(config: AIConfig) -> "BaseEmbedding":
|
||||
or config.llm_endpoint
|
||||
or "http://localhost:11434"
|
||||
)
|
||||
validate_outbound_http_url(
|
||||
endpoint,
|
||||
allow_internal=config.llm_allow_internal_endpoints,
|
||||
)
|
||||
embedding = OllamaEmbedding(
|
||||
model_name=config.llm_embedding_model or "embeddinggemma",
|
||||
base_url=endpoint,
|
||||
@@ -73,13 +68,15 @@ def get_embedding_model(config: AIConfig) -> "BaseEmbedding":
|
||||
)
|
||||
embedding._client = Client(
|
||||
host=endpoint,
|
||||
transport=PinnedHostHTTPTransport(
|
||||
transport=make_pinned_transport(
|
||||
endpoint,
|
||||
allow_internal=config.llm_allow_internal_endpoints,
|
||||
),
|
||||
)
|
||||
embedding._async_client = AsyncClient(
|
||||
host=endpoint,
|
||||
transport=PinnedHostAsyncHTTPTransport(
|
||||
transport=make_pinned_async_transport(
|
||||
endpoint,
|
||||
allow_internal=config.llm_allow_internal_endpoints,
|
||||
),
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user