diff --git a/aidial_adapter_dial/app.py b/aidial_adapter_dial/app.py index e766d8d..4e46da3 100644 --- a/aidial_adapter_dial/app.py +++ b/aidial_adapter_dial/app.py @@ -17,6 +17,7 @@ HTTPException, dial_exception_decorator, ) +from aidial_adapter_dial.utils.http_client import get_http_client from aidial_adapter_dial.utils.log_config import configure_loggers from aidial_adapter_dial.utils.reflection import call_with_extra_body from aidial_adapter_dial.utils.sse_stream import to_openai_sse_stream @@ -103,7 +104,7 @@ async def parse( base_url=upstream_endpoint, api_key=remote_dial_api_key, api_version=query_params.get("api-version"), - timeout=DEFAULT_TIMEOUT, + http_client=get_http_client(), ) attachment_transformer = await AttachmentTransformer.create( diff --git a/aidial_adapter_dial/utils/http_client.py b/aidial_adapter_dial/utils/http_client.py new file mode 100644 index 0000000..7ccc2ca --- /dev/null +++ b/aidial_adapter_dial/utils/http_client.py @@ -0,0 +1,20 @@ +import functools + +import httpx + +# connect timeout and total timeout +DEFAULT_TIMEOUT = httpx.Timeout(600, connect=10) + +# Borrowed from openai._constants.DEFAULT_CONNECTION_LIMITS +DEFAULT_CONNECTION_LIMITS = httpx.Limits( + max_connections=1000, max_keepalive_connections=100 +) + + +@functools.cache +def get_http_client() -> httpx.AsyncClient: + return httpx.AsyncClient( + timeout=DEFAULT_TIMEOUT, + limits=DEFAULT_CONNECTION_LIMITS, + follow_redirects=True, + ) diff --git a/poetry.lock b/poetry.lock index b2b4c9e..8f104f9 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. [[package]] name = "aidial-sdk" @@ -1884,4 +1884,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = ">=3.11,<3.13" -content-hash = "c7f3d18148732173bc962970d5896a9b154c270b8b85048aabdc45bacbdd16ef" +content-hash = "48ef9a6dedb8f84f194f73357c279b2e77ac648a4dc82e89910fe1131b055aea" diff --git a/pyproject.toml b/pyproject.toml index a1ba326..98eaf28 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,12 +27,12 @@ aiohttp = "3.9.5" openai = "1.32.0" # NOTE: used solely for chat completion response types pydantic = "^1.10.12" aidial-sdk = {version = "^0.8.0", extras = ["telemetry"]} +respx = "^0.21.1" [tool.poetry.group.test.dependencies] pytest = "7.4.0" pytest-asyncio = "0.21.1" python-dotenv = "1.0.0" -respx = "^0.21.1" [tool.poetry.group.lint.dependencies] pyright = "1.1.324"