From ad1db9fc8bfabb462005a3c9889bc14647ae0a75 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 21 Jan 2026 12:01:33 -0800 Subject: [PATCH 1/7] [Fix] LiteLLM VertexAI Pass through - ensuring incoming headers are forwarded down to target (#19524) * test_vertex_passthrough_forwards_anthropic_beta_header * add_incoming_headers --- .../llm_passthrough_endpoints.py | 31 +++++- .../test_vertex_passthrough_load_balancing.py | 98 ++++++++++++++++++- 2 files changed, 124 insertions(+), 5 deletions(-) diff --git a/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py b/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py index e48fd22bc8d0..0a94fc953421 100644 --- a/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py +++ b/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py @@ -1369,6 +1369,27 @@ def get_vertex_base_url(vertex_location: Optional[str]) -> str: return f"https://{vertex_location}-aiplatform.googleapis.com/" +def add_incoming_headers(request: Request, auth_header: str) -> dict: + """ + Build headers from incoming request, preserving headers like anthropic-beta, + while removing headers that should not be forwarded and adding authorization. + + Args: + request: The FastAPI request object + auth_header: The authorization token to add + + Returns: + dict: Headers dictionary with authorization added + """ + headers = dict(request.headers) or {} + # Remove headers that should not be forwarded + headers.pop("content-length", None) + headers.pop("host", None) + # Add/override the Authorization header + headers["Authorization"] = f"Bearer {auth_header}" + return headers + + def get_vertex_pass_through_handler( call_type: Literal["discovery", "aiplatform"], ) -> BaseVertexAIPassThroughHandler: @@ -1512,9 +1533,13 @@ async def _prepare_vertex_auth_headers( api_base="", ) - headers = { - "Authorization": f"Bearer {auth_header}", - } + # Start with incoming request headers to preserve headers like anthropic-beta + headers = dict(request.headers) or {} + # Remove headers that should not be forwarded + headers.pop("content-length", None) + headers.pop("host", None) + # Add/override the Authorization header + headers["Authorization"] = f"Bearer {auth_header}" if base_target_url is not None: base_target_url = get_vertex_pass_through_handler.update_base_target_url_with_credential_location( diff --git a/tests/test_litellm/proxy/pass_through_endpoints/test_vertex_passthrough_load_balancing.py b/tests/test_litellm/proxy/pass_through_endpoints/test_vertex_passthrough_load_balancing.py index ceb231eb4cb9..a6701451f204 100644 --- a/tests/test_litellm/proxy/pass_through_endpoints/test_vertex_passthrough_load_balancing.py +++ b/tests/test_litellm/proxy/pass_through_endpoints/test_vertex_passthrough_load_balancing.py @@ -1,9 +1,14 @@ +from unittest.mock import AsyncMock, MagicMock, patch + import pytest -from unittest.mock import MagicMock, AsyncMock, patch -from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import _base_vertex_proxy_route + +from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import ( + _base_vertex_proxy_route, +) from litellm.types.router import DeploymentTypedDict + @pytest.mark.asyncio async def test_vertex_passthrough_load_balancing(): """ @@ -220,3 +225,92 @@ async def test_async_get_available_deployment_for_pass_through(): assert deployment is not None assert deployment["litellm_params"]["use_in_pass_through"] is True + +@pytest.mark.asyncio +async def test_vertex_passthrough_forwards_anthropic_beta_header(): + """ + Test that _prepare_vertex_auth_headers forwards the anthropic-beta header + (and other important headers) from the incoming request when credentials are available. + + This test validates the fix for the issue where the 1M context window header + (anthropic-beta: context-1m-2025-08-07) was being dropped when forwarding + requests to Vertex AI. + """ + from starlette.datastructures import Headers + + from litellm.llms.vertex_ai.vertex_llm_base import VertexBase + from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import ( + _prepare_vertex_auth_headers, + ) + + # Create a mock request with anthropic-beta header + mock_request = MagicMock() + mock_request.headers = Headers({ + "authorization": "Bearer old-token", + "anthropic-beta": "context-1m-2025-08-07", + "content-type": "application/json", + "user-agent": "test-client", + "content-length": "1234", # Should be removed + "host": "localhost:4000", # Should be removed + }) + + # Create mock vertex credentials + mock_vertex_credentials = MagicMock() + mock_vertex_credentials.vertex_project = "test-project" + mock_vertex_credentials.vertex_location = "us-central1" + mock_vertex_credentials.vertex_credentials = "test-credentials" + + # Create mock handler + mock_handler = MagicMock() + mock_handler.update_base_target_url_with_credential_location.return_value = ( + "https://us-central1-aiplatform.googleapis.com" + ) + + with patch.object( + VertexBase, + "_ensure_access_token_async", + new_callable=AsyncMock, + return_value=("test-auth-header", "test-project"), + ) as mock_ensure_token, patch.object( + VertexBase, + "_get_token_and_url", + return_value=("new-access-token", None), + ) as mock_get_token: + + # Call the function + ( + headers, + base_target_url, + headers_passed_through, + vertex_project, + vertex_location, + ) = await _prepare_vertex_auth_headers( + request=mock_request, + vertex_credentials=mock_vertex_credentials, + router_credentials=None, + vertex_project="test-project", + vertex_location="us-central1", + base_target_url="https://us-central1-aiplatform.googleapis.com", + get_vertex_pass_through_handler=mock_handler, + ) + + # Verify that the anthropic-beta header is preserved + assert "anthropic-beta" in headers + assert headers["anthropic-beta"] == "context-1m-2025-08-07" + + # Verify that other headers are preserved + assert "content-type" in headers + assert headers["content-type"] == "application/json" + assert "user-agent" in headers + + # Verify that the Authorization header was updated + assert "authorization" in headers + assert headers["authorization"] == "Bearer new-access-token" + + # Verify that content-length and host headers were removed + assert "content-length" not in headers + assert "host" not in headers + + # Verify that headers_passed_through is False (since we have credentials) + assert headers_passed_through is False + From 8c29ad41916253549afc324287544ff96ad0cae3 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 21 Jan 2026 19:12:04 -0800 Subject: [PATCH 2/7] [Fix] VertexAI Pass through - Ensure only anthropic betas are forwarded down to LLM API (#19542) * fix ALLOWED_VERTEX_AI_PASSTHROUGH_HEADERS * test_vertex_passthrough_forwards_anthropic_beta_header * fix test_vertex_passthrough_forwards_anthropic_beta_header * test_vertex_passthrough_does_not_forward_litellm_auth_token * fix utils * Using Anthropic Beta Features on Vertex AI * test_forward_headers_from_request_x_pass_prefix --- .../my-website/docs/pass_through/vertex_ai.md | 45 ++++++ litellm/constants.py | 14 ++ litellm/passthrough/utils.py | 16 +- .../llm_passthrough_endpoints.py | 36 ++--- proxy_config.yaml | 7 + .../test_vertex_passthrough_load_balancing.py | 153 ++++++++++++++++-- 6 files changed, 242 insertions(+), 29 deletions(-) create mode 100644 proxy_config.yaml diff --git a/docs/my-website/docs/pass_through/vertex_ai.md b/docs/my-website/docs/pass_through/vertex_ai.md index 2efef60070da..560b76543520 100644 --- a/docs/my-website/docs/pass_through/vertex_ai.md +++ b/docs/my-website/docs/pass_through/vertex_ai.md @@ -461,3 +461,48 @@ generateContent(); + +### Using Anthropic Beta Features on Vertex AI + +When using Anthropic models via Vertex AI passthrough (e.g., Claude on Vertex), you can enable Anthropic beta features like extended context windows. + +The `anthropic-beta` header is automatically forwarded to Vertex AI when calling Anthropic models. + +```bash +curl http://localhost:4000/vertex_ai/v1/projects/${PROJECT_ID}/locations/us-east5/publishers/anthropic/models/claude-3-5-sonnet:rawPredict \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -H "anthropic-beta: context-1m-2025-08-07" \ + -d '{ + "anthropic_version": "vertex-2023-10-16", + "messages": [{"role": "user", "content": "Hello"}], + "max_tokens": 500 + }' +``` + +### Forwarding Custom Headers with `x-pass-` Prefix + +You can forward any custom header to the provider by prefixing it with `x-pass-`. The prefix is stripped before the header is sent to the provider. + +For example: +- `x-pass-anthropic-beta: value` becomes `anthropic-beta: value` +- `x-pass-custom-header: value` becomes `custom-header: value` + +This is useful when you need to send provider-specific headers that aren't in the default allowlist. + +```bash +curl http://localhost:4000/vertex_ai/v1/projects/${PROJECT_ID}/locations/us-east5/publishers/anthropic/models/claude-3-5-sonnet:rawPredict \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -H "x-pass-anthropic-beta: context-1m-2025-08-07" \ + -H "x-pass-custom-feature: enabled" \ + -d '{ + "anthropic_version": "vertex-2023-10-16", + "messages": [{"role": "user", "content": "Hello"}], + "max_tokens": 500 + }' +``` + +:::info +The `x-pass-` prefix works for all LLM pass-through endpoints, not just Vertex AI. +::: diff --git a/litellm/constants.py b/litellm/constants.py index 3bdd943481ef..3f43fadd6901 100644 --- a/litellm/constants.py +++ b/litellm/constants.py @@ -1113,6 +1113,20 @@ "generateQuery/", "optimize-prompt/", ] + + +# Headers that are safe to forward from incoming requests to Vertex AI +# Using an allowlist approach for security - only forward headers we explicitly trust +ALLOWED_VERTEX_AI_PASSTHROUGH_HEADERS = { + "anthropic-beta", # Required for Anthropic features like extended context windows + "content-type", # Required for request body parsing +} + +# Prefix for headers that should be forwarded to the provider with the prefix stripped +# e.g., 'x-pass-anthropic-beta: value' becomes 'anthropic-beta: value' +# Works for all LLM pass-through endpoints (Vertex AI, Anthropic, Bedrock, etc.) +PASS_THROUGH_HEADER_PREFIX = "x-pass-" + BASE_MCP_ROUTE = "/mcp" BATCH_STATUS_POLL_INTERVAL_SECONDS = int( diff --git a/litellm/passthrough/utils.py b/litellm/passthrough/utils.py index 4bf66d498811..fbbf9cd25811 100644 --- a/litellm/passthrough/utils.py +++ b/litellm/passthrough/utils.py @@ -3,6 +3,8 @@ import httpx +from litellm.constants import PASS_THROUGH_HEADER_PREFIX + class BasePassthroughUtils: @staticmethod @@ -27,7 +29,11 @@ def forward_headers_from_request( forward_headers: Optional[bool] = False, ): """ - Helper to forward headers from original request + Helper to forward headers from original request. + + Also handles 'x-pass-' prefixed headers which are always forwarded + with the prefix stripped, regardless of forward_headers setting. + e.g., 'x-pass-anthropic-beta: value' becomes 'anthropic-beta: value' """ if forward_headers is True: # Header We Should NOT forward @@ -36,6 +42,14 @@ def forward_headers_from_request( # Combine request headers with custom headers headers = {**request_headers, **headers} + + # Always process x-pass- prefixed headers (strip prefix and forward) + for header_name, header_value in request_headers.items(): + if header_name.lower().startswith(PASS_THROUGH_HEADER_PREFIX): + # Strip the 'x-pass-' prefix to get the actual header name + actual_header_name = header_name[len(PASS_THROUGH_HEADER_PREFIX) :] + headers[actual_header_name] = header_value + return headers class CommonUtils: diff --git a/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py b/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py index 0a94fc953421..b079e1615190 100644 --- a/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py +++ b/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py @@ -17,7 +17,10 @@ import litellm from litellm._logging import verbose_proxy_logger -from litellm.constants import BEDROCK_AGENT_RUNTIME_PASS_THROUGH_ROUTES +from litellm.constants import ( + ALLOWED_VERTEX_AI_PASSTHROUGH_HEADERS, + BEDROCK_AGENT_RUNTIME_PASS_THROUGH_ROUTES, +) from litellm.llms.vertex_ai.vertex_llm_base import VertexBase from litellm.proxy._types import * from litellm.proxy.auth.route_checks import RouteChecks @@ -1369,24 +1372,24 @@ def get_vertex_base_url(vertex_location: Optional[str]) -> str: return f"https://{vertex_location}-aiplatform.googleapis.com/" -def add_incoming_headers(request: Request, auth_header: str) -> dict: +def get_vertex_ai_allowed_incoming_headers(request: Request) -> dict: """ - Build headers from incoming request, preserving headers like anthropic-beta, - while removing headers that should not be forwarded and adding authorization. + Extract only the allowed headers from incoming request for Vertex AI pass-through. + + Uses an allowlist approach for security - only forwards headers we explicitly trust. + This prevents accidentally forwarding sensitive headers like the LiteLLM auth token. Args: request: The FastAPI request object - auth_header: The authorization token to add Returns: - dict: Headers dictionary with authorization added + dict: Headers dictionary with only allowed headers """ - headers = dict(request.headers) or {} - # Remove headers that should not be forwarded - headers.pop("content-length", None) - headers.pop("host", None) - # Add/override the Authorization header - headers["Authorization"] = f"Bearer {auth_header}" + incoming_headers = dict(request.headers) or {} + headers = {} + for header_name in ALLOWED_VERTEX_AI_PASSTHROUGH_HEADERS: + if header_name in incoming_headers: + headers[header_name] = incoming_headers[header_name] return headers @@ -1533,12 +1536,9 @@ async def _prepare_vertex_auth_headers( api_base="", ) - # Start with incoming request headers to preserve headers like anthropic-beta - headers = dict(request.headers) or {} - # Remove headers that should not be forwarded - headers.pop("content-length", None) - headers.pop("host", None) - # Add/override the Authorization header + # Use allowlist approach - only forward specific safe headers + headers = get_vertex_ai_allowed_incoming_headers(request) + # Add the Authorization header with vendor credentials headers["Authorization"] = f"Bearer {auth_header}" if base_target_url is not None: diff --git a/proxy_config.yaml b/proxy_config.yaml new file mode 100644 index 000000000000..57397181cdaa --- /dev/null +++ b/proxy_config.yaml @@ -0,0 +1,7 @@ +model_list: + - model_name: "*" + litellm_params: + model: "*" + +general_settings: + master_key: sk-1234 diff --git a/tests/test_litellm/proxy/pass_through_endpoints/test_vertex_passthrough_load_balancing.py b/tests/test_litellm/proxy/pass_through_endpoints/test_vertex_passthrough_load_balancing.py index a6701451f204..28b3ba0a1792 100644 --- a/tests/test_litellm/proxy/pass_through_endpoints/test_vertex_passthrough_load_balancing.py +++ b/tests/test_litellm/proxy/pass_through_endpoints/test_vertex_passthrough_load_balancing.py @@ -294,23 +294,156 @@ async def test_vertex_passthrough_forwards_anthropic_beta_header(): get_vertex_pass_through_handler=mock_handler, ) - # Verify that the anthropic-beta header is preserved + # Verify that allowlisted headers are preserved assert "anthropic-beta" in headers assert headers["anthropic-beta"] == "context-1m-2025-08-07" - - # Verify that other headers are preserved assert "content-type" in headers assert headers["content-type"] == "application/json" - assert "user-agent" in headers - # Verify that the Authorization header was updated - assert "authorization" in headers - assert headers["authorization"] == "Bearer new-access-token" + # Verify that the Authorization header is set with vendor credentials + assert "Authorization" in headers + assert headers["Authorization"] == "Bearer new-access-token" - # Verify that content-length and host headers were removed - assert "content-length" not in headers - assert "host" not in headers + # Verify that non-allowlisted headers are NOT forwarded (security) + # Only anthropic-beta, content-type, and Authorization should be present + assert "authorization" not in headers # lowercase auth token not forwarded + assert "user-agent" not in headers # not in allowlist + assert "content-length" not in headers # not in allowlist + assert "host" not in headers # not in allowlist # Verify that headers_passed_through is False (since we have credentials) assert headers_passed_through is False + +@pytest.mark.asyncio +async def test_vertex_passthrough_does_not_forward_litellm_auth_token(): + """ + Test that the LiteLLM authorization header is NOT forwarded to Vertex AI. + + This test validates the fix for the issue where both the LiteLLM auth token + (lowercase 'authorization') and the Vertex AI token (uppercase 'Authorization') + were being sent, causing 401 errors on the vendor side. + + The incoming request has: + - authorization: Bearer (should NOT be forwarded) + + The outgoing request should only have: + - Authorization: Bearer (vendor credentials) + """ + from starlette.datastructures import Headers + + from litellm.llms.vertex_ai.vertex_llm_base import VertexBase + from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import ( + _prepare_vertex_auth_headers, + ) + + # Create a mock request with ONLY the litellm auth token (no other headers) + mock_request = MagicMock() + mock_request.headers = Headers({ + "authorization": "Bearer sk-litellm-secret-key", # LiteLLM token - should NOT be forwarded + "Authorization": "Bearer sk-litellm-secret-key-uppercase", # Also try uppercase + }) + + # Create mock vertex credentials + mock_vertex_credentials = MagicMock() + mock_vertex_credentials.vertex_project = "test-project" + mock_vertex_credentials.vertex_location = "us-central1" + mock_vertex_credentials.vertex_credentials = "test-credentials" + + # Create mock handler + mock_handler = MagicMock() + mock_handler.update_base_target_url_with_credential_location.return_value = ( + "https://us-central1-aiplatform.googleapis.com" + ) + + with patch.object( + VertexBase, + "_ensure_access_token_async", + new_callable=AsyncMock, + return_value=("test-auth-header", "test-project"), + ), patch.object( + VertexBase, + "_get_token_and_url", + return_value=("vertex-access-token", None), + ): + + ( + headers, + _base_target_url, + _headers_passed_through, + _vertex_project, + _vertex_location, + ) = await _prepare_vertex_auth_headers( + request=mock_request, + vertex_credentials=mock_vertex_credentials, + router_credentials=None, + vertex_project="test-project", + vertex_location="us-central1", + base_target_url="https://us-central1-aiplatform.googleapis.com", + get_vertex_pass_through_handler=mock_handler, + ) + + # The ONLY Authorization header should be the Vertex token + assert headers["Authorization"] == "Bearer vertex-access-token" + + # The LiteLLM token should NOT be present (neither lowercase nor as a duplicate) + assert "authorization" not in headers + assert headers.get("Authorization") != "Bearer sk-litellm-secret-key" + assert headers.get("Authorization") != "Bearer sk-litellm-secret-key-uppercase" + + # Verify we only have the expected headers (Authorization + any allowlisted ones present) + # Since the request only had auth headers, only Authorization should be in output + assert set(headers.keys()) == {"Authorization"} + + +def test_forward_headers_from_request_x_pass_prefix(): + """ + Test that headers with 'x-pass-' prefix are forwarded with the prefix stripped. + + This allows users to force-forward arbitrary headers to the vendor API: + - 'x-pass-anthropic-beta: value' becomes 'anthropic-beta: value' + - 'x-pass-custom-header: value' becomes 'custom-header: value' + + This is tested on BasePassthroughUtils.forward_headers_from_request which is used + by all pass-through endpoints (not just Vertex AI). + """ + from litellm.passthrough.utils import BasePassthroughUtils + + # Simulate incoming request headers + request_headers = { + "x-pass-anthropic-beta": "context-1m-2025-08-07", + "x-pass-custom-header": "custom-value", + "x-pass-another-header": "another-value", + "authorization": "Bearer sk-litellm-key", + "x-litellm-api-key": "sk-1234", + "content-type": "application/json", + } + + # Start with empty headers dict (simulating custom headers from endpoint config) + headers = {} + + # Call the method with forward_headers=False (default behavior) + # x-pass- headers should still be forwarded + result = BasePassthroughUtils.forward_headers_from_request( + request_headers=request_headers, + headers=headers, + forward_headers=False, + ) + + # Verify x-pass- prefixed headers are forwarded with prefix stripped + assert "anthropic-beta" in result + assert result["anthropic-beta"] == "context-1m-2025-08-07" + assert "custom-header" in result + assert result["custom-header"] == "custom-value" + assert "another-header" in result + assert result["another-header"] == "another-value" + + # Verify other headers are NOT forwarded (since forward_headers=False) + assert "authorization" not in result + assert "x-litellm-api-key" not in result + assert "content-type" not in result + + # Verify original x-pass- prefixed headers are NOT in output (only stripped versions) + assert "x-pass-anthropic-beta" not in result + assert "x-pass-custom-header" not in result + From 9e893b7cda0a12f4ff6625a557b3b09c11352cb9 Mon Sep 17 00:00:00 2001 From: Alexsander Hamir Date: Tue, 20 Jan 2026 10:44:31 -0800 Subject: [PATCH 3/7] Fix: Handle PostgreSQL cached plan errors during rolling deployments (#19424) --- litellm/proxy/utils.py | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index 9ea2ea7d5c92..fcb678ef02a9 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -2214,6 +2214,45 @@ async def get_generic_data( raise e + async def _query_first_with_cached_plan_fallback( + self, sql_query: str + ) -> Optional[dict]: + """ + Execute a query with automatic fallback for PostgreSQL cached plan errors. + + This handles the "cached plan must not change result type" error that occurs + during rolling deployments when schema changes are applied while old pods + still have cached query plans expecting the old schema. + + Args: + sql_query: SQL query string to execute + + Returns: + Query result or None + + Raises: + Original exception if not a cached plan error + """ + try: + return await self.db.query_first(query=sql_query) + except Exception as e: + error_str = str(e) + if "cached plan must not change result type" in error_str: + # Force PostgreSQL to re-plan by invalidating the cache + # Add a unique comment to make the query different + sql_query_retry = sql_query.replace( + "SELECT", + f"SELECT /* cache_invalidated_{int(time.time() * 1000)} */" + ) + verbose_proxy_logger.warning( + "PostgreSQL cached plan error detected for token lookup, " + "retrying with fresh plan. This may occur during rolling deployments " + "when schema changes are applied." + ) + return await self.db.query_first(query=sql_query_retry) + else: + raise + @backoff.on_exception( backoff.expo, Exception, # base exception to catch for the backoff @@ -2545,7 +2584,7 @@ async def get_data( # noqa: PLR0915 WHERE v.token = '{token}' """ - response = await self.db.query_first(query=sql_query) + response = await self._query_first_with_cached_plan_fallback(sql_query) if response is not None: if response["team_models"] is None: From 790a5ce0b323c1eefa70c2df25b2780097aa3f80 Mon Sep 17 00:00:00 2001 From: Alexsander Hamir Date: Tue, 20 Jan 2026 12:17:06 -0800 Subject: [PATCH 4/7] Fix in-flight request termination on SIGTERM when health-check runs in a separate process (#19427) --- docker/prod_entrypoint.sh | 1 + docker/supervisord.conf | 2 ++ docs/my-website/docs/proxy/config_settings.md | 1 + docs/my-website/docs/proxy/prod.md | 5 +++++ 4 files changed, 9 insertions(+) diff --git a/docker/prod_entrypoint.sh b/docker/prod_entrypoint.sh index 1fc09d2c8648..28d1bdcc2942 100644 --- a/docker/prod_entrypoint.sh +++ b/docker/prod_entrypoint.sh @@ -2,6 +2,7 @@ if [ "$SEPARATE_HEALTH_APP" = "1" ]; then export LITELLM_ARGS="$@" + export SUPERVISORD_STOPWAITSECS="${SUPERVISORD_STOPWAITSECS:-3600}" exec supervisord -c /etc/supervisord.conf fi diff --git a/docker/supervisord.conf b/docker/supervisord.conf index c6855fe652b9..9e9890e214f6 100644 --- a/docker/supervisord.conf +++ b/docker/supervisord.conf @@ -14,6 +14,7 @@ priority=1 exitcodes=0 stopasgroup=true killasgroup=true +stopwaitsecs=%(ENV_SUPERVISORD_STOPWAITSECS)s stdout_logfile=/dev/stdout stderr_logfile=/dev/stderr stdout_logfile_maxbytes = 0 @@ -29,6 +30,7 @@ priority=2 exitcodes=0 stopasgroup=true killasgroup=true +stopwaitsecs=%(ENV_SUPERVISORD_STOPWAITSECS)s stdout_logfile=/dev/stdout stderr_logfile=/dev/stderr stdout_logfile_maxbytes = 0 diff --git a/docs/my-website/docs/proxy/config_settings.md b/docs/my-website/docs/proxy/config_settings.md index b941f21b33e9..53d9c7759721 100644 --- a/docs/my-website/docs/proxy/config_settings.md +++ b/docs/my-website/docs/proxy/config_settings.md @@ -866,6 +866,7 @@ router_settings: | SECRET_MANAGER_REFRESH_INTERVAL | Refresh interval in seconds for secret manager. Default is 86400 (24 hours) | SEPARATE_HEALTH_APP | If set to '1', runs health endpoints on a separate ASGI app and port. Default: '0'. | SEPARATE_HEALTH_PORT | Port for the separate health endpoints app. Only used if SEPARATE_HEALTH_APP=1. Default: 4001. +| SUPERVISORD_STOPWAITSECS | Upper bound timeout in seconds for graceful shutdown when SEPARATE_HEALTH_APP=1. Default: 3600 (1 hour). | SERVER_ROOT_PATH | Root path for the server application | SEND_USER_API_KEY_ALIAS | Flag to send user API key alias to Zscaler AI Guard. Default is False | SEND_USER_API_KEY_TEAM_ID | Flag to send user API key team ID to Zscaler AI Guard. Default is False diff --git a/docs/my-website/docs/proxy/prod.md b/docs/my-website/docs/proxy/prod.md index 9216b0fbf30d..a42d91a7d5f4 100644 --- a/docs/my-website/docs/proxy/prod.md +++ b/docs/my-website/docs/proxy/prod.md @@ -277,8 +277,13 @@ Set the following environment variable(s): ```bash SEPARATE_HEALTH_APP="1" # Default "0" SEPARATE_HEALTH_PORT="8001" # Default "4001", Works only if `SEPARATE_HEALTH_APP` is "1" +SUPERVISORD_STOPWAITSECS="3600" # Optional: Upper bound timeout in seconds for graceful shutdown. Default: 3600 (1 hour). Only used when SEPARATE_HEALTH_APP=1. ``` +**Graceful Shutdown:** + +Previously, `stopwaitsecs` was not set, defaulting to 10 seconds and causing in-flight requests to fail. `SUPERVISORD_STOPWAITSECS` (default: 3600) provides an upper bound for graceful shutdown, allowing uvicorn to wait for all in-flight requests to complete. +