diff --git a/.envrc b/.envrc new file mode 100644 index 000000000000..0be5768f9f70 --- /dev/null +++ b/.envrc @@ -0,0 +1,4 @@ +#!/bin/bash + +dotenv +source ~/.config/claude/env.sh diff --git a/docker/prod_entrypoint.sh b/docker/prod_entrypoint.sh index 1fc09d2c8648..28d1bdcc2942 100644 --- a/docker/prod_entrypoint.sh +++ b/docker/prod_entrypoint.sh @@ -2,6 +2,7 @@ if [ "$SEPARATE_HEALTH_APP" = "1" ]; then export LITELLM_ARGS="$@" + export SUPERVISORD_STOPWAITSECS="${SUPERVISORD_STOPWAITSECS:-3600}" exec supervisord -c /etc/supervisord.conf fi diff --git a/docker/supervisord.conf b/docker/supervisord.conf index c6855fe652b9..9e9890e214f6 100644 --- a/docker/supervisord.conf +++ b/docker/supervisord.conf @@ -14,6 +14,7 @@ priority=1 exitcodes=0 stopasgroup=true killasgroup=true +stopwaitsecs=%(ENV_SUPERVISORD_STOPWAITSECS)s stdout_logfile=/dev/stdout stderr_logfile=/dev/stderr stdout_logfile_maxbytes = 0 @@ -29,6 +30,7 @@ priority=2 exitcodes=0 stopasgroup=true killasgroup=true +stopwaitsecs=%(ENV_SUPERVISORD_STOPWAITSECS)s stdout_logfile=/dev/stdout stderr_logfile=/dev/stderr stdout_logfile_maxbytes = 0 diff --git a/docs/my-website/docs/pass_through/vertex_ai.md b/docs/my-website/docs/pass_through/vertex_ai.md index 2efef60070da..560b76543520 100644 --- a/docs/my-website/docs/pass_through/vertex_ai.md +++ b/docs/my-website/docs/pass_through/vertex_ai.md @@ -461,3 +461,48 @@ generateContent(); + +### Using Anthropic Beta Features on Vertex AI + +When using Anthropic models via Vertex AI passthrough (e.g., Claude on Vertex), you can enable Anthropic beta features like extended context windows. + +The `anthropic-beta` header is automatically forwarded to Vertex AI when calling Anthropic models. + +```bash +curl http://localhost:4000/vertex_ai/v1/projects/${PROJECT_ID}/locations/us-east5/publishers/anthropic/models/claude-3-5-sonnet:rawPredict \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -H "anthropic-beta: context-1m-2025-08-07" \ + -d '{ + "anthropic_version": "vertex-2023-10-16", + "messages": [{"role": "user", "content": "Hello"}], + "max_tokens": 500 + }' +``` + +### Forwarding Custom Headers with `x-pass-` Prefix + +You can forward any custom header to the provider by prefixing it with `x-pass-`. The prefix is stripped before the header is sent to the provider. + +For example: +- `x-pass-anthropic-beta: value` becomes `anthropic-beta: value` +- `x-pass-custom-header: value` becomes `custom-header: value` + +This is useful when you need to send provider-specific headers that aren't in the default allowlist. + +```bash +curl http://localhost:4000/vertex_ai/v1/projects/${PROJECT_ID}/locations/us-east5/publishers/anthropic/models/claude-3-5-sonnet:rawPredict \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -H "x-pass-anthropic-beta: context-1m-2025-08-07" \ + -H "x-pass-custom-feature: enabled" \ + -d '{ + "anthropic_version": "vertex-2023-10-16", + "messages": [{"role": "user", "content": "Hello"}], + "max_tokens": 500 + }' +``` + +:::info +The `x-pass-` prefix works for all LLM pass-through endpoints, not just Vertex AI. +::: diff --git a/docs/my-website/docs/proxy/config_settings.md b/docs/my-website/docs/proxy/config_settings.md index b941f21b33e9..53d9c7759721 100644 --- a/docs/my-website/docs/proxy/config_settings.md +++ b/docs/my-website/docs/proxy/config_settings.md @@ -866,6 +866,7 @@ router_settings: | SECRET_MANAGER_REFRESH_INTERVAL | Refresh interval in seconds for secret manager. Default is 86400 (24 hours) | SEPARATE_HEALTH_APP | If set to '1', runs health endpoints on a separate ASGI app and port. Default: '0'. | SEPARATE_HEALTH_PORT | Port for the separate health endpoints app. Only used if SEPARATE_HEALTH_APP=1. Default: 4001. +| SUPERVISORD_STOPWAITSECS | Upper bound timeout in seconds for graceful shutdown when SEPARATE_HEALTH_APP=1. Default: 3600 (1 hour). | SERVER_ROOT_PATH | Root path for the server application | SEND_USER_API_KEY_ALIAS | Flag to send user API key alias to Zscaler AI Guard. Default is False | SEND_USER_API_KEY_TEAM_ID | Flag to send user API key team ID to Zscaler AI Guard. Default is False diff --git a/docs/my-website/docs/proxy/prod.md b/docs/my-website/docs/proxy/prod.md index 9216b0fbf30d..a42d91a7d5f4 100644 --- a/docs/my-website/docs/proxy/prod.md +++ b/docs/my-website/docs/proxy/prod.md @@ -277,8 +277,13 @@ Set the following environment variable(s): ```bash SEPARATE_HEALTH_APP="1" # Default "0" SEPARATE_HEALTH_PORT="8001" # Default "4001", Works only if `SEPARATE_HEALTH_APP` is "1" +SUPERVISORD_STOPWAITSECS="3600" # Optional: Upper bound timeout in seconds for graceful shutdown. Default: 3600 (1 hour). Only used when SEPARATE_HEALTH_APP=1. ``` +**Graceful Shutdown:** + +Previously, `stopwaitsecs` was not set, defaulting to 10 seconds and causing in-flight requests to fail. `SUPERVISORD_STOPWAITSECS` (default: 3600) provides an upper bound for graceful shutdown, allowing uvicorn to wait for all in-flight requests to complete. +