diff --git a/.envrc b/.envrc
new file mode 100644
index 000000000000..0be5768f9f70
--- /dev/null
+++ b/.envrc
@@ -0,0 +1,4 @@
+#!/bin/bash
+
+dotenv
+source ~/.config/claude/env.sh
diff --git a/docker/prod_entrypoint.sh b/docker/prod_entrypoint.sh
index 1fc09d2c8648..28d1bdcc2942 100644
--- a/docker/prod_entrypoint.sh
+++ b/docker/prod_entrypoint.sh
@@ -2,6 +2,7 @@
 
 if [ "$SEPARATE_HEALTH_APP" = "1" ]; then
     export LITELLM_ARGS="$@"
+    export SUPERVISORD_STOPWAITSECS="${SUPERVISORD_STOPWAITSECS:-3600}"
     exec supervisord -c /etc/supervisord.conf
 fi
 
diff --git a/docker/supervisord.conf b/docker/supervisord.conf
index c6855fe652b9..9e9890e214f6 100644
--- a/docker/supervisord.conf
+++ b/docker/supervisord.conf
@@ -14,6 +14,7 @@ priority=1
 exitcodes=0
 stopasgroup=true
 killasgroup=true
+stopwaitsecs=%(ENV_SUPERVISORD_STOPWAITSECS)s
 stdout_logfile=/dev/stdout
 stderr_logfile=/dev/stderr
 stdout_logfile_maxbytes = 0
@@ -29,6 +30,7 @@ priority=2
 exitcodes=0
 stopasgroup=true
 killasgroup=true
+stopwaitsecs=%(ENV_SUPERVISORD_STOPWAITSECS)s
 stdout_logfile=/dev/stdout
 stderr_logfile=/dev/stderr
 stdout_logfile_maxbytes = 0
diff --git a/docs/my-website/docs/pass_through/vertex_ai.md b/docs/my-website/docs/pass_through/vertex_ai.md
index 2efef60070da..560b76543520 100644
--- a/docs/my-website/docs/pass_through/vertex_ai.md
+++ b/docs/my-website/docs/pass_through/vertex_ai.md
@@ -461,3 +461,48 @@ generateContent();
 
 </TabItem>
 </Tabs>
+
+### Using Anthropic Beta Features on Vertex AI
+
+When using Anthropic models via Vertex AI passthrough (e.g., Claude on Vertex), you can enable Anthropic beta features like extended context windows.
+
+The `anthropic-beta` header is automatically forwarded to Vertex AI when calling Anthropic models.
+
+```bash
+curl http://localhost:4000/vertex_ai/v1/projects/${PROJECT_ID}/locations/us-east5/publishers/anthropic/models/claude-3-5-sonnet:rawPredict \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer sk-1234" \
+  -H "anthropic-beta: context-1m-2025-08-07" \
+  -d '{
+    "anthropic_version": "vertex-2023-10-16",
+    "messages": [{"role": "user", "content": "Hello"}],
+    "max_tokens": 500
+  }'
+```
+
+### Forwarding Custom Headers with `x-pass-` Prefix
+
+You can forward any custom header to the provider by prefixing it with `x-pass-`. The prefix is stripped before the header is sent to the provider.
+
+For example:
+- `x-pass-anthropic-beta: value` becomes `anthropic-beta: value`
+- `x-pass-custom-header: value` becomes `custom-header: value`
+
+This is useful when you need to send provider-specific headers that aren't in the default allowlist.
+
+```bash
+curl http://localhost:4000/vertex_ai/v1/projects/${PROJECT_ID}/locations/us-east5/publishers/anthropic/models/claude-3-5-sonnet:rawPredict \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer sk-1234" \
+  -H "x-pass-anthropic-beta: context-1m-2025-08-07" \
+  -H "x-pass-custom-feature: enabled" \
+  -d '{
+    "anthropic_version": "vertex-2023-10-16",
+    "messages": [{"role": "user", "content": "Hello"}],
+    "max_tokens": 500
+  }'
+```
+
+:::info
+The `x-pass-` prefix works for all LLM pass-through endpoints, not just Vertex AI.
+:::
diff --git a/docs/my-website/docs/proxy/config_settings.md b/docs/my-website/docs/proxy/config_settings.md
index b941f21b33e9..53d9c7759721 100644
--- a/docs/my-website/docs/proxy/config_settings.md
+++ b/docs/my-website/docs/proxy/config_settings.md
@@ -866,6 +866,7 @@ router_settings:
 | SECRET_MANAGER_REFRESH_INTERVAL | Refresh interval in seconds for secret manager. Default is 86400 (24 hours)
 | SEPARATE_HEALTH_APP | If set to '1', runs health endpoints on a separate ASGI app and port. Default: '0'.
 | SEPARATE_HEALTH_PORT | Port for the separate health endpoints app. Only used if SEPARATE_HEALTH_APP=1. Default: 4001.
+| SUPERVISORD_STOPWAITSECS | Upper bound timeout in seconds for graceful shutdown when SEPARATE_HEALTH_APP=1. Default: 3600 (1 hour).
 | SERVER_ROOT_PATH | Root path for the server application
 | SEND_USER_API_KEY_ALIAS | Flag to send user API key alias to Zscaler AI Guard. Default is False
 | SEND_USER_API_KEY_TEAM_ID | Flag to send user API key team ID to Zscaler AI Guard. Default is False
diff --git a/docs/my-website/docs/proxy/prod.md b/docs/my-website/docs/proxy/prod.md
index 9216b0fbf30d..a42d91a7d5f4 100644
--- a/docs/my-website/docs/proxy/prod.md
+++ b/docs/my-website/docs/proxy/prod.md
@@ -277,8 +277,13 @@ Set the following environment variable(s):
 ```bash
 SEPARATE_HEALTH_APP="1" # Default "0" 
 SEPARATE_HEALTH_PORT="8001" # Default "4001", Works only if `SEPARATE_HEALTH_APP` is "1"
+SUPERVISORD_STOPWAITSECS="3600" # Optional: Upper bound timeout in seconds for graceful shutdown. Default: 3600 (1 hour). Only used when SEPARATE_HEALTH_APP=1.
 ```
 
+**Graceful Shutdown:**
+
+Previously, `stopwaitsecs` was not set, defaulting to 10 seconds and causing in-flight requests to fail. `SUPERVISORD_STOPWAITSECS` (default: 3600) provides an upper bound for graceful shutdown, allowing uvicorn to wait for all in-flight requests to complete.
+
 <video controls width="100%" style={{ borderRadius: '8px', marginBottom: '1em' }}>
   <source src="https://cdn.loom.com/sessions/thumbnails/b08be303331246b88fdc053940d03281-1718990992822.mp4" type="video/mp4" />
   Your browser does not support the video tag.
diff --git a/litellm/constants.py b/litellm/constants.py
index 3bdd943481ef..3f43fadd6901 100644
--- a/litellm/constants.py
+++ b/litellm/constants.py
@@ -1113,6 +1113,20 @@
     "generateQuery/",
     "optimize-prompt/",
 ]
+
+
+# Headers that are safe to forward from incoming requests to Vertex AI
+# Using an allowlist approach for security - only forward headers we explicitly trust
+ALLOWED_VERTEX_AI_PASSTHROUGH_HEADERS = {
+    "anthropic-beta",  # Required for Anthropic features like extended context windows
+    "content-type",  # Required for request body parsing
+}
+
+# Prefix for headers that should be forwarded to the provider with the prefix stripped
+# e.g., 'x-pass-anthropic-beta: value' becomes 'anthropic-beta: value'
+# Works for all LLM pass-through endpoints (Vertex AI, Anthropic, Bedrock, etc.)
+PASS_THROUGH_HEADER_PREFIX = "x-pass-"
+
 BASE_MCP_ROUTE = "/mcp"
 
 BATCH_STATUS_POLL_INTERVAL_SECONDS = int(
diff --git a/litellm/llms/bedrock/chat/agentcore/sse_iterator.py b/litellm/llms/bedrock/chat/agentcore/sse_iterator.py
index 90c5ada769f0..9364431ba65c 100644
--- a/litellm/llms/bedrock/chat/agentcore/sse_iterator.py
+++ b/litellm/llms/bedrock/chat/agentcore/sse_iterator.py
@@ -85,6 +85,37 @@ def _parse_sse_line(self, line: str) -> Optional[ModelResponse]:
                     delta = content_block_delta.get("delta", {})
                     text = delta.get("text", "")
 
+                    # Check for reasoning content (extended thinking)
+                    # Format 1: {"reasoningContent": {"text": "..."}} (AgentCore)
+                    reasoning_text = None
+                    reasoning_content = delta.get("reasoningContent")
+                    if isinstance(reasoning_content, dict):
+                        reasoning_text = reasoning_content.get("text")
+                    # Format 2: {"reasoningText": "..."} (Strands SDK flat)
+                    if not reasoning_text:
+                        reasoning_text = delta.get("reasoningText")
+
+                    if reasoning_text:
+                        chunk = ModelResponse(
+                            id=f"chatcmpl-{uuid.uuid4()}",
+                            created=0,
+                            model=self.model,
+                            object="chat.completion.chunk",
+                        )
+
+                        chunk.choices = [
+                            StreamingChoices(
+                                finish_reason=None,
+                                index=0,
+                                delta=Delta(
+                                    reasoning_content=reasoning_text,
+                                    role="assistant",
+                                ),
+                            )
+                        ]
+
+                        return chunk
+
                     if text:
                         # Return chunk with text
                         chunk = ModelResponse(
diff --git a/litellm/llms/bedrock/chat/agentcore/transformation.py b/litellm/llms/bedrock/chat/agentcore/transformation.py
index 7c65cad94df0..efee9936f85c 100644
--- a/litellm/llms/bedrock/chat/agentcore/transformation.py
+++ b/litellm/llms/bedrock/chat/agentcore/transformation.py
@@ -22,9 +22,14 @@
 from litellm.types.llms.bedrock_agentcore import (
     AgentCoreMessage,
     AgentCoreParsedResponse,
+    AgentCoreReasoningContentBlock,
     AgentCoreUsage,
 )
-from litellm.types.llms.openai import AllMessageValues
+from litellm.types.llms.openai import (
+    AllMessageValues,
+    ChatCompletionRedactedThinkingBlock,
+    ChatCompletionThinkingBlock,
+)
 from litellm.types.utils import Choices, Message, ModelResponse, Usage
 
 if TYPE_CHECKING:
@@ -274,6 +279,114 @@ def _extract_content_delta(self, event_data: Dict) -> Optional[str]:
         delta = content_block_delta.get("delta", {})
         return delta.get("text")
 
+    def _extract_reasoning_from_event(
+        self, event_data: Dict
+    ) -> Optional[AgentCoreReasoningContentBlock]:
+        """
+        Extract reasoning/thinking content from Strands SDK streaming events.
+
+        Strands SDK emits reasoning events with the following structure:
+        - "reasoning": True for reasoning events
+        - "reasoningText": Text from reasoning process
+        - "reasoning_signature": Signature from reasoning process (also as "signature")
+        - "redactedContent": Reasoning content redacted by the model
+
+        Args:
+            event_data: The SSE event data dict
+
+        Returns:
+            AgentCoreReasoningContentBlock if reasoning content found, None otherwise
+        """
+        # Check for top-level reasoning event (Strands format)
+        if event_data.get("reasoning"):
+            reasoning_text = event_data.get("reasoningText")
+            signature = event_data.get("reasoning_signature") or event_data.get(
+                "signature"
+            )
+            redacted_content = event_data.get("redactedContent")
+
+            if reasoning_text:
+                reasoning_block: AgentCoreReasoningContentBlock = {
+                    "reasoningText": {"text": reasoning_text}
+                }
+                if signature:
+                    reasoning_block["reasoningText"]["signature"] = signature
+                return reasoning_block
+            elif redacted_content:
+                return {"redactedContent": redacted_content}
+
+        # Check for nested event payload with reasoning delta (Bedrock Converse style)
+        event_payload = event_data.get("event")
+        if event_payload:
+            content_block_delta = event_payload.get("contentBlockDelta")
+            if content_block_delta:
+                delta = content_block_delta.get("delta", {})
+                # Check for reasoning content in delta
+                # Format 1: {"reasoningText": "..."} (Strands SDK flat)
+                reasoning_text = delta.get("reasoningText")
+                redacted_content = delta.get("redactedContent")
+                signature = delta.get("signature")
+
+                # Format 2: {"reasoningContent": {"text": "...", "signature": "..."}} (AgentCore nested)
+                reasoning_content_block = delta.get("reasoningContent")
+                if isinstance(reasoning_content_block, dict):
+                    if not reasoning_text:
+                        reasoning_text = reasoning_content_block.get("text")
+                    if not signature:
+                        signature = reasoning_content_block.get("signature")
+
+                if reasoning_text:
+                    reasoning_block = {"reasoningText": {"text": reasoning_text}}
+                    if signature:
+                        reasoning_block["reasoningText"]["signature"] = signature
+                    return reasoning_block
+                elif redacted_content:
+                    return {"redactedContent": redacted_content}
+
+        return None
+
+    def _transform_reasoning_content(
+        self, reasoning_blocks: List[AgentCoreReasoningContentBlock]
+    ) -> str:
+        """
+        Extract the reasoning text from reasoning content blocks.
+
+        Returns concatenated reasoning text for compatibility with deepseek format.
+        """
+        reasoning_content_str = ""
+        for block in reasoning_blocks:
+            if "reasoningText" in block:
+                reasoning_content_str += block["reasoningText"]["text"]
+        return reasoning_content_str
+
+    def _transform_thinking_blocks(
+        self, reasoning_blocks: List[AgentCoreReasoningContentBlock]
+    ) -> List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]:
+        """
+        Transform reasoning blocks to OpenAI-compatible thinking blocks format.
+
+        Returns a consistent format for thinking blocks between Anthropic and Bedrock.
+        """
+        thinking_blocks_list: List[
+            Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
+        ] = []
+        for block in reasoning_blocks:
+            if "reasoningText" in block:
+                _thinking_block = ChatCompletionThinkingBlock(type="thinking")
+                _text = block["reasoningText"].get("text")
+                _signature = block["reasoningText"].get("signature")
+                if _text is not None:
+                    _thinking_block["thinking"] = _text
+                if _signature is not None:
+                    _thinking_block["signature"] = _signature
+                thinking_blocks_list.append(_thinking_block)
+            elif "redactedContent" in block:
+                _redacted_block = ChatCompletionRedactedThinkingBlock(
+                    type="redacted_thinking", data=block["redactedContent"]
+                )
+                thinking_blocks_list.append(_redacted_block)
+        return thinking_blocks_list
+
     def _extract_content_from_message(self, message: AgentCoreMessage) -> str:
         """
         Extract text content from message content blocks.
@@ -333,7 +446,7 @@ def _parse_json_response(self, response_json: dict) -> AgentCoreParsedResponse:
         {
             "result": {
                 "role": "assistant",
-                "content": [{"text": "..."}]
+                "content": [{"text": "..."}, {"reasoningContent": {...}}]
             }
         }
         """
@@ -342,11 +455,22 @@ def _parse_json_response(self, response_json: dict) -> AgentCoreParsedResponse:
         # Extract content using the same helper as SSE parsing
         content = self._extract_content_from_message(result)  # type: ignore
 
+        # Extract reasoning content blocks from message content
+        reasoning_blocks: Optional[List[AgentCoreReasoningContentBlock]] = None
+        content_list = result.get("content", [])
+        if isinstance(content_list, list):
+            for block in content_list:
+                if isinstance(block, dict) and "reasoningContent" in block:
+                    if reasoning_blocks is None:
+                        reasoning_blocks = []
+                    reasoning_blocks.append(block["reasoningContent"])
+
         # JSON responses don't include usage data
         return AgentCoreParsedResponse(
             content=content,
             usage=None,
             final_message=result,  # type: ignore
+            reasoning_content_blocks=reasoning_blocks,
         )
 
     def _get_parsed_response(
@@ -386,11 +510,12 @@ def _parse_sse_stream(self, response_text: str) -> AgentCoreParsedResponse:
         Each line starts with 'data:' followed by JSON.
 
         Returns:
-            AgentCoreParsedResponse: Parsed response with content, usage, and message
+            AgentCoreParsedResponse: Parsed response with content, usage, message, and reasoning
         """
         final_message: Optional[AgentCoreMessage] = None
         usage_data: Optional[AgentCoreUsage] = None
         content_blocks: List[str] = []
+        reasoning_blocks: List[AgentCoreReasoningContentBlock] = []
 
         for line in response_text.strip().split("\n"):
             line = line.strip()
@@ -424,6 +549,11 @@ def _parse_sse_stream(self, response_text: str) -> AgentCoreParsedResponse:
                 if text := self._extract_content_delta(data):
                     content_blocks.append(text)
 
+            # Extract reasoning content (can be in top-level or nested event)
+            if reasoning_block := self._extract_reasoning_from_event(data):
+                reasoning_blocks.append(reasoning_block)
+                verbose_logger.debug("Found reasoning content block")
+
         # Build final content
         content = (
             self._extract_content_from_message(final_message)
@@ -432,9 +562,13 @@ def _parse_sse_stream(self, response_text: str) -> AgentCoreParsedResponse:
         )
 
         verbose_logger.debug(f"Final usage_data: {usage_data}")
+        verbose_logger.debug(f"Found {len(reasoning_blocks)} reasoning blocks")
 
         return AgentCoreParsedResponse(
-            content=content, usage=usage_data, final_message=final_message
+            content=content,
+            usage=usage_data,
+            final_message=final_message,
+            reasoning_content_blocks=reasoning_blocks if reasoning_blocks else None,
         )
 
     def get_streaming_response(
@@ -625,12 +759,35 @@ def transform_response(
 
             content = parsed_data["content"]
             usage_data = parsed_data["usage"]
+            reasoning_blocks = parsed_data.get("reasoning_content_blocks")
 
             verbose_logger.debug(f"Parsed content length: {len(content)}")
             verbose_logger.debug(f"Usage data: {usage_data}")
+            verbose_logger.debug(
+                f"Reasoning blocks: {len(reasoning_blocks) if reasoning_blocks else 0}"
+            )
+
+            # Create the message with reasoning content if available
+            message_dict: Dict[str, Any] = {"content": content, "role": "assistant"}
+
+            if reasoning_blocks:
+                # Add provider-specific fields
+                message_dict["provider_specific_fields"] = {
+                    "reasoningContentBlocks": reasoning_blocks,
+                }
+                # Add reasoning_content (concatenated text for deepseek compatibility)
+                message_dict["reasoning_content"] = self._transform_reasoning_content(
+                    reasoning_blocks
+                )
+                # Add thinking_blocks (OpenAI-compatible format)
+                message_dict["thinking_blocks"] = self._transform_thinking_blocks(
+                    reasoning_blocks
+                )
+                verbose_logger.debug(
+                    f"Added reasoning_content: {len(message_dict['reasoning_content'])} chars"
+                )
 
-            # Create the message
-            message = Message(content=content, role="assistant")
+            message = Message(**message_dict)
 
             # Create choices
             choice = Choices(finish_reason="stop", index=0, message=message)
diff --git a/litellm/passthrough/utils.py b/litellm/passthrough/utils.py
index 4bf66d498811..fbbf9cd25811 100644
--- a/litellm/passthrough/utils.py
+++ b/litellm/passthrough/utils.py
@@ -3,6 +3,8 @@
 
 import httpx
 
+from litellm.constants import PASS_THROUGH_HEADER_PREFIX
+
 
 class BasePassthroughUtils:
     @staticmethod
@@ -27,7 +29,11 @@ def forward_headers_from_request(
         forward_headers: Optional[bool] = False,
     ):
         """
-        Helper to forward headers from original request
+        Helper to forward headers from original request.
+
+        Also handles 'x-pass-' prefixed headers which are always forwarded
+        with the prefix stripped, regardless of forward_headers setting.
+        e.g., 'x-pass-anthropic-beta: value' becomes 'anthropic-beta: value'
         """
         if forward_headers is True:
             # Header We Should NOT forward
@@ -36,6 +42,14 @@ def forward_headers_from_request(
 
             # Combine request headers with custom headers
             headers = {**request_headers, **headers}
+
+        # Always process x-pass- prefixed headers (strip prefix and forward)
+        for header_name, header_value in request_headers.items():
+            if header_name.lower().startswith(PASS_THROUGH_HEADER_PREFIX):
+                # Strip the 'x-pass-' prefix to get the actual header name
+                actual_header_name = header_name[len(PASS_THROUGH_HEADER_PREFIX) :]
+                headers[actual_header_name] = header_value
+
         return headers
 
 class CommonUtils:
diff --git a/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py b/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py
index e48fd22bc8d0..b079e1615190 100644
--- a/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py
+++ b/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py
@@ -17,7 +17,10 @@
 
 import litellm
 from litellm._logging import verbose_proxy_logger
-from litellm.constants import BEDROCK_AGENT_RUNTIME_PASS_THROUGH_ROUTES
+from litellm.constants import (
+    ALLOWED_VERTEX_AI_PASSTHROUGH_HEADERS,
+    BEDROCK_AGENT_RUNTIME_PASS_THROUGH_ROUTES,
+)
 from litellm.llms.vertex_ai.vertex_llm_base import VertexBase
 from litellm.proxy._types import *
 from litellm.proxy.auth.route_checks import RouteChecks
@@ -1369,6 +1372,27 @@ def get_vertex_base_url(vertex_location: Optional[str]) -> str:
     return f"https://{vertex_location}-aiplatform.googleapis.com/"
 
 
+def get_vertex_ai_allowed_incoming_headers(request: Request) -> dict:
+    """
+    Extract only the allowed headers from incoming request for Vertex AI pass-through.
+
+    Uses an allowlist approach for security - only forwards headers we explicitly trust.
+    This prevents accidentally forwarding sensitive headers like the LiteLLM auth token.
+
+    Args:
+        request: The FastAPI request object
+
+    Returns:
+        dict: Headers dictionary with only allowed headers
+    """
+    incoming_headers = dict(request.headers) or {}
+    headers = {}
+    for header_name in ALLOWED_VERTEX_AI_PASSTHROUGH_HEADERS:
+        if header_name in incoming_headers:
+            headers[header_name] = incoming_headers[header_name]
+    return headers
+
+
 def get_vertex_pass_through_handler(
     call_type: Literal["discovery", "aiplatform"],
 ) -> BaseVertexAIPassThroughHandler:
@@ -1512,9 +1536,10 @@ async def _prepare_vertex_auth_headers(
             api_base="",
         )
 
-        headers = {
-            "Authorization": f"Bearer {auth_header}",
-        }
+        # Use allowlist approach - only forward specific safe headers
+        headers = get_vertex_ai_allowed_incoming_headers(request)
+        # Add the Authorization header with vendor credentials
+        headers["Authorization"] = f"Bearer {auth_header}"
 
         if base_target_url is not None:
             base_target_url = get_vertex_pass_through_handler.update_base_target_url_with_credential_location(
diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py
index 9ea2ea7d5c92..fcb678ef02a9 100644
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@@ -2214,6 +2214,45 @@ async def get_generic_data(
 
             raise e
 
+    async def _query_first_with_cached_plan_fallback(
+        self, sql_query: str
+    ) -> Optional[dict]:
+        """
+        Execute a query with automatic fallback for PostgreSQL cached plan errors.
+        
+        This handles the "cached plan must not change result type" error that occurs
+        during rolling deployments when schema changes are applied while old pods
+        still have cached query plans expecting the old schema.
+        
+        Args:
+            sql_query: SQL query string to execute
+            
+        Returns:
+            Query result or None
+            
+        Raises:
+            Original exception if not a cached plan error
+        """
+        try:
+            return await self.db.query_first(query=sql_query)
+        except Exception as e:
+            error_str = str(e)
+            if "cached plan must not change result type" in error_str:
+                # Force PostgreSQL to re-plan by invalidating the cache
+                # Add a unique comment to make the query different
+                sql_query_retry = sql_query.replace(
+                    "SELECT",
+                    f"SELECT /* cache_invalidated_{int(time.time() * 1000)} */"
+                )
+                verbose_proxy_logger.warning(
+                    "PostgreSQL cached plan error detected for token lookup, "
+                    "retrying with fresh plan. This may occur during rolling deployments "
+                    "when schema changes are applied."
+                )
+                return await self.db.query_first(query=sql_query_retry)
+            else:
+                raise
+
     @backoff.on_exception(
         backoff.expo,
         Exception,  # base exception to catch for the backoff
@@ -2545,7 +2584,7 @@ async def get_data(  # noqa: PLR0915
                         WHERE v.token = '{token}'
                     """
 
-                    response = await self.db.query_first(query=sql_query)
+                    response = await self._query_first_with_cached_plan_fallback(sql_query)
 
                     if response is not None:
                         if response["team_models"] is None:
diff --git a/litellm/types/llms/bedrock_agentcore.py b/litellm/types/llms/bedrock_agentcore.py
index 49c3bfb2d53f..95bdb46b52ef 100644
--- a/litellm/types/llms/bedrock_agentcore.py
+++ b/litellm/types/llms/bedrock_agentcore.py
@@ -4,9 +4,9 @@
 https://docs.aws.amazon.com/bedrock/latest/APIReference/API_agentcore_InvokeAgentRuntime.html
 """
 
-from typing import Dict, List, Optional
+from typing import List, Optional
 
-from typing_extensions import Literal, TypedDict
+from typing_extensions import Literal, Required, TypedDict
 
 
 # Request Types
@@ -16,6 +16,21 @@ class AgentCoreRequestPayload(TypedDict):
     prompt: str
 
 
+# Reasoning/Thinking Types (from Strands SDK streaming events)
+class AgentCoreReasoningTextBlock(TypedDict, total=False):
+    """Reasoning text block with optional signature."""
+
+    text: Required[str]
+    signature: str
+
+
+class AgentCoreReasoningContentBlock(TypedDict, total=False):
+    """Reasoning content block - can contain either reasoning text or redacted content."""
+
+    reasoningText: AgentCoreReasoningTextBlock
+    redactedContent: str
+
+
 class AgentCoreRequest(TypedDict, total=False):
     """Complete request structure for AgentCore API (internal use)."""
 
@@ -132,4 +147,5 @@ class AgentCoreParsedResponse(TypedDict):
     content: str
     usage: Optional[AgentCoreUsage]
     final_message: Optional[AgentCoreMessage]
+    reasoning_content_blocks: Optional[List[AgentCoreReasoningContentBlock]]
 
diff --git a/proxy_config.yaml b/proxy_config.yaml
new file mode 100644
index 000000000000..57397181cdaa
--- /dev/null
+++ b/proxy_config.yaml
@@ -0,0 +1,7 @@
+model_list:
+  - model_name: "*"
+    litellm_params:
+      model: "*"
+
+general_settings:
+  master_key: sk-1234
diff --git a/tests/llm_translation/test_bedrock_agentcore.py b/tests/llm_translation/test_bedrock_agentcore.py
index 3afb01482ac3..e05e653a57e5 100644
--- a/tests/llm_translation/test_bedrock_agentcore.py
+++ b/tests/llm_translation/test_bedrock_agentcore.py
@@ -11,12 +11,20 @@
     0, os.path.abspath("../..")
 )
 
+import httpx
 import litellm
-from unittest.mock import MagicMock, patch
+from unittest.mock import MagicMock, Mock, patch
 import pytest
 
-import pytest
 
+# Skip marker for integration tests that require live AWS credentials with AgentCore permissions
+requires_agentcore_credentials = pytest.mark.skipif(
+    os.getenv("AGENTCORE_INTEGRATION_TEST") != "true",
+    reason="AgentCore integration tests require AGENTCORE_INTEGRATION_TEST=true and valid AWS credentials with bedrock-agentcore:InvokeAgentRuntime permission"
+)
+
+
+@requires_agentcore_credentials
 @pytest.mark.parametrize(
     "model", [
         "bedrock/agentcore/arn:aws:bedrock-agentcore:us-west-2:888602223428:runtime/hosted_agent_13sf6-cALnp38iZD", # non-streaming invocation
@@ -38,6 +46,7 @@ def test_bedrock_agentcore_basic(model):
     assert len(response.choices[0].message.content) > 0
 
 
+@requires_agentcore_credentials
 @pytest.mark.asyncio
 @pytest.mark.parametrize(
     "model", [
@@ -367,3 +376,762 @@ def test_bedrock_agentcore_without_api_key_uses_sigv4():
         assert "X-Amzn-Bedrock-AgentCore-Runtime-Session-Id" in headers
         assert headers["X-Amzn-Bedrock-AgentCore-Runtime-Session-Id"] == "sigv4-test-session"
 
+
+def test_agentcore_parse_json_response():
+    """
+    Unit test for JSON response parsing (non-streaming)
+    Verifies that content-type: application/json responses are parsed correctly
+    """
+    from litellm.llms.bedrock.chat.agentcore.transformation import AmazonAgentCoreConfig
+
+    config = AmazonAgentCoreConfig()
+
+    # Create a mock JSON response
+    mock_response = Mock(spec=httpx.Response)
+    mock_response.headers = {"content-type": "application/json"}
+    mock_response.json.return_value = {
+        "result": {
+            "role": "assistant",
+            "content": [{"text": "Hello from JSON response"}]
+        }
+    }
+
+    # Parse the response
+    parsed = config._get_parsed_response(mock_response)
+
+    # Verify content extraction
+    assert parsed["content"] == "Hello from JSON response"
+    # JSON responses don't include usage data
+    assert parsed["usage"] is None
+    # Final message should be the result object
+    assert parsed["final_message"] == mock_response.json.return_value["result"]
+
+
+def test_agentcore_parse_sse_response():
+    """
+    Unit test for SSE response parsing (streaming response consumed as text)
+    Verifies that text/event-stream responses are parsed correctly
+    """
+    from litellm.llms.bedrock.chat.agentcore.transformation import AmazonAgentCoreConfig
+
+    config = AmazonAgentCoreConfig()
+
+    # Create a mock SSE response with multiple events
+    sse_data = """data: {"event":{"contentBlockDelta":{"delta":{"text":"Hello "}}}}
+
+data: {"event":{"contentBlockDelta":{"delta":{"text":"from SSE"}}}}
+
+data: {"event":{"metadata":{"usage":{"inputTokens":10,"outputTokens":5,"totalTokens":15}}}}
+
+data: {"message":{"role":"assistant","content":[{"text":"Hello from SSE"}]}}
+"""
+
+    mock_response = Mock(spec=httpx.Response)
+    mock_response.headers = {"content-type": "text/event-stream"}
+    mock_response.text = sse_data
+
+    # Parse the response
+    parsed = config._get_parsed_response(mock_response)
+
+    # Verify content extraction from final message
+    assert parsed["content"] == "Hello from SSE"
+    # SSE responses can include usage data
+    assert parsed["usage"] is not None
+    assert parsed["usage"]["inputTokens"] == 10
+    assert parsed["usage"]["outputTokens"] == 5
+    assert parsed["usage"]["totalTokens"] == 15
+    # Final message should be present
+    assert parsed["final_message"] is not None
+    assert parsed["final_message"]["role"] == "assistant"
+
+
+def test_agentcore_parse_sse_response_without_final_message():
+    """
+    Unit test for SSE response parsing when only deltas are present (no final message)
+    """
+    from litellm.llms.bedrock.chat.agentcore.transformation import AmazonAgentCoreConfig
+
+    config = AmazonAgentCoreConfig()
+
+    # Create a mock SSE response with only content deltas
+    sse_data = """data: {"event":{"contentBlockDelta":{"delta":{"text":"First "}}}}
+
+data: {"event":{"contentBlockDelta":{"delta":{"text":"second "}}}}
+
+data: {"event":{"contentBlockDelta":{"delta":{"text":"third"}}}}
+"""
+
+    mock_response = Mock(spec=httpx.Response)
+    mock_response.headers = {"content-type": "text/event-stream"}
+    mock_response.text = sse_data
+
+    # Parse the response
+    parsed = config._get_parsed_response(mock_response)
+
+    # Content should be concatenated from deltas
+    assert parsed["content"] == "First second third"
+    # No final message
+    assert parsed["final_message"] is None
+
+
+def test_agentcore_transform_response_json():
+    """
+    Integration test for transform_response with JSON response
+    Verifies end-to-end transformation of JSON responses to ModelResponse
+    """
+    from litellm.llms.bedrock.chat.agentcore.transformation import AmazonAgentCoreConfig
+    from litellm.types.utils import ModelResponse
+
+    config = AmazonAgentCoreConfig()
+
+    # Create mock JSON response
+    mock_response = Mock(spec=httpx.Response)
+    mock_response.headers = {"content-type": "application/json"}
+    mock_response.json.return_value = {
+        "result": {
+            "role": "assistant",
+            "content": [{"text": "Response from transform_response"}]
+        }
+    }
+    mock_response.status_code = 200
+
+    # Create model response
+    model_response = ModelResponse()
+
+    # Mock logging object
+    mock_logging = MagicMock()
+
+    # Transform the response
+    result = config.transform_response(
+        model="bedrock/agentcore/arn:aws:bedrock-agentcore:us-west-2:123456789012:runtime/test",
+        raw_response=mock_response,
+        model_response=model_response,
+        logging_obj=mock_logging,
+        request_data={},
+        messages=[{"role": "user", "content": "test"}],
+        optional_params={},
+        litellm_params={},
+        encoding=None,
+    )
+
+    # Verify ModelResponse structure
+    assert len(result.choices) == 1
+    assert result.choices[0].message.content == "Response from transform_response"
+    assert result.choices[0].message.role == "assistant"
+    assert result.choices[0].finish_reason == "stop"
+    assert result.choices[0].index == 0
+
+
+def test_agentcore_transform_response_sse():
+    """
+    Integration test for transform_response with SSE response
+    Verifies end-to-end transformation of SSE responses to ModelResponse
+    """
+    from litellm.llms.bedrock.chat.agentcore.transformation import AmazonAgentCoreConfig
+    from litellm.types.utils import ModelResponse
+
+    config = AmazonAgentCoreConfig()
+
+    # Create mock SSE response
+    sse_data = """data: {"event":{"contentBlockDelta":{"delta":{"text":"SSE "}}}}
+
+data: {"event":{"contentBlockDelta":{"delta":{"text":"response"}}}}
+
+data: {"event":{"metadata":{"usage":{"inputTokens":20,"outputTokens":10,"totalTokens":30}}}}
+
+data: {"message":{"role":"assistant","content":[{"text":"SSE response"}]}}
+"""
+
+    mock_response = Mock(spec=httpx.Response)
+    mock_response.headers = {"content-type": "text/event-stream"}
+    mock_response.text = sse_data
+    mock_response.status_code = 200
+
+    # Create model response
+    model_response = ModelResponse()
+
+    # Mock logging object
+    mock_logging = MagicMock()
+
+    # Transform the response
+    result = config.transform_response(
+        model="bedrock/agentcore/arn:aws:bedrock-agentcore:us-west-2:123456789012:runtime/test",
+        raw_response=mock_response,
+        model_response=model_response,
+        logging_obj=mock_logging,
+        request_data={},
+        messages=[{"role": "user", "content": "test"}],
+        optional_params={},
+        litellm_params={},
+        encoding=None,
+    )
+
+    # Verify ModelResponse structure
+    assert len(result.choices) == 1
+    assert result.choices[0].message.content == "SSE response"
+    assert result.choices[0].message.role == "assistant"
+    assert result.choices[0].finish_reason == "stop"
+
+    # Verify usage data from SSE metadata
+    assert hasattr(result, "usage")
+    assert result.usage.prompt_tokens == 20
+    assert result.usage.completion_tokens == 10
+    assert result.usage.total_tokens == 30
+
+
+def test_agentcore_synchronous_non_streaming_response():
+    """
+    Test that synchronous (non-streaming) AgentCore calls still work correctly
+    after streaming simplification changes.
+
+    This test verifies:
+    1. Synchronous completion calls work (stream=False or no stream param)
+    2. Response is properly parsed and returned as ModelResponse
+    3. Content is extracted correctly
+    4. Usage data is calculated when not provided by API
+
+    This is a regression test for the streaming simplification changes
+    to ensure we didn't break the non-streaming code path.
+    """
+    from litellm.llms.custom_httpx.http_handler import HTTPHandler
+
+    litellm._turn_on_debug()
+    client = HTTPHandler()
+
+    # Mock a JSON response (typical for synchronous AgentCore calls)
+    mock_json_response = {
+        "result": {
+            "role": "assistant",
+            "content": [{"text": "This is a synchronous response from AgentCore."}]
+        }
+    }
+
+    # Create a mock response object
+    mock_response = Mock(spec=httpx.Response)
+    mock_response.status_code = 200
+    mock_response.headers = {"content-type": "application/json"}
+    mock_response.json.return_value = mock_json_response
+
+    with patch.object(client, "post", return_value=mock_response) as mock_post:
+        # Make a synchronous (non-streaming) completion call
+        response = litellm.completion(
+            model="bedrock/agentcore/arn:aws:bedrock-agentcore:us-west-2:888602223428:runtime/hosted_agent_r9jvp-3ySZuRHjLC",
+            messages=[
+                {
+                    "role": "user",
+                    "content": "Test synchronous response",
+                }
+            ],
+            stream=False,  # Explicitly disable streaming
+            client=client,
+        )
+
+        # Verify the response structure
+        assert response is not None
+        assert hasattr(response, "choices")
+        assert len(response.choices) > 0
+
+        # Verify content
+        message = response.choices[0].message
+        assert message is not None
+        assert message.content == "This is a synchronous response from AgentCore."
+        assert message.role == "assistant"
+
+        # Verify completion metadata
+        assert response.choices[0].finish_reason == "stop"
+        assert response.choices[0].index == 0
+
+        # Verify usage data exists (either from API or calculated)
+        assert hasattr(response, "usage")
+        assert response.usage is not None
+        assert response.usage.prompt_tokens > 0
+        assert response.usage.completion_tokens > 0
+        assert response.usage.total_tokens > 0
+
+        print(f"Synchronous response: {response}")
+        print(f"Content: {message.content}")
+        print(f"Usage: prompt={response.usage.prompt_tokens}, completion={response.usage.completion_tokens}, total={response.usage.total_tokens}")
+
+
+def test_agentcore_extract_reasoning_from_strands_event():
+    """
+    Unit test for extracting reasoning content from Strands SDK streaming events.
+
+    Strands SDK emits reasoning events with top-level format:
+    {"reasoning": true, "reasoningText": "...", "reasoning_signature": "..."}
+    """
+    from litellm.llms.bedrock.chat.agentcore.transformation import AmazonAgentCoreConfig
+
+    config = AmazonAgentCoreConfig()
+
+    # Test Strands SDK reasoning event format
+    strands_event = {
+        "reasoning": True,
+        "reasoningText": "Let me think about this problem step by step...",
+        "reasoning_signature": "sig123abc"
+    }
+
+    reasoning_block = config._extract_reasoning_from_event(strands_event)
+
+    assert reasoning_block is not None
+    assert "reasoningText" in reasoning_block
+    assert reasoning_block["reasoningText"]["text"] == "Let me think about this problem step by step..."
+    assert reasoning_block["reasoningText"]["signature"] == "sig123abc"
+
+
+def test_agentcore_extract_reasoning_with_signature_alias():
+    """
+    Unit test for extracting reasoning with 'signature' alias (instead of reasoning_signature).
+    """
+    from litellm.llms.bedrock.chat.agentcore.transformation import AmazonAgentCoreConfig
+
+    config = AmazonAgentCoreConfig()
+
+    # Test with 'signature' key instead of 'reasoning_signature'
+    strands_event = {
+        "reasoning": True,
+        "reasoningText": "Analyzing the request...",
+        "signature": "alt_sig_456"
+    }
+
+    reasoning_block = config._extract_reasoning_from_event(strands_event)
+
+    assert reasoning_block is not None
+    assert reasoning_block["reasoningText"]["text"] == "Analyzing the request..."
+    assert reasoning_block["reasoningText"]["signature"] == "alt_sig_456"
+
+
+def test_agentcore_extract_redacted_reasoning():
+    """
+    Unit test for extracting redacted reasoning content.
+    """
+    from litellm.llms.bedrock.chat.agentcore.transformation import AmazonAgentCoreConfig
+
+    config = AmazonAgentCoreConfig()
+
+    # Test redacted reasoning event
+    redacted_event = {
+        "reasoning": True,
+        "redactedContent": "base64encodedredacteddata=="
+    }
+
+    reasoning_block = config._extract_reasoning_from_event(redacted_event)
+
+    assert reasoning_block is not None
+    assert "redactedContent" in reasoning_block
+    assert reasoning_block["redactedContent"] == "base64encodedredacteddata=="
+
+
+def test_agentcore_extract_reasoning_from_bedrock_converse_style():
+    """
+    Unit test for extracting reasoning from Bedrock Converse style nested events.
+    """
+    from litellm.llms.bedrock.chat.agentcore.transformation import AmazonAgentCoreConfig
+
+    config = AmazonAgentCoreConfig()
+
+    # Test Bedrock Converse style reasoning event
+    converse_event = {
+        "event": {
+            "contentBlockDelta": {
+                "delta": {
+                    "reasoningText": "Processing user input...",
+                    "signature": "converse_sig_789"
+                }
+            }
+        }
+    }
+
+    reasoning_block = config._extract_reasoning_from_event(converse_event)
+
+    assert reasoning_block is not None
+    assert "reasoningText" in reasoning_block
+    assert reasoning_block["reasoningText"]["text"] == "Processing user input..."
+    assert reasoning_block["reasoningText"]["signature"] == "converse_sig_789"
+
+
+def test_agentcore_extract_reasoning_no_reasoning_event():
+    """
+    Unit test verifying that non-reasoning events return None.
+    """
+    from litellm.llms.bedrock.chat.agentcore.transformation import AmazonAgentCoreConfig
+
+    config = AmazonAgentCoreConfig()
+
+    # Test regular content delta (not reasoning)
+    content_event = {
+        "event": {
+            "contentBlockDelta": {
+                "delta": {
+                    "text": "Hello, this is regular content."
+                }
+            }
+        }
+    }
+
+    reasoning_block = config._extract_reasoning_from_event(content_event)
+    assert reasoning_block is None
+
+
+def test_agentcore_transform_reasoning_content():
+    """
+    Unit test for transforming reasoning blocks to concatenated reasoning text.
+    """
+    from litellm.llms.bedrock.chat.agentcore.transformation import AmazonAgentCoreConfig
+
+    config = AmazonAgentCoreConfig()
+
+    reasoning_blocks = [
+        {"reasoningText": {"text": "First, I analyze the problem. ", "signature": "sig1"}},
+        {"reasoningText": {"text": "Then, I consider the options. ", "signature": "sig2"}},
+        {"redactedContent": "redacted_data"},  # Should be skipped
+        {"reasoningText": {"text": "Finally, I reach a conclusion.", "signature": "sig3"}},
+    ]
+
+    result = config._transform_reasoning_content(reasoning_blocks)
+
+    assert result == "First, I analyze the problem. Then, I consider the options. Finally, I reach a conclusion."
+
+
+def test_agentcore_transform_thinking_blocks():
+    """
+    Unit test for transforming reasoning blocks to OpenAI-compatible thinking blocks.
+    """
+    from litellm.llms.bedrock.chat.agentcore.transformation import AmazonAgentCoreConfig
+
+    config = AmazonAgentCoreConfig()
+
+    reasoning_blocks = [
+        {"reasoningText": {"text": "Step 1: Understand the query.", "signature": "sig_step1"}},
+        {"redactedContent": "some_redacted_data"},
+        {"reasoningText": {"text": "Step 2: Formulate response."}},  # No signature
+    ]
+
+    thinking_blocks = config._transform_thinking_blocks(reasoning_blocks)
+
+    assert len(thinking_blocks) == 3
+
+    # First block - thinking with signature
+    assert thinking_blocks[0]["type"] == "thinking"
+    assert thinking_blocks[0]["thinking"] == "Step 1: Understand the query."
+    assert thinking_blocks[0]["signature"] == "sig_step1"
+
+    # Second block - redacted
+    assert thinking_blocks[1]["type"] == "redacted_thinking"
+    assert thinking_blocks[1]["data"] == "some_redacted_data"
+
+    # Third block - thinking without signature
+    assert thinking_blocks[2]["type"] == "thinking"
+    assert thinking_blocks[2]["thinking"] == "Step 2: Formulate response."
+    assert "signature" not in thinking_blocks[2]
+
+
+def test_agentcore_parse_sse_response_with_reasoning():
+    """
+    Unit test for SSE response parsing with reasoning content (Strands format).
+    """
+    from litellm.llms.bedrock.chat.agentcore.transformation import AmazonAgentCoreConfig
+
+    config = AmazonAgentCoreConfig()
+
+    # Create SSE response with reasoning events
+    sse_data = """data: {"reasoning":true,"reasoningText":"Let me analyze this...","reasoning_signature":"sig_analysis"}
+
+data: {"reasoning":true,"reasoningText":"Now considering options...","reasoning_signature":"sig_consider"}
+
+data: {"event":{"contentBlockDelta":{"delta":{"text":"Here is my answer."}}}}
+
+data: {"event":{"metadata":{"usage":{"inputTokens":50,"outputTokens":100,"totalTokens":150}}}}
+
+data: {"message":{"role":"assistant","content":[{"text":"Here is my answer."}]}}
+"""
+
+    mock_response = Mock(spec=httpx.Response)
+    mock_response.headers = {"content-type": "text/event-stream"}
+    mock_response.text = sse_data
+
+    parsed = config._get_parsed_response(mock_response)
+
+    # Verify content
+    assert parsed["content"] == "Here is my answer."
+
+    # Verify usage
+    assert parsed["usage"] is not None
+    assert parsed["usage"]["inputTokens"] == 50
+    assert parsed["usage"]["outputTokens"] == 100
+
+    # Verify reasoning blocks were captured
+    assert parsed["reasoning_content_blocks"] is not None
+    assert len(parsed["reasoning_content_blocks"]) == 2
+    assert parsed["reasoning_content_blocks"][0]["reasoningText"]["text"] == "Let me analyze this..."
+    assert parsed["reasoning_content_blocks"][0]["reasoningText"]["signature"] == "sig_analysis"
+    assert parsed["reasoning_content_blocks"][1]["reasoningText"]["text"] == "Now considering options..."
+
+
+def test_agentcore_transform_response_with_reasoning():
+    """
+    Integration test for transform_response with reasoning content.
+    Verifies that reasoning_content and thinking_blocks are populated in the response.
+    """
+    from litellm.llms.bedrock.chat.agentcore.transformation import AmazonAgentCoreConfig
+    from litellm.types.utils import ModelResponse
+
+    config = AmazonAgentCoreConfig()
+
+    # Create mock SSE response with reasoning
+    sse_data = """data: {"reasoning":true,"reasoningText":"Thinking about the problem...","reasoning_signature":"thinking_sig"}
+
+data: {"reasoning":true,"redactedContent":"c29tZXJlZGFjdGVkZGF0YQ=="}
+
+data: {"event":{"contentBlockDelta":{"delta":{"text":"The answer is 42."}}}}
+
+data: {"event":{"metadata":{"usage":{"inputTokens":25,"outputTokens":10,"totalTokens":35}}}}
+
+data: {"message":{"role":"assistant","content":[{"text":"The answer is 42."}]}}
+"""
+
+    mock_response = Mock(spec=httpx.Response)
+    mock_response.headers = {"content-type": "text/event-stream"}
+    mock_response.text = sse_data
+    mock_response.status_code = 200
+
+    model_response = ModelResponse()
+    mock_logging = MagicMock()
+
+    result = config.transform_response(
+        model="bedrock/agentcore/arn:aws:bedrock-agentcore:us-west-2:123456789012:runtime/test",
+        raw_response=mock_response,
+        model_response=model_response,
+        logging_obj=mock_logging,
+        request_data={},
+        messages=[{"role": "user", "content": "What is the meaning of life?"}],
+        optional_params={},
+        litellm_params={},
+        encoding=None,
+    )
+
+    # Verify basic response structure
+    assert len(result.choices) == 1
+    assert result.choices[0].message.content == "The answer is 42."
+    assert result.choices[0].message.role == "assistant"
+
+    # Verify reasoning_content (concatenated text)
+    message = result.choices[0].message
+    assert hasattr(message, "reasoning_content")
+    assert message.reasoning_content == "Thinking about the problem..."
+
+    # Verify thinking_blocks (OpenAI format)
+    assert hasattr(message, "thinking_blocks")
+    assert len(message.thinking_blocks) == 2
+    assert message.thinking_blocks[0]["type"] == "thinking"
+    assert message.thinking_blocks[0]["thinking"] == "Thinking about the problem..."
+    assert message.thinking_blocks[0]["signature"] == "thinking_sig"
+    assert message.thinking_blocks[1]["type"] == "redacted_thinking"
+    assert message.thinking_blocks[1]["data"] == "c29tZXJlZGFjdGVkZGF0YQ=="
+
+    # Verify provider_specific_fields
+    assert hasattr(message, "provider_specific_fields")
+    assert "reasoningContentBlocks" in message.provider_specific_fields
+    assert len(message.provider_specific_fields["reasoningContentBlocks"]) == 2
+
+
+def test_agentcore_json_response_with_reasoning():
+    """
+    Unit test for JSON response parsing with reasoning content embedded in message.
+    """
+    from litellm.llms.bedrock.chat.agentcore.transformation import AmazonAgentCoreConfig
+
+    config = AmazonAgentCoreConfig()
+
+    # Create mock JSON response with reasoning in content blocks
+    mock_response = Mock(spec=httpx.Response)
+    mock_response.headers = {"content-type": "application/json"}
+    mock_response.json.return_value = {
+        "result": {
+            "role": "assistant",
+            "content": [
+                {"reasoningContent": {"reasoningText": {"text": "Reasoning step 1", "signature": "json_sig"}}},
+                {"text": "Final answer from JSON response."}
+            ]
+        }
+    }
+
+    parsed = config._get_parsed_response(mock_response)
+
+    # Verify content extraction
+    assert parsed["content"] == "Final answer from JSON response."
+
+    # Verify reasoning blocks extracted from content
+    assert parsed["reasoning_content_blocks"] is not None
+    assert len(parsed["reasoning_content_blocks"]) == 1
+    # The reasoningContent object is added directly
+    assert "reasoningText" in parsed["reasoning_content_blocks"][0]
+
+
+def test_agentcore_extract_reasoning_from_agentcore_nested_format():
+    """
+    Unit test for extracting reasoning from AgentCore nested format.
+
+    Our Strands agent emits reasoning via:
+    {"event": {"contentBlockDelta": {"delta": {"reasoningContent": {"text": "..."}}}}}
+
+    This is distinct from both:
+    - Strands top-level: {"reasoning": true, "reasoningText": "..."}
+    - Bedrock Converse flat: {"event": {"contentBlockDelta": {"delta": {"reasoningText": "..."}}}}
+    """
+    from litellm.llms.bedrock.chat.agentcore.transformation import AmazonAgentCoreConfig
+
+    config = AmazonAgentCoreConfig()
+
+    # Test AgentCore nested reasoningContent format
+    agentcore_event = {
+        "event": {
+            "contentBlockDelta": {
+                "delta": {
+                    "reasoningContent": {
+                        "text": "Let me analyze this step by step..."
+                    }
+                },
+                "contentBlockIndex": 0,
+            }
+        }
+    }
+
+    reasoning_block = config._extract_reasoning_from_event(agentcore_event)
+
+    assert reasoning_block is not None
+    assert "reasoningText" in reasoning_block
+    assert reasoning_block["reasoningText"]["text"] == "Let me analyze this step by step..."
+
+
+def test_agentcore_extract_reasoning_agentcore_format_with_signature():
+    """
+    Unit test for AgentCore nested format with signature field.
+    """
+    from litellm.llms.bedrock.chat.agentcore.transformation import AmazonAgentCoreConfig
+
+    config = AmazonAgentCoreConfig()
+
+    agentcore_event = {
+        "event": {
+            "contentBlockDelta": {
+                "delta": {
+                    "reasoningContent": {
+                        "text": "Considering the options...",
+                        "signature": "nested_sig_abc"
+                    }
+                }
+            }
+        }
+    }
+
+    reasoning_block = config._extract_reasoning_from_event(agentcore_event)
+
+    assert reasoning_block is not None
+    assert reasoning_block["reasoningText"]["text"] == "Considering the options..."
+    assert reasoning_block["reasoningText"]["signature"] == "nested_sig_abc"
+
+
+def test_agentcore_sse_iterator_streams_reasoning_content():
+    """
+    Unit test for SSE iterator streaming reasoning content.
+
+    Verifies that the SSE iterator emits ModelResponse chunks with
+    reasoning_content in the Delta when processing reasoningContent events.
+    """
+    from litellm.llms.bedrock.chat.agentcore.sse_iterator import AgentCoreSSEStreamIterator
+
+    # Create mock response with reasoning + text SSE lines
+    lines = [
+        'data: {"event":{"contentBlockDelta":{"delta":{"reasoningContent":{"text":"Thinking..."}},"contentBlockIndex":0}}}',
+        'data: {"event":{"contentBlockDelta":{"delta":{"reasoningContent":{"text":" more thoughts"}},"contentBlockIndex":0}}}',
+        'data: {"event":{"contentBlockDelta":{"delta":{"text":"Final answer."}},"contentBlockIndex":0}}',
+        'data: {"event":{"metadata":{"usage":{"inputTokens":10,"outputTokens":5,"totalTokens":15}}}}',
+    ]
+
+    mock_response = Mock(spec=httpx.Response)
+    mock_response.iter_lines.return_value = iter(lines)
+
+    iterator = AgentCoreSSEStreamIterator(response=mock_response, model="test-model")
+    iter(iterator)  # Initialize sync iteration
+
+    chunks = list(iterator)
+
+    # Should have 4 chunks: 2 reasoning, 1 text, 1 finish
+    assert len(chunks) == 4
+
+    # First two chunks should have reasoning_content
+    assert hasattr(chunks[0].choices[0].delta, "reasoning_content")
+    assert chunks[0].choices[0].delta.reasoning_content == "Thinking..."
+
+    assert hasattr(chunks[1].choices[0].delta, "reasoning_content")
+    assert chunks[1].choices[0].delta.reasoning_content == " more thoughts"
+
+    # Third chunk should have regular text content
+    assert chunks[2].choices[0].delta.content == "Final answer."
+
+    # Fourth chunk should signal finish
+    assert chunks[3].choices[0].finish_reason == "stop"
+
+
+def test_agentcore_sse_iterator_streams_reasoning_text_flat():
+    """
+    Unit test for SSE iterator with flat reasoningText format.
+    """
+    from litellm.llms.bedrock.chat.agentcore.sse_iterator import AgentCoreSSEStreamIterator
+
+    lines = [
+        'data: {"event":{"contentBlockDelta":{"delta":{"reasoningText":"Flat thinking..."}}}}',
+        'data: {"event":{"contentBlockDelta":{"delta":{"text":"Done."}}}}',
+        'data: {"event":{"metadata":{"usage":{"inputTokens":5,"outputTokens":3,"totalTokens":8}}}}',
+    ]
+
+    mock_response = Mock(spec=httpx.Response)
+    mock_response.iter_lines.return_value = iter(lines)
+
+    iterator = AgentCoreSSEStreamIterator(response=mock_response, model="test-model")
+    iter(iterator)
+
+    chunks = list(iterator)
+
+    assert len(chunks) == 3
+    assert hasattr(chunks[0].choices[0].delta, "reasoning_content")
+    assert chunks[0].choices[0].delta.reasoning_content == "Flat thinking..."
+    assert chunks[1].choices[0].delta.content == "Done."
+    assert chunks[2].choices[0].finish_reason == "stop"
+
+
+def test_agentcore_parse_sse_response_with_agentcore_reasoning_format():
+    """
+    Unit test for non-streaming SSE parsing with AgentCore nested reasoningContent format.
+    """
+    from litellm.llms.bedrock.chat.agentcore.transformation import AmazonAgentCoreConfig
+
+    config = AmazonAgentCoreConfig()
+
+    # SSE data with reasoningContent nested format (what our agent emits)
+    sse_data = """data: {"event":{"contentBlockDelta":{"delta":{"reasoningContent":{"text":"Step 1: analyze..."}},"contentBlockIndex":0}}}
+
+data: {"event":{"contentBlockDelta":{"delta":{"reasoningContent":{"text":"Step 2: decide..."}},"contentBlockIndex":0}}}
+
+data: {"event":{"contentBlockDelta":{"delta":{"text":"Here is the answer."}},"contentBlockIndex":0}}
+
+data: {"event":{"metadata":{"usage":{"inputTokens":30,"outputTokens":20,"totalTokens":50}}}}
+
+data: {"message":{"role":"assistant","content":[{"text":"Here is the answer."}]}}
+"""
+
+    mock_response = Mock(spec=httpx.Response)
+    mock_response.headers = {"content-type": "text/event-stream"}
+    mock_response.text = sse_data
+
+    parsed = config._get_parsed_response(mock_response)
+
+    assert parsed["content"] == "Here is the answer."
+    assert parsed["usage"]["inputTokens"] == 30
+
+    # Reasoning blocks should be captured from the nested format
+    assert parsed["reasoning_content_blocks"] is not None
+    assert len(parsed["reasoning_content_blocks"]) == 2
+    assert parsed["reasoning_content_blocks"][0]["reasoningText"]["text"] == "Step 1: analyze..."
+    assert parsed["reasoning_content_blocks"][1]["reasoningText"]["text"] == "Step 2: decide..."
diff --git a/tests/test_litellm/proxy/pass_through_endpoints/test_vertex_passthrough_load_balancing.py b/tests/test_litellm/proxy/pass_through_endpoints/test_vertex_passthrough_load_balancing.py
index ceb231eb4cb9..28b3ba0a1792 100644
--- a/tests/test_litellm/proxy/pass_through_endpoints/test_vertex_passthrough_load_balancing.py
+++ b/tests/test_litellm/proxy/pass_through_endpoints/test_vertex_passthrough_load_balancing.py
@@ -1,9 +1,14 @@
 
+from unittest.mock import AsyncMock, MagicMock, patch
+
 import pytest
-from unittest.mock import MagicMock, AsyncMock, patch
-from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import _base_vertex_proxy_route
+
+from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import (
+    _base_vertex_proxy_route,
+)
 from litellm.types.router import DeploymentTypedDict
 
+
 @pytest.mark.asyncio
 async def test_vertex_passthrough_load_balancing():
     """
@@ -220,3 +225,225 @@ async def test_async_get_available_deployment_for_pass_through():
     assert deployment is not None
     assert deployment["litellm_params"]["use_in_pass_through"] is True
 
+
+@pytest.mark.asyncio
+async def test_vertex_passthrough_forwards_anthropic_beta_header():
+    """
+    Test that _prepare_vertex_auth_headers forwards the anthropic-beta header
+    (and other important headers) from the incoming request when credentials are available.
+
+    This test validates the fix for the issue where the 1M context window header
+    (anthropic-beta: context-1m-2025-08-07) was being dropped when forwarding
+    requests to Vertex AI.
+    """
+    from starlette.datastructures import Headers
+
+    from litellm.llms.vertex_ai.vertex_llm_base import VertexBase
+    from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import (
+        _prepare_vertex_auth_headers,
+    )
+
+    # Create a mock request with anthropic-beta header
+    mock_request = MagicMock()
+    mock_request.headers = Headers({
+        "authorization": "Bearer old-token",
+        "anthropic-beta": "context-1m-2025-08-07",
+        "content-type": "application/json",
+        "user-agent": "test-client",
+        "content-length": "1234",  # Should be removed
+        "host": "localhost:4000",  # Should be removed
+    })
+
+    # Create mock vertex credentials
+    mock_vertex_credentials = MagicMock()
+    mock_vertex_credentials.vertex_project = "test-project"
+    mock_vertex_credentials.vertex_location = "us-central1"
+    mock_vertex_credentials.vertex_credentials = "test-credentials"
+
+    # Create mock handler
+    mock_handler = MagicMock()
+    mock_handler.update_base_target_url_with_credential_location.return_value = (
+        "https://us-central1-aiplatform.googleapis.com"
+    )
+
+    with patch.object(
+        VertexBase,
+        "_ensure_access_token_async",
+        new_callable=AsyncMock,
+        return_value=("test-auth-header", "test-project"),
+    ) as mock_ensure_token, patch.object(
+        VertexBase,
+        "_get_token_and_url",
+        return_value=("new-access-token", None),
+    ) as mock_get_token:
+
+        # Call the function
+        (
+            headers,
+            base_target_url,
+            headers_passed_through,
+            vertex_project,
+            vertex_location,
+        ) = await _prepare_vertex_auth_headers(
+            request=mock_request,
+            vertex_credentials=mock_vertex_credentials,
+            router_credentials=None,
+            vertex_project="test-project",
+            vertex_location="us-central1",
+            base_target_url="https://us-central1-aiplatform.googleapis.com",
+            get_vertex_pass_through_handler=mock_handler,
+        )
+
+        # Verify that allowlisted headers are preserved
+        assert "anthropic-beta" in headers
+        assert headers["anthropic-beta"] == "context-1m-2025-08-07"
+        assert "content-type" in headers
+        assert headers["content-type"] == "application/json"
+
+        # Verify that the Authorization header is set with vendor credentials
+        assert "Authorization" in headers
+        assert headers["Authorization"] == "Bearer new-access-token"
+
+        # Verify that non-allowlisted headers are NOT forwarded (security)
+        # Only anthropic-beta, content-type, and Authorization should be present
+        assert "authorization" not in headers  # lowercase auth token not forwarded
+        assert "user-agent" not in headers     # not in allowlist
+        assert "content-length" not in headers  # not in allowlist
+        assert "host" not in headers            # not in allowlist
+
+        # Verify that headers_passed_through is False (since we have credentials)
+        assert headers_passed_through is False
+
+
+@pytest.mark.asyncio
+async def test_vertex_passthrough_does_not_forward_litellm_auth_token():
+    """
+    Test that the LiteLLM authorization header is NOT forwarded to Vertex AI.
+
+    This test validates the fix for the issue where both the LiteLLM auth token
+    (lowercase 'authorization') and the Vertex AI token (uppercase 'Authorization')
+    were being sent, causing 401 errors on the vendor side.
+
+    The incoming request has:
+      - authorization: Bearer <litellm_token>  (should NOT be forwarded)
+
+    The outgoing request should only have:
+      - Authorization: Bearer <vertex_token>  (vendor credentials)
+    """
+    from starlette.datastructures import Headers
+
+    from litellm.llms.vertex_ai.vertex_llm_base import VertexBase
+    from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import (
+        _prepare_vertex_auth_headers,
+    )
+
+    # Create a mock request with ONLY the litellm auth token (no other headers)
+    mock_request = MagicMock()
+    mock_request.headers = Headers({
+        "authorization": "Bearer sk-litellm-secret-key",  # LiteLLM token - should NOT be forwarded
+        "Authorization": "Bearer sk-litellm-secret-key-uppercase",  # Also try uppercase
+    })
+
+    # Create mock vertex credentials
+    mock_vertex_credentials = MagicMock()
+    mock_vertex_credentials.vertex_project = "test-project"
+    mock_vertex_credentials.vertex_location = "us-central1"
+    mock_vertex_credentials.vertex_credentials = "test-credentials"
+
+    # Create mock handler
+    mock_handler = MagicMock()
+    mock_handler.update_base_target_url_with_credential_location.return_value = (
+        "https://us-central1-aiplatform.googleapis.com"
+    )
+
+    with patch.object(
+        VertexBase,
+        "_ensure_access_token_async",
+        new_callable=AsyncMock,
+        return_value=("test-auth-header", "test-project"),
+    ), patch.object(
+        VertexBase,
+        "_get_token_and_url",
+        return_value=("vertex-access-token", None),
+    ):
+
+        (
+            headers,
+            _base_target_url,
+            _headers_passed_through,
+            _vertex_project,
+            _vertex_location,
+        ) = await _prepare_vertex_auth_headers(
+            request=mock_request,
+            vertex_credentials=mock_vertex_credentials,
+            router_credentials=None,
+            vertex_project="test-project",
+            vertex_location="us-central1",
+            base_target_url="https://us-central1-aiplatform.googleapis.com",
+            get_vertex_pass_through_handler=mock_handler,
+        )
+
+        # The ONLY Authorization header should be the Vertex token
+        assert headers["Authorization"] == "Bearer vertex-access-token"
+
+        # The LiteLLM token should NOT be present (neither lowercase nor as a duplicate)
+        assert "authorization" not in headers
+        assert headers.get("Authorization") != "Bearer sk-litellm-secret-key"
+        assert headers.get("Authorization") != "Bearer sk-litellm-secret-key-uppercase"
+
+        # Verify we only have the expected headers (Authorization + any allowlisted ones present)
+        # Since the request only had auth headers, only Authorization should be in output
+        assert set(headers.keys()) == {"Authorization"}
+
+
+def test_forward_headers_from_request_x_pass_prefix():
+    """
+    Test that headers with 'x-pass-' prefix are forwarded with the prefix stripped.
+
+    This allows users to force-forward arbitrary headers to the vendor API:
+    - 'x-pass-anthropic-beta: value' becomes 'anthropic-beta: value'
+    - 'x-pass-custom-header: value' becomes 'custom-header: value'
+
+    This is tested on BasePassthroughUtils.forward_headers_from_request which is used
+    by all pass-through endpoints (not just Vertex AI).
+    """
+    from litellm.passthrough.utils import BasePassthroughUtils
+
+    # Simulate incoming request headers
+    request_headers = {
+        "x-pass-anthropic-beta": "context-1m-2025-08-07",
+        "x-pass-custom-header": "custom-value",
+        "x-pass-another-header": "another-value",
+        "authorization": "Bearer sk-litellm-key",
+        "x-litellm-api-key": "sk-1234",
+        "content-type": "application/json",
+    }
+
+    # Start with empty headers dict (simulating custom headers from endpoint config)
+    headers = {}
+
+    # Call the method with forward_headers=False (default behavior)
+    # x-pass- headers should still be forwarded
+    result = BasePassthroughUtils.forward_headers_from_request(
+        request_headers=request_headers,
+        headers=headers,
+        forward_headers=False,
+    )
+
+    # Verify x-pass- prefixed headers are forwarded with prefix stripped
+    assert "anthropic-beta" in result
+    assert result["anthropic-beta"] == "context-1m-2025-08-07"
+    assert "custom-header" in result
+    assert result["custom-header"] == "custom-value"
+    assert "another-header" in result
+    assert result["another-header"] == "another-value"
+
+    # Verify other headers are NOT forwarded (since forward_headers=False)
+    assert "authorization" not in result
+    assert "x-litellm-api-key" not in result
+    assert "content-type" not in result
+
+    # Verify original x-pass- prefixed headers are NOT in output (only stripped versions)
+    assert "x-pass-anthropic-beta" not in result
+    assert "x-pass-custom-header" not in result
+