elastic · Jan-Kazlouski-elastic · Dec 5, 2025 · Dec 10, 2025 · Dec 11, 2025 · DonalEvans
diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json
diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json
diff --git a/output/schema/schema.json b/output/schema/schema.json
diff --git a/output/typescript/types.ts b/output/typescript/types.ts
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -3,7 +3,7 @@
     "transform-to-openapi": "npm run transform-to-openapi --prefix compiler --"
   },
   "dependencies": {
-    "@redocly/cli": "^1.34.5"
+    "@redocly/cli": "^1.34.6"
   },
   "version": "overlay"
 }
@@ -19,7 +19,7 @@
             "task_type": {
               "type": "enum",
               "description": "The task type",
-              "options": ["completion", "text_embedding"]
+              "options": ["completion", "chat_completion", "text_embedding"]
             },
             "azureopenai_inference_id": {
               "type": "string",

diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts
@@ -802,7 +802,7 @@ export class AzureOpenAIServiceSettings {
    * This setting helps to minimize the number of rate limit errors returned from Azure.
    * The `azureopenai` service sets a default number of requests allowed per minute depending on the task type.
    * For `text_embedding`, it is set to `1440`.
-   * For `completion`, it is set to `120`.
+   * For `completion` and `chat_completion`, it is set to `120`.
    * @ext_doc_id azureopenai-quota-limits
    */
   rate_limit?: RateLimitSetting
@@ -824,6 +824,7 @@ export class AzureOpenAITaskSettings {
 
 export enum AzureOpenAITaskType {
   completion,
+  chat_completion,
   text_embedding
 }
 

diff --git a/specification/inference/_types/Services.ts b/specification/inference/_types/Services.ts
@@ -425,7 +425,7 @@ export class RateLimitSetting {
    * * `anthropic` service: `50`
    * * `azureaistudio` service: `240`
    * * `azureopenai` service and task type `text_embedding`: `1440`
-   * * `azureopenai` service and task type `completion`: `120`
+   * * `azureopenai` service and task types `completion` or `chat_completion`: `120`
    * * `cohere` service: `10000`
    * * `contextualai` service: `1000`
    * * `elastic` service and task type `chat_completion`: `240`

diff --git a/specification/inference/_types/TaskType.ts b/specification/inference/_types/TaskType.ts
@@ -70,7 +70,8 @@ export enum TaskTypeAzureAIStudio {
 
 export enum TaskTypeAzureOpenAI {
   text_embedding,
-  completion
+  completion,
+  chat_completion
 }
 
 export enum TaskTypeCohere {

diff --git a/specification/inference/put/PutRequest.ts b/specification/inference/put/PutRequest.ts
@@ -37,7 +37,7 @@ import { TaskType } from '@inference/_types/TaskType'
  * * Amazon SageMaker (`chat_completion`, `completion`, `rerank`, `sparse_embedding`, `text_embedding`)
  * * Anthropic (`completion`)
  * * Azure AI Studio (`completion`, `rerank`, `text_embedding`)
- * * Azure OpenAI (`completion`, `text_embedding`)
+ * * Azure OpenAI (`completion`, `chat_completion`, `text_embedding`)
  * * Cohere (`completion`, `rerank`, `text_embedding`)
  * * DeepSeek (`chat_completion`, `completion`)
  * * Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)

diff --git a/specification/inference/put_azureopenai/PutAzureOpenAiRequest.ts b/specification/inference/put_azureopenai/PutAzureOpenAiRequest.ts
@@ -76,7 +76,7 @@ export interface Request extends RequestBase {
     /**
      * The chunking configuration object.
      * Applies only to the `text_embedding` task type.
-     * Not applicable to the `completion` task type.
+     * Not applicable to the `completion` and `chat_completion` task types.
      * @ext_doc_id inference-chunking
      */
     chunking_settings?: InferenceChunkingSettings

diff --git a/specification/inference/put_azureopenai/examples/request/PutAzureOpenAiRequestExample3.yaml b/specification/inference/put_azureopenai/examples/request/PutAzureOpenAiRequestExample3.yaml
@@ -0,0 +1,14 @@
+summary: A chat completion task
+description: Run `PUT _inference/chat_completion/azure_openai_chat_completion` to create an inference endpoint that performs a `chat_completion` task.
+method_request: 'PUT _inference/chat_completion/azure_openai_chat_completion'
+# type: "request"
+value: |-
+  {
+      "service": "azureopenai",
+      "service_settings": {
+          "api_key": "Api-Key",
+          "resource_name": "Resource-name",
+          "deployment_id": "Deployment-id",
+          "api_version": "2024-02-01"
+      }
+  }
diff --git a/...ification/inference/put_azureopenai/examples/response/PutAzureOpenAiResponseExample1.yaml b/...ification/inference/put_azureopenai/examples/response/PutAzureOpenAiResponseExample1.yaml
@@ -0,0 +1,25 @@
+summary: A text embedding task
+description: A successful response when creating an Azure OpenAI `text_embedding` inference endpoint.
+# type: response
+# response_code:
+value: |-
+  {
+    "inference_id": "azure_openai_embeddings",
+    "task_type": "text_embedding",
+    "service": "azureopenai",
+    "service_settings": {
+      "resource_name": "Resource-name",
+      "deployment_id": "Deployment-id",
+      "api_version": "2024-02-01",
+      "rate_limit": {
+        "requests_per_minute": 1140
+      },
+      "dimensions": 1536,
+      "similarity": "dot_product"
+    },
+    "chunking_settings": {
+      "strategy": "sentence",
+      "max_chunk_size": 250,
+      "sentence_overlap": 1
+    }
+  }
diff --git a/...ification/inference/put_azureopenai/examples/response/PutAzureOpenAiResponseExample2.yaml b/...ification/inference/put_azureopenai/examples/response/PutAzureOpenAiResponseExample2.yaml
@@ -0,0 +1,18 @@
+summary: A completion task
+description: A successful response when creating an Azure OpenAI `completion` inference endpoint.
+# type: response
+# response_code:
+value: |-
+  {
+    "inference_id": "azure_openai_completion",
+    "task_type": "completion",
+    "service": "azureopenai",
+    "service_settings": {
+      "resource_name": "Resource-name",
+      "deployment_id": "Deployment-id",
+      "api_version": "2024-02-01",
+      "rate_limit": {
+        "requests_per_minute": 120
+      }
+    }
+  }
diff --git a/...ification/inference/put_azureopenai/examples/response/PutAzureOpenAiResponseExample3.yaml b/...ification/inference/put_azureopenai/examples/response/PutAzureOpenAiResponseExample3.yaml
@@ -0,0 +1,18 @@
+summary: A chat completion task
+description: A successful response when creating an Azure OpenAI `chat_completion` inference endpoint.
+# type: response
+# response_code:
+value: |-
+  {
+    "inference_id": "azure_openai_chat_completion",
+    "task_type": "chat_completion",
+    "service": "azureopenai",
+    "service_settings": {
+      "resource_name": "Resource-name",
+      "deployment_id": "Deployment-id",
+      "api_version": "2024-02-01",
+      "rate_limit": {
+        "requests_per_minute": 120
+      }
+    }
+  }