diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index 8cfff46c33..27de3ffac8 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -20780,7 +20780,7 @@ "inference" ], "summary": "Create an inference endpoint", - "description": "IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThe following integrations are available through the inference API. You can find the available task types next to the integration name:\n* AI21 (`chat_completion`, `completion`)\n* AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Amazon Bedrock (`completion`, `text_embedding`)\n* Amazon SageMaker (`chat_completion`, `completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Anthropic (`completion`)\n* Azure AI Studio (`completion`, `rerank`, `text_embedding`)\n* Azure OpenAI (`completion`, `text_embedding`)\n* Cohere (`completion`, `rerank`, `text_embedding`)\n* DeepSeek (`chat_completion`, `completion`)\n* Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)\n* ELSER (`sparse_embedding`)\n* Google AI Studio (`completion`, `text_embedding`)\n* Google Vertex AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* Groq (`chat_completion`)\n* Hugging Face (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* JinaAI (`rerank`, `text_embedding`)\n* Llama (`chat_completion`, `completion`, `text_embedding`)\n* Mistral (`chat_completion`, `completion`, `text_embedding`)\n* OpenAI (`chat_completion`, `completion`, `text_embedding`)\n* OpenShift AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* VoyageAI (`rerank`, `text_embedding`)\n* Watsonx inference integration (`text_embedding`)\n\n## Required authorization\n\n* Cluster privileges: `manage_inference`\n", + "description": "IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThe following integrations are available through the inference API. You can find the available task types next to the integration name:\n* AI21 (`chat_completion`, `completion`)\n* AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Amazon Bedrock (`completion`, `text_embedding`)\n* Amazon SageMaker (`chat_completion`, `completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Anthropic (`completion`)\n* Azure AI Studio (`completion`, `rerank`, `text_embedding`)\n* Azure OpenAI (`completion`, `chat_completion`, `text_embedding`)\n* Cohere (`completion`, `rerank`, `text_embedding`)\n* DeepSeek (`chat_completion`, `completion`)\n* Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)\n* ELSER (`sparse_embedding`)\n* Google AI Studio (`completion`, `text_embedding`)\n* Google Vertex AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* Groq (`chat_completion`)\n* Hugging Face (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* JinaAI (`rerank`, `text_embedding`)\n* Llama (`chat_completion`, `completion`, `text_embedding`)\n* Mistral (`chat_completion`, `completion`, `text_embedding`)\n* OpenAI (`chat_completion`, `completion`, `text_embedding`)\n* OpenShift AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* VoyageAI (`rerank`, `text_embedding`)\n* Watsonx inference integration (`text_embedding`)\n\n## Required authorization\n\n* Cluster privileges: `manage_inference`\n", "operationId": "inference-put", "parameters": [ { @@ -20903,7 +20903,7 @@ "inference" ], "summary": "Create an inference endpoint", - "description": "IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThe following integrations are available through the inference API. You can find the available task types next to the integration name:\n* AI21 (`chat_completion`, `completion`)\n* AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Amazon Bedrock (`completion`, `text_embedding`)\n* Amazon SageMaker (`chat_completion`, `completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Anthropic (`completion`)\n* Azure AI Studio (`completion`, `rerank`, `text_embedding`)\n* Azure OpenAI (`completion`, `text_embedding`)\n* Cohere (`completion`, `rerank`, `text_embedding`)\n* DeepSeek (`chat_completion`, `completion`)\n* Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)\n* ELSER (`sparse_embedding`)\n* Google AI Studio (`completion`, `text_embedding`)\n* Google Vertex AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* Groq (`chat_completion`)\n* Hugging Face (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* JinaAI (`rerank`, `text_embedding`)\n* Llama (`chat_completion`, `completion`, `text_embedding`)\n* Mistral (`chat_completion`, `completion`, `text_embedding`)\n* OpenAI (`chat_completion`, `completion`, `text_embedding`)\n* OpenShift AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* VoyageAI (`rerank`, `text_embedding`)\n* Watsonx inference integration (`text_embedding`)\n\n## Required authorization\n\n* Cluster privileges: `manage_inference`\n", + "description": "IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThe following integrations are available through the inference API. You can find the available task types next to the integration name:\n* AI21 (`chat_completion`, `completion`)\n* AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Amazon Bedrock (`completion`, `text_embedding`)\n* Amazon SageMaker (`chat_completion`, `completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Anthropic (`completion`)\n* Azure AI Studio (`completion`, `rerank`, `text_embedding`)\n* Azure OpenAI (`completion`, `chat_completion`, `text_embedding`)\n* Cohere (`completion`, `rerank`, `text_embedding`)\n* DeepSeek (`chat_completion`, `completion`)\n* Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)\n* ELSER (`sparse_embedding`)\n* Google AI Studio (`completion`, `text_embedding`)\n* Google Vertex AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* Groq (`chat_completion`)\n* Hugging Face (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* JinaAI (`rerank`, `text_embedding`)\n* Llama (`chat_completion`, `completion`, `text_embedding`)\n* Mistral (`chat_completion`, `completion`, `text_embedding`)\n* OpenAI (`chat_completion`, `completion`, `text_embedding`)\n* OpenShift AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* VoyageAI (`rerank`, `text_embedding`)\n* Watsonx inference integration (`text_embedding`)\n\n## Required authorization\n\n* Cluster privileges: `manage_inference`\n", "operationId": "inference-put-1", "parameters": [ { @@ -21823,7 +21823,7 @@ "externalDocs": { "url": "https://www.elastic.co/docs/explore-analyze/elastic-inference/inference-api#infer-chunking-config" }, - "description": "The chunking configuration object.\nApplies only to the `text_embedding` task type.\nNot applicable to the `completion` task type.", + "description": "The chunking configuration object.\nApplies only to the `text_embedding` task type.\nNot applicable to the `completion` and `chat_completion` task types.", "allOf": [ { "$ref": "#/components/schemas/inference._types.InferenceChunkingSettings" @@ -21870,6 +21870,11 @@ "summary": "A completion task", "description": "Run `PUT _inference/completion/azure_openai_completion` to create an inference endpoint that performs a `completion` task.", "value": "{\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"api_key\": \"Api-Key\",\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\"\n }\n}" + }, + "PutAzureOpenAiRequestExample3": { + "summary": "A chat completion task", + "description": "Run `PUT _inference/chat_completion/azure_openai_chat_completion` to create an inference endpoint that performs a `chat_completion` task.", + "value": "{\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"api_key\": \"Api-Key\",\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\"\n }\n}" } } } @@ -21883,6 +21888,23 @@ "application/json": { "schema": { "$ref": "#/components/schemas/inference._types.InferenceEndpointInfoAzureOpenAI" + }, + "examples": { + "PutAzureOpenAiResponseExample1": { + "summary": "A text embedding task", + "description": "A successful response when creating an Azure OpenAI `text_embedding` inference endpoint.", + "value": "{\n \"inference_id\": \"azure_openai_embeddings\",\n \"task_type\": \"text_embedding\",\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\",\n \"rate_limit\": {\n \"requests_per_minute\": 1140\n },\n \"dimensions\": 1536,\n \"similarity\": \"dot_product\"\n },\n \"chunking_settings\": {\n \"strategy\": \"sentence\",\n \"max_chunk_size\": 250,\n \"sentence_overlap\": 1\n }\n}" + }, + "PutAzureOpenAiResponseExample2": { + "summary": "A completion task", + "description": "A successful response when creating an Azure OpenAI `completion` inference endpoint.", + "value": "{\n \"inference_id\": \"azure_openai_completion\",\n \"task_type\": \"completion\",\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\",\n \"rate_limit\": {\n \"requests_per_minute\": 120\n }\n }\n}" + }, + "PutAzureOpenAiResponseExample3": { + "summary": "A chat completion task", + "description": "A successful response when creating an Azure OpenAI `chat_completion` inference endpoint.", + "value": "{\n \"inference_id\": \"azure_openai_chat_completion\",\n \"task_type\": \"chat_completion\",\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\",\n \"rate_limit\": {\n \"requests_per_minute\": 120\n }\n }\n}" + } } } } @@ -102650,7 +102672,7 @@ "type": "object", "properties": { "requests_per_minute": { - "description": "The number of requests allowed per minute.\nBy default, the number of requests allowed per minute is set by each service as follows:\n\n* `alibabacloud-ai-search` service: `1000`\n* `anthropic` service: `50`\n* `azureaistudio` service: `240`\n* `azureopenai` service and task type `text_embedding`: `1440`\n* `azureopenai` service and task type `completion`: `120`\n* `cohere` service: `10000`\n* `contextualai` service: `1000`\n* `elastic` service and task type `chat_completion`: `240`\n* `googleaistudio` service: `360`\n* `googlevertexai` service: `30000`\n* `hugging_face` service: `3000`\n* `jinaai` service: `2000`\n* `llama` service: `3000`\n* `mistral` service: `240`\n* `openai` service and task type `text_embedding`: `3000`\n* `openai` service and task type `completion`: `500`\n* `openshift_ai` service: `3000`\n* `voyageai` service: `2000`\n* `watsonxai` service: `120`", + "description": "The number of requests allowed per minute.\nBy default, the number of requests allowed per minute is set by each service as follows:\n\n* `alibabacloud-ai-search` service: `1000`\n* `anthropic` service: `50`\n* `azureaistudio` service: `240`\n* `azureopenai` service and task type `text_embedding`: `1440`\n* `azureopenai` service and task types `completion` or `chat_completion`: `120`\n* `cohere` service: `10000`\n* `contextualai` service: `1000`\n* `elastic` service and task type `chat_completion`: `240`\n* `googleaistudio` service: `360`\n* `googlevertexai` service: `30000`\n* `hugging_face` service: `3000`\n* `jinaai` service: `2000`\n* `llama` service: `3000`\n* `mistral` service: `240`\n* `openai` service and task type `text_embedding`: `3000`\n* `openai` service and task type `completion`: `500`\n* `openshift_ai` service: `3000`\n* `voyageai` service: `2000`\n* `watsonxai` service: `120`", "type": "number" } } @@ -103304,6 +103326,7 @@ "type": "string", "enum": [ "completion", + "chat_completion", "text_embedding" ] }, @@ -103345,7 +103368,7 @@ "externalDocs": { "url": "https://learn.microsoft.com/en-us/azure/ai-services/openai/quotas-limits" }, - "description": "This setting helps to minimize the number of rate limit errors returned from Azure.\nThe `azureopenai` service sets a default number of requests allowed per minute depending on the task type.\nFor `text_embedding`, it is set to `1440`.\nFor `completion`, it is set to `120`.", + "description": "This setting helps to minimize the number of rate limit errors returned from Azure.\nThe `azureopenai` service sets a default number of requests allowed per minute depending on the task type.\nFor `text_embedding`, it is set to `1440`.\nFor `completion` and `chat_completion`, it is set to `120`.", "allOf": [ { "$ref": "#/components/schemas/inference._types.RateLimitSetting" @@ -103407,7 +103430,8 @@ "type": "string", "enum": [ "text_embedding", - "completion" + "completion", + "chat_completion" ] }, "inference._types.CohereTaskType": { diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index 399828704a..1d8da1d9da 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -11783,7 +11783,7 @@ "inference" ], "summary": "Create an inference endpoint", - "description": "IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThe following integrations are available through the inference API. You can find the available task types next to the integration name:\n* AI21 (`chat_completion`, `completion`)\n* AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Amazon Bedrock (`completion`, `text_embedding`)\n* Amazon SageMaker (`chat_completion`, `completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Anthropic (`completion`)\n* Azure AI Studio (`completion`, `rerank`, `text_embedding`)\n* Azure OpenAI (`completion`, `text_embedding`)\n* Cohere (`completion`, `rerank`, `text_embedding`)\n* DeepSeek (`chat_completion`, `completion`)\n* Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)\n* ELSER (`sparse_embedding`)\n* Google AI Studio (`completion`, `text_embedding`)\n* Google Vertex AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* Groq (`chat_completion`)\n* Hugging Face (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* JinaAI (`rerank`, `text_embedding`)\n* Llama (`chat_completion`, `completion`, `text_embedding`)\n* Mistral (`chat_completion`, `completion`, `text_embedding`)\n* OpenAI (`chat_completion`, `completion`, `text_embedding`)\n* OpenShift AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* VoyageAI (`rerank`, `text_embedding`)\n* Watsonx inference integration (`text_embedding`)\n\n## Required authorization\n\n* Cluster privileges: `manage_inference`\n", + "description": "IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThe following integrations are available through the inference API. You can find the available task types next to the integration name:\n* AI21 (`chat_completion`, `completion`)\n* AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Amazon Bedrock (`completion`, `text_embedding`)\n* Amazon SageMaker (`chat_completion`, `completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Anthropic (`completion`)\n* Azure AI Studio (`completion`, `rerank`, `text_embedding`)\n* Azure OpenAI (`completion`, `chat_completion`, `text_embedding`)\n* Cohere (`completion`, `rerank`, `text_embedding`)\n* DeepSeek (`chat_completion`, `completion`)\n* Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)\n* ELSER (`sparse_embedding`)\n* Google AI Studio (`completion`, `text_embedding`)\n* Google Vertex AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* Groq (`chat_completion`)\n* Hugging Face (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* JinaAI (`rerank`, `text_embedding`)\n* Llama (`chat_completion`, `completion`, `text_embedding`)\n* Mistral (`chat_completion`, `completion`, `text_embedding`)\n* OpenAI (`chat_completion`, `completion`, `text_embedding`)\n* OpenShift AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* VoyageAI (`rerank`, `text_embedding`)\n* Watsonx inference integration (`text_embedding`)\n\n## Required authorization\n\n* Cluster privileges: `manage_inference`\n", "operationId": "inference-put", "parameters": [ { @@ -11906,7 +11906,7 @@ "inference" ], "summary": "Create an inference endpoint", - "description": "IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThe following integrations are available through the inference API. You can find the available task types next to the integration name:\n* AI21 (`chat_completion`, `completion`)\n* AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Amazon Bedrock (`completion`, `text_embedding`)\n* Amazon SageMaker (`chat_completion`, `completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Anthropic (`completion`)\n* Azure AI Studio (`completion`, `rerank`, `text_embedding`)\n* Azure OpenAI (`completion`, `text_embedding`)\n* Cohere (`completion`, `rerank`, `text_embedding`)\n* DeepSeek (`chat_completion`, `completion`)\n* Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)\n* ELSER (`sparse_embedding`)\n* Google AI Studio (`completion`, `text_embedding`)\n* Google Vertex AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* Groq (`chat_completion`)\n* Hugging Face (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* JinaAI (`rerank`, `text_embedding`)\n* Llama (`chat_completion`, `completion`, `text_embedding`)\n* Mistral (`chat_completion`, `completion`, `text_embedding`)\n* OpenAI (`chat_completion`, `completion`, `text_embedding`)\n* OpenShift AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* VoyageAI (`rerank`, `text_embedding`)\n* Watsonx inference integration (`text_embedding`)\n\n## Required authorization\n\n* Cluster privileges: `manage_inference`\n", + "description": "IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThe following integrations are available through the inference API. You can find the available task types next to the integration name:\n* AI21 (`chat_completion`, `completion`)\n* AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Amazon Bedrock (`completion`, `text_embedding`)\n* Amazon SageMaker (`chat_completion`, `completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Anthropic (`completion`)\n* Azure AI Studio (`completion`, `rerank`, `text_embedding`)\n* Azure OpenAI (`completion`, `chat_completion`, `text_embedding`)\n* Cohere (`completion`, `rerank`, `text_embedding`)\n* DeepSeek (`chat_completion`, `completion`)\n* Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)\n* ELSER (`sparse_embedding`)\n* Google AI Studio (`completion`, `text_embedding`)\n* Google Vertex AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* Groq (`chat_completion`)\n* Hugging Face (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* JinaAI (`rerank`, `text_embedding`)\n* Llama (`chat_completion`, `completion`, `text_embedding`)\n* Mistral (`chat_completion`, `completion`, `text_embedding`)\n* OpenAI (`chat_completion`, `completion`, `text_embedding`)\n* OpenShift AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* VoyageAI (`rerank`, `text_embedding`)\n* Watsonx inference integration (`text_embedding`)\n\n## Required authorization\n\n* Cluster privileges: `manage_inference`\n", "operationId": "inference-put-1", "parameters": [ { @@ -12826,7 +12826,7 @@ "externalDocs": { "url": "https://www.elastic.co/docs/explore-analyze/elastic-inference/inference-api#infer-chunking-config" }, - "description": "The chunking configuration object.\nApplies only to the `text_embedding` task type.\nNot applicable to the `completion` task type.", + "description": "The chunking configuration object.\nApplies only to the `text_embedding` task type.\nNot applicable to the `completion` and `chat_completion` task types.", "allOf": [ { "$ref": "#/components/schemas/inference._types.InferenceChunkingSettings" @@ -12873,6 +12873,11 @@ "summary": "A completion task", "description": "Run `PUT _inference/completion/azure_openai_completion` to create an inference endpoint that performs a `completion` task.", "value": "{\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"api_key\": \"Api-Key\",\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\"\n }\n}" + }, + "PutAzureOpenAiRequestExample3": { + "summary": "A chat completion task", + "description": "Run `PUT _inference/chat_completion/azure_openai_chat_completion` to create an inference endpoint that performs a `chat_completion` task.", + "value": "{\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"api_key\": \"Api-Key\",\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\"\n }\n}" } } } @@ -12886,6 +12891,23 @@ "application/json": { "schema": { "$ref": "#/components/schemas/inference._types.InferenceEndpointInfoAzureOpenAI" + }, + "examples": { + "PutAzureOpenAiResponseExample1": { + "summary": "A text embedding task", + "description": "A successful response when creating an Azure OpenAI `text_embedding` inference endpoint.", + "value": "{\n \"inference_id\": \"azure_openai_embeddings\",\n \"task_type\": \"text_embedding\",\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\",\n \"rate_limit\": {\n \"requests_per_minute\": 1140\n },\n \"dimensions\": 1536,\n \"similarity\": \"dot_product\"\n },\n \"chunking_settings\": {\n \"strategy\": \"sentence\",\n \"max_chunk_size\": 250,\n \"sentence_overlap\": 1\n }\n}" + }, + "PutAzureOpenAiResponseExample2": { + "summary": "A completion task", + "description": "A successful response when creating an Azure OpenAI `completion` inference endpoint.", + "value": "{\n \"inference_id\": \"azure_openai_completion\",\n \"task_type\": \"completion\",\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\",\n \"rate_limit\": {\n \"requests_per_minute\": 120\n }\n }\n}" + }, + "PutAzureOpenAiResponseExample3": { + "summary": "A chat completion task", + "description": "A successful response when creating an Azure OpenAI `chat_completion` inference endpoint.", + "value": "{\n \"inference_id\": \"azure_openai_chat_completion\",\n \"task_type\": \"chat_completion\",\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\",\n \"rate_limit\": {\n \"requests_per_minute\": 120\n }\n }\n}" + } } } } @@ -66566,7 +66588,7 @@ "type": "object", "properties": { "requests_per_minute": { - "description": "The number of requests allowed per minute.\nBy default, the number of requests allowed per minute is set by each service as follows:\n\n* `alibabacloud-ai-search` service: `1000`\n* `anthropic` service: `50`\n* `azureaistudio` service: `240`\n* `azureopenai` service and task type `text_embedding`: `1440`\n* `azureopenai` service and task type `completion`: `120`\n* `cohere` service: `10000`\n* `contextualai` service: `1000`\n* `elastic` service and task type `chat_completion`: `240`\n* `googleaistudio` service: `360`\n* `googlevertexai` service: `30000`\n* `hugging_face` service: `3000`\n* `jinaai` service: `2000`\n* `llama` service: `3000`\n* `mistral` service: `240`\n* `openai` service and task type `text_embedding`: `3000`\n* `openai` service and task type `completion`: `500`\n* `openshift_ai` service: `3000`\n* `voyageai` service: `2000`\n* `watsonxai` service: `120`", + "description": "The number of requests allowed per minute.\nBy default, the number of requests allowed per minute is set by each service as follows:\n\n* `alibabacloud-ai-search` service: `1000`\n* `anthropic` service: `50`\n* `azureaistudio` service: `240`\n* `azureopenai` service and task type `text_embedding`: `1440`\n* `azureopenai` service and task types `completion` or `chat_completion`: `120`\n* `cohere` service: `10000`\n* `contextualai` service: `1000`\n* `elastic` service and task type `chat_completion`: `240`\n* `googleaistudio` service: `360`\n* `googlevertexai` service: `30000`\n* `hugging_face` service: `3000`\n* `jinaai` service: `2000`\n* `llama` service: `3000`\n* `mistral` service: `240`\n* `openai` service and task type `text_embedding`: `3000`\n* `openai` service and task type `completion`: `500`\n* `openshift_ai` service: `3000`\n* `voyageai` service: `2000`\n* `watsonxai` service: `120`", "type": "number" } } @@ -67220,6 +67242,7 @@ "type": "string", "enum": [ "completion", + "chat_completion", "text_embedding" ] }, @@ -67261,7 +67284,7 @@ "externalDocs": { "url": "https://learn.microsoft.com/en-us/azure/ai-services/openai/quotas-limits" }, - "description": "This setting helps to minimize the number of rate limit errors returned from Azure.\nThe `azureopenai` service sets a default number of requests allowed per minute depending on the task type.\nFor `text_embedding`, it is set to `1440`.\nFor `completion`, it is set to `120`.", + "description": "This setting helps to minimize the number of rate limit errors returned from Azure.\nThe `azureopenai` service sets a default number of requests allowed per minute depending on the task type.\nFor `text_embedding`, it is set to `1440`.\nFor `completion` and `chat_completion`, it is set to `120`.", "allOf": [ { "$ref": "#/components/schemas/inference._types.RateLimitSetting" @@ -67323,7 +67346,8 @@ "type": "string", "enum": [ "text_embedding", - "completion" + "completion", + "chat_completion" ] }, "inference._types.CohereTaskType": { diff --git a/output/schema/schema.json b/output/schema/schema.json index c864610e70..79748d3866 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -10289,7 +10289,7 @@ "visibility": "public" } }, - "description": "Create an inference endpoint.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThe following integrations are available through the inference API. You can find the available task types next to the integration name:\n* AI21 (`chat_completion`, `completion`)\n* AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Amazon Bedrock (`completion`, `text_embedding`)\n* Amazon SageMaker (`chat_completion`, `completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Anthropic (`completion`)\n* Azure AI Studio (`completion`, `rerank`, `text_embedding`)\n* Azure OpenAI (`completion`, `text_embedding`)\n* Cohere (`completion`, `rerank`, `text_embedding`)\n* DeepSeek (`chat_completion`, `completion`)\n* Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)\n* ELSER (`sparse_embedding`)\n* Google AI Studio (`completion`, `text_embedding`)\n* Google Vertex AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* Groq (`chat_completion`)\n* Hugging Face (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* JinaAI (`rerank`, `text_embedding`)\n* Llama (`chat_completion`, `completion`, `text_embedding`)\n* Mistral (`chat_completion`, `completion`, `text_embedding`)\n* OpenAI (`chat_completion`, `completion`, `text_embedding`)\n* OpenShift AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* VoyageAI (`rerank`, `text_embedding`)\n* Watsonx inference integration (`text_embedding`)", + "description": "Create an inference endpoint.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThe following integrations are available through the inference API. You can find the available task types next to the integration name:\n* AI21 (`chat_completion`, `completion`)\n* AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Amazon Bedrock (`completion`, `text_embedding`)\n* Amazon SageMaker (`chat_completion`, `completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Anthropic (`completion`)\n* Azure AI Studio (`completion`, `rerank`, `text_embedding`)\n* Azure OpenAI (`completion`, `chat_completion`, `text_embedding`)\n* Cohere (`completion`, `rerank`, `text_embedding`)\n* DeepSeek (`chat_completion`, `completion`)\n* Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)\n* ELSER (`sparse_embedding`)\n* Google AI Studio (`completion`, `text_embedding`)\n* Google Vertex AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* Groq (`chat_completion`)\n* Hugging Face (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* JinaAI (`rerank`, `text_embedding`)\n* Llama (`chat_completion`, `completion`, `text_embedding`)\n* Mistral (`chat_completion`, `completion`, `text_embedding`)\n* OpenAI (`chat_completion`, `completion`, `text_embedding`)\n* OpenShift AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* VoyageAI (`rerank`, `text_embedding`)\n* Watsonx inference integration (`text_embedding`)", "docId": "inference-api-put", "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put", "extPreviousVersionDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/8.18/put-inference-api.html", @@ -176930,7 +176930,7 @@ } }, { - "description": "This setting helps to minimize the number of rate limit errors returned from Azure.\nThe `azureopenai` service sets a default number of requests allowed per minute depending on the task type.\nFor `text_embedding`, it is set to `1440`.\nFor `completion`, it is set to `120`.", + "description": "This setting helps to minimize the number of rate limit errors returned from Azure.\nThe `azureopenai` service sets a default number of requests allowed per minute depending on the task type.\nFor `text_embedding`, it is set to `1440`.\nFor `completion` and `chat_completion`, it is set to `120`.", "extDocId": "azureopenai-quota-limits", "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-services/openai/quotas-limits", "name": "rate_limit", @@ -176971,7 +176971,7 @@ "name": "AzureOpenAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L830-L832" + "specLocation": "inference/_types/CommonTypes.ts#L831-L833" }, { "kind": "interface", @@ -177001,6 +177001,9 @@ { "name": "completion" }, + { + "name": "chat_completion" + }, { "name": "text_embedding" } @@ -177009,7 +177012,7 @@ "name": "AzureOpenAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L825-L828" + "specLocation": "inference/_types/CommonTypes.ts#L825-L829" }, { "kind": "enum", @@ -177034,7 +177037,7 @@ "name": "CohereEmbeddingType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L887-L893" + "specLocation": "inference/_types/CommonTypes.ts#L888-L894" }, { "kind": "enum", @@ -177056,7 +177059,7 @@ "name": "CohereInputType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L895-L900" + "specLocation": "inference/_types/CommonTypes.ts#L896-L901" }, { "kind": "interface", @@ -177129,7 +177132,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L834-L875" + "specLocation": "inference/_types/CommonTypes.ts#L835-L876" }, { "kind": "enum", @@ -177142,7 +177145,7 @@ "name": "CohereServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L883-L885" + "specLocation": "inference/_types/CommonTypes.ts#L884-L886" }, { "kind": "enum", @@ -177161,7 +177164,7 @@ "name": "CohereSimilarityType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L902-L906" + "specLocation": "inference/_types/CommonTypes.ts#L903-L907" }, { "kind": "interface", @@ -177219,7 +177222,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L914-L946" + "specLocation": "inference/_types/CommonTypes.ts#L915-L947" }, { "kind": "enum", @@ -177238,7 +177241,7 @@ "name": "CohereTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L877-L881" + "specLocation": "inference/_types/CommonTypes.ts#L878-L882" }, { "kind": "enum", @@ -177257,7 +177260,7 @@ "name": "CohereTruncateType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L908-L912" + "specLocation": "inference/_types/CommonTypes.ts#L909-L913" }, { "kind": "interface", @@ -177566,7 +177569,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1225-L1248" + "specLocation": "inference/_types/CommonTypes.ts#L1226-L1249" }, { "kind": "enum", @@ -177579,7 +177582,7 @@ "name": "ContextualAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1221-L1223" + "specLocation": "inference/_types/CommonTypes.ts#L1222-L1224" }, { "kind": "interface", @@ -177626,7 +177629,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1250-L1268" + "specLocation": "inference/_types/CommonTypes.ts#L1251-L1269" }, { "kind": "interface", @@ -177648,7 +177651,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1025-L1036" + "specLocation": "inference/_types/CommonTypes.ts#L1026-L1037" }, { "kind": "interface", @@ -177666,7 +177669,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1038-L1192" + "specLocation": "inference/_types/CommonTypes.ts#L1039-L1193" }, { "kind": "interface", @@ -177758,7 +177761,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L948-L1023" + "specLocation": "inference/_types/CommonTypes.ts#L949-L1024" }, { "kind": "enum", @@ -177771,7 +177774,7 @@ "name": "CustomServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1201-L1203" + "specLocation": "inference/_types/CommonTypes.ts#L1202-L1204" }, { "kind": "interface", @@ -177789,7 +177792,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1205-L1219" + "specLocation": "inference/_types/CommonTypes.ts#L1206-L1220" }, { "kind": "enum", @@ -177811,7 +177814,7 @@ "name": "CustomTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1194-L1199" + "specLocation": "inference/_types/CommonTypes.ts#L1195-L1200" }, { "kind": "interface", @@ -177859,7 +177862,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1270-L1292" + "specLocation": "inference/_types/CommonTypes.ts#L1271-L1293" }, { "kind": "enum", @@ -177872,7 +177875,7 @@ "name": "DeepSeekServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1294-L1296" + "specLocation": "inference/_types/CommonTypes.ts#L1295-L1297" }, { "kind": "interface", @@ -178057,7 +178060,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1298-L1354" + "specLocation": "inference/_types/CommonTypes.ts#L1299-L1355" }, { "kind": "enum", @@ -178070,7 +178073,7 @@ "name": "ElasticsearchServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1370-L1372" + "specLocation": "inference/_types/CommonTypes.ts#L1371-L1373" }, { "kind": "interface", @@ -178093,7 +178096,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1356-L1362" + "specLocation": "inference/_types/CommonTypes.ts#L1357-L1363" }, { "kind": "enum", @@ -178112,7 +178115,7 @@ "name": "ElasticsearchTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1364-L1368" + "specLocation": "inference/_types/CommonTypes.ts#L1365-L1369" }, { "kind": "interface", @@ -178158,7 +178161,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1374-L1400" + "specLocation": "inference/_types/CommonTypes.ts#L1375-L1401" }, { "kind": "enum", @@ -178171,7 +178174,7 @@ "name": "ElserServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1406-L1408" + "specLocation": "inference/_types/CommonTypes.ts#L1407-L1409" }, { "kind": "enum", @@ -178184,7 +178187,7 @@ "name": "ElserTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1402-L1404" + "specLocation": "inference/_types/CommonTypes.ts#L1403-L1405" }, { "kind": "enum", @@ -178197,7 +178200,7 @@ "name": "GoogleAiServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1433-L1435" + "specLocation": "inference/_types/CommonTypes.ts#L1434-L1436" }, { "kind": "interface", @@ -178245,7 +178248,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1410-L1426" + "specLocation": "inference/_types/CommonTypes.ts#L1411-L1427" }, { "kind": "enum", @@ -178261,7 +178264,7 @@ "name": "GoogleAiStudioTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1428-L1431" + "specLocation": "inference/_types/CommonTypes.ts#L1429-L1432" }, { "kind": "enum", @@ -178289,7 +178292,7 @@ "name": "GoogleModelGardenProvider", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1506-L1513" + "specLocation": "inference/_types/CommonTypes.ts#L1507-L1514" }, { "kind": "interface", @@ -178413,7 +178416,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1437-L1504" + "specLocation": "inference/_types/CommonTypes.ts#L1438-L1505" }, { "kind": "enum", @@ -178426,7 +178429,7 @@ "name": "GoogleVertexAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1554-L1556" + "specLocation": "inference/_types/CommonTypes.ts#L1555-L1557" }, { "kind": "interface", @@ -178488,7 +178491,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1515-L1538" + "specLocation": "inference/_types/CommonTypes.ts#L1516-L1539" }, { "kind": "enum", @@ -178510,7 +178513,7 @@ "name": "GoogleVertexAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1547-L1552" + "specLocation": "inference/_types/CommonTypes.ts#L1548-L1553" }, { "kind": "interface", @@ -178560,7 +178563,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1558-L1582" + "specLocation": "inference/_types/CommonTypes.ts#L1559-L1583" }, { "kind": "enum", @@ -178573,7 +178576,7 @@ "name": "GroqServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1588-L1590" + "specLocation": "inference/_types/CommonTypes.ts#L1589-L1591" }, { "kind": "enum", @@ -178586,7 +178589,7 @@ "name": "GroqTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1584-L1586" + "specLocation": "inference/_types/CommonTypes.ts#L1585-L1587" }, { "kind": "interface", @@ -178648,7 +178651,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1592-L1624" + "specLocation": "inference/_types/CommonTypes.ts#L1593-L1625" }, { "kind": "enum", @@ -178661,7 +178664,7 @@ "name": "HuggingFaceServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1645-L1647" + "specLocation": "inference/_types/CommonTypes.ts#L1646-L1648" }, { "kind": "interface", @@ -178695,7 +178698,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1626-L1636" + "specLocation": "inference/_types/CommonTypes.ts#L1627-L1637" }, { "kind": "enum", @@ -178717,7 +178720,7 @@ "name": "HuggingFaceTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1638-L1643" + "specLocation": "inference/_types/CommonTypes.ts#L1639-L1644" }, { "kind": "interface", @@ -180029,7 +180032,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1649-L1678" + "specLocation": "inference/_types/CommonTypes.ts#L1650-L1679" }, { "kind": "enum", @@ -180042,7 +180045,7 @@ "name": "JinaAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1708-L1710" + "specLocation": "inference/_types/CommonTypes.ts#L1709-L1711" }, { "kind": "enum", @@ -180061,7 +180064,7 @@ "name": "JinaAISimilarityType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1712-L1716" + "specLocation": "inference/_types/CommonTypes.ts#L1713-L1717" }, { "kind": "interface", @@ -180107,7 +180110,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1680-L1701" + "specLocation": "inference/_types/CommonTypes.ts#L1681-L1702" }, { "kind": "enum", @@ -180123,7 +180126,7 @@ "name": "JinaAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1703-L1706" + "specLocation": "inference/_types/CommonTypes.ts#L1704-L1707" }, { "kind": "enum", @@ -180145,7 +180148,7 @@ "name": "JinaAITextEmbeddingTask", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1718-L1723" + "specLocation": "inference/_types/CommonTypes.ts#L1719-L1724" }, { "kind": "interface", @@ -180217,7 +180220,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1725-L1755" + "specLocation": "inference/_types/CommonTypes.ts#L1726-L1756" }, { "kind": "enum", @@ -180230,7 +180233,7 @@ "name": "LlamaServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1763-L1765" + "specLocation": "inference/_types/CommonTypes.ts#L1764-L1766" }, { "kind": "enum", @@ -180249,7 +180252,7 @@ "name": "LlamaSimilarityType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1767-L1771" + "specLocation": "inference/_types/CommonTypes.ts#L1768-L1772" }, { "kind": "enum", @@ -180268,7 +180271,7 @@ "name": "LlamaTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1757-L1761" + "specLocation": "inference/_types/CommonTypes.ts#L1758-L1762" }, { "kind": "interface", @@ -180426,7 +180429,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1773-L1800" + "specLocation": "inference/_types/CommonTypes.ts#L1774-L1801" }, { "kind": "enum", @@ -180439,7 +180442,7 @@ "name": "MistralServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1808-L1810" + "specLocation": "inference/_types/CommonTypes.ts#L1809-L1811" }, { "kind": "enum", @@ -180458,7 +180461,7 @@ "name": "MistralTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1802-L1806" + "specLocation": "inference/_types/CommonTypes.ts#L1803-L1807" }, { "kind": "interface", @@ -180545,7 +180548,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1812-L1854" + "specLocation": "inference/_types/CommonTypes.ts#L1813-L1855" }, { "kind": "enum", @@ -180558,7 +180561,7 @@ "name": "OpenAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1881-L1883" + "specLocation": "inference/_types/CommonTypes.ts#L1882-L1884" }, { "kind": "interface", @@ -180588,7 +180591,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1856-L1873" + "specLocation": "inference/_types/CommonTypes.ts#L1857-L1874" }, { "kind": "enum", @@ -180607,7 +180610,7 @@ "name": "OpenAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1875-L1879" + "specLocation": "inference/_types/CommonTypes.ts#L1876-L1880" }, { "kind": "interface", @@ -180689,7 +180692,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1885-L1917" + "specLocation": "inference/_types/CommonTypes.ts#L1886-L1918" }, { "kind": "enum", @@ -180702,7 +180705,7 @@ "name": "OpenShiftAiServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1926-L1928" + "specLocation": "inference/_types/CommonTypes.ts#L1927-L1929" }, { "kind": "enum", @@ -180721,7 +180724,7 @@ "name": "OpenShiftAiSimilarityType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1930-L1934" + "specLocation": "inference/_types/CommonTypes.ts#L1931-L1935" }, { "kind": "interface", @@ -180755,7 +180758,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1936-L1945" + "specLocation": "inference/_types/CommonTypes.ts#L1937-L1946" }, { "kind": "enum", @@ -180777,7 +180780,7 @@ "name": "OpenShiftAiTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1919-L1924" + "specLocation": "inference/_types/CommonTypes.ts#L1920-L1925" }, { "kind": "interface", @@ -180832,7 +180835,7 @@ }, "properties": [ { - "description": "The number of requests allowed per minute.\nBy default, the number of requests allowed per minute is set by each service as follows:\n\n* `alibabacloud-ai-search` service: `1000`\n* `anthropic` service: `50`\n* `azureaistudio` service: `240`\n* `azureopenai` service and task type `text_embedding`: `1440`\n* `azureopenai` service and task type `completion`: `120`\n* `cohere` service: `10000`\n* `contextualai` service: `1000`\n* `elastic` service and task type `chat_completion`: `240`\n* `googleaistudio` service: `360`\n* `googlevertexai` service: `30000`\n* `hugging_face` service: `3000`\n* `jinaai` service: `2000`\n* `llama` service: `3000`\n* `mistral` service: `240`\n* `openai` service and task type `text_embedding`: `3000`\n* `openai` service and task type `completion`: `500`\n* `openshift_ai` service: `3000`\n* `voyageai` service: `2000`\n* `watsonxai` service: `120`", + "description": "The number of requests allowed per minute.\nBy default, the number of requests allowed per minute is set by each service as follows:\n\n* `alibabacloud-ai-search` service: `1000`\n* `anthropic` service: `50`\n* `azureaistudio` service: `240`\n* `azureopenai` service and task type `text_embedding`: `1440`\n* `azureopenai` service and task types `completion` or `chat_completion`: `120`\n* `cohere` service: `10000`\n* `contextualai` service: `1000`\n* `elastic` service and task type `chat_completion`: `240`\n* `googleaistudio` service: `360`\n* `googlevertexai` service: `30000`\n* `hugging_face` service: `3000`\n* `jinaai` service: `2000`\n* `llama` service: `3000`\n* `mistral` service: `240`\n* `openai` service and task type `text_embedding`: `3000`\n* `openai` service and task type `completion`: `500`\n* `openshift_ai` service: `3000`\n* `voyageai` service: `2000`\n* `watsonxai` service: `120`", "name": "requests_per_minute", "required": false, "type": { @@ -181225,13 +181228,16 @@ }, { "name": "completion" + }, + { + "name": "chat_completion" } ], "name": { "name": "TaskTypeAzureOpenAI", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L71-L74" + "specLocation": "inference/_types/TaskType.ts#L71-L75" }, { "kind": "enum", @@ -181250,7 +181256,7 @@ "name": "TaskTypeCohere", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L76-L80" + "specLocation": "inference/_types/TaskType.ts#L77-L81" }, { "kind": "enum", @@ -181263,7 +181269,7 @@ "name": "TaskTypeContextualAI", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L82-L84" + "specLocation": "inference/_types/TaskType.ts#L83-L85" }, { "kind": "enum", @@ -181285,7 +181291,7 @@ "name": "TaskTypeCustom", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L86-L91" + "specLocation": "inference/_types/TaskType.ts#L87-L92" }, { "kind": "enum", @@ -181301,7 +181307,7 @@ "name": "TaskTypeDeepSeek", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L93-L96" + "specLocation": "inference/_types/TaskType.ts#L94-L97" }, { "kind": "enum", @@ -181314,7 +181320,7 @@ "name": "TaskTypeELSER", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L104-L106" + "specLocation": "inference/_types/TaskType.ts#L105-L107" }, { "kind": "enum", @@ -181333,7 +181339,7 @@ "name": "TaskTypeElasticsearch", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L98-L102" + "specLocation": "inference/_types/TaskType.ts#L99-L103" }, { "kind": "enum", @@ -181349,7 +181355,7 @@ "name": "TaskTypeGoogleAIStudio", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L108-L111" + "specLocation": "inference/_types/TaskType.ts#L109-L112" }, { "kind": "enum", @@ -181371,7 +181377,7 @@ "name": "TaskTypeGoogleVertexAI", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L113-L118" + "specLocation": "inference/_types/TaskType.ts#L114-L119" }, { "kind": "enum", @@ -181384,7 +181390,7 @@ "name": "TaskTypeGroq", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L120-L122" + "specLocation": "inference/_types/TaskType.ts#L121-L123" }, { "kind": "enum", @@ -181406,7 +181412,7 @@ "name": "TaskTypeHuggingFace", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L124-L129" + "specLocation": "inference/_types/TaskType.ts#L125-L130" }, { "kind": "enum", @@ -181441,7 +181447,7 @@ "name": "TaskTypeLlama", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L131-L135" + "specLocation": "inference/_types/TaskType.ts#L132-L136" }, { "kind": "enum", @@ -181460,7 +181466,7 @@ "name": "TaskTypeMistral", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L137-L141" + "specLocation": "inference/_types/TaskType.ts#L138-L142" }, { "kind": "enum", @@ -181479,7 +181485,7 @@ "name": "TaskTypeOpenAI", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L143-L147" + "specLocation": "inference/_types/TaskType.ts#L144-L148" }, { "kind": "enum", @@ -181501,7 +181507,7 @@ "name": "TaskTypeOpenShiftAi", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L149-L154" + "specLocation": "inference/_types/TaskType.ts#L150-L155" }, { "kind": "enum", @@ -181517,7 +181523,7 @@ "name": "TaskTypeVoyageAI", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L156-L159" + "specLocation": "inference/_types/TaskType.ts#L157-L160" }, { "kind": "enum", @@ -181536,7 +181542,7 @@ "name": "TaskTypeWatsonx", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L161-L165" + "specLocation": "inference/_types/TaskType.ts#L162-L166" }, { "kind": "interface", @@ -181658,7 +181664,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1540-L1545" + "specLocation": "inference/_types/CommonTypes.ts#L1541-L1546" }, { "kind": "interface", @@ -181804,7 +181810,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1947-L1978" + "specLocation": "inference/_types/CommonTypes.ts#L1948-L1979" }, { "kind": "enum", @@ -181817,7 +181823,7 @@ "name": "VoyageAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L2011-L2013" + "specLocation": "inference/_types/CommonTypes.ts#L2012-L2014" }, { "kind": "interface", @@ -181877,7 +181883,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1980-L2004" + "specLocation": "inference/_types/CommonTypes.ts#L1981-L2005" }, { "kind": "enum", @@ -181893,7 +181899,7 @@ "name": "VoyageAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L2006-L2009" + "specLocation": "inference/_types/CommonTypes.ts#L2007-L2010" }, { "kind": "interface", @@ -181981,7 +181987,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L2015-L2053" + "specLocation": "inference/_types/CommonTypes.ts#L2016-L2054" }, { "kind": "enum", @@ -181994,7 +182000,7 @@ "name": "WatsonxServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L2061-L2063" + "specLocation": "inference/_types/CommonTypes.ts#L2062-L2064" }, { "kind": "enum", @@ -182013,7 +182019,7 @@ "name": "WatsonxTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L2055-L2059" + "specLocation": "inference/_types/CommonTypes.ts#L2056-L2060" }, { "kind": "request", @@ -182740,7 +182746,7 @@ } } }, - "description": "Create an inference endpoint.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThe following integrations are available through the inference API. You can find the available task types next to the integration name:\n* AI21 (`chat_completion`, `completion`)\n* AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Amazon Bedrock (`completion`, `text_embedding`)\n* Amazon SageMaker (`chat_completion`, `completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Anthropic (`completion`)\n* Azure AI Studio (`completion`, `rerank`, `text_embedding`)\n* Azure OpenAI (`completion`, `text_embedding`)\n* Cohere (`completion`, `rerank`, `text_embedding`)\n* DeepSeek (`chat_completion`, `completion`)\n* Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)\n* ELSER (`sparse_embedding`)\n* Google AI Studio (`completion`, `text_embedding`)\n* Google Vertex AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* Groq (`chat_completion`)\n* Hugging Face (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* JinaAI (`rerank`, `text_embedding`)\n* Llama (`chat_completion`, `completion`, `text_embedding`)\n* Mistral (`chat_completion`, `completion`, `text_embedding`)\n* OpenAI (`chat_completion`, `completion`, `text_embedding`)\n* OpenShift AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* VoyageAI (`rerank`, `text_embedding`)\n* Watsonx inference integration (`text_embedding`)", + "description": "Create an inference endpoint.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThe following integrations are available through the inference API. You can find the available task types next to the integration name:\n* AI21 (`chat_completion`, `completion`)\n* AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Amazon Bedrock (`completion`, `text_embedding`)\n* Amazon SageMaker (`chat_completion`, `completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Anthropic (`completion`)\n* Azure AI Studio (`completion`, `rerank`, `text_embedding`)\n* Azure OpenAI (`completion`, `chat_completion`, `text_embedding`)\n* Cohere (`completion`, `rerank`, `text_embedding`)\n* DeepSeek (`chat_completion`, `completion`)\n* Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)\n* ELSER (`sparse_embedding`)\n* Google AI Studio (`completion`, `text_embedding`)\n* Google Vertex AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* Groq (`chat_completion`)\n* Hugging Face (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* JinaAI (`rerank`, `text_embedding`)\n* Llama (`chat_completion`, `completion`, `text_embedding`)\n* Mistral (`chat_completion`, `completion`, `text_embedding`)\n* OpenAI (`chat_completion`, `completion`, `text_embedding`)\n* OpenShift AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* VoyageAI (`rerank`, `text_embedding`)\n* Watsonx inference integration (`text_embedding`)", "examples": { "InferencePutExample1": { "alternatives": [ @@ -184162,7 +184168,7 @@ "kind": "properties", "properties": [ { - "description": "The chunking configuration object.\nApplies only to the `text_embedding` task type.\nNot applicable to the `completion` task type.", + "description": "The chunking configuration object.\nApplies only to the `text_embedding` task type.\nNot applicable to the `completion` and `chat_completion` task types.", "extDocId": "inference-chunking", "extDocUrl": "https://www.elastic.co/docs/explore-analyze/elastic-inference/inference-api#infer-chunking-config", "name": "chunking_settings", @@ -184278,6 +184284,12 @@ "method_request": "PUT _inference/completion/azure_openai_completion", "summary": "A completion task", "value": "{\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"api_key\": \"Api-Key\",\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\"\n }\n}" + }, + "PutAzureOpenAiRequestExample3": { + "description": "Run `PUT _inference/chat_completion/azure_openai_chat_completion` to create an inference endpoint that performs a `chat_completion` task.", + "method_request": "PUT _inference/chat_completion/azure_openai_chat_completion", + "summary": "A chat completion task", + "value": "{\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"api_key\": \"Api-Key\",\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\"\n }\n}" } }, "inherits": { @@ -184346,6 +184358,23 @@ } } }, + "examples": { + "PutAzureOpenAiResponseExample1": { + "description": "A successful response when creating an Azure OpenAI `text_embedding` inference endpoint.", + "summary": "A text embedding task", + "value": "{\n \"inference_id\": \"azure_openai_embeddings\",\n \"task_type\": \"text_embedding\",\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\",\n \"rate_limit\": {\n \"requests_per_minute\": 1140\n },\n \"dimensions\": 1536,\n \"similarity\": \"dot_product\"\n },\n \"chunking_settings\": {\n \"strategy\": \"sentence\",\n \"max_chunk_size\": 250,\n \"sentence_overlap\": 1\n }\n}" + }, + "PutAzureOpenAiResponseExample2": { + "description": "A successful response when creating an Azure OpenAI `completion` inference endpoint.", + "summary": "A completion task", + "value": "{\n \"inference_id\": \"azure_openai_completion\",\n \"task_type\": \"completion\",\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\",\n \"rate_limit\": {\n \"requests_per_minute\": 120\n }\n }\n}" + }, + "PutAzureOpenAiResponseExample3": { + "description": "A successful response when creating an Azure OpenAI `chat_completion` inference endpoint.", + "summary": "A chat completion task", + "value": "{\n \"inference_id\": \"azure_openai_chat_completion\",\n \"task_type\": \"chat_completion\",\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\",\n \"rate_limit\": {\n \"requests_per_minute\": 120\n }\n }\n}" + } + }, "name": { "name": "Response", "namespace": "inference.put_azureopenai" diff --git a/output/typescript/types.ts b/output/typescript/types.ts index 0590943900..63deee4206 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -14404,7 +14404,7 @@ export interface InferenceAzureOpenAITaskSettings { user?: string } -export type InferenceAzureOpenAITaskType = 'completion' | 'text_embedding' +export type InferenceAzureOpenAITaskType = 'completion' | 'chat_completion' | 'text_embedding' export type InferenceCohereEmbeddingType = 'binary' | 'bit' | 'byte' | 'float' | 'int8' @@ -14911,7 +14911,7 @@ export type InferenceTaskTypeAnthropic = 'completion' export type InferenceTaskTypeAzureAIStudio = 'text_embedding' | 'completion' | 'rerank' -export type InferenceTaskTypeAzureOpenAI = 'text_embedding' | 'completion' +export type InferenceTaskTypeAzureOpenAI = 'text_embedding' | 'completion' | 'chat_completion' export type InferenceTaskTypeCohere = 'text_embedding' | 'rerank' | 'completion' diff --git a/package-lock.json b/package-lock.json index c7b190e242..b6b31da2c0 100644 --- a/package-lock.json +++ b/package-lock.json @@ -7,7 +7,7 @@ "": { "version": "overlay", "dependencies": { - "@redocly/cli": "^1.34.5" + "@redocly/cli": "^1.34.6" } }, "node_modules/@babel/code-frame": { @@ -434,9 +434,9 @@ } }, "node_modules/@redocly/cli": { - "version": "1.34.5", - "resolved": "https://registry.npmjs.org/@redocly/cli/-/cli-1.34.5.tgz", - "integrity": "sha512-5IEwxs7SGP5KEXjBKLU8Ffdz9by/KqNSeBk6YUVQaGxMXK//uYlTJIPntgUXbo1KAGG2d2q2XF8y4iFz6qNeiw==", + "version": "1.34.6", + "resolved": "https://registry.npmjs.org/@redocly/cli/-/cli-1.34.6.tgz", + "integrity": "sha512-V03jtLOXLm6+wpTuFNw9+eLHE6R3wywZo4Clt9XMPnulafbJcpCFz+J0e5/4Cw4zZB087xjU7WvRdI/bZ+pHtA==", "license": "MIT", "dependencies": { "@opentelemetry/api": "1.9.0", @@ -445,8 +445,8 @@ "@opentelemetry/sdk-trace-node": "1.26.0", "@opentelemetry/semantic-conventions": "1.27.0", "@redocly/config": "^0.22.0", - "@redocly/openapi-core": "1.34.5", - "@redocly/respect-core": "1.34.5", + "@redocly/openapi-core": "1.34.6", + "@redocly/respect-core": "1.34.6", "abort-controller": "^3.0.0", "chokidar": "^3.5.1", "colorette": "^1.2.0", @@ -458,8 +458,8 @@ "handlebars": "^4.7.6", "mobx": "^6.0.4", "pluralize": "^8.0.0", - "react": "^17.0.0 || ^18.2.0 || ^19.0.0", - "react-dom": "^17.0.0 || ^18.2.0 || ^19.0.0", + "react": "^17.0.0 || ^18.2.0 || ^19.2.1", + "react-dom": "^17.0.0 || ^18.2.0 || ^19.2.1", "redoc": "2.5.0", "semver": "^7.5.2", "simple-websocket": "^9.0.0", @@ -482,9 +482,9 @@ "license": "MIT" }, "node_modules/@redocly/openapi-core": { - "version": "1.34.5", - "resolved": "https://registry.npmjs.org/@redocly/openapi-core/-/openapi-core-1.34.5.tgz", - "integrity": "sha512-0EbE8LRbkogtcCXU7liAyC00n9uNG9hJ+eMyHFdUsy9lB/WGqnEBgwjA9q2cyzAVcdTkQqTBBU1XePNnN3OijA==", + "version": "1.34.6", + "resolved": "https://registry.npmjs.org/@redocly/openapi-core/-/openapi-core-1.34.6.tgz", + "integrity": "sha512-2+O+riuIUgVSuLl3Lyh5AplWZyVMNuG2F98/o6NrutKJfW4/GTZdPpZlIphS0HGgcOHgmWcCSHj+dWFlZaGSHw==", "license": "MIT", "dependencies": { "@redocly/ajv": "^8.11.2", @@ -503,14 +503,14 @@ } }, "node_modules/@redocly/respect-core": { - "version": "1.34.5", - "resolved": "https://registry.npmjs.org/@redocly/respect-core/-/respect-core-1.34.5.tgz", - "integrity": "sha512-GheC/g/QFztPe9UA9LamooSplQuy9pe0Yr8XGTqkz0ahivLDl7svoy/LSQNn1QH3XGtLKwFYMfTwFR2TAYyh5Q==", + "version": "1.34.6", + "resolved": "https://registry.npmjs.org/@redocly/respect-core/-/respect-core-1.34.6.tgz", + "integrity": "sha512-nXFBRctoB4CPCLR2it2WxDsuAE/nLd4EnW9mQ+IUKrIFAjMv1O6rgggxkgdlyKUyenYkajJIHSKwVbRS6FwlEQ==", "license": "MIT", "dependencies": { "@faker-js/faker": "^7.6.0", "@redocly/ajv": "8.11.2", - "@redocly/openapi-core": "1.34.5", + "@redocly/openapi-core": "1.34.6", "better-ajv-errors": "^1.2.0", "colorette": "^2.0.20", "concat-stream": "^2.0.0", @@ -2274,24 +2274,24 @@ } }, "node_modules/react": { - "version": "19.2.0", - "resolved": "https://registry.npmjs.org/react/-/react-19.2.0.tgz", - "integrity": "sha512-tmbWg6W31tQLeB5cdIBOicJDJRR2KzXsV7uSK9iNfLWQ5bIZfxuPEHp7M8wiHyHnn0DD1i7w3Zmin0FtkrwoCQ==", + "version": "19.2.1", + "resolved": "https://registry.npmjs.org/react/-/react-19.2.1.tgz", + "integrity": "sha512-DGrYcCWK7tvYMnWh79yrPHt+vdx9tY+1gPZa7nJQtO/p8bLTDaHp4dzwEhQB7pZ4Xe3ok4XKuEPrVuc+wlpkmw==", "license": "MIT", "engines": { "node": ">=0.10.0" } }, "node_modules/react-dom": { - "version": "19.2.0", - "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.2.0.tgz", - "integrity": "sha512-UlbRu4cAiGaIewkPyiRGJk0imDN2T3JjieT6spoL2UeSf5od4n5LB/mQ4ejmxhCFT1tYe8IvaFulzynWovsEFQ==", + "version": "19.2.1", + "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.2.1.tgz", + "integrity": "sha512-ibrK8llX2a4eOskq1mXKu/TGZj9qzomO+sNfO98M6d9zIPOEhlBkMkBUBLd1vgS0gQsLDBzA+8jJBVXDnfHmJg==", "license": "MIT", "dependencies": { "scheduler": "^0.27.0" }, "peerDependencies": { - "react": "^19.2.0" + "react": "^19.2.1" } }, "node_modules/react-is": { diff --git a/package.json b/package.json index a5f1a5e0a7..ea76d681d6 100644 --- a/package.json +++ b/package.json @@ -3,7 +3,7 @@ "transform-to-openapi": "npm run transform-to-openapi --prefix compiler --" }, "dependencies": { - "@redocly/cli": "^1.34.5" + "@redocly/cli": "^1.34.6" }, "version": "overlay" } diff --git a/specification/_json_spec/inference.put_azureopenai.json b/specification/_json_spec/inference.put_azureopenai.json index e185e27a98..e87f01db7b 100644 --- a/specification/_json_spec/inference.put_azureopenai.json +++ b/specification/_json_spec/inference.put_azureopenai.json @@ -19,7 +19,7 @@ "task_type": { "type": "enum", "description": "The task type", - "options": ["completion", "text_embedding"] + "options": ["completion", "chat_completion", "text_embedding"] }, "azureopenai_inference_id": { "type": "string", diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts index 69ee9a00da..e6e742a0dc 100644 --- a/specification/inference/_types/CommonTypes.ts +++ b/specification/inference/_types/CommonTypes.ts @@ -802,7 +802,7 @@ export class AzureOpenAIServiceSettings { * This setting helps to minimize the number of rate limit errors returned from Azure. * The `azureopenai` service sets a default number of requests allowed per minute depending on the task type. * For `text_embedding`, it is set to `1440`. - * For `completion`, it is set to `120`. + * For `completion` and `chat_completion`, it is set to `120`. * @ext_doc_id azureopenai-quota-limits */ rate_limit?: RateLimitSetting @@ -824,6 +824,7 @@ export class AzureOpenAITaskSettings { export enum AzureOpenAITaskType { completion, + chat_completion, text_embedding } diff --git a/specification/inference/_types/Services.ts b/specification/inference/_types/Services.ts index f5f759a894..66f16009e0 100644 --- a/specification/inference/_types/Services.ts +++ b/specification/inference/_types/Services.ts @@ -425,7 +425,7 @@ export class RateLimitSetting { * * `anthropic` service: `50` * * `azureaistudio` service: `240` * * `azureopenai` service and task type `text_embedding`: `1440` - * * `azureopenai` service and task type `completion`: `120` + * * `azureopenai` service and task types `completion` or `chat_completion`: `120` * * `cohere` service: `10000` * * `contextualai` service: `1000` * * `elastic` service and task type `chat_completion`: `240` diff --git a/specification/inference/_types/TaskType.ts b/specification/inference/_types/TaskType.ts index 44ddf99444..4650cb12d5 100644 --- a/specification/inference/_types/TaskType.ts +++ b/specification/inference/_types/TaskType.ts @@ -70,7 +70,8 @@ export enum TaskTypeAzureAIStudio { export enum TaskTypeAzureOpenAI { text_embedding, - completion + completion, + chat_completion } export enum TaskTypeCohere { diff --git a/specification/inference/put/PutRequest.ts b/specification/inference/put/PutRequest.ts index c231a0e441..dd206e6feb 100644 --- a/specification/inference/put/PutRequest.ts +++ b/specification/inference/put/PutRequest.ts @@ -37,7 +37,7 @@ import { TaskType } from '@inference/_types/TaskType' * * Amazon SageMaker (`chat_completion`, `completion`, `rerank`, `sparse_embedding`, `text_embedding`) * * Anthropic (`completion`) * * Azure AI Studio (`completion`, `rerank`, `text_embedding`) - * * Azure OpenAI (`completion`, `text_embedding`) + * * Azure OpenAI (`completion`, `chat_completion`, `text_embedding`) * * Cohere (`completion`, `rerank`, `text_embedding`) * * DeepSeek (`chat_completion`, `completion`) * * Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland) diff --git a/specification/inference/put_azureopenai/PutAzureOpenAiRequest.ts b/specification/inference/put_azureopenai/PutAzureOpenAiRequest.ts index 320adbc473..e26f288864 100644 --- a/specification/inference/put_azureopenai/PutAzureOpenAiRequest.ts +++ b/specification/inference/put_azureopenai/PutAzureOpenAiRequest.ts @@ -76,7 +76,7 @@ export interface Request extends RequestBase { /** * The chunking configuration object. * Applies only to the `text_embedding` task type. - * Not applicable to the `completion` task type. + * Not applicable to the `completion` and `chat_completion` task types. * @ext_doc_id inference-chunking */ chunking_settings?: InferenceChunkingSettings diff --git a/specification/inference/put_azureopenai/examples/request/PutAzureOpenAiRequestExample3.yaml b/specification/inference/put_azureopenai/examples/request/PutAzureOpenAiRequestExample3.yaml new file mode 100644 index 0000000000..4233b09808 --- /dev/null +++ b/specification/inference/put_azureopenai/examples/request/PutAzureOpenAiRequestExample3.yaml @@ -0,0 +1,14 @@ +summary: A chat completion task +description: Run `PUT _inference/chat_completion/azure_openai_chat_completion` to create an inference endpoint that performs a `chat_completion` task. +method_request: 'PUT _inference/chat_completion/azure_openai_chat_completion' +# type: "request" +value: |- + { + "service": "azureopenai", + "service_settings": { + "api_key": "Api-Key", + "resource_name": "Resource-name", + "deployment_id": "Deployment-id", + "api_version": "2024-02-01" + } + } diff --git a/specification/inference/put_azureopenai/examples/response/PutAzureOpenAiResponseExample1.yaml b/specification/inference/put_azureopenai/examples/response/PutAzureOpenAiResponseExample1.yaml new file mode 100644 index 0000000000..4c00b0ac87 --- /dev/null +++ b/specification/inference/put_azureopenai/examples/response/PutAzureOpenAiResponseExample1.yaml @@ -0,0 +1,25 @@ +summary: A text embedding task +description: A successful response when creating an Azure OpenAI `text_embedding` inference endpoint. +# type: response +# response_code: +value: |- + { + "inference_id": "azure_openai_embeddings", + "task_type": "text_embedding", + "service": "azureopenai", + "service_settings": { + "resource_name": "Resource-name", + "deployment_id": "Deployment-id", + "api_version": "2024-02-01", + "rate_limit": { + "requests_per_minute": 1140 + }, + "dimensions": 1536, + "similarity": "dot_product" + }, + "chunking_settings": { + "strategy": "sentence", + "max_chunk_size": 250, + "sentence_overlap": 1 + } + } diff --git a/specification/inference/put_azureopenai/examples/response/PutAzureOpenAiResponseExample2.yaml b/specification/inference/put_azureopenai/examples/response/PutAzureOpenAiResponseExample2.yaml new file mode 100644 index 0000000000..a8966ed93d --- /dev/null +++ b/specification/inference/put_azureopenai/examples/response/PutAzureOpenAiResponseExample2.yaml @@ -0,0 +1,18 @@ +summary: A completion task +description: A successful response when creating an Azure OpenAI `completion` inference endpoint. +# type: response +# response_code: +value: |- + { + "inference_id": "azure_openai_completion", + "task_type": "completion", + "service": "azureopenai", + "service_settings": { + "resource_name": "Resource-name", + "deployment_id": "Deployment-id", + "api_version": "2024-02-01", + "rate_limit": { + "requests_per_minute": 120 + } + } + } diff --git a/specification/inference/put_azureopenai/examples/response/PutAzureOpenAiResponseExample3.yaml b/specification/inference/put_azureopenai/examples/response/PutAzureOpenAiResponseExample3.yaml new file mode 100644 index 0000000000..d2926a07f2 --- /dev/null +++ b/specification/inference/put_azureopenai/examples/response/PutAzureOpenAiResponseExample3.yaml @@ -0,0 +1,18 @@ +summary: A chat completion task +description: A successful response when creating an Azure OpenAI `chat_completion` inference endpoint. +# type: response +# response_code: +value: |- + { + "inference_id": "azure_openai_chat_completion", + "task_type": "chat_completion", + "service": "azureopenai", + "service_settings": { + "resource_name": "Resource-name", + "deployment_id": "Deployment-id", + "api_version": "2024-02-01", + "rate_limit": { + "requests_per_minute": 120 + } + } + }