databricks.ModelServing
Explore with Pulumi AI
This resource allows you to manage Model Serving endpoints in Databricks.
If you replace
served_models
withserved_entities
in an existing serving endpoint, the serving endpoint will briefly go into an update state (~30 seconds) and increment the config version.
Example Usage
import * as pulumi from "@pulumi/pulumi";
import * as databricks from "@pulumi/databricks";
const _this = new databricks.ModelServing("this", {
name: "ads-serving-endpoint",
config: {
servedEntities: [
{
name: "prod_model",
entityName: "ads-model",
entityVersion: "2",
workloadSize: "Small",
scaleToZeroEnabled: true,
},
{
name: "candidate_model",
entityName: "ads-model",
entityVersion: "4",
workloadSize: "Small",
scaleToZeroEnabled: false,
},
],
trafficConfig: {
routes: [
{
servedModelName: "prod_model",
trafficPercentage: 90,
},
{
servedModelName: "candidate_model",
trafficPercentage: 10,
},
],
},
},
});
import pulumi
import pulumi_databricks as databricks
this = databricks.ModelServing("this",
name="ads-serving-endpoint",
config={
"served_entities": [
{
"name": "prod_model",
"entity_name": "ads-model",
"entity_version": "2",
"workload_size": "Small",
"scale_to_zero_enabled": True,
},
{
"name": "candidate_model",
"entity_name": "ads-model",
"entity_version": "4",
"workload_size": "Small",
"scale_to_zero_enabled": False,
},
],
"traffic_config": {
"routes": [
{
"served_model_name": "prod_model",
"traffic_percentage": 90,
},
{
"served_model_name": "candidate_model",
"traffic_percentage": 10,
},
],
},
})
package main
import (
"github.com/pulumi/pulumi-databricks/sdk/go/databricks"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := databricks.NewModelServing(ctx, "this", &databricks.ModelServingArgs{
Name: pulumi.String("ads-serving-endpoint"),
Config: &databricks.ModelServingConfigArgs{
ServedEntities: databricks.ModelServingConfigServedEntityArray{
&databricks.ModelServingConfigServedEntityArgs{
Name: pulumi.String("prod_model"),
EntityName: pulumi.String("ads-model"),
EntityVersion: pulumi.String("2"),
WorkloadSize: pulumi.String("Small"),
ScaleToZeroEnabled: pulumi.Bool(true),
},
&databricks.ModelServingConfigServedEntityArgs{
Name: pulumi.String("candidate_model"),
EntityName: pulumi.String("ads-model"),
EntityVersion: pulumi.String("4"),
WorkloadSize: pulumi.String("Small"),
ScaleToZeroEnabled: pulumi.Bool(false),
},
},
TrafficConfig: &databricks.ModelServingConfigTrafficConfigArgs{
Routes: databricks.ModelServingConfigTrafficConfigRouteArray{
&databricks.ModelServingConfigTrafficConfigRouteArgs{
ServedModelName: pulumi.String("prod_model"),
TrafficPercentage: pulumi.Int(90),
},
&databricks.ModelServingConfigTrafficConfigRouteArgs{
ServedModelName: pulumi.String("candidate_model"),
TrafficPercentage: pulumi.Int(10),
},
},
},
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Databricks = Pulumi.Databricks;
return await Deployment.RunAsync(() =>
{
var @this = new Databricks.ModelServing("this", new()
{
Name = "ads-serving-endpoint",
Config = new Databricks.Inputs.ModelServingConfigArgs
{
ServedEntities = new[]
{
new Databricks.Inputs.ModelServingConfigServedEntityArgs
{
Name = "prod_model",
EntityName = "ads-model",
EntityVersion = "2",
WorkloadSize = "Small",
ScaleToZeroEnabled = true,
},
new Databricks.Inputs.ModelServingConfigServedEntityArgs
{
Name = "candidate_model",
EntityName = "ads-model",
EntityVersion = "4",
WorkloadSize = "Small",
ScaleToZeroEnabled = false,
},
},
TrafficConfig = new Databricks.Inputs.ModelServingConfigTrafficConfigArgs
{
Routes = new[]
{
new Databricks.Inputs.ModelServingConfigTrafficConfigRouteArgs
{
ServedModelName = "prod_model",
TrafficPercentage = 90,
},
new Databricks.Inputs.ModelServingConfigTrafficConfigRouteArgs
{
ServedModelName = "candidate_model",
TrafficPercentage = 10,
},
},
},
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.databricks.ModelServing;
import com.pulumi.databricks.ModelServingArgs;
import com.pulumi.databricks.inputs.ModelServingConfigArgs;
import com.pulumi.databricks.inputs.ModelServingConfigTrafficConfigArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var this_ = new ModelServing("this", ModelServingArgs.builder()
.name("ads-serving-endpoint")
.config(ModelServingConfigArgs.builder()
.servedEntities(
ModelServingConfigServedEntityArgs.builder()
.name("prod_model")
.entityName("ads-model")
.entityVersion("2")
.workloadSize("Small")
.scaleToZeroEnabled(true)
.build(),
ModelServingConfigServedEntityArgs.builder()
.name("candidate_model")
.entityName("ads-model")
.entityVersion("4")
.workloadSize("Small")
.scaleToZeroEnabled(false)
.build())
.trafficConfig(ModelServingConfigTrafficConfigArgs.builder()
.routes(
ModelServingConfigTrafficConfigRouteArgs.builder()
.servedModelName("prod_model")
.trafficPercentage(90)
.build(),
ModelServingConfigTrafficConfigRouteArgs.builder()
.servedModelName("candidate_model")
.trafficPercentage(10)
.build())
.build())
.build())
.build());
}
}
resources:
this:
type: databricks:ModelServing
properties:
name: ads-serving-endpoint
config:
servedEntities:
- name: prod_model
entityName: ads-model
entityVersion: '2'
workloadSize: Small
scaleToZeroEnabled: true
- name: candidate_model
entityName: ads-model
entityVersion: '4'
workloadSize: Small
scaleToZeroEnabled: false
trafficConfig:
routes:
- servedModelName: prod_model
trafficPercentage: 90
- servedModelName: candidate_model
trafficPercentage: 10
Access Control
- databricks.Permissions can control which groups or individual users can Manage, Query or View individual serving endpoints.
Related Resources
The following resources are often used in the same context:
- databricks.RegisteredModel to create Models in Unity Catalog in Databricks.
- End to end workspace management guide.
- databricks.Directory to manage directories in Databricks Workspace.
- databricks.MlflowModel to create models in the workspace model registry in Databricks.
- databricks.Notebook to manage Databricks Notebooks.
- databricks.Notebook data to export a notebook from Databricks Workspace.
- databricks.Repo to manage Databricks Repos.
Create ModelServing Resource
Resources are created with functions called constructors. To learn more about declaring and configuring resources, see Resources.
Constructor syntax
new ModelServing(name: string, args: ModelServingArgs, opts?: CustomResourceOptions);
@overload
def ModelServing(resource_name: str,
args: ModelServingArgs,
opts: Optional[ResourceOptions] = None)
@overload
def ModelServing(resource_name: str,
opts: Optional[ResourceOptions] = None,
config: Optional[ModelServingConfigArgs] = None,
ai_gateway: Optional[ModelServingAiGatewayArgs] = None,
name: Optional[str] = None,
rate_limits: Optional[Sequence[ModelServingRateLimitArgs]] = None,
route_optimized: Optional[bool] = None,
tags: Optional[Sequence[ModelServingTagArgs]] = None)
func NewModelServing(ctx *Context, name string, args ModelServingArgs, opts ...ResourceOption) (*ModelServing, error)
public ModelServing(string name, ModelServingArgs args, CustomResourceOptions? opts = null)
public ModelServing(String name, ModelServingArgs args)
public ModelServing(String name, ModelServingArgs args, CustomResourceOptions options)
type: databricks:ModelServing
properties: # The arguments to resource properties.
options: # Bag of options to control resource's behavior.
Parameters
- name string
- The unique name of the resource.
- args ModelServingArgs
- The arguments to resource properties.
- opts CustomResourceOptions
- Bag of options to control resource's behavior.
- resource_name str
- The unique name of the resource.
- args ModelServingArgs
- The arguments to resource properties.
- opts ResourceOptions
- Bag of options to control resource's behavior.
- ctx Context
- Context object for the current deployment.
- name string
- The unique name of the resource.
- args ModelServingArgs
- The arguments to resource properties.
- opts ResourceOption
- Bag of options to control resource's behavior.
- name string
- The unique name of the resource.
- args ModelServingArgs
- The arguments to resource properties.
- opts CustomResourceOptions
- Bag of options to control resource's behavior.
- name String
- The unique name of the resource.
- args ModelServingArgs
- The arguments to resource properties.
- options CustomResourceOptions
- Bag of options to control resource's behavior.
Constructor example
The following reference example uses placeholder values for all input properties.
var modelServingResource = new Databricks.ModelServing("modelServingResource", new()
{
Config = new Databricks.Inputs.ModelServingConfigArgs
{
AutoCaptureConfig = new Databricks.Inputs.ModelServingConfigAutoCaptureConfigArgs
{
CatalogName = "string",
Enabled = false,
SchemaName = "string",
TableNamePrefix = "string",
},
ServedEntities = new[]
{
new Databricks.Inputs.ModelServingConfigServedEntityArgs
{
EntityName = "string",
EntityVersion = "string",
EnvironmentVars =
{
{ "string", "string" },
},
ExternalModel = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelArgs
{
Name = "string",
Provider = "string",
Task = "string",
Ai21labsConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelAi21labsConfigArgs
{
Ai21labsApiKey = "string",
Ai21labsApiKeyPlaintext = "string",
},
AmazonBedrockConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelAmazonBedrockConfigArgs
{
AwsRegion = "string",
BedrockProvider = "string",
AwsAccessKeyId = "string",
AwsAccessKeyIdPlaintext = "string",
AwsSecretAccessKey = "string",
AwsSecretAccessKeyPlaintext = "string",
},
AnthropicConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelAnthropicConfigArgs
{
AnthropicApiKey = "string",
AnthropicApiKeyPlaintext = "string",
},
CohereConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelCohereConfigArgs
{
CohereApiBase = "string",
CohereApiKey = "string",
CohereApiKeyPlaintext = "string",
},
DatabricksModelServingConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelDatabricksModelServingConfigArgs
{
DatabricksWorkspaceUrl = "string",
DatabricksApiToken = "string",
DatabricksApiTokenPlaintext = "string",
},
GoogleCloudVertexAiConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelGoogleCloudVertexAiConfigArgs
{
PrivateKey = "string",
PrivateKeyPlaintext = "string",
ProjectId = "string",
Region = "string",
},
OpenaiConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelOpenaiConfigArgs
{
MicrosoftEntraClientId = "string",
MicrosoftEntraClientSecret = "string",
MicrosoftEntraClientSecretPlaintext = "string",
MicrosoftEntraTenantId = "string",
OpenaiApiBase = "string",
OpenaiApiKey = "string",
OpenaiApiKeyPlaintext = "string",
OpenaiApiType = "string",
OpenaiApiVersion = "string",
OpenaiDeploymentName = "string",
OpenaiOrganization = "string",
},
PalmConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelPalmConfigArgs
{
PalmApiKey = "string",
PalmApiKeyPlaintext = "string",
},
},
InstanceProfileArn = "string",
MaxProvisionedThroughput = 0,
MinProvisionedThroughput = 0,
Name = "string",
ScaleToZeroEnabled = false,
WorkloadSize = "string",
WorkloadType = "string",
},
},
TrafficConfig = new Databricks.Inputs.ModelServingConfigTrafficConfigArgs
{
Routes = new[]
{
new Databricks.Inputs.ModelServingConfigTrafficConfigRouteArgs
{
ServedModelName = "string",
TrafficPercentage = 0,
},
},
},
},
AiGateway = new Databricks.Inputs.ModelServingAiGatewayArgs
{
Guardrails = new Databricks.Inputs.ModelServingAiGatewayGuardrailsArgs
{
Input = new Databricks.Inputs.ModelServingAiGatewayGuardrailsInputArgs
{
InvalidKeywords = new[]
{
"string",
},
Pii = new Databricks.Inputs.ModelServingAiGatewayGuardrailsInputPiiArgs
{
Behavior = "string",
},
Safety = false,
ValidTopics = new[]
{
"string",
},
},
Output = new Databricks.Inputs.ModelServingAiGatewayGuardrailsOutputArgs
{
InvalidKeywords = new[]
{
"string",
},
Pii = new Databricks.Inputs.ModelServingAiGatewayGuardrailsOutputPiiArgs
{
Behavior = "string",
},
Safety = false,
ValidTopics = new[]
{
"string",
},
},
},
InferenceTableConfig = new Databricks.Inputs.ModelServingAiGatewayInferenceTableConfigArgs
{
CatalogName = "string",
Enabled = false,
SchemaName = "string",
TableNamePrefix = "string",
},
RateLimits = new[]
{
new Databricks.Inputs.ModelServingAiGatewayRateLimitArgs
{
Calls = 0,
RenewalPeriod = "string",
Key = "string",
},
},
UsageTrackingConfig = new Databricks.Inputs.ModelServingAiGatewayUsageTrackingConfigArgs
{
Enabled = false,
},
},
Name = "string",
RateLimits = new[]
{
new Databricks.Inputs.ModelServingRateLimitArgs
{
Calls = 0,
RenewalPeriod = "string",
Key = "string",
},
},
RouteOptimized = false,
Tags = new[]
{
new Databricks.Inputs.ModelServingTagArgs
{
Key = "string",
Value = "string",
},
},
});
example, err := databricks.NewModelServing(ctx, "modelServingResource", &databricks.ModelServingArgs{
Config: &databricks.ModelServingConfigArgs{
AutoCaptureConfig: &databricks.ModelServingConfigAutoCaptureConfigArgs{
CatalogName: pulumi.String("string"),
Enabled: pulumi.Bool(false),
SchemaName: pulumi.String("string"),
TableNamePrefix: pulumi.String("string"),
},
ServedEntities: databricks.ModelServingConfigServedEntityArray{
&databricks.ModelServingConfigServedEntityArgs{
EntityName: pulumi.String("string"),
EntityVersion: pulumi.String("string"),
EnvironmentVars: pulumi.StringMap{
"string": pulumi.String("string"),
},
ExternalModel: &databricks.ModelServingConfigServedEntityExternalModelArgs{
Name: pulumi.String("string"),
Provider: pulumi.String("string"),
Task: pulumi.String("string"),
Ai21labsConfig: &databricks.ModelServingConfigServedEntityExternalModelAi21labsConfigArgs{
Ai21labsApiKey: pulumi.String("string"),
Ai21labsApiKeyPlaintext: pulumi.String("string"),
},
AmazonBedrockConfig: &databricks.ModelServingConfigServedEntityExternalModelAmazonBedrockConfigArgs{
AwsRegion: pulumi.String("string"),
BedrockProvider: pulumi.String("string"),
AwsAccessKeyId: pulumi.String("string"),
AwsAccessKeyIdPlaintext: pulumi.String("string"),
AwsSecretAccessKey: pulumi.String("string"),
AwsSecretAccessKeyPlaintext: pulumi.String("string"),
},
AnthropicConfig: &databricks.ModelServingConfigServedEntityExternalModelAnthropicConfigArgs{
AnthropicApiKey: pulumi.String("string"),
AnthropicApiKeyPlaintext: pulumi.String("string"),
},
CohereConfig: &databricks.ModelServingConfigServedEntityExternalModelCohereConfigArgs{
CohereApiBase: pulumi.String("string"),
CohereApiKey: pulumi.String("string"),
CohereApiKeyPlaintext: pulumi.String("string"),
},
DatabricksModelServingConfig: &databricks.ModelServingConfigServedEntityExternalModelDatabricksModelServingConfigArgs{
DatabricksWorkspaceUrl: pulumi.String("string"),
DatabricksApiToken: pulumi.String("string"),
DatabricksApiTokenPlaintext: pulumi.String("string"),
},
GoogleCloudVertexAiConfig: &databricks.ModelServingConfigServedEntityExternalModelGoogleCloudVertexAiConfigArgs{
PrivateKey: pulumi.String("string"),
PrivateKeyPlaintext: pulumi.String("string"),
ProjectId: pulumi.String("string"),
Region: pulumi.String("string"),
},
OpenaiConfig: &databricks.ModelServingConfigServedEntityExternalModelOpenaiConfigArgs{
MicrosoftEntraClientId: pulumi.String("string"),
MicrosoftEntraClientSecret: pulumi.String("string"),
MicrosoftEntraClientSecretPlaintext: pulumi.String("string"),
MicrosoftEntraTenantId: pulumi.String("string"),
OpenaiApiBase: pulumi.String("string"),
OpenaiApiKey: pulumi.String("string"),
OpenaiApiKeyPlaintext: pulumi.String("string"),
OpenaiApiType: pulumi.String("string"),
OpenaiApiVersion: pulumi.String("string"),
OpenaiDeploymentName: pulumi.String("string"),
OpenaiOrganization: pulumi.String("string"),
},
PalmConfig: &databricks.ModelServingConfigServedEntityExternalModelPalmConfigArgs{
PalmApiKey: pulumi.String("string"),
PalmApiKeyPlaintext: pulumi.String("string"),
},
},
InstanceProfileArn: pulumi.String("string"),
MaxProvisionedThroughput: pulumi.Int(0),
MinProvisionedThroughput: pulumi.Int(0),
Name: pulumi.String("string"),
ScaleToZeroEnabled: pulumi.Bool(false),
WorkloadSize: pulumi.String("string"),
WorkloadType: pulumi.String("string"),
},
},
TrafficConfig: &databricks.ModelServingConfigTrafficConfigArgs{
Routes: databricks.ModelServingConfigTrafficConfigRouteArray{
&databricks.ModelServingConfigTrafficConfigRouteArgs{
ServedModelName: pulumi.String("string"),
TrafficPercentage: pulumi.Int(0),
},
},
},
},
AiGateway: &databricks.ModelServingAiGatewayArgs{
Guardrails: &databricks.ModelServingAiGatewayGuardrailsArgs{
Input: &databricks.ModelServingAiGatewayGuardrailsInputTypeArgs{
InvalidKeywords: pulumi.StringArray{
pulumi.String("string"),
},
Pii: &databricks.ModelServingAiGatewayGuardrailsInputPiiArgs{
Behavior: pulumi.String("string"),
},
Safety: pulumi.Bool(false),
ValidTopics: pulumi.StringArray{
pulumi.String("string"),
},
},
Output: &databricks.ModelServingAiGatewayGuardrailsOutputTypeArgs{
InvalidKeywords: pulumi.StringArray{
pulumi.String("string"),
},
Pii: &databricks.ModelServingAiGatewayGuardrailsOutputPiiArgs{
Behavior: pulumi.String("string"),
},
Safety: pulumi.Bool(false),
ValidTopics: pulumi.StringArray{
pulumi.String("string"),
},
},
},
InferenceTableConfig: &databricks.ModelServingAiGatewayInferenceTableConfigArgs{
CatalogName: pulumi.String("string"),
Enabled: pulumi.Bool(false),
SchemaName: pulumi.String("string"),
TableNamePrefix: pulumi.String("string"),
},
RateLimits: databricks.ModelServingAiGatewayRateLimitArray{
&databricks.ModelServingAiGatewayRateLimitArgs{
Calls: pulumi.Int(0),
RenewalPeriod: pulumi.String("string"),
Key: pulumi.String("string"),
},
},
UsageTrackingConfig: &databricks.ModelServingAiGatewayUsageTrackingConfigArgs{
Enabled: pulumi.Bool(false),
},
},
Name: pulumi.String("string"),
RateLimits: databricks.ModelServingRateLimitArray{
&databricks.ModelServingRateLimitArgs{
Calls: pulumi.Int(0),
RenewalPeriod: pulumi.String("string"),
Key: pulumi.String("string"),
},
},
RouteOptimized: pulumi.Bool(false),
Tags: databricks.ModelServingTagArray{
&databricks.ModelServingTagArgs{
Key: pulumi.String("string"),
Value: pulumi.String("string"),
},
},
})
var modelServingResource = new ModelServing("modelServingResource", ModelServingArgs.builder()
.config(ModelServingConfigArgs.builder()
.autoCaptureConfig(ModelServingConfigAutoCaptureConfigArgs.builder()
.catalogName("string")
.enabled(false)
.schemaName("string")
.tableNamePrefix("string")
.build())
.servedEntities(ModelServingConfigServedEntityArgs.builder()
.entityName("string")
.entityVersion("string")
.environmentVars(Map.of("string", "string"))
.externalModel(ModelServingConfigServedEntityExternalModelArgs.builder()
.name("string")
.provider("string")
.task("string")
.ai21labsConfig(ModelServingConfigServedEntityExternalModelAi21labsConfigArgs.builder()
.ai21labsApiKey("string")
.ai21labsApiKeyPlaintext("string")
.build())
.amazonBedrockConfig(ModelServingConfigServedEntityExternalModelAmazonBedrockConfigArgs.builder()
.awsRegion("string")
.bedrockProvider("string")
.awsAccessKeyId("string")
.awsAccessKeyIdPlaintext("string")
.awsSecretAccessKey("string")
.awsSecretAccessKeyPlaintext("string")
.build())
.anthropicConfig(ModelServingConfigServedEntityExternalModelAnthropicConfigArgs.builder()
.anthropicApiKey("string")
.anthropicApiKeyPlaintext("string")
.build())
.cohereConfig(ModelServingConfigServedEntityExternalModelCohereConfigArgs.builder()
.cohereApiBase("string")
.cohereApiKey("string")
.cohereApiKeyPlaintext("string")
.build())
.databricksModelServingConfig(ModelServingConfigServedEntityExternalModelDatabricksModelServingConfigArgs.builder()
.databricksWorkspaceUrl("string")
.databricksApiToken("string")
.databricksApiTokenPlaintext("string")
.build())
.googleCloudVertexAiConfig(ModelServingConfigServedEntityExternalModelGoogleCloudVertexAiConfigArgs.builder()
.privateKey("string")
.privateKeyPlaintext("string")
.projectId("string")
.region("string")
.build())
.openaiConfig(ModelServingConfigServedEntityExternalModelOpenaiConfigArgs.builder()
.microsoftEntraClientId("string")
.microsoftEntraClientSecret("string")
.microsoftEntraClientSecretPlaintext("string")
.microsoftEntraTenantId("string")
.openaiApiBase("string")
.openaiApiKey("string")
.openaiApiKeyPlaintext("string")
.openaiApiType("string")
.openaiApiVersion("string")
.openaiDeploymentName("string")
.openaiOrganization("string")
.build())
.palmConfig(ModelServingConfigServedEntityExternalModelPalmConfigArgs.builder()
.palmApiKey("string")
.palmApiKeyPlaintext("string")
.build())
.build())
.instanceProfileArn("string")
.maxProvisionedThroughput(0)
.minProvisionedThroughput(0)
.name("string")
.scaleToZeroEnabled(false)
.workloadSize("string")
.workloadType("string")
.build())
.trafficConfig(ModelServingConfigTrafficConfigArgs.builder()
.routes(ModelServingConfigTrafficConfigRouteArgs.builder()
.servedModelName("string")
.trafficPercentage(0)
.build())
.build())
.build())
.aiGateway(ModelServingAiGatewayArgs.builder()
.guardrails(ModelServingAiGatewayGuardrailsArgs.builder()
.input(ModelServingAiGatewayGuardrailsInputArgs.builder()
.invalidKeywords("string")
.pii(ModelServingAiGatewayGuardrailsInputPiiArgs.builder()
.behavior("string")
.build())
.safety(false)
.validTopics("string")
.build())
.output(ModelServingAiGatewayGuardrailsOutputArgs.builder()
.invalidKeywords("string")
.pii(ModelServingAiGatewayGuardrailsOutputPiiArgs.builder()
.behavior("string")
.build())
.safety(false)
.validTopics("string")
.build())
.build())
.inferenceTableConfig(ModelServingAiGatewayInferenceTableConfigArgs.builder()
.catalogName("string")
.enabled(false)
.schemaName("string")
.tableNamePrefix("string")
.build())
.rateLimits(ModelServingAiGatewayRateLimitArgs.builder()
.calls(0)
.renewalPeriod("string")
.key("string")
.build())
.usageTrackingConfig(ModelServingAiGatewayUsageTrackingConfigArgs.builder()
.enabled(false)
.build())
.build())
.name("string")
.rateLimits(ModelServingRateLimitArgs.builder()
.calls(0)
.renewalPeriod("string")
.key("string")
.build())
.routeOptimized(false)
.tags(ModelServingTagArgs.builder()
.key("string")
.value("string")
.build())
.build());
model_serving_resource = databricks.ModelServing("modelServingResource",
config=databricks.ModelServingConfigArgs(
auto_capture_config=databricks.ModelServingConfigAutoCaptureConfigArgs(
catalog_name="string",
enabled=False,
schema_name="string",
table_name_prefix="string",
),
served_entities=[databricks.ModelServingConfigServedEntityArgs(
entity_name="string",
entity_version="string",
environment_vars={
"string": "string",
},
external_model=databricks.ModelServingConfigServedEntityExternalModelArgs(
name="string",
provider="string",
task="string",
ai21labs_config=databricks.ModelServingConfigServedEntityExternalModelAi21labsConfigArgs(
ai21labs_api_key="string",
ai21labs_api_key_plaintext="string",
),
amazon_bedrock_config=databricks.ModelServingConfigServedEntityExternalModelAmazonBedrockConfigArgs(
aws_region="string",
bedrock_provider="string",
aws_access_key_id="string",
aws_access_key_id_plaintext="string",
aws_secret_access_key="string",
aws_secret_access_key_plaintext="string",
),
anthropic_config=databricks.ModelServingConfigServedEntityExternalModelAnthropicConfigArgs(
anthropic_api_key="string",
anthropic_api_key_plaintext="string",
),
cohere_config=databricks.ModelServingConfigServedEntityExternalModelCohereConfigArgs(
cohere_api_base="string",
cohere_api_key="string",
cohere_api_key_plaintext="string",
),
databricks_model_serving_config=databricks.ModelServingConfigServedEntityExternalModelDatabricksModelServingConfigArgs(
databricks_workspace_url="string",
databricks_api_token="string",
databricks_api_token_plaintext="string",
),
google_cloud_vertex_ai_config=databricks.ModelServingConfigServedEntityExternalModelGoogleCloudVertexAiConfigArgs(
private_key="string",
private_key_plaintext="string",
project_id="string",
region="string",
),
openai_config=databricks.ModelServingConfigServedEntityExternalModelOpenaiConfigArgs(
microsoft_entra_client_id="string",
microsoft_entra_client_secret="string",
microsoft_entra_client_secret_plaintext="string",
microsoft_entra_tenant_id="string",
openai_api_base="string",
openai_api_key="string",
openai_api_key_plaintext="string",
openai_api_type="string",
openai_api_version="string",
openai_deployment_name="string",
openai_organization="string",
),
palm_config=databricks.ModelServingConfigServedEntityExternalModelPalmConfigArgs(
palm_api_key="string",
palm_api_key_plaintext="string",
),
),
instance_profile_arn="string",
max_provisioned_throughput=0,
min_provisioned_throughput=0,
name="string",
scale_to_zero_enabled=False,
workload_size="string",
workload_type="string",
)],
traffic_config=databricks.ModelServingConfigTrafficConfigArgs(
routes=[databricks.ModelServingConfigTrafficConfigRouteArgs(
served_model_name="string",
traffic_percentage=0,
)],
),
),
ai_gateway=databricks.ModelServingAiGatewayArgs(
guardrails=databricks.ModelServingAiGatewayGuardrailsArgs(
input=databricks.ModelServingAiGatewayGuardrailsInputArgs(
invalid_keywords=["string"],
pii=databricks.ModelServingAiGatewayGuardrailsInputPiiArgs(
behavior="string",
),
safety=False,
valid_topics=["string"],
),
output=databricks.ModelServingAiGatewayGuardrailsOutputArgs(
invalid_keywords=["string"],
pii=databricks.ModelServingAiGatewayGuardrailsOutputPiiArgs(
behavior="string",
),
safety=False,
valid_topics=["string"],
),
),
inference_table_config=databricks.ModelServingAiGatewayInferenceTableConfigArgs(
catalog_name="string",
enabled=False,
schema_name="string",
table_name_prefix="string",
),
rate_limits=[databricks.ModelServingAiGatewayRateLimitArgs(
calls=0,
renewal_period="string",
key="string",
)],
usage_tracking_config=databricks.ModelServingAiGatewayUsageTrackingConfigArgs(
enabled=False,
),
),
name="string",
rate_limits=[databricks.ModelServingRateLimitArgs(
calls=0,
renewal_period="string",
key="string",
)],
route_optimized=False,
tags=[databricks.ModelServingTagArgs(
key="string",
value="string",
)])
const modelServingResource = new databricks.ModelServing("modelServingResource", {
config: {
autoCaptureConfig: {
catalogName: "string",
enabled: false,
schemaName: "string",
tableNamePrefix: "string",
},
servedEntities: [{
entityName: "string",
entityVersion: "string",
environmentVars: {
string: "string",
},
externalModel: {
name: "string",
provider: "string",
task: "string",
ai21labsConfig: {
ai21labsApiKey: "string",
ai21labsApiKeyPlaintext: "string",
},
amazonBedrockConfig: {
awsRegion: "string",
bedrockProvider: "string",
awsAccessKeyId: "string",
awsAccessKeyIdPlaintext: "string",
awsSecretAccessKey: "string",
awsSecretAccessKeyPlaintext: "string",
},
anthropicConfig: {
anthropicApiKey: "string",
anthropicApiKeyPlaintext: "string",
},
cohereConfig: {
cohereApiBase: "string",
cohereApiKey: "string",
cohereApiKeyPlaintext: "string",
},
databricksModelServingConfig: {
databricksWorkspaceUrl: "string",
databricksApiToken: "string",
databricksApiTokenPlaintext: "string",
},
googleCloudVertexAiConfig: {
privateKey: "string",
privateKeyPlaintext: "string",
projectId: "string",
region: "string",
},
openaiConfig: {
microsoftEntraClientId: "string",
microsoftEntraClientSecret: "string",
microsoftEntraClientSecretPlaintext: "string",
microsoftEntraTenantId: "string",
openaiApiBase: "string",
openaiApiKey: "string",
openaiApiKeyPlaintext: "string",
openaiApiType: "string",
openaiApiVersion: "string",
openaiDeploymentName: "string",
openaiOrganization: "string",
},
palmConfig: {
palmApiKey: "string",
palmApiKeyPlaintext: "string",
},
},
instanceProfileArn: "string",
maxProvisionedThroughput: 0,
minProvisionedThroughput: 0,
name: "string",
scaleToZeroEnabled: false,
workloadSize: "string",
workloadType: "string",
}],
trafficConfig: {
routes: [{
servedModelName: "string",
trafficPercentage: 0,
}],
},
},
aiGateway: {
guardrails: {
input: {
invalidKeywords: ["string"],
pii: {
behavior: "string",
},
safety: false,
validTopics: ["string"],
},
output: {
invalidKeywords: ["string"],
pii: {
behavior: "string",
},
safety: false,
validTopics: ["string"],
},
},
inferenceTableConfig: {
catalogName: "string",
enabled: false,
schemaName: "string",
tableNamePrefix: "string",
},
rateLimits: [{
calls: 0,
renewalPeriod: "string",
key: "string",
}],
usageTrackingConfig: {
enabled: false,
},
},
name: "string",
rateLimits: [{
calls: 0,
renewalPeriod: "string",
key: "string",
}],
routeOptimized: false,
tags: [{
key: "string",
value: "string",
}],
});
type: databricks:ModelServing
properties:
aiGateway:
guardrails:
input:
invalidKeywords:
- string
pii:
behavior: string
safety: false
validTopics:
- string
output:
invalidKeywords:
- string
pii:
behavior: string
safety: false
validTopics:
- string
inferenceTableConfig:
catalogName: string
enabled: false
schemaName: string
tableNamePrefix: string
rateLimits:
- calls: 0
key: string
renewalPeriod: string
usageTrackingConfig:
enabled: false
config:
autoCaptureConfig:
catalogName: string
enabled: false
schemaName: string
tableNamePrefix: string
servedEntities:
- entityName: string
entityVersion: string
environmentVars:
string: string
externalModel:
ai21labsConfig:
ai21labsApiKey: string
ai21labsApiKeyPlaintext: string
amazonBedrockConfig:
awsAccessKeyId: string
awsAccessKeyIdPlaintext: string
awsRegion: string
awsSecretAccessKey: string
awsSecretAccessKeyPlaintext: string
bedrockProvider: string
anthropicConfig:
anthropicApiKey: string
anthropicApiKeyPlaintext: string
cohereConfig:
cohereApiBase: string
cohereApiKey: string
cohereApiKeyPlaintext: string
databricksModelServingConfig:
databricksApiToken: string
databricksApiTokenPlaintext: string
databricksWorkspaceUrl: string
googleCloudVertexAiConfig:
privateKey: string
privateKeyPlaintext: string
projectId: string
region: string
name: string
openaiConfig:
microsoftEntraClientId: string
microsoftEntraClientSecret: string
microsoftEntraClientSecretPlaintext: string
microsoftEntraTenantId: string
openaiApiBase: string
openaiApiKey: string
openaiApiKeyPlaintext: string
openaiApiType: string
openaiApiVersion: string
openaiDeploymentName: string
openaiOrganization: string
palmConfig:
palmApiKey: string
palmApiKeyPlaintext: string
provider: string
task: string
instanceProfileArn: string
maxProvisionedThroughput: 0
minProvisionedThroughput: 0
name: string
scaleToZeroEnabled: false
workloadSize: string
workloadType: string
trafficConfig:
routes:
- servedModelName: string
trafficPercentage: 0
name: string
rateLimits:
- calls: 0
key: string
renewalPeriod: string
routeOptimized: false
tags:
- key: string
value: string
ModelServing Resource Properties
To learn more about resource properties and how to use them, see Inputs and Outputs in the Architecture and Concepts docs.
Inputs
The ModelServing resource accepts the following input properties:
- Config
Model
Serving Config - The model serving endpoint configuration.
- Ai
Gateway ModelServing Ai Gateway - Name string
- The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
- Rate
Limits List<ModelServing Rate Limit> - A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
- Route
Optimized bool - A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
- List<Model
Serving Tag> - Tags to be attached to the serving endpoint and automatically propagated to billing logs.
- Config
Model
Serving Config Args - The model serving endpoint configuration.
- Ai
Gateway ModelServing Ai Gateway Args - Name string
- The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
- Rate
Limits []ModelServing Rate Limit Args - A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
- Route
Optimized bool - A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
- []Model
Serving Tag Args - Tags to be attached to the serving endpoint and automatically propagated to billing logs.
- config
Model
Serving Config - The model serving endpoint configuration.
- ai
Gateway ModelServing Ai Gateway - name String
- The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
- rate
Limits List<ModelServing Rate Limit> - A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
- route
Optimized Boolean - A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
- List<Model
Serving Tag> - Tags to be attached to the serving endpoint and automatically propagated to billing logs.
- config
Model
Serving Config - The model serving endpoint configuration.
- ai
Gateway ModelServing Ai Gateway - name string
- The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
- rate
Limits ModelServing Rate Limit[] - A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
- route
Optimized boolean - A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
- Model
Serving Tag[] - Tags to be attached to the serving endpoint and automatically propagated to billing logs.
- config
Model
Serving Config Args - The model serving endpoint configuration.
- ai_
gateway ModelServing Ai Gateway Args - name str
- The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
- rate_
limits Sequence[ModelServing Rate Limit Args] - A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
- route_
optimized bool - A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
- Sequence[Model
Serving Tag Args] - Tags to be attached to the serving endpoint and automatically propagated to billing logs.
- config Property Map
- The model serving endpoint configuration.
- ai
Gateway Property Map - name String
- The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
- rate
Limits List<Property Map> - A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
- route
Optimized Boolean - A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
- List<Property Map>
- Tags to be attached to the serving endpoint and automatically propagated to billing logs.
Outputs
All input properties are implicitly available as output properties. Additionally, the ModelServing resource produces the following output properties:
- Id string
- The provider-assigned unique ID for this managed resource.
- Serving
Endpoint stringId - Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
- Id string
- The provider-assigned unique ID for this managed resource.
- Serving
Endpoint stringId - Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
- id String
- The provider-assigned unique ID for this managed resource.
- serving
Endpoint StringId - Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
- id string
- The provider-assigned unique ID for this managed resource.
- serving
Endpoint stringId - Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
- id str
- The provider-assigned unique ID for this managed resource.
- serving_
endpoint_ strid - Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
- id String
- The provider-assigned unique ID for this managed resource.
- serving
Endpoint StringId - Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
Look up Existing ModelServing Resource
Get an existing ModelServing resource’s state with the given name, ID, and optional extra properties used to qualify the lookup.
public static get(name: string, id: Input<ID>, state?: ModelServingState, opts?: CustomResourceOptions): ModelServing
@staticmethod
def get(resource_name: str,
id: str,
opts: Optional[ResourceOptions] = None,
ai_gateway: Optional[ModelServingAiGatewayArgs] = None,
config: Optional[ModelServingConfigArgs] = None,
name: Optional[str] = None,
rate_limits: Optional[Sequence[ModelServingRateLimitArgs]] = None,
route_optimized: Optional[bool] = None,
serving_endpoint_id: Optional[str] = None,
tags: Optional[Sequence[ModelServingTagArgs]] = None) -> ModelServing
func GetModelServing(ctx *Context, name string, id IDInput, state *ModelServingState, opts ...ResourceOption) (*ModelServing, error)
public static ModelServing Get(string name, Input<string> id, ModelServingState? state, CustomResourceOptions? opts = null)
public static ModelServing get(String name, Output<String> id, ModelServingState state, CustomResourceOptions options)
Resource lookup is not supported in YAML
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- resource_name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- Ai
Gateway ModelServing Ai Gateway - Config
Model
Serving Config - The model serving endpoint configuration.
- Name string
- The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
- Rate
Limits List<ModelServing Rate Limit> - A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
- Route
Optimized bool - A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
- Serving
Endpoint stringId - Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
- List<Model
Serving Tag> - Tags to be attached to the serving endpoint and automatically propagated to billing logs.
- Ai
Gateway ModelServing Ai Gateway Args - Config
Model
Serving Config Args - The model serving endpoint configuration.
- Name string
- The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
- Rate
Limits []ModelServing Rate Limit Args - A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
- Route
Optimized bool - A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
- Serving
Endpoint stringId - Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
- []Model
Serving Tag Args - Tags to be attached to the serving endpoint and automatically propagated to billing logs.
- ai
Gateway ModelServing Ai Gateway - config
Model
Serving Config - The model serving endpoint configuration.
- name String
- The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
- rate
Limits List<ModelServing Rate Limit> - A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
- route
Optimized Boolean - A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
- serving
Endpoint StringId - Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
- List<Model
Serving Tag> - Tags to be attached to the serving endpoint and automatically propagated to billing logs.
- ai
Gateway ModelServing Ai Gateway - config
Model
Serving Config - The model serving endpoint configuration.
- name string
- The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
- rate
Limits ModelServing Rate Limit[] - A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
- route
Optimized boolean - A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
- serving
Endpoint stringId - Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
- Model
Serving Tag[] - Tags to be attached to the serving endpoint and automatically propagated to billing logs.
- ai_
gateway ModelServing Ai Gateway Args - config
Model
Serving Config Args - The model serving endpoint configuration.
- name str
- The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
- rate_
limits Sequence[ModelServing Rate Limit Args] - A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
- route_
optimized bool - A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
- serving_
endpoint_ strid - Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
- Sequence[Model
Serving Tag Args] - Tags to be attached to the serving endpoint and automatically propagated to billing logs.
- ai
Gateway Property Map - config Property Map
- The model serving endpoint configuration.
- name String
- The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
- rate
Limits List<Property Map> - A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
- route
Optimized Boolean - A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
- serving
Endpoint StringId - Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
- List<Property Map>
- Tags to be attached to the serving endpoint and automatically propagated to billing logs.
Supporting Types
ModelServingAiGateway, ModelServingAiGatewayArgs
- Guardrails
Model
Serving Ai Gateway Guardrails - Inference
Table ModelConfig Serving Ai Gateway Inference Table Config - Rate
Limits List<ModelServing Ai Gateway Rate Limit> - A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
- Usage
Tracking ModelConfig Serving Ai Gateway Usage Tracking Config
- Guardrails
Model
Serving Ai Gateway Guardrails - Inference
Table ModelConfig Serving Ai Gateway Inference Table Config - Rate
Limits []ModelServing Ai Gateway Rate Limit - A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
- Usage
Tracking ModelConfig Serving Ai Gateway Usage Tracking Config
- guardrails
Model
Serving Ai Gateway Guardrails - inference
Table ModelConfig Serving Ai Gateway Inference Table Config - rate
Limits List<ModelServing Ai Gateway Rate Limit> - A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
- usage
Tracking ModelConfig Serving Ai Gateway Usage Tracking Config
- guardrails
Model
Serving Ai Gateway Guardrails - inference
Table ModelConfig Serving Ai Gateway Inference Table Config - rate
Limits ModelServing Ai Gateway Rate Limit[] - A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
- usage
Tracking ModelConfig Serving Ai Gateway Usage Tracking Config
- guardrails
Model
Serving Ai Gateway Guardrails - inference_
table_ Modelconfig Serving Ai Gateway Inference Table Config - rate_
limits Sequence[ModelServing Ai Gateway Rate Limit] - A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
- usage_
tracking_ Modelconfig Serving Ai Gateway Usage Tracking Config
- guardrails Property Map
- inference
Table Property MapConfig - rate
Limits List<Property Map> - A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
- usage
Tracking Property MapConfig
ModelServingAiGatewayGuardrails, ModelServingAiGatewayGuardrailsArgs
ModelServingAiGatewayGuardrailsInput, ModelServingAiGatewayGuardrailsInputArgs
- Invalid
Keywords List<string> - Pii
Model
Serving Ai Gateway Guardrails Input Pii - Safety bool
- Valid
Topics List<string>
- Invalid
Keywords []string - Pii
Model
Serving Ai Gateway Guardrails Input Pii - Safety bool
- Valid
Topics []string
- invalid
Keywords List<String> - pii
Model
Serving Ai Gateway Guardrails Input Pii - safety Boolean
- valid
Topics List<String>
- invalid
Keywords string[] - pii
Model
Serving Ai Gateway Guardrails Input Pii - safety boolean
- valid
Topics string[]
- invalid_
keywords Sequence[str] - pii
Model
Serving Ai Gateway Guardrails Input Pii - safety bool
- valid_
topics Sequence[str]
- invalid
Keywords List<String> - pii Property Map
- safety Boolean
- valid
Topics List<String>
ModelServingAiGatewayGuardrailsInputPii, ModelServingAiGatewayGuardrailsInputPiiArgs
- Behavior string
- Behavior string
- behavior String
- behavior string
- behavior str
- behavior String
ModelServingAiGatewayGuardrailsOutput, ModelServingAiGatewayGuardrailsOutputArgs
- Invalid
Keywords List<string> - Pii
Model
Serving Ai Gateway Guardrails Output Pii - Safety bool
- Valid
Topics List<string>
- Invalid
Keywords []string - Pii
Model
Serving Ai Gateway Guardrails Output Pii - Safety bool
- Valid
Topics []string
- invalid
Keywords List<String> - pii
Model
Serving Ai Gateway Guardrails Output Pii - safety Boolean
- valid
Topics List<String>
- invalid
Keywords string[] - pii
Model
Serving Ai Gateway Guardrails Output Pii - safety boolean
- valid
Topics string[]
- invalid_
keywords Sequence[str] - pii
Model
Serving Ai Gateway Guardrails Output Pii - safety bool
- valid_
topics Sequence[str]
- invalid
Keywords List<String> - pii Property Map
- safety Boolean
- valid
Topics List<String>
ModelServingAiGatewayGuardrailsOutputPii, ModelServingAiGatewayGuardrailsOutputPiiArgs
- Behavior string
- Behavior string
- behavior String
- behavior string
- behavior str
- behavior String
ModelServingAiGatewayInferenceTableConfig, ModelServingAiGatewayInferenceTableConfigArgs
- Catalog
Name string - The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
- Enabled bool
- If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
- Schema
Name string - The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
- Table
Name stringPrefix - The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.
- Catalog
Name string - The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
- Enabled bool
- If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
- Schema
Name string - The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
- Table
Name stringPrefix - The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.
- catalog
Name String - The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
- enabled Boolean
- If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
- schema
Name String - The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
- table
Name StringPrefix - The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.
- catalog
Name string - The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
- enabled boolean
- If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
- schema
Name string - The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
- table
Name stringPrefix - The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.
- catalog_
name str - The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
- enabled bool
- If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
- schema_
name str - The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
- table_
name_ strprefix - The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.
- catalog
Name String - The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
- enabled Boolean
- If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
- schema
Name String - The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
- table
Name StringPrefix - The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.
ModelServingAiGatewayRateLimit, ModelServingAiGatewayRateLimitArgs
- Calls int
- Used to specify how many calls are allowed for a key within the renewal_period.
- Renewal
Period string - Renewal period field for a serving endpoint rate limit. Currently, only
minute
is supported. - Key string
- Key field for a serving endpoint rate limit. Currently, only
user
andendpoint
are supported, withendpoint
being the default if not specified.
- Calls int
- Used to specify how many calls are allowed for a key within the renewal_period.
- Renewal
Period string - Renewal period field for a serving endpoint rate limit. Currently, only
minute
is supported. - Key string
- Key field for a serving endpoint rate limit. Currently, only
user
andendpoint
are supported, withendpoint
being the default if not specified.
- calls Integer
- Used to specify how many calls are allowed for a key within the renewal_period.
- renewal
Period String - Renewal period field for a serving endpoint rate limit. Currently, only
minute
is supported. - key String
- Key field for a serving endpoint rate limit. Currently, only
user
andendpoint
are supported, withendpoint
being the default if not specified.
- calls number
- Used to specify how many calls are allowed for a key within the renewal_period.
- renewal
Period string - Renewal period field for a serving endpoint rate limit. Currently, only
minute
is supported. - key string
- Key field for a serving endpoint rate limit. Currently, only
user
andendpoint
are supported, withendpoint
being the default if not specified.
- calls int
- Used to specify how many calls are allowed for a key within the renewal_period.
- renewal_
period str - Renewal period field for a serving endpoint rate limit. Currently, only
minute
is supported. - key str
- Key field for a serving endpoint rate limit. Currently, only
user
andendpoint
are supported, withendpoint
being the default if not specified.
- calls Number
- Used to specify how many calls are allowed for a key within the renewal_period.
- renewal
Period String - Renewal period field for a serving endpoint rate limit. Currently, only
minute
is supported. - key String
- Key field for a serving endpoint rate limit. Currently, only
user
andendpoint
are supported, withendpoint
being the default if not specified.
ModelServingAiGatewayUsageTrackingConfig, ModelServingAiGatewayUsageTrackingConfigArgs
- Enabled bool
- If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
- Enabled bool
- If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
- enabled Boolean
- If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
- enabled boolean
- If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
- enabled bool
- If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
- enabled Boolean
- If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
ModelServingConfig, ModelServingConfigArgs
- Auto
Capture ModelConfig Serving Config Auto Capture Config - Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.
- Served
Entities List<ModelServing Config Served Entity> - A list of served entities for the endpoint to serve. A serving endpoint can have up to 10 served entities.
- Served
Models List<ModelServing Config Served Model> - Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.
- Traffic
Config ModelServing Config Traffic Config - A single block represents the traffic split configuration amongst the served models.
- Auto
Capture ModelConfig Serving Config Auto Capture Config - Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.
- Served
Entities []ModelServing Config Served Entity - A list of served entities for the endpoint to serve. A serving endpoint can have up to 10 served entities.
- Served
Models []ModelServing Config Served Model - Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.
- Traffic
Config ModelServing Config Traffic Config - A single block represents the traffic split configuration amongst the served models.
- auto
Capture ModelConfig Serving Config Auto Capture Config - Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.
- served
Entities List<ModelServing Config Served Entity> - A list of served entities for the endpoint to serve. A serving endpoint can have up to 10 served entities.
- served
Models List<ModelServing Config Served Model> - Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.
- traffic
Config ModelServing Config Traffic Config - A single block represents the traffic split configuration amongst the served models.
- auto
Capture ModelConfig Serving Config Auto Capture Config - Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.
- served
Entities ModelServing Config Served Entity[] - A list of served entities for the endpoint to serve. A serving endpoint can have up to 10 served entities.
- served
Models ModelServing Config Served Model[] - Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.
- traffic
Config ModelServing Config Traffic Config - A single block represents the traffic split configuration amongst the served models.
- auto_
capture_ Modelconfig Serving Config Auto Capture Config - Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.
- served_
entities Sequence[ModelServing Config Served Entity] - A list of served entities for the endpoint to serve. A serving endpoint can have up to 10 served entities.
- served_
models Sequence[ModelServing Config Served Model] - Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.
- traffic_
config ModelServing Config Traffic Config - A single block represents the traffic split configuration amongst the served models.
- auto
Capture Property MapConfig - Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.
- served
Entities List<Property Map> - A list of served entities for the endpoint to serve. A serving endpoint can have up to 10 served entities.
- served
Models List<Property Map> - Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.
- traffic
Config Property Map - A single block represents the traffic split configuration amongst the served models.
ModelServingConfigAutoCaptureConfig, ModelServingConfigAutoCaptureConfigArgs
- Catalog
Name string - The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
- Enabled bool
- If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
- Schema
Name string - The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
- Table
Name stringPrefix - The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.
- Catalog
Name string - The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
- Enabled bool
- If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
- Schema
Name string - The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
- Table
Name stringPrefix - The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.
- catalog
Name String - The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
- enabled Boolean
- If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
- schema
Name String - The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
- table
Name StringPrefix - The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.
- catalog
Name string - The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
- enabled boolean
- If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
- schema
Name string - The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
- table
Name stringPrefix - The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.
- catalog_
name str - The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
- enabled bool
- If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
- schema_
name str - The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
- table_
name_ strprefix - The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.
- catalog
Name String - The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
- enabled Boolean
- If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
- schema
Name String - The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
- table
Name StringPrefix - The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.
ModelServingConfigServedEntity, ModelServingConfigServedEntityArgs
- Entity
Name string - The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type
FEATURE_SPEC
in the UC. If it is a UC object, the full name of the object should be given in the form ofcatalog_name.schema_name.model_name
. - Entity
Version string - The version of the model in Databricks Model Registry to be served or empty if the entity is a
FEATURE_SPEC
. - Environment
Vars Dictionary<string, string> - An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and subject to change. Example entity environment variables that refer to Databricks secrets:
{"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}
- External
Model ModelServing Config Served Entity External Model - The external model to be served. NOTE: Only one of
external_model
and (entity_name
,entity_version
,workload_size
,workload_type
, andscale_to_zero_enabled
) can be specified with the latter set being used for custom model serving for a Databricks registered model. When anexternal_model
is present, the served entities list can only have oneserved_entity
object. For an existing endpoint withexternal_model
, it can not be updated to an endpoint withoutexternal_model
. If the endpoint is created withoutexternal_model
, users cannot update it to addexternal_model
later. - Instance
Profile stringArn - ARN of the instance profile that the served entity uses to access AWS resources.
- Max
Provisioned intThroughput - The maximum tokens per second that the endpoint can scale up to.
- Min
Provisioned intThroughput - The minimum tokens per second that the endpoint can scale down to.
- Name string
- The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to
external_model.name
, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to -. - Scale
To boolZero Enabled - Whether the compute resources for the served entity should scale down to zero.
- Workload
Size string - The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are
Small
(4 - 4 provisioned concurrency),Medium
(8 - 16 provisioned concurrency), andLarge
(16 - 64 provisioned concurrency). Ifscale-to-zero
is enabled, the lower bound of the provisioned concurrency for each workload size is 0. - Workload
Type string - The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is
CPU
. For deep learning workloads, GPU acceleration is available by selecting workload types likeGPU_SMALL
and others. See the available GPU types.
- Entity
Name string - The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type
FEATURE_SPEC
in the UC. If it is a UC object, the full name of the object should be given in the form ofcatalog_name.schema_name.model_name
. - Entity
Version string - The version of the model in Databricks Model Registry to be served or empty if the entity is a
FEATURE_SPEC
. - Environment
Vars map[string]string - An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and subject to change. Example entity environment variables that refer to Databricks secrets:
{"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}
- External
Model ModelServing Config Served Entity External Model - The external model to be served. NOTE: Only one of
external_model
and (entity_name
,entity_version
,workload_size
,workload_type
, andscale_to_zero_enabled
) can be specified with the latter set being used for custom model serving for a Databricks registered model. When anexternal_model
is present, the served entities list can only have oneserved_entity
object. For an existing endpoint withexternal_model
, it can not be updated to an endpoint withoutexternal_model
. If the endpoint is created withoutexternal_model
, users cannot update it to addexternal_model
later. - Instance
Profile stringArn - ARN of the instance profile that the served entity uses to access AWS resources.
- Max
Provisioned intThroughput - The maximum tokens per second that the endpoint can scale up to.
- Min
Provisioned intThroughput - The minimum tokens per second that the endpoint can scale down to.
- Name string
- The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to
external_model.name
, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to -. - Scale
To boolZero Enabled - Whether the compute resources for the served entity should scale down to zero.
- Workload
Size string - The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are
Small
(4 - 4 provisioned concurrency),Medium
(8 - 16 provisioned concurrency), andLarge
(16 - 64 provisioned concurrency). Ifscale-to-zero
is enabled, the lower bound of the provisioned concurrency for each workload size is 0. - Workload
Type string - The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is
CPU
. For deep learning workloads, GPU acceleration is available by selecting workload types likeGPU_SMALL
and others. See the available GPU types.
- entity
Name String - The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type
FEATURE_SPEC
in the UC. If it is a UC object, the full name of the object should be given in the form ofcatalog_name.schema_name.model_name
. - entity
Version String - The version of the model in Databricks Model Registry to be served or empty if the entity is a
FEATURE_SPEC
. - environment
Vars Map<String,String> - An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and subject to change. Example entity environment variables that refer to Databricks secrets:
{"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}
- external
Model ModelServing Config Served Entity External Model - The external model to be served. NOTE: Only one of
external_model
and (entity_name
,entity_version
,workload_size
,workload_type
, andscale_to_zero_enabled
) can be specified with the latter set being used for custom model serving for a Databricks registered model. When anexternal_model
is present, the served entities list can only have oneserved_entity
object. For an existing endpoint withexternal_model
, it can not be updated to an endpoint withoutexternal_model
. If the endpoint is created withoutexternal_model
, users cannot update it to addexternal_model
later. - instance
Profile StringArn - ARN of the instance profile that the served entity uses to access AWS resources.
- max
Provisioned IntegerThroughput - The maximum tokens per second that the endpoint can scale up to.
- min
Provisioned IntegerThroughput - The minimum tokens per second that the endpoint can scale down to.
- name String
- The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to
external_model.name
, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to -. - scale
To BooleanZero Enabled - Whether the compute resources for the served entity should scale down to zero.
- workload
Size String - The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are
Small
(4 - 4 provisioned concurrency),Medium
(8 - 16 provisioned concurrency), andLarge
(16 - 64 provisioned concurrency). Ifscale-to-zero
is enabled, the lower bound of the provisioned concurrency for each workload size is 0. - workload
Type String - The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is
CPU
. For deep learning workloads, GPU acceleration is available by selecting workload types likeGPU_SMALL
and others. See the available GPU types.
- entity
Name string - The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type
FEATURE_SPEC
in the UC. If it is a UC object, the full name of the object should be given in the form ofcatalog_name.schema_name.model_name
. - entity
Version string - The version of the model in Databricks Model Registry to be served or empty if the entity is a
FEATURE_SPEC
. - environment
Vars {[key: string]: string} - An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and subject to change. Example entity environment variables that refer to Databricks secrets:
{"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}
- external
Model ModelServing Config Served Entity External Model - The external model to be served. NOTE: Only one of
external_model
and (entity_name
,entity_version
,workload_size
,workload_type
, andscale_to_zero_enabled
) can be specified with the latter set being used for custom model serving for a Databricks registered model. When anexternal_model
is present, the served entities list can only have oneserved_entity
object. For an existing endpoint withexternal_model
, it can not be updated to an endpoint withoutexternal_model
. If the endpoint is created withoutexternal_model
, users cannot update it to addexternal_model
later. - instance
Profile stringArn - ARN of the instance profile that the served entity uses to access AWS resources.
- max
Provisioned numberThroughput - The maximum tokens per second that the endpoint can scale up to.
- min
Provisioned numberThroughput - The minimum tokens per second that the endpoint can scale down to.
- name string
- The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to
external_model.name
, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to -. - scale
To booleanZero Enabled - Whether the compute resources for the served entity should scale down to zero.
- workload
Size string - The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are
Small
(4 - 4 provisioned concurrency),Medium
(8 - 16 provisioned concurrency), andLarge
(16 - 64 provisioned concurrency). Ifscale-to-zero
is enabled, the lower bound of the provisioned concurrency for each workload size is 0. - workload
Type string - The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is
CPU
. For deep learning workloads, GPU acceleration is available by selecting workload types likeGPU_SMALL
and others. See the available GPU types.
- entity_
name str - The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type
FEATURE_SPEC
in the UC. If it is a UC object, the full name of the object should be given in the form ofcatalog_name.schema_name.model_name
. - entity_
version str - The version of the model in Databricks Model Registry to be served or empty if the entity is a
FEATURE_SPEC
. - environment_
vars Mapping[str, str] - An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and subject to change. Example entity environment variables that refer to Databricks secrets:
{"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}
- external_
model ModelServing Config Served Entity External Model - The external model to be served. NOTE: Only one of
external_model
and (entity_name
,entity_version
,workload_size
,workload_type
, andscale_to_zero_enabled
) can be specified with the latter set being used for custom model serving for a Databricks registered model. When anexternal_model
is present, the served entities list can only have oneserved_entity
object. For an existing endpoint withexternal_model
, it can not be updated to an endpoint withoutexternal_model
. If the endpoint is created withoutexternal_model
, users cannot update it to addexternal_model
later. - instance_
profile_ strarn - ARN of the instance profile that the served entity uses to access AWS resources.
- max_
provisioned_ intthroughput - The maximum tokens per second that the endpoint can scale up to.
- min_
provisioned_ intthroughput - The minimum tokens per second that the endpoint can scale down to.
- name str
- The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to
external_model.name
, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to -. - scale_
to_ boolzero_ enabled - Whether the compute resources for the served entity should scale down to zero.
- workload_
size str - The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are
Small
(4 - 4 provisioned concurrency),Medium
(8 - 16 provisioned concurrency), andLarge
(16 - 64 provisioned concurrency). Ifscale-to-zero
is enabled, the lower bound of the provisioned concurrency for each workload size is 0. - workload_
type str - The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is
CPU
. For deep learning workloads, GPU acceleration is available by selecting workload types likeGPU_SMALL
and others. See the available GPU types.
- entity
Name String - The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type
FEATURE_SPEC
in the UC. If it is a UC object, the full name of the object should be given in the form ofcatalog_name.schema_name.model_name
. - entity
Version String - The version of the model in Databricks Model Registry to be served or empty if the entity is a
FEATURE_SPEC
. - environment
Vars Map<String> - An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and subject to change. Example entity environment variables that refer to Databricks secrets:
{"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}
- external
Model Property Map - The external model to be served. NOTE: Only one of
external_model
and (entity_name
,entity_version
,workload_size
,workload_type
, andscale_to_zero_enabled
) can be specified with the latter set being used for custom model serving for a Databricks registered model. When anexternal_model
is present, the served entities list can only have oneserved_entity
object. For an existing endpoint withexternal_model
, it can not be updated to an endpoint withoutexternal_model
. If the endpoint is created withoutexternal_model
, users cannot update it to addexternal_model
later. - instance
Profile StringArn - ARN of the instance profile that the served entity uses to access AWS resources.
- max
Provisioned NumberThroughput - The maximum tokens per second that the endpoint can scale up to.
- min
Provisioned NumberThroughput - The minimum tokens per second that the endpoint can scale down to.
- name String
- The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to
external_model.name
, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to -. - scale
To BooleanZero Enabled - Whether the compute resources for the served entity should scale down to zero.
- workload
Size String - The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are
Small
(4 - 4 provisioned concurrency),Medium
(8 - 16 provisioned concurrency), andLarge
(16 - 64 provisioned concurrency). Ifscale-to-zero
is enabled, the lower bound of the provisioned concurrency for each workload size is 0. - workload
Type String - The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is
CPU
. For deep learning workloads, GPU acceleration is available by selecting workload types likeGPU_SMALL
and others. See the available GPU types.
ModelServingConfigServedEntityExternalModel, ModelServingConfigServedEntityExternalModelArgs
- Name string
- The name of the external model.
- Provider string
- The name of the provider for the external model. Currently, the supported providers are
ai21labs
,anthropic
,amazon-bedrock
,cohere
,databricks-model-serving
,openai
, andpalm
. - Task string
- The task type of the external model.
- Ai21labs
Config ModelServing Config Served Entity External Model Ai21labs Config - AI21Labs Config
- Amazon
Bedrock ModelConfig Serving Config Served Entity External Model Amazon Bedrock Config - Amazon Bedrock Config
- Anthropic
Config ModelServing Config Served Entity External Model Anthropic Config - Anthropic Config
- Cohere
Config ModelServing Config Served Entity External Model Cohere Config - Cohere Config
- Databricks
Model ModelServing Config Serving Config Served Entity External Model Databricks Model Serving Config - Databricks Model Serving Config
- Google
Cloud ModelVertex Ai Config Serving Config Served Entity External Model Google Cloud Vertex Ai Config - Openai
Config ModelServing Config Served Entity External Model Openai Config - OpenAI Config
- Palm
Config ModelServing Config Served Entity External Model Palm Config - PaLM Config
- Name string
- The name of the external model.
- Provider string
- The name of the provider for the external model. Currently, the supported providers are
ai21labs
,anthropic
,amazon-bedrock
,cohere
,databricks-model-serving
,openai
, andpalm
. - Task string
- The task type of the external model.
- Ai21labs
Config ModelServing Config Served Entity External Model Ai21labs Config - AI21Labs Config
- Amazon
Bedrock ModelConfig Serving Config Served Entity External Model Amazon Bedrock Config - Amazon Bedrock Config
- Anthropic
Config ModelServing Config Served Entity External Model Anthropic Config - Anthropic Config
- Cohere
Config ModelServing Config Served Entity External Model Cohere Config - Cohere Config
- Databricks
Model ModelServing Config Serving Config Served Entity External Model Databricks Model Serving Config - Databricks Model Serving Config
- Google
Cloud ModelVertex Ai Config Serving Config Served Entity External Model Google Cloud Vertex Ai Config - Openai
Config ModelServing Config Served Entity External Model Openai Config - OpenAI Config
- Palm
Config ModelServing Config Served Entity External Model Palm Config - PaLM Config
- name String
- The name of the external model.
- provider String
- The name of the provider for the external model. Currently, the supported providers are
ai21labs
,anthropic
,amazon-bedrock
,cohere
,databricks-model-serving
,openai
, andpalm
. - task String
- The task type of the external model.
- ai21labs
Config ModelServing Config Served Entity External Model Ai21labs Config - AI21Labs Config
- amazon
Bedrock ModelConfig Serving Config Served Entity External Model Amazon Bedrock Config - Amazon Bedrock Config
- anthropic
Config ModelServing Config Served Entity External Model Anthropic Config - Anthropic Config
- cohere
Config ModelServing Config Served Entity External Model Cohere Config - Cohere Config
- databricks
Model ModelServing Config Serving Config Served Entity External Model Databricks Model Serving Config - Databricks Model Serving Config
- google
Cloud ModelVertex Ai Config Serving Config Served Entity External Model Google Cloud Vertex Ai Config - openai
Config ModelServing Config Served Entity External Model Openai Config - OpenAI Config
- palm
Config ModelServing Config Served Entity External Model Palm Config - PaLM Config
- name string
- The name of the external model.
- provider string
- The name of the provider for the external model. Currently, the supported providers are
ai21labs
,anthropic
,amazon-bedrock
,cohere
,databricks-model-serving
,openai
, andpalm
. - task string
- The task type of the external model.
- ai21labs
Config ModelServing Config Served Entity External Model Ai21labs Config - AI21Labs Config
- amazon
Bedrock ModelConfig Serving Config Served Entity External Model Amazon Bedrock Config - Amazon Bedrock Config
- anthropic
Config ModelServing Config Served Entity External Model Anthropic Config - Anthropic Config
- cohere
Config ModelServing Config Served Entity External Model Cohere Config - Cohere Config
- databricks
Model ModelServing Config Serving Config Served Entity External Model Databricks Model Serving Config - Databricks Model Serving Config
- google
Cloud ModelVertex Ai Config Serving Config Served Entity External Model Google Cloud Vertex Ai Config - openai
Config ModelServing Config Served Entity External Model Openai Config - OpenAI Config
- palm
Config ModelServing Config Served Entity External Model Palm Config - PaLM Config
- name str
- The name of the external model.
- provider str
- The name of the provider for the external model. Currently, the supported providers are
ai21labs
,anthropic
,amazon-bedrock
,cohere
,databricks-model-serving
,openai
, andpalm
. - task str
- The task type of the external model.
- ai21labs_
config ModelServing Config Served Entity External Model Ai21labs Config - AI21Labs Config
- amazon_
bedrock_ Modelconfig Serving Config Served Entity External Model Amazon Bedrock Config - Amazon Bedrock Config
- anthropic_
config ModelServing Config Served Entity External Model Anthropic Config - Anthropic Config
- cohere_
config ModelServing Config Served Entity External Model Cohere Config - Cohere Config
- databricks_
model_ Modelserving_ config Serving Config Served Entity External Model Databricks Model Serving Config - Databricks Model Serving Config
- google_
cloud_ Modelvertex_ ai_ config Serving Config Served Entity External Model Google Cloud Vertex Ai Config - openai_
config ModelServing Config Served Entity External Model Openai Config - OpenAI Config
- palm_
config ModelServing Config Served Entity External Model Palm Config - PaLM Config
- name String
- The name of the external model.
- provider String
- The name of the provider for the external model. Currently, the supported providers are
ai21labs
,anthropic
,amazon-bedrock
,cohere
,databricks-model-serving
,openai
, andpalm
. - task String
- The task type of the external model.
- ai21labs
Config Property Map - AI21Labs Config
- amazon
Bedrock Property MapConfig - Amazon Bedrock Config
- anthropic
Config Property Map - Anthropic Config
- cohere
Config Property Map - Cohere Config
- databricks
Model Property MapServing Config - Databricks Model Serving Config
- google
Cloud Property MapVertex Ai Config - openai
Config Property Map - OpenAI Config
- palm
Config Property Map - PaLM Config
ModelServingConfigServedEntityExternalModelAi21labsConfig, ModelServingConfigServedEntityExternalModelAi21labsConfigArgs
- Ai21labs
Api stringKey - The Databricks secret key reference for an AI21Labs API key.
- Ai21labs
Api stringKey Plaintext
- Ai21labs
Api stringKey - The Databricks secret key reference for an AI21Labs API key.
- Ai21labs
Api stringKey Plaintext
- ai21labs
Api StringKey - The Databricks secret key reference for an AI21Labs API key.
- ai21labs
Api StringKey Plaintext
- ai21labs
Api stringKey - The Databricks secret key reference for an AI21Labs API key.
- ai21labs
Api stringKey Plaintext
- ai21labs_
api_ strkey - The Databricks secret key reference for an AI21Labs API key.
- ai21labs_
api_ strkey_ plaintext
- ai21labs
Api StringKey - The Databricks secret key reference for an AI21Labs API key.
- ai21labs
Api StringKey Plaintext
ModelServingConfigServedEntityExternalModelAmazonBedrockConfig, ModelServingConfigServedEntityExternalModelAmazonBedrockConfigArgs
- Aws
Region string - The AWS region to use. Bedrock has to be enabled there.
- Bedrock
Provider string - The underlying provider in Amazon Bedrock. Supported values (case insensitive) include:
Anthropic
,Cohere
,AI21Labs
,Amazon
. - Aws
Access stringKey Id - The Databricks secret key reference for an AWS Access Key ID with permissions to interact with Bedrock services.
- Aws
Access stringKey Id Plaintext - Aws
Secret stringAccess Key - The Databricks secret key reference for an AWS Secret Access Key paired with the access key ID, with permissions to interact with Bedrock services.
- Aws
Secret stringAccess Key Plaintext
- Aws
Region string - The AWS region to use. Bedrock has to be enabled there.
- Bedrock
Provider string - The underlying provider in Amazon Bedrock. Supported values (case insensitive) include:
Anthropic
,Cohere
,AI21Labs
,Amazon
. - Aws
Access stringKey Id - The Databricks secret key reference for an AWS Access Key ID with permissions to interact with Bedrock services.
- Aws
Access stringKey Id Plaintext - Aws
Secret stringAccess Key - The Databricks secret key reference for an AWS Secret Access Key paired with the access key ID, with permissions to interact with Bedrock services.
- Aws
Secret stringAccess Key Plaintext
- aws
Region String - The AWS region to use. Bedrock has to be enabled there.
- bedrock
Provider String - The underlying provider in Amazon Bedrock. Supported values (case insensitive) include:
Anthropic
,Cohere
,AI21Labs
,Amazon
. - aws
Access StringKey Id - The Databricks secret key reference for an AWS Access Key ID with permissions to interact with Bedrock services.
- aws
Access StringKey Id Plaintext - aws
Secret StringAccess Key - The Databricks secret key reference for an AWS Secret Access Key paired with the access key ID, with permissions to interact with Bedrock services.
- aws
Secret StringAccess Key Plaintext
- aws
Region string - The AWS region to use. Bedrock has to be enabled there.
- bedrock
Provider string - The underlying provider in Amazon Bedrock. Supported values (case insensitive) include:
Anthropic
,Cohere
,AI21Labs
,Amazon
. - aws
Access stringKey Id - The Databricks secret key reference for an AWS Access Key ID with permissions to interact with Bedrock services.
- aws
Access stringKey Id Plaintext - aws
Secret stringAccess Key - The Databricks secret key reference for an AWS Secret Access Key paired with the access key ID, with permissions to interact with Bedrock services.
- aws
Secret stringAccess Key Plaintext
- aws_
region str - The AWS region to use. Bedrock has to be enabled there.
- bedrock_
provider str - The underlying provider in Amazon Bedrock. Supported values (case insensitive) include:
Anthropic
,Cohere
,AI21Labs
,Amazon
. - aws_
access_ strkey_ id - The Databricks secret key reference for an AWS Access Key ID with permissions to interact with Bedrock services.
- aws_
access_ strkey_ id_ plaintext - aws_
secret_ straccess_ key - The Databricks secret key reference for an AWS Secret Access Key paired with the access key ID, with permissions to interact with Bedrock services.
- aws_
secret_ straccess_ key_ plaintext
- aws
Region String - The AWS region to use. Bedrock has to be enabled there.
- bedrock
Provider String - The underlying provider in Amazon Bedrock. Supported values (case insensitive) include:
Anthropic
,Cohere
,AI21Labs
,Amazon
. - aws
Access StringKey Id - The Databricks secret key reference for an AWS Access Key ID with permissions to interact with Bedrock services.
- aws
Access StringKey Id Plaintext - aws
Secret StringAccess Key - The Databricks secret key reference for an AWS Secret Access Key paired with the access key ID, with permissions to interact with Bedrock services.
- aws
Secret StringAccess Key Plaintext
ModelServingConfigServedEntityExternalModelAnthropicConfig, ModelServingConfigServedEntityExternalModelAnthropicConfigArgs
- Anthropic
Api stringKey - The Databricks secret key reference for an Anthropic API key. The Databricks secret key reference for an Anthropic API key.
- Anthropic
Api stringKey Plaintext
- Anthropic
Api stringKey - The Databricks secret key reference for an Anthropic API key. The Databricks secret key reference for an Anthropic API key.
- Anthropic
Api stringKey Plaintext
- anthropic
Api StringKey - The Databricks secret key reference for an Anthropic API key. The Databricks secret key reference for an Anthropic API key.
- anthropic
Api StringKey Plaintext
- anthropic
Api stringKey - The Databricks secret key reference for an Anthropic API key. The Databricks secret key reference for an Anthropic API key.
- anthropic
Api stringKey Plaintext
- anthropic_
api_ strkey - The Databricks secret key reference for an Anthropic API key. The Databricks secret key reference for an Anthropic API key.
- anthropic_
api_ strkey_ plaintext
- anthropic
Api StringKey - The Databricks secret key reference for an Anthropic API key. The Databricks secret key reference for an Anthropic API key.
- anthropic
Api StringKey Plaintext
ModelServingConfigServedEntityExternalModelCohereConfig, ModelServingConfigServedEntityExternalModelCohereConfigArgs
- Cohere
Api stringBase - Cohere
Api stringKey - The Databricks secret key reference for a Cohere API key.
- Cohere
Api stringKey Plaintext
- Cohere
Api stringBase - Cohere
Api stringKey - The Databricks secret key reference for a Cohere API key.
- Cohere
Api stringKey Plaintext
- cohere
Api StringBase - cohere
Api StringKey - The Databricks secret key reference for a Cohere API key.
- cohere
Api StringKey Plaintext
- cohere
Api stringBase - cohere
Api stringKey - The Databricks secret key reference for a Cohere API key.
- cohere
Api stringKey Plaintext
- cohere_
api_ strbase - cohere_
api_ strkey - The Databricks secret key reference for a Cohere API key.
- cohere_
api_ strkey_ plaintext
- cohere
Api StringBase - cohere
Api StringKey - The Databricks secret key reference for a Cohere API key.
- cohere
Api StringKey Plaintext
ModelServingConfigServedEntityExternalModelDatabricksModelServingConfig, ModelServingConfigServedEntityExternalModelDatabricksModelServingConfigArgs
- Databricks
Workspace stringUrl - The URL of the Databricks workspace containing the model serving endpoint pointed to by this external model.
- Databricks
Api stringToken - The Databricks secret key reference for a Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model.
- Databricks
Api stringToken Plaintext
- Databricks
Workspace stringUrl - The URL of the Databricks workspace containing the model serving endpoint pointed to by this external model.
- Databricks
Api stringToken - The Databricks secret key reference for a Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model.
- Databricks
Api stringToken Plaintext
- databricks
Workspace StringUrl - The URL of the Databricks workspace containing the model serving endpoint pointed to by this external model.
- databricks
Api StringToken - The Databricks secret key reference for a Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model.
- databricks
Api StringToken Plaintext
- databricks
Workspace stringUrl - The URL of the Databricks workspace containing the model serving endpoint pointed to by this external model.
- databricks
Api stringToken - The Databricks secret key reference for a Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model.
- databricks
Api stringToken Plaintext
- databricks_
workspace_ strurl - The URL of the Databricks workspace containing the model serving endpoint pointed to by this external model.
- databricks_
api_ strtoken - The Databricks secret key reference for a Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model.
- databricks_
api_ strtoken_ plaintext
- databricks
Workspace StringUrl - The URL of the Databricks workspace containing the model serving endpoint pointed to by this external model.
- databricks
Api StringToken - The Databricks secret key reference for a Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model.
- databricks
Api StringToken Plaintext
ModelServingConfigServedEntityExternalModelGoogleCloudVertexAiConfig, ModelServingConfigServedEntityExternalModelGoogleCloudVertexAiConfigArgs
- Private
Key string - Private
Key stringPlaintext - Project
Id string - Region string
- Private
Key string - Private
Key stringPlaintext - Project
Id string - Region string
- private
Key String - private
Key StringPlaintext - project
Id String - region String
- private
Key string - private
Key stringPlaintext - project
Id string - region string
- private_
key str - private_
key_ strplaintext - project_
id str - region str
- private
Key String - private
Key StringPlaintext - project
Id String - region String
ModelServingConfigServedEntityExternalModelOpenaiConfig, ModelServingConfigServedEntityExternalModelOpenaiConfigArgs
- Microsoft
Entra stringClient Id - Microsoft
Entra stringClient Secret - Microsoft
Entra stringClient Secret Plaintext - Microsoft
Entra stringTenant Id - Openai
Api stringBase - This is the base URL for the OpenAI API (default: "https://api.openai.com/v1"). For Azure OpenAI, this field is required, and is the base URL for the Azure OpenAI API service provided by Azure.
- Openai
Api stringKey - The Databricks secret key reference for an OpenAI or Azure OpenAI API key.
- Openai
Api stringKey Plaintext - Openai
Api stringType - This is an optional field to specify the type of OpenAI API to use. For Azure OpenAI, this field is required, and adjust this parameter to represent the preferred security access validation protocol. For access token validation, use azure. For authentication using Azure Active Directory (Azure AD) use, azuread.
- Openai
Api stringVersion - This is an optional field to specify the OpenAI API version. For Azure OpenAI, this field is required, and is the version of the Azure OpenAI service to utilize, specified by a date.
- Openai
Deployment stringName - This field is only required for Azure OpenAI and is the name of the deployment resource for the Azure OpenAI service.
- Openai
Organization string - This is an optional field to specify the organization in OpenAI or Azure OpenAI.
- Microsoft
Entra stringClient Id - Microsoft
Entra stringClient Secret - Microsoft
Entra stringClient Secret Plaintext - Microsoft
Entra stringTenant Id - Openai
Api stringBase - This is the base URL for the OpenAI API (default: "https://api.openai.com/v1"). For Azure OpenAI, this field is required, and is the base URL for the Azure OpenAI API service provided by Azure.
- Openai
Api stringKey - The Databricks secret key reference for an OpenAI or Azure OpenAI API key.
- Openai
Api stringKey Plaintext - Openai
Api stringType - This is an optional field to specify the type of OpenAI API to use. For Azure OpenAI, this field is required, and adjust this parameter to represent the preferred security access validation protocol. For access token validation, use azure. For authentication using Azure Active Directory (Azure AD) use, azuread.
- Openai
Api stringVersion - This is an optional field to specify the OpenAI API version. For Azure OpenAI, this field is required, and is the version of the Azure OpenAI service to utilize, specified by a date.
- Openai
Deployment stringName - This field is only required for Azure OpenAI and is the name of the deployment resource for the Azure OpenAI service.
- Openai
Organization string - This is an optional field to specify the organization in OpenAI or Azure OpenAI.
- microsoft
Entra StringClient Id - microsoft
Entra StringClient Secret - microsoft
Entra StringClient Secret Plaintext - microsoft
Entra StringTenant Id - openai
Api StringBase - This is the base URL for the OpenAI API (default: "https://api.openai.com/v1"). For Azure OpenAI, this field is required, and is the base URL for the Azure OpenAI API service provided by Azure.
- openai
Api StringKey - The Databricks secret key reference for an OpenAI or Azure OpenAI API key.
- openai
Api StringKey Plaintext - openai
Api StringType - This is an optional field to specify the type of OpenAI API to use. For Azure OpenAI, this field is required, and adjust this parameter to represent the preferred security access validation protocol. For access token validation, use azure. For authentication using Azure Active Directory (Azure AD) use, azuread.
- openai
Api StringVersion - This is an optional field to specify the OpenAI API version. For Azure OpenAI, this field is required, and is the version of the Azure OpenAI service to utilize, specified by a date.
- openai
Deployment StringName - This field is only required for Azure OpenAI and is the name of the deployment resource for the Azure OpenAI service.
- openai
Organization String - This is an optional field to specify the organization in OpenAI or Azure OpenAI.
- microsoft
Entra stringClient Id - microsoft
Entra stringClient Secret - microsoft
Entra stringClient Secret Plaintext - microsoft
Entra stringTenant Id - openai
Api stringBase - This is the base URL for the OpenAI API (default: "https://api.openai.com/v1"). For Azure OpenAI, this field is required, and is the base URL for the Azure OpenAI API service provided by Azure.
- openai
Api stringKey - The Databricks secret key reference for an OpenAI or Azure OpenAI API key.
- openai
Api stringKey Plaintext - openai
Api stringType - This is an optional field to specify the type of OpenAI API to use. For Azure OpenAI, this field is required, and adjust this parameter to represent the preferred security access validation protocol. For access token validation, use azure. For authentication using Azure Active Directory (Azure AD) use, azuread.
- openai
Api stringVersion - This is an optional field to specify the OpenAI API version. For Azure OpenAI, this field is required, and is the version of the Azure OpenAI service to utilize, specified by a date.
- openai
Deployment stringName - This field is only required for Azure OpenAI and is the name of the deployment resource for the Azure OpenAI service.
- openai
Organization string - This is an optional field to specify the organization in OpenAI or Azure OpenAI.
- microsoft_
entra_ strclient_ id - microsoft_
entra_ strclient_ secret - microsoft_
entra_ strclient_ secret_ plaintext - microsoft_
entra_ strtenant_ id - openai_
api_ strbase - This is the base URL for the OpenAI API (default: "https://api.openai.com/v1"). For Azure OpenAI, this field is required, and is the base URL for the Azure OpenAI API service provided by Azure.
- openai_
api_ strkey - The Databricks secret key reference for an OpenAI or Azure OpenAI API key.
- openai_
api_ strkey_ plaintext - openai_
api_ strtype - This is an optional field to specify the type of OpenAI API to use. For Azure OpenAI, this field is required, and adjust this parameter to represent the preferred security access validation protocol. For access token validation, use azure. For authentication using Azure Active Directory (Azure AD) use, azuread.
- openai_
api_ strversion - This is an optional field to specify the OpenAI API version. For Azure OpenAI, this field is required, and is the version of the Azure OpenAI service to utilize, specified by a date.
- openai_
deployment_ strname - This field is only required for Azure OpenAI and is the name of the deployment resource for the Azure OpenAI service.
- openai_
organization str - This is an optional field to specify the organization in OpenAI or Azure OpenAI.
- microsoft
Entra StringClient Id - microsoft
Entra StringClient Secret - microsoft
Entra StringClient Secret Plaintext - microsoft
Entra StringTenant Id - openai
Api StringBase - This is the base URL for the OpenAI API (default: "https://api.openai.com/v1"). For Azure OpenAI, this field is required, and is the base URL for the Azure OpenAI API service provided by Azure.
- openai
Api StringKey - The Databricks secret key reference for an OpenAI or Azure OpenAI API key.
- openai
Api StringKey Plaintext - openai
Api StringType - This is an optional field to specify the type of OpenAI API to use. For Azure OpenAI, this field is required, and adjust this parameter to represent the preferred security access validation protocol. For access token validation, use azure. For authentication using Azure Active Directory (Azure AD) use, azuread.
- openai
Api StringVersion - This is an optional field to specify the OpenAI API version. For Azure OpenAI, this field is required, and is the version of the Azure OpenAI service to utilize, specified by a date.
- openai
Deployment StringName - This field is only required for Azure OpenAI and is the name of the deployment resource for the Azure OpenAI service.
- openai
Organization String - This is an optional field to specify the organization in OpenAI or Azure OpenAI.
ModelServingConfigServedEntityExternalModelPalmConfig, ModelServingConfigServedEntityExternalModelPalmConfigArgs
- Palm
Api stringKey - The Databricks secret key reference for a PaLM API key.
- Palm
Api stringKey Plaintext
- Palm
Api stringKey - The Databricks secret key reference for a PaLM API key.
- Palm
Api stringKey Plaintext
- palm
Api StringKey - The Databricks secret key reference for a PaLM API key.
- palm
Api StringKey Plaintext
- palm
Api stringKey - The Databricks secret key reference for a PaLM API key.
- palm
Api stringKey Plaintext
- palm_
api_ strkey - The Databricks secret key reference for a PaLM API key.
- palm_
api_ strkey_ plaintext
- palm
Api StringKey - The Databricks secret key reference for a PaLM API key.
- palm
Api StringKey Plaintext
ModelServingConfigServedModel, ModelServingConfigServedModelArgs
- Model
Name string - The name of the model in Databricks Model Registry to be served.
- Model
Version string - The version of the model in Databricks Model Registry to be served.
- Environment
Vars Dictionary<string, string> - a map of environment variable name/values that will be used for serving this model. Environment variables may refer to Databricks secrets using the standard syntax:
{{secrets/secret_scope/secret_key}}
. - Instance
Profile stringArn - ARN of the instance profile that the served model will use to access AWS resources.
- Max
Provisioned intThroughput - The maximum tokens per second that the endpoint can scale up to.
- Min
Provisioned intThroughput - The minimum tokens per second that the endpoint can scale down to.
- Name string
- The name of a served model. It must be unique across an endpoint. If not specified, this field will default to
modelname-modelversion
. A served model name can consist of alphanumeric characters, dashes, and underscores. - Scale
To boolZero Enabled - Whether the compute resources for the served model should scale down to zero. If
scale-to-zero
is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value istrue
. - Workload
Size string - The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are
Small
(4 - 4 provisioned concurrency),Medium
(8 - 16 provisioned concurrency), andLarge
(16 - 64 provisioned concurrency). - Workload
Type string - The workload type of the served model. The workload type selects which type of compute to use in the endpoint. For deep learning workloads, GPU acceleration is available by selecting workload types like
GPU_SMALL
and others. See documentation for all options. The default value isCPU
.
- Model
Name string - The name of the model in Databricks Model Registry to be served.
- Model
Version string - The version of the model in Databricks Model Registry to be served.
- Environment
Vars map[string]string - a map of environment variable name/values that will be used for serving this model. Environment variables may refer to Databricks secrets using the standard syntax:
{{secrets/secret_scope/secret_key}}
. - Instance
Profile stringArn - ARN of the instance profile that the served model will use to access AWS resources.
- Max
Provisioned intThroughput - The maximum tokens per second that the endpoint can scale up to.
- Min
Provisioned intThroughput - The minimum tokens per second that the endpoint can scale down to.
- Name string
- The name of a served model. It must be unique across an endpoint. If not specified, this field will default to
modelname-modelversion
. A served model name can consist of alphanumeric characters, dashes, and underscores. - Scale
To boolZero Enabled - Whether the compute resources for the served model should scale down to zero. If
scale-to-zero
is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value istrue
. - Workload
Size string - The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are
Small
(4 - 4 provisioned concurrency),Medium
(8 - 16 provisioned concurrency), andLarge
(16 - 64 provisioned concurrency). - Workload
Type string - The workload type of the served model. The workload type selects which type of compute to use in the endpoint. For deep learning workloads, GPU acceleration is available by selecting workload types like
GPU_SMALL
and others. See documentation for all options. The default value isCPU
.
- model
Name String - The name of the model in Databricks Model Registry to be served.
- model
Version String - The version of the model in Databricks Model Registry to be served.
- environment
Vars Map<String,String> - a map of environment variable name/values that will be used for serving this model. Environment variables may refer to Databricks secrets using the standard syntax:
{{secrets/secret_scope/secret_key}}
. - instance
Profile StringArn - ARN of the instance profile that the served model will use to access AWS resources.
- max
Provisioned IntegerThroughput - The maximum tokens per second that the endpoint can scale up to.
- min
Provisioned IntegerThroughput - The minimum tokens per second that the endpoint can scale down to.
- name String
- The name of a served model. It must be unique across an endpoint. If not specified, this field will default to
modelname-modelversion
. A served model name can consist of alphanumeric characters, dashes, and underscores. - scale
To BooleanZero Enabled - Whether the compute resources for the served model should scale down to zero. If
scale-to-zero
is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value istrue
. - workload
Size String - The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are
Small
(4 - 4 provisioned concurrency),Medium
(8 - 16 provisioned concurrency), andLarge
(16 - 64 provisioned concurrency). - workload
Type String - The workload type of the served model. The workload type selects which type of compute to use in the endpoint. For deep learning workloads, GPU acceleration is available by selecting workload types like
GPU_SMALL
and others. See documentation for all options. The default value isCPU
.
- model
Name string - The name of the model in Databricks Model Registry to be served.
- model
Version string - The version of the model in Databricks Model Registry to be served.
- environment
Vars {[key: string]: string} - a map of environment variable name/values that will be used for serving this model. Environment variables may refer to Databricks secrets using the standard syntax:
{{secrets/secret_scope/secret_key}}
. - instance
Profile stringArn - ARN of the instance profile that the served model will use to access AWS resources.
- max
Provisioned numberThroughput - The maximum tokens per second that the endpoint can scale up to.
- min
Provisioned numberThroughput - The minimum tokens per second that the endpoint can scale down to.
- name string
- The name of a served model. It must be unique across an endpoint. If not specified, this field will default to
modelname-modelversion
. A served model name can consist of alphanumeric characters, dashes, and underscores. - scale
To booleanZero Enabled - Whether the compute resources for the served model should scale down to zero. If
scale-to-zero
is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value istrue
. - workload
Size string - The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are
Small
(4 - 4 provisioned concurrency),Medium
(8 - 16 provisioned concurrency), andLarge
(16 - 64 provisioned concurrency). - workload
Type string - The workload type of the served model. The workload type selects which type of compute to use in the endpoint. For deep learning workloads, GPU acceleration is available by selecting workload types like
GPU_SMALL
and others. See documentation for all options. The default value isCPU
.
- model_
name str - The name of the model in Databricks Model Registry to be served.
- model_
version str - The version of the model in Databricks Model Registry to be served.
- environment_
vars Mapping[str, str] - a map of environment variable name/values that will be used for serving this model. Environment variables may refer to Databricks secrets using the standard syntax:
{{secrets/secret_scope/secret_key}}
. - instance_
profile_ strarn - ARN of the instance profile that the served model will use to access AWS resources.
- max_
provisioned_ intthroughput - The maximum tokens per second that the endpoint can scale up to.
- min_
provisioned_ intthroughput - The minimum tokens per second that the endpoint can scale down to.
- name str
- The name of a served model. It must be unique across an endpoint. If not specified, this field will default to
modelname-modelversion
. A served model name can consist of alphanumeric characters, dashes, and underscores. - scale_
to_ boolzero_ enabled - Whether the compute resources for the served model should scale down to zero. If
scale-to-zero
is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value istrue
. - workload_
size str - The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are
Small
(4 - 4 provisioned concurrency),Medium
(8 - 16 provisioned concurrency), andLarge
(16 - 64 provisioned concurrency). - workload_
type str - The workload type of the served model. The workload type selects which type of compute to use in the endpoint. For deep learning workloads, GPU acceleration is available by selecting workload types like
GPU_SMALL
and others. See documentation for all options. The default value isCPU
.
- model
Name String - The name of the model in Databricks Model Registry to be served.
- model
Version String - The version of the model in Databricks Model Registry to be served.
- environment
Vars Map<String> - a map of environment variable name/values that will be used for serving this model. Environment variables may refer to Databricks secrets using the standard syntax:
{{secrets/secret_scope/secret_key}}
. - instance
Profile StringArn - ARN of the instance profile that the served model will use to access AWS resources.
- max
Provisioned NumberThroughput - The maximum tokens per second that the endpoint can scale up to.
- min
Provisioned NumberThroughput - The minimum tokens per second that the endpoint can scale down to.
- name String
- The name of a served model. It must be unique across an endpoint. If not specified, this field will default to
modelname-modelversion
. A served model name can consist of alphanumeric characters, dashes, and underscores. - scale
To BooleanZero Enabled - Whether the compute resources for the served model should scale down to zero. If
scale-to-zero
is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value istrue
. - workload
Size String - The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are
Small
(4 - 4 provisioned concurrency),Medium
(8 - 16 provisioned concurrency), andLarge
(16 - 64 provisioned concurrency). - workload
Type String - The workload type of the served model. The workload type selects which type of compute to use in the endpoint. For deep learning workloads, GPU acceleration is available by selecting workload types like
GPU_SMALL
and others. See documentation for all options. The default value isCPU
.
ModelServingConfigTrafficConfig, ModelServingConfigTrafficConfigArgs
- Routes
List<Model
Serving Config Traffic Config Route> - Each block represents a route that defines traffic to each served entity. Each
served_entity
block needs to have a correspondingroutes
block.
- Routes
[]Model
Serving Config Traffic Config Route - Each block represents a route that defines traffic to each served entity. Each
served_entity
block needs to have a correspondingroutes
block.
- routes
List<Model
Serving Config Traffic Config Route> - Each block represents a route that defines traffic to each served entity. Each
served_entity
block needs to have a correspondingroutes
block.
- routes
Model
Serving Config Traffic Config Route[] - Each block represents a route that defines traffic to each served entity. Each
served_entity
block needs to have a correspondingroutes
block.
- routes
Sequence[Model
Serving Config Traffic Config Route] - Each block represents a route that defines traffic to each served entity. Each
served_entity
block needs to have a correspondingroutes
block.
- routes List<Property Map>
- Each block represents a route that defines traffic to each served entity. Each
served_entity
block needs to have a correspondingroutes
block.
ModelServingConfigTrafficConfigRoute, ModelServingConfigTrafficConfigRouteArgs
- Served
Model stringName - Traffic
Percentage int - The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.
- Served
Model stringName - Traffic
Percentage int - The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.
- served
Model StringName - traffic
Percentage Integer - The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.
- served
Model stringName - traffic
Percentage number - The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.
- served_
model_ strname - traffic_
percentage int - The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.
- served
Model StringName - traffic
Percentage Number - The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.
ModelServingRateLimit, ModelServingRateLimitArgs
- Calls int
- Used to specify how many calls are allowed for a key within the renewal_period.
- Renewal
Period string - Renewal period field for a serving endpoint rate limit. Currently, only
minute
is supported. - Key string
- Key field for a serving endpoint rate limit. Currently, only
user
andendpoint
are supported, withendpoint
being the default if not specified.
- Calls int
- Used to specify how many calls are allowed for a key within the renewal_period.
- Renewal
Period string - Renewal period field for a serving endpoint rate limit. Currently, only
minute
is supported. - Key string
- Key field for a serving endpoint rate limit. Currently, only
user
andendpoint
are supported, withendpoint
being the default if not specified.
- calls Integer
- Used to specify how many calls are allowed for a key within the renewal_period.
- renewal
Period String - Renewal period field for a serving endpoint rate limit. Currently, only
minute
is supported. - key String
- Key field for a serving endpoint rate limit. Currently, only
user
andendpoint
are supported, withendpoint
being the default if not specified.
- calls number
- Used to specify how many calls are allowed for a key within the renewal_period.
- renewal
Period string - Renewal period field for a serving endpoint rate limit. Currently, only
minute
is supported. - key string
- Key field for a serving endpoint rate limit. Currently, only
user
andendpoint
are supported, withendpoint
being the default if not specified.
- calls int
- Used to specify how many calls are allowed for a key within the renewal_period.
- renewal_
period str - Renewal period field for a serving endpoint rate limit. Currently, only
minute
is supported. - key str
- Key field for a serving endpoint rate limit. Currently, only
user
andendpoint
are supported, withendpoint
being the default if not specified.
- calls Number
- Used to specify how many calls are allowed for a key within the renewal_period.
- renewal
Period String - Renewal period field for a serving endpoint rate limit. Currently, only
minute
is supported. - key String
- Key field for a serving endpoint rate limit. Currently, only
user
andendpoint
are supported, withendpoint
being the default if not specified.
ModelServingTag, ModelServingTagArgs
Import
The model serving resource can be imported using the name of the endpoint.
bash
$ pulumi import databricks:index/modelServing:ModelServing this <model-serving-endpoint-name>
To learn more about importing existing cloud resources, see Importing resources.
Package Details
- Repository
- databricks pulumi/pulumi-databricks
- License
- Apache-2.0
- Notes
- This Pulumi package is based on the
databricks
Terraform Provider.