1. Packages
  2. Databricks
  3. API Docs
  4. ModelServing
Databricks v1.51.0 published on Tuesday, Oct 8, 2024 by Pulumi

databricks.ModelServing

Explore with Pulumi AI

databricks logo
Databricks v1.51.0 published on Tuesday, Oct 8, 2024 by Pulumi

    This resource allows you to manage Model Serving endpoints in Databricks.

    If you replace served_models with served_entities in an existing serving endpoint, the serving endpoint will briefly go into an update state (~30 seconds) and increment the config version.

    Example Usage

    import * as pulumi from "@pulumi/pulumi";
    import * as databricks from "@pulumi/databricks";
    
    const _this = new databricks.ModelServing("this", {
        name: "ads-serving-endpoint",
        config: {
            servedEntities: [
                {
                    name: "prod_model",
                    entityName: "ads-model",
                    entityVersion: "2",
                    workloadSize: "Small",
                    scaleToZeroEnabled: true,
                },
                {
                    name: "candidate_model",
                    entityName: "ads-model",
                    entityVersion: "4",
                    workloadSize: "Small",
                    scaleToZeroEnabled: false,
                },
            ],
            trafficConfig: {
                routes: [
                    {
                        servedModelName: "prod_model",
                        trafficPercentage: 90,
                    },
                    {
                        servedModelName: "candidate_model",
                        trafficPercentage: 10,
                    },
                ],
            },
        },
    });
    
    import pulumi
    import pulumi_databricks as databricks
    
    this = databricks.ModelServing("this",
        name="ads-serving-endpoint",
        config={
            "served_entities": [
                {
                    "name": "prod_model",
                    "entity_name": "ads-model",
                    "entity_version": "2",
                    "workload_size": "Small",
                    "scale_to_zero_enabled": True,
                },
                {
                    "name": "candidate_model",
                    "entity_name": "ads-model",
                    "entity_version": "4",
                    "workload_size": "Small",
                    "scale_to_zero_enabled": False,
                },
            ],
            "traffic_config": {
                "routes": [
                    {
                        "served_model_name": "prod_model",
                        "traffic_percentage": 90,
                    },
                    {
                        "served_model_name": "candidate_model",
                        "traffic_percentage": 10,
                    },
                ],
            },
        })
    
    package main
    
    import (
    	"github.com/pulumi/pulumi-databricks/sdk/go/databricks"
    	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
    )
    
    func main() {
    	pulumi.Run(func(ctx *pulumi.Context) error {
    		_, err := databricks.NewModelServing(ctx, "this", &databricks.ModelServingArgs{
    			Name: pulumi.String("ads-serving-endpoint"),
    			Config: &databricks.ModelServingConfigArgs{
    				ServedEntities: databricks.ModelServingConfigServedEntityArray{
    					&databricks.ModelServingConfigServedEntityArgs{
    						Name:               pulumi.String("prod_model"),
    						EntityName:         pulumi.String("ads-model"),
    						EntityVersion:      pulumi.String("2"),
    						WorkloadSize:       pulumi.String("Small"),
    						ScaleToZeroEnabled: pulumi.Bool(true),
    					},
    					&databricks.ModelServingConfigServedEntityArgs{
    						Name:               pulumi.String("candidate_model"),
    						EntityName:         pulumi.String("ads-model"),
    						EntityVersion:      pulumi.String("4"),
    						WorkloadSize:       pulumi.String("Small"),
    						ScaleToZeroEnabled: pulumi.Bool(false),
    					},
    				},
    				TrafficConfig: &databricks.ModelServingConfigTrafficConfigArgs{
    					Routes: databricks.ModelServingConfigTrafficConfigRouteArray{
    						&databricks.ModelServingConfigTrafficConfigRouteArgs{
    							ServedModelName:   pulumi.String("prod_model"),
    							TrafficPercentage: pulumi.Int(90),
    						},
    						&databricks.ModelServingConfigTrafficConfigRouteArgs{
    							ServedModelName:   pulumi.String("candidate_model"),
    							TrafficPercentage: pulumi.Int(10),
    						},
    					},
    				},
    			},
    		})
    		if err != nil {
    			return err
    		}
    		return nil
    	})
    }
    
    using System.Collections.Generic;
    using System.Linq;
    using Pulumi;
    using Databricks = Pulumi.Databricks;
    
    return await Deployment.RunAsync(() => 
    {
        var @this = new Databricks.ModelServing("this", new()
        {
            Name = "ads-serving-endpoint",
            Config = new Databricks.Inputs.ModelServingConfigArgs
            {
                ServedEntities = new[]
                {
                    new Databricks.Inputs.ModelServingConfigServedEntityArgs
                    {
                        Name = "prod_model",
                        EntityName = "ads-model",
                        EntityVersion = "2",
                        WorkloadSize = "Small",
                        ScaleToZeroEnabled = true,
                    },
                    new Databricks.Inputs.ModelServingConfigServedEntityArgs
                    {
                        Name = "candidate_model",
                        EntityName = "ads-model",
                        EntityVersion = "4",
                        WorkloadSize = "Small",
                        ScaleToZeroEnabled = false,
                    },
                },
                TrafficConfig = new Databricks.Inputs.ModelServingConfigTrafficConfigArgs
                {
                    Routes = new[]
                    {
                        new Databricks.Inputs.ModelServingConfigTrafficConfigRouteArgs
                        {
                            ServedModelName = "prod_model",
                            TrafficPercentage = 90,
                        },
                        new Databricks.Inputs.ModelServingConfigTrafficConfigRouteArgs
                        {
                            ServedModelName = "candidate_model",
                            TrafficPercentage = 10,
                        },
                    },
                },
            },
        });
    
    });
    
    package generated_program;
    
    import com.pulumi.Context;
    import com.pulumi.Pulumi;
    import com.pulumi.core.Output;
    import com.pulumi.databricks.ModelServing;
    import com.pulumi.databricks.ModelServingArgs;
    import com.pulumi.databricks.inputs.ModelServingConfigArgs;
    import com.pulumi.databricks.inputs.ModelServingConfigTrafficConfigArgs;
    import java.util.List;
    import java.util.ArrayList;
    import java.util.Map;
    import java.io.File;
    import java.nio.file.Files;
    import java.nio.file.Paths;
    
    public class App {
        public static void main(String[] args) {
            Pulumi.run(App::stack);
        }
    
        public static void stack(Context ctx) {
            var this_ = new ModelServing("this", ModelServingArgs.builder()
                .name("ads-serving-endpoint")
                .config(ModelServingConfigArgs.builder()
                    .servedEntities(                
                        ModelServingConfigServedEntityArgs.builder()
                            .name("prod_model")
                            .entityName("ads-model")
                            .entityVersion("2")
                            .workloadSize("Small")
                            .scaleToZeroEnabled(true)
                            .build(),
                        ModelServingConfigServedEntityArgs.builder()
                            .name("candidate_model")
                            .entityName("ads-model")
                            .entityVersion("4")
                            .workloadSize("Small")
                            .scaleToZeroEnabled(false)
                            .build())
                    .trafficConfig(ModelServingConfigTrafficConfigArgs.builder()
                        .routes(                    
                            ModelServingConfigTrafficConfigRouteArgs.builder()
                                .servedModelName("prod_model")
                                .trafficPercentage(90)
                                .build(),
                            ModelServingConfigTrafficConfigRouteArgs.builder()
                                .servedModelName("candidate_model")
                                .trafficPercentage(10)
                                .build())
                        .build())
                    .build())
                .build());
    
        }
    }
    
    resources:
      this:
        type: databricks:ModelServing
        properties:
          name: ads-serving-endpoint
          config:
            servedEntities:
              - name: prod_model
                entityName: ads-model
                entityVersion: '2'
                workloadSize: Small
                scaleToZeroEnabled: true
              - name: candidate_model
                entityName: ads-model
                entityVersion: '4'
                workloadSize: Small
                scaleToZeroEnabled: false
            trafficConfig:
              routes:
                - servedModelName: prod_model
                  trafficPercentage: 90
                - servedModelName: candidate_model
                  trafficPercentage: 10
    

    Access Control

    • databricks.Permissions can control which groups or individual users can Manage, Query or View individual serving endpoints.

    The following resources are often used in the same context:

    Create ModelServing Resource

    Resources are created with functions called constructors. To learn more about declaring and configuring resources, see Resources.

    Constructor syntax

    new ModelServing(name: string, args: ModelServingArgs, opts?: CustomResourceOptions);
    @overload
    def ModelServing(resource_name: str,
                     args: ModelServingArgs,
                     opts: Optional[ResourceOptions] = None)
    
    @overload
    def ModelServing(resource_name: str,
                     opts: Optional[ResourceOptions] = None,
                     config: Optional[ModelServingConfigArgs] = None,
                     ai_gateway: Optional[ModelServingAiGatewayArgs] = None,
                     name: Optional[str] = None,
                     rate_limits: Optional[Sequence[ModelServingRateLimitArgs]] = None,
                     route_optimized: Optional[bool] = None,
                     tags: Optional[Sequence[ModelServingTagArgs]] = None)
    func NewModelServing(ctx *Context, name string, args ModelServingArgs, opts ...ResourceOption) (*ModelServing, error)
    public ModelServing(string name, ModelServingArgs args, CustomResourceOptions? opts = null)
    public ModelServing(String name, ModelServingArgs args)
    public ModelServing(String name, ModelServingArgs args, CustomResourceOptions options)
    
    type: databricks:ModelServing
    properties: # The arguments to resource properties.
    options: # Bag of options to control resource's behavior.
    
    

    Parameters

    name string
    The unique name of the resource.
    args ModelServingArgs
    The arguments to resource properties.
    opts CustomResourceOptions
    Bag of options to control resource's behavior.
    resource_name str
    The unique name of the resource.
    args ModelServingArgs
    The arguments to resource properties.
    opts ResourceOptions
    Bag of options to control resource's behavior.
    ctx Context
    Context object for the current deployment.
    name string
    The unique name of the resource.
    args ModelServingArgs
    The arguments to resource properties.
    opts ResourceOption
    Bag of options to control resource's behavior.
    name string
    The unique name of the resource.
    args ModelServingArgs
    The arguments to resource properties.
    opts CustomResourceOptions
    Bag of options to control resource's behavior.
    name String
    The unique name of the resource.
    args ModelServingArgs
    The arguments to resource properties.
    options CustomResourceOptions
    Bag of options to control resource's behavior.

    Constructor example

    The following reference example uses placeholder values for all input properties.

    var modelServingResource = new Databricks.ModelServing("modelServingResource", new()
    {
        Config = new Databricks.Inputs.ModelServingConfigArgs
        {
            AutoCaptureConfig = new Databricks.Inputs.ModelServingConfigAutoCaptureConfigArgs
            {
                CatalogName = "string",
                Enabled = false,
                SchemaName = "string",
                TableNamePrefix = "string",
            },
            ServedEntities = new[]
            {
                new Databricks.Inputs.ModelServingConfigServedEntityArgs
                {
                    EntityName = "string",
                    EntityVersion = "string",
                    EnvironmentVars = 
                    {
                        { "string", "string" },
                    },
                    ExternalModel = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelArgs
                    {
                        Name = "string",
                        Provider = "string",
                        Task = "string",
                        Ai21labsConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelAi21labsConfigArgs
                        {
                            Ai21labsApiKey = "string",
                            Ai21labsApiKeyPlaintext = "string",
                        },
                        AmazonBedrockConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelAmazonBedrockConfigArgs
                        {
                            AwsRegion = "string",
                            BedrockProvider = "string",
                            AwsAccessKeyId = "string",
                            AwsAccessKeyIdPlaintext = "string",
                            AwsSecretAccessKey = "string",
                            AwsSecretAccessKeyPlaintext = "string",
                        },
                        AnthropicConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelAnthropicConfigArgs
                        {
                            AnthropicApiKey = "string",
                            AnthropicApiKeyPlaintext = "string",
                        },
                        CohereConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelCohereConfigArgs
                        {
                            CohereApiBase = "string",
                            CohereApiKey = "string",
                            CohereApiKeyPlaintext = "string",
                        },
                        DatabricksModelServingConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelDatabricksModelServingConfigArgs
                        {
                            DatabricksWorkspaceUrl = "string",
                            DatabricksApiToken = "string",
                            DatabricksApiTokenPlaintext = "string",
                        },
                        GoogleCloudVertexAiConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelGoogleCloudVertexAiConfigArgs
                        {
                            PrivateKey = "string",
                            PrivateKeyPlaintext = "string",
                            ProjectId = "string",
                            Region = "string",
                        },
                        OpenaiConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelOpenaiConfigArgs
                        {
                            MicrosoftEntraClientId = "string",
                            MicrosoftEntraClientSecret = "string",
                            MicrosoftEntraClientSecretPlaintext = "string",
                            MicrosoftEntraTenantId = "string",
                            OpenaiApiBase = "string",
                            OpenaiApiKey = "string",
                            OpenaiApiKeyPlaintext = "string",
                            OpenaiApiType = "string",
                            OpenaiApiVersion = "string",
                            OpenaiDeploymentName = "string",
                            OpenaiOrganization = "string",
                        },
                        PalmConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelPalmConfigArgs
                        {
                            PalmApiKey = "string",
                            PalmApiKeyPlaintext = "string",
                        },
                    },
                    InstanceProfileArn = "string",
                    MaxProvisionedThroughput = 0,
                    MinProvisionedThroughput = 0,
                    Name = "string",
                    ScaleToZeroEnabled = false,
                    WorkloadSize = "string",
                    WorkloadType = "string",
                },
            },
            TrafficConfig = new Databricks.Inputs.ModelServingConfigTrafficConfigArgs
            {
                Routes = new[]
                {
                    new Databricks.Inputs.ModelServingConfigTrafficConfigRouteArgs
                    {
                        ServedModelName = "string",
                        TrafficPercentage = 0,
                    },
                },
            },
        },
        AiGateway = new Databricks.Inputs.ModelServingAiGatewayArgs
        {
            Guardrails = new Databricks.Inputs.ModelServingAiGatewayGuardrailsArgs
            {
                Input = new Databricks.Inputs.ModelServingAiGatewayGuardrailsInputArgs
                {
                    InvalidKeywords = new[]
                    {
                        "string",
                    },
                    Pii = new Databricks.Inputs.ModelServingAiGatewayGuardrailsInputPiiArgs
                    {
                        Behavior = "string",
                    },
                    Safety = false,
                    ValidTopics = new[]
                    {
                        "string",
                    },
                },
                Output = new Databricks.Inputs.ModelServingAiGatewayGuardrailsOutputArgs
                {
                    InvalidKeywords = new[]
                    {
                        "string",
                    },
                    Pii = new Databricks.Inputs.ModelServingAiGatewayGuardrailsOutputPiiArgs
                    {
                        Behavior = "string",
                    },
                    Safety = false,
                    ValidTopics = new[]
                    {
                        "string",
                    },
                },
            },
            InferenceTableConfig = new Databricks.Inputs.ModelServingAiGatewayInferenceTableConfigArgs
            {
                CatalogName = "string",
                Enabled = false,
                SchemaName = "string",
                TableNamePrefix = "string",
            },
            RateLimits = new[]
            {
                new Databricks.Inputs.ModelServingAiGatewayRateLimitArgs
                {
                    Calls = 0,
                    RenewalPeriod = "string",
                    Key = "string",
                },
            },
            UsageTrackingConfig = new Databricks.Inputs.ModelServingAiGatewayUsageTrackingConfigArgs
            {
                Enabled = false,
            },
        },
        Name = "string",
        RateLimits = new[]
        {
            new Databricks.Inputs.ModelServingRateLimitArgs
            {
                Calls = 0,
                RenewalPeriod = "string",
                Key = "string",
            },
        },
        RouteOptimized = false,
        Tags = new[]
        {
            new Databricks.Inputs.ModelServingTagArgs
            {
                Key = "string",
                Value = "string",
            },
        },
    });
    
    example, err := databricks.NewModelServing(ctx, "modelServingResource", &databricks.ModelServingArgs{
    	Config: &databricks.ModelServingConfigArgs{
    		AutoCaptureConfig: &databricks.ModelServingConfigAutoCaptureConfigArgs{
    			CatalogName:     pulumi.String("string"),
    			Enabled:         pulumi.Bool(false),
    			SchemaName:      pulumi.String("string"),
    			TableNamePrefix: pulumi.String("string"),
    		},
    		ServedEntities: databricks.ModelServingConfigServedEntityArray{
    			&databricks.ModelServingConfigServedEntityArgs{
    				EntityName:    pulumi.String("string"),
    				EntityVersion: pulumi.String("string"),
    				EnvironmentVars: pulumi.StringMap{
    					"string": pulumi.String("string"),
    				},
    				ExternalModel: &databricks.ModelServingConfigServedEntityExternalModelArgs{
    					Name:     pulumi.String("string"),
    					Provider: pulumi.String("string"),
    					Task:     pulumi.String("string"),
    					Ai21labsConfig: &databricks.ModelServingConfigServedEntityExternalModelAi21labsConfigArgs{
    						Ai21labsApiKey:          pulumi.String("string"),
    						Ai21labsApiKeyPlaintext: pulumi.String("string"),
    					},
    					AmazonBedrockConfig: &databricks.ModelServingConfigServedEntityExternalModelAmazonBedrockConfigArgs{
    						AwsRegion:                   pulumi.String("string"),
    						BedrockProvider:             pulumi.String("string"),
    						AwsAccessKeyId:              pulumi.String("string"),
    						AwsAccessKeyIdPlaintext:     pulumi.String("string"),
    						AwsSecretAccessKey:          pulumi.String("string"),
    						AwsSecretAccessKeyPlaintext: pulumi.String("string"),
    					},
    					AnthropicConfig: &databricks.ModelServingConfigServedEntityExternalModelAnthropicConfigArgs{
    						AnthropicApiKey:          pulumi.String("string"),
    						AnthropicApiKeyPlaintext: pulumi.String("string"),
    					},
    					CohereConfig: &databricks.ModelServingConfigServedEntityExternalModelCohereConfigArgs{
    						CohereApiBase:         pulumi.String("string"),
    						CohereApiKey:          pulumi.String("string"),
    						CohereApiKeyPlaintext: pulumi.String("string"),
    					},
    					DatabricksModelServingConfig: &databricks.ModelServingConfigServedEntityExternalModelDatabricksModelServingConfigArgs{
    						DatabricksWorkspaceUrl:      pulumi.String("string"),
    						DatabricksApiToken:          pulumi.String("string"),
    						DatabricksApiTokenPlaintext: pulumi.String("string"),
    					},
    					GoogleCloudVertexAiConfig: &databricks.ModelServingConfigServedEntityExternalModelGoogleCloudVertexAiConfigArgs{
    						PrivateKey:          pulumi.String("string"),
    						PrivateKeyPlaintext: pulumi.String("string"),
    						ProjectId:           pulumi.String("string"),
    						Region:              pulumi.String("string"),
    					},
    					OpenaiConfig: &databricks.ModelServingConfigServedEntityExternalModelOpenaiConfigArgs{
    						MicrosoftEntraClientId:              pulumi.String("string"),
    						MicrosoftEntraClientSecret:          pulumi.String("string"),
    						MicrosoftEntraClientSecretPlaintext: pulumi.String("string"),
    						MicrosoftEntraTenantId:              pulumi.String("string"),
    						OpenaiApiBase:                       pulumi.String("string"),
    						OpenaiApiKey:                        pulumi.String("string"),
    						OpenaiApiKeyPlaintext:               pulumi.String("string"),
    						OpenaiApiType:                       pulumi.String("string"),
    						OpenaiApiVersion:                    pulumi.String("string"),
    						OpenaiDeploymentName:                pulumi.String("string"),
    						OpenaiOrganization:                  pulumi.String("string"),
    					},
    					PalmConfig: &databricks.ModelServingConfigServedEntityExternalModelPalmConfigArgs{
    						PalmApiKey:          pulumi.String("string"),
    						PalmApiKeyPlaintext: pulumi.String("string"),
    					},
    				},
    				InstanceProfileArn:       pulumi.String("string"),
    				MaxProvisionedThroughput: pulumi.Int(0),
    				MinProvisionedThroughput: pulumi.Int(0),
    				Name:                     pulumi.String("string"),
    				ScaleToZeroEnabled:       pulumi.Bool(false),
    				WorkloadSize:             pulumi.String("string"),
    				WorkloadType:             pulumi.String("string"),
    			},
    		},
    		TrafficConfig: &databricks.ModelServingConfigTrafficConfigArgs{
    			Routes: databricks.ModelServingConfigTrafficConfigRouteArray{
    				&databricks.ModelServingConfigTrafficConfigRouteArgs{
    					ServedModelName:   pulumi.String("string"),
    					TrafficPercentage: pulumi.Int(0),
    				},
    			},
    		},
    	},
    	AiGateway: &databricks.ModelServingAiGatewayArgs{
    		Guardrails: &databricks.ModelServingAiGatewayGuardrailsArgs{
    			Input: &databricks.ModelServingAiGatewayGuardrailsInputTypeArgs{
    				InvalidKeywords: pulumi.StringArray{
    					pulumi.String("string"),
    				},
    				Pii: &databricks.ModelServingAiGatewayGuardrailsInputPiiArgs{
    					Behavior: pulumi.String("string"),
    				},
    				Safety: pulumi.Bool(false),
    				ValidTopics: pulumi.StringArray{
    					pulumi.String("string"),
    				},
    			},
    			Output: &databricks.ModelServingAiGatewayGuardrailsOutputTypeArgs{
    				InvalidKeywords: pulumi.StringArray{
    					pulumi.String("string"),
    				},
    				Pii: &databricks.ModelServingAiGatewayGuardrailsOutputPiiArgs{
    					Behavior: pulumi.String("string"),
    				},
    				Safety: pulumi.Bool(false),
    				ValidTopics: pulumi.StringArray{
    					pulumi.String("string"),
    				},
    			},
    		},
    		InferenceTableConfig: &databricks.ModelServingAiGatewayInferenceTableConfigArgs{
    			CatalogName:     pulumi.String("string"),
    			Enabled:         pulumi.Bool(false),
    			SchemaName:      pulumi.String("string"),
    			TableNamePrefix: pulumi.String("string"),
    		},
    		RateLimits: databricks.ModelServingAiGatewayRateLimitArray{
    			&databricks.ModelServingAiGatewayRateLimitArgs{
    				Calls:         pulumi.Int(0),
    				RenewalPeriod: pulumi.String("string"),
    				Key:           pulumi.String("string"),
    			},
    		},
    		UsageTrackingConfig: &databricks.ModelServingAiGatewayUsageTrackingConfigArgs{
    			Enabled: pulumi.Bool(false),
    		},
    	},
    	Name: pulumi.String("string"),
    	RateLimits: databricks.ModelServingRateLimitArray{
    		&databricks.ModelServingRateLimitArgs{
    			Calls:         pulumi.Int(0),
    			RenewalPeriod: pulumi.String("string"),
    			Key:           pulumi.String("string"),
    		},
    	},
    	RouteOptimized: pulumi.Bool(false),
    	Tags: databricks.ModelServingTagArray{
    		&databricks.ModelServingTagArgs{
    			Key:   pulumi.String("string"),
    			Value: pulumi.String("string"),
    		},
    	},
    })
    
    var modelServingResource = new ModelServing("modelServingResource", ModelServingArgs.builder()
        .config(ModelServingConfigArgs.builder()
            .autoCaptureConfig(ModelServingConfigAutoCaptureConfigArgs.builder()
                .catalogName("string")
                .enabled(false)
                .schemaName("string")
                .tableNamePrefix("string")
                .build())
            .servedEntities(ModelServingConfigServedEntityArgs.builder()
                .entityName("string")
                .entityVersion("string")
                .environmentVars(Map.of("string", "string"))
                .externalModel(ModelServingConfigServedEntityExternalModelArgs.builder()
                    .name("string")
                    .provider("string")
                    .task("string")
                    .ai21labsConfig(ModelServingConfigServedEntityExternalModelAi21labsConfigArgs.builder()
                        .ai21labsApiKey("string")
                        .ai21labsApiKeyPlaintext("string")
                        .build())
                    .amazonBedrockConfig(ModelServingConfigServedEntityExternalModelAmazonBedrockConfigArgs.builder()
                        .awsRegion("string")
                        .bedrockProvider("string")
                        .awsAccessKeyId("string")
                        .awsAccessKeyIdPlaintext("string")
                        .awsSecretAccessKey("string")
                        .awsSecretAccessKeyPlaintext("string")
                        .build())
                    .anthropicConfig(ModelServingConfigServedEntityExternalModelAnthropicConfigArgs.builder()
                        .anthropicApiKey("string")
                        .anthropicApiKeyPlaintext("string")
                        .build())
                    .cohereConfig(ModelServingConfigServedEntityExternalModelCohereConfigArgs.builder()
                        .cohereApiBase("string")
                        .cohereApiKey("string")
                        .cohereApiKeyPlaintext("string")
                        .build())
                    .databricksModelServingConfig(ModelServingConfigServedEntityExternalModelDatabricksModelServingConfigArgs.builder()
                        .databricksWorkspaceUrl("string")
                        .databricksApiToken("string")
                        .databricksApiTokenPlaintext("string")
                        .build())
                    .googleCloudVertexAiConfig(ModelServingConfigServedEntityExternalModelGoogleCloudVertexAiConfigArgs.builder()
                        .privateKey("string")
                        .privateKeyPlaintext("string")
                        .projectId("string")
                        .region("string")
                        .build())
                    .openaiConfig(ModelServingConfigServedEntityExternalModelOpenaiConfigArgs.builder()
                        .microsoftEntraClientId("string")
                        .microsoftEntraClientSecret("string")
                        .microsoftEntraClientSecretPlaintext("string")
                        .microsoftEntraTenantId("string")
                        .openaiApiBase("string")
                        .openaiApiKey("string")
                        .openaiApiKeyPlaintext("string")
                        .openaiApiType("string")
                        .openaiApiVersion("string")
                        .openaiDeploymentName("string")
                        .openaiOrganization("string")
                        .build())
                    .palmConfig(ModelServingConfigServedEntityExternalModelPalmConfigArgs.builder()
                        .palmApiKey("string")
                        .palmApiKeyPlaintext("string")
                        .build())
                    .build())
                .instanceProfileArn("string")
                .maxProvisionedThroughput(0)
                .minProvisionedThroughput(0)
                .name("string")
                .scaleToZeroEnabled(false)
                .workloadSize("string")
                .workloadType("string")
                .build())
            .trafficConfig(ModelServingConfigTrafficConfigArgs.builder()
                .routes(ModelServingConfigTrafficConfigRouteArgs.builder()
                    .servedModelName("string")
                    .trafficPercentage(0)
                    .build())
                .build())
            .build())
        .aiGateway(ModelServingAiGatewayArgs.builder()
            .guardrails(ModelServingAiGatewayGuardrailsArgs.builder()
                .input(ModelServingAiGatewayGuardrailsInputArgs.builder()
                    .invalidKeywords("string")
                    .pii(ModelServingAiGatewayGuardrailsInputPiiArgs.builder()
                        .behavior("string")
                        .build())
                    .safety(false)
                    .validTopics("string")
                    .build())
                .output(ModelServingAiGatewayGuardrailsOutputArgs.builder()
                    .invalidKeywords("string")
                    .pii(ModelServingAiGatewayGuardrailsOutputPiiArgs.builder()
                        .behavior("string")
                        .build())
                    .safety(false)
                    .validTopics("string")
                    .build())
                .build())
            .inferenceTableConfig(ModelServingAiGatewayInferenceTableConfigArgs.builder()
                .catalogName("string")
                .enabled(false)
                .schemaName("string")
                .tableNamePrefix("string")
                .build())
            .rateLimits(ModelServingAiGatewayRateLimitArgs.builder()
                .calls(0)
                .renewalPeriod("string")
                .key("string")
                .build())
            .usageTrackingConfig(ModelServingAiGatewayUsageTrackingConfigArgs.builder()
                .enabled(false)
                .build())
            .build())
        .name("string")
        .rateLimits(ModelServingRateLimitArgs.builder()
            .calls(0)
            .renewalPeriod("string")
            .key("string")
            .build())
        .routeOptimized(false)
        .tags(ModelServingTagArgs.builder()
            .key("string")
            .value("string")
            .build())
        .build());
    
    model_serving_resource = databricks.ModelServing("modelServingResource",
        config=databricks.ModelServingConfigArgs(
            auto_capture_config=databricks.ModelServingConfigAutoCaptureConfigArgs(
                catalog_name="string",
                enabled=False,
                schema_name="string",
                table_name_prefix="string",
            ),
            served_entities=[databricks.ModelServingConfigServedEntityArgs(
                entity_name="string",
                entity_version="string",
                environment_vars={
                    "string": "string",
                },
                external_model=databricks.ModelServingConfigServedEntityExternalModelArgs(
                    name="string",
                    provider="string",
                    task="string",
                    ai21labs_config=databricks.ModelServingConfigServedEntityExternalModelAi21labsConfigArgs(
                        ai21labs_api_key="string",
                        ai21labs_api_key_plaintext="string",
                    ),
                    amazon_bedrock_config=databricks.ModelServingConfigServedEntityExternalModelAmazonBedrockConfigArgs(
                        aws_region="string",
                        bedrock_provider="string",
                        aws_access_key_id="string",
                        aws_access_key_id_plaintext="string",
                        aws_secret_access_key="string",
                        aws_secret_access_key_plaintext="string",
                    ),
                    anthropic_config=databricks.ModelServingConfigServedEntityExternalModelAnthropicConfigArgs(
                        anthropic_api_key="string",
                        anthropic_api_key_plaintext="string",
                    ),
                    cohere_config=databricks.ModelServingConfigServedEntityExternalModelCohereConfigArgs(
                        cohere_api_base="string",
                        cohere_api_key="string",
                        cohere_api_key_plaintext="string",
                    ),
                    databricks_model_serving_config=databricks.ModelServingConfigServedEntityExternalModelDatabricksModelServingConfigArgs(
                        databricks_workspace_url="string",
                        databricks_api_token="string",
                        databricks_api_token_plaintext="string",
                    ),
                    google_cloud_vertex_ai_config=databricks.ModelServingConfigServedEntityExternalModelGoogleCloudVertexAiConfigArgs(
                        private_key="string",
                        private_key_plaintext="string",
                        project_id="string",
                        region="string",
                    ),
                    openai_config=databricks.ModelServingConfigServedEntityExternalModelOpenaiConfigArgs(
                        microsoft_entra_client_id="string",
                        microsoft_entra_client_secret="string",
                        microsoft_entra_client_secret_plaintext="string",
                        microsoft_entra_tenant_id="string",
                        openai_api_base="string",
                        openai_api_key="string",
                        openai_api_key_plaintext="string",
                        openai_api_type="string",
                        openai_api_version="string",
                        openai_deployment_name="string",
                        openai_organization="string",
                    ),
                    palm_config=databricks.ModelServingConfigServedEntityExternalModelPalmConfigArgs(
                        palm_api_key="string",
                        palm_api_key_plaintext="string",
                    ),
                ),
                instance_profile_arn="string",
                max_provisioned_throughput=0,
                min_provisioned_throughput=0,
                name="string",
                scale_to_zero_enabled=False,
                workload_size="string",
                workload_type="string",
            )],
            traffic_config=databricks.ModelServingConfigTrafficConfigArgs(
                routes=[databricks.ModelServingConfigTrafficConfigRouteArgs(
                    served_model_name="string",
                    traffic_percentage=0,
                )],
            ),
        ),
        ai_gateway=databricks.ModelServingAiGatewayArgs(
            guardrails=databricks.ModelServingAiGatewayGuardrailsArgs(
                input=databricks.ModelServingAiGatewayGuardrailsInputArgs(
                    invalid_keywords=["string"],
                    pii=databricks.ModelServingAiGatewayGuardrailsInputPiiArgs(
                        behavior="string",
                    ),
                    safety=False,
                    valid_topics=["string"],
                ),
                output=databricks.ModelServingAiGatewayGuardrailsOutputArgs(
                    invalid_keywords=["string"],
                    pii=databricks.ModelServingAiGatewayGuardrailsOutputPiiArgs(
                        behavior="string",
                    ),
                    safety=False,
                    valid_topics=["string"],
                ),
            ),
            inference_table_config=databricks.ModelServingAiGatewayInferenceTableConfigArgs(
                catalog_name="string",
                enabled=False,
                schema_name="string",
                table_name_prefix="string",
            ),
            rate_limits=[databricks.ModelServingAiGatewayRateLimitArgs(
                calls=0,
                renewal_period="string",
                key="string",
            )],
            usage_tracking_config=databricks.ModelServingAiGatewayUsageTrackingConfigArgs(
                enabled=False,
            ),
        ),
        name="string",
        rate_limits=[databricks.ModelServingRateLimitArgs(
            calls=0,
            renewal_period="string",
            key="string",
        )],
        route_optimized=False,
        tags=[databricks.ModelServingTagArgs(
            key="string",
            value="string",
        )])
    
    const modelServingResource = new databricks.ModelServing("modelServingResource", {
        config: {
            autoCaptureConfig: {
                catalogName: "string",
                enabled: false,
                schemaName: "string",
                tableNamePrefix: "string",
            },
            servedEntities: [{
                entityName: "string",
                entityVersion: "string",
                environmentVars: {
                    string: "string",
                },
                externalModel: {
                    name: "string",
                    provider: "string",
                    task: "string",
                    ai21labsConfig: {
                        ai21labsApiKey: "string",
                        ai21labsApiKeyPlaintext: "string",
                    },
                    amazonBedrockConfig: {
                        awsRegion: "string",
                        bedrockProvider: "string",
                        awsAccessKeyId: "string",
                        awsAccessKeyIdPlaintext: "string",
                        awsSecretAccessKey: "string",
                        awsSecretAccessKeyPlaintext: "string",
                    },
                    anthropicConfig: {
                        anthropicApiKey: "string",
                        anthropicApiKeyPlaintext: "string",
                    },
                    cohereConfig: {
                        cohereApiBase: "string",
                        cohereApiKey: "string",
                        cohereApiKeyPlaintext: "string",
                    },
                    databricksModelServingConfig: {
                        databricksWorkspaceUrl: "string",
                        databricksApiToken: "string",
                        databricksApiTokenPlaintext: "string",
                    },
                    googleCloudVertexAiConfig: {
                        privateKey: "string",
                        privateKeyPlaintext: "string",
                        projectId: "string",
                        region: "string",
                    },
                    openaiConfig: {
                        microsoftEntraClientId: "string",
                        microsoftEntraClientSecret: "string",
                        microsoftEntraClientSecretPlaintext: "string",
                        microsoftEntraTenantId: "string",
                        openaiApiBase: "string",
                        openaiApiKey: "string",
                        openaiApiKeyPlaintext: "string",
                        openaiApiType: "string",
                        openaiApiVersion: "string",
                        openaiDeploymentName: "string",
                        openaiOrganization: "string",
                    },
                    palmConfig: {
                        palmApiKey: "string",
                        palmApiKeyPlaintext: "string",
                    },
                },
                instanceProfileArn: "string",
                maxProvisionedThroughput: 0,
                minProvisionedThroughput: 0,
                name: "string",
                scaleToZeroEnabled: false,
                workloadSize: "string",
                workloadType: "string",
            }],
            trafficConfig: {
                routes: [{
                    servedModelName: "string",
                    trafficPercentage: 0,
                }],
            },
        },
        aiGateway: {
            guardrails: {
                input: {
                    invalidKeywords: ["string"],
                    pii: {
                        behavior: "string",
                    },
                    safety: false,
                    validTopics: ["string"],
                },
                output: {
                    invalidKeywords: ["string"],
                    pii: {
                        behavior: "string",
                    },
                    safety: false,
                    validTopics: ["string"],
                },
            },
            inferenceTableConfig: {
                catalogName: "string",
                enabled: false,
                schemaName: "string",
                tableNamePrefix: "string",
            },
            rateLimits: [{
                calls: 0,
                renewalPeriod: "string",
                key: "string",
            }],
            usageTrackingConfig: {
                enabled: false,
            },
        },
        name: "string",
        rateLimits: [{
            calls: 0,
            renewalPeriod: "string",
            key: "string",
        }],
        routeOptimized: false,
        tags: [{
            key: "string",
            value: "string",
        }],
    });
    
    type: databricks:ModelServing
    properties:
        aiGateway:
            guardrails:
                input:
                    invalidKeywords:
                        - string
                    pii:
                        behavior: string
                    safety: false
                    validTopics:
                        - string
                output:
                    invalidKeywords:
                        - string
                    pii:
                        behavior: string
                    safety: false
                    validTopics:
                        - string
            inferenceTableConfig:
                catalogName: string
                enabled: false
                schemaName: string
                tableNamePrefix: string
            rateLimits:
                - calls: 0
                  key: string
                  renewalPeriod: string
            usageTrackingConfig:
                enabled: false
        config:
            autoCaptureConfig:
                catalogName: string
                enabled: false
                schemaName: string
                tableNamePrefix: string
            servedEntities:
                - entityName: string
                  entityVersion: string
                  environmentVars:
                    string: string
                  externalModel:
                    ai21labsConfig:
                        ai21labsApiKey: string
                        ai21labsApiKeyPlaintext: string
                    amazonBedrockConfig:
                        awsAccessKeyId: string
                        awsAccessKeyIdPlaintext: string
                        awsRegion: string
                        awsSecretAccessKey: string
                        awsSecretAccessKeyPlaintext: string
                        bedrockProvider: string
                    anthropicConfig:
                        anthropicApiKey: string
                        anthropicApiKeyPlaintext: string
                    cohereConfig:
                        cohereApiBase: string
                        cohereApiKey: string
                        cohereApiKeyPlaintext: string
                    databricksModelServingConfig:
                        databricksApiToken: string
                        databricksApiTokenPlaintext: string
                        databricksWorkspaceUrl: string
                    googleCloudVertexAiConfig:
                        privateKey: string
                        privateKeyPlaintext: string
                        projectId: string
                        region: string
                    name: string
                    openaiConfig:
                        microsoftEntraClientId: string
                        microsoftEntraClientSecret: string
                        microsoftEntraClientSecretPlaintext: string
                        microsoftEntraTenantId: string
                        openaiApiBase: string
                        openaiApiKey: string
                        openaiApiKeyPlaintext: string
                        openaiApiType: string
                        openaiApiVersion: string
                        openaiDeploymentName: string
                        openaiOrganization: string
                    palmConfig:
                        palmApiKey: string
                        palmApiKeyPlaintext: string
                    provider: string
                    task: string
                  instanceProfileArn: string
                  maxProvisionedThroughput: 0
                  minProvisionedThroughput: 0
                  name: string
                  scaleToZeroEnabled: false
                  workloadSize: string
                  workloadType: string
            trafficConfig:
                routes:
                    - servedModelName: string
                      trafficPercentage: 0
        name: string
        rateLimits:
            - calls: 0
              key: string
              renewalPeriod: string
        routeOptimized: false
        tags:
            - key: string
              value: string
    

    ModelServing Resource Properties

    To learn more about resource properties and how to use them, see Inputs and Outputs in the Architecture and Concepts docs.

    Inputs

    The ModelServing resource accepts the following input properties:

    Config ModelServingConfig
    The model serving endpoint configuration.
    AiGateway ModelServingAiGateway
    Name string
    The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
    RateLimits List<ModelServingRateLimit>
    A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
    RouteOptimized bool
    A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
    Tags List<ModelServingTag>
    Tags to be attached to the serving endpoint and automatically propagated to billing logs.
    Config ModelServingConfigArgs
    The model serving endpoint configuration.
    AiGateway ModelServingAiGatewayArgs
    Name string
    The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
    RateLimits []ModelServingRateLimitArgs
    A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
    RouteOptimized bool
    A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
    Tags []ModelServingTagArgs
    Tags to be attached to the serving endpoint and automatically propagated to billing logs.
    config ModelServingConfig
    The model serving endpoint configuration.
    aiGateway ModelServingAiGateway
    name String
    The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
    rateLimits List<ModelServingRateLimit>
    A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
    routeOptimized Boolean
    A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
    tags List<ModelServingTag>
    Tags to be attached to the serving endpoint and automatically propagated to billing logs.
    config ModelServingConfig
    The model serving endpoint configuration.
    aiGateway ModelServingAiGateway
    name string
    The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
    rateLimits ModelServingRateLimit[]
    A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
    routeOptimized boolean
    A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
    tags ModelServingTag[]
    Tags to be attached to the serving endpoint and automatically propagated to billing logs.
    config ModelServingConfigArgs
    The model serving endpoint configuration.
    ai_gateway ModelServingAiGatewayArgs
    name str
    The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
    rate_limits Sequence[ModelServingRateLimitArgs]
    A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
    route_optimized bool
    A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
    tags Sequence[ModelServingTagArgs]
    Tags to be attached to the serving endpoint and automatically propagated to billing logs.
    config Property Map
    The model serving endpoint configuration.
    aiGateway Property Map
    name String
    The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
    rateLimits List<Property Map>
    A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
    routeOptimized Boolean
    A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
    tags List<Property Map>
    Tags to be attached to the serving endpoint and automatically propagated to billing logs.

    Outputs

    All input properties are implicitly available as output properties. Additionally, the ModelServing resource produces the following output properties:

    Id string
    The provider-assigned unique ID for this managed resource.
    ServingEndpointId string
    Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
    Id string
    The provider-assigned unique ID for this managed resource.
    ServingEndpointId string
    Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
    id String
    The provider-assigned unique ID for this managed resource.
    servingEndpointId String
    Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
    id string
    The provider-assigned unique ID for this managed resource.
    servingEndpointId string
    Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
    id str
    The provider-assigned unique ID for this managed resource.
    serving_endpoint_id str
    Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
    id String
    The provider-assigned unique ID for this managed resource.
    servingEndpointId String
    Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.

    Look up Existing ModelServing Resource

    Get an existing ModelServing resource’s state with the given name, ID, and optional extra properties used to qualify the lookup.

    public static get(name: string, id: Input<ID>, state?: ModelServingState, opts?: CustomResourceOptions): ModelServing
    @staticmethod
    def get(resource_name: str,
            id: str,
            opts: Optional[ResourceOptions] = None,
            ai_gateway: Optional[ModelServingAiGatewayArgs] = None,
            config: Optional[ModelServingConfigArgs] = None,
            name: Optional[str] = None,
            rate_limits: Optional[Sequence[ModelServingRateLimitArgs]] = None,
            route_optimized: Optional[bool] = None,
            serving_endpoint_id: Optional[str] = None,
            tags: Optional[Sequence[ModelServingTagArgs]] = None) -> ModelServing
    func GetModelServing(ctx *Context, name string, id IDInput, state *ModelServingState, opts ...ResourceOption) (*ModelServing, error)
    public static ModelServing Get(string name, Input<string> id, ModelServingState? state, CustomResourceOptions? opts = null)
    public static ModelServing get(String name, Output<String> id, ModelServingState state, CustomResourceOptions options)
    Resource lookup is not supported in YAML
    name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    state
    Any extra arguments used during the lookup.
    opts
    A bag of options that control this resource's behavior.
    resource_name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    state
    Any extra arguments used during the lookup.
    opts
    A bag of options that control this resource's behavior.
    name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    state
    Any extra arguments used during the lookup.
    opts
    A bag of options that control this resource's behavior.
    name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    state
    Any extra arguments used during the lookup.
    opts
    A bag of options that control this resource's behavior.
    The following state arguments are supported:
    AiGateway ModelServingAiGateway
    Config ModelServingConfig
    The model serving endpoint configuration.
    Name string
    The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
    RateLimits List<ModelServingRateLimit>
    A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
    RouteOptimized bool
    A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
    ServingEndpointId string
    Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
    Tags List<ModelServingTag>
    Tags to be attached to the serving endpoint and automatically propagated to billing logs.
    AiGateway ModelServingAiGatewayArgs
    Config ModelServingConfigArgs
    The model serving endpoint configuration.
    Name string
    The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
    RateLimits []ModelServingRateLimitArgs
    A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
    RouteOptimized bool
    A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
    ServingEndpointId string
    Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
    Tags []ModelServingTagArgs
    Tags to be attached to the serving endpoint and automatically propagated to billing logs.
    aiGateway ModelServingAiGateway
    config ModelServingConfig
    The model serving endpoint configuration.
    name String
    The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
    rateLimits List<ModelServingRateLimit>
    A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
    routeOptimized Boolean
    A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
    servingEndpointId String
    Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
    tags List<ModelServingTag>
    Tags to be attached to the serving endpoint and automatically propagated to billing logs.
    aiGateway ModelServingAiGateway
    config ModelServingConfig
    The model serving endpoint configuration.
    name string
    The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
    rateLimits ModelServingRateLimit[]
    A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
    routeOptimized boolean
    A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
    servingEndpointId string
    Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
    tags ModelServingTag[]
    Tags to be attached to the serving endpoint and automatically propagated to billing logs.
    ai_gateway ModelServingAiGatewayArgs
    config ModelServingConfigArgs
    The model serving endpoint configuration.
    name str
    The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
    rate_limits Sequence[ModelServingRateLimitArgs]
    A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
    route_optimized bool
    A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
    serving_endpoint_id str
    Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
    tags Sequence[ModelServingTagArgs]
    Tags to be attached to the serving endpoint and automatically propagated to billing logs.
    aiGateway Property Map
    config Property Map
    The model serving endpoint configuration.
    name String
    The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
    rateLimits List<Property Map>
    A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
    routeOptimized Boolean
    A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
    servingEndpointId String
    Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
    tags List<Property Map>
    Tags to be attached to the serving endpoint and automatically propagated to billing logs.

    Supporting Types

    ModelServingAiGateway, ModelServingAiGatewayArgs

    Guardrails ModelServingAiGatewayGuardrails
    InferenceTableConfig ModelServingAiGatewayInferenceTableConfig
    RateLimits List<ModelServingAiGatewayRateLimit>
    A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
    UsageTrackingConfig ModelServingAiGatewayUsageTrackingConfig
    Guardrails ModelServingAiGatewayGuardrails
    InferenceTableConfig ModelServingAiGatewayInferenceTableConfig
    RateLimits []ModelServingAiGatewayRateLimit
    A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
    UsageTrackingConfig ModelServingAiGatewayUsageTrackingConfig
    guardrails ModelServingAiGatewayGuardrails
    inferenceTableConfig ModelServingAiGatewayInferenceTableConfig
    rateLimits List<ModelServingAiGatewayRateLimit>
    A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
    usageTrackingConfig ModelServingAiGatewayUsageTrackingConfig
    guardrails ModelServingAiGatewayGuardrails
    inferenceTableConfig ModelServingAiGatewayInferenceTableConfig
    rateLimits ModelServingAiGatewayRateLimit[]
    A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
    usageTrackingConfig ModelServingAiGatewayUsageTrackingConfig
    guardrails Property Map
    inferenceTableConfig Property Map
    rateLimits List<Property Map>
    A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
    usageTrackingConfig Property Map

    ModelServingAiGatewayGuardrails, ModelServingAiGatewayGuardrailsArgs

    ModelServingAiGatewayGuardrailsInput, ModelServingAiGatewayGuardrailsInputArgs

    invalidKeywords List<String>
    pii Property Map
    safety Boolean
    validTopics List<String>

    ModelServingAiGatewayGuardrailsInputPii, ModelServingAiGatewayGuardrailsInputPiiArgs

    Behavior string
    Behavior string
    behavior String
    behavior string
    behavior String

    ModelServingAiGatewayGuardrailsOutput, ModelServingAiGatewayGuardrailsOutputArgs

    invalidKeywords List<String>
    pii Property Map
    safety Boolean
    validTopics List<String>

    ModelServingAiGatewayGuardrailsOutputPii, ModelServingAiGatewayGuardrailsOutputPiiArgs

    Behavior string
    Behavior string
    behavior String
    behavior string
    behavior String

    ModelServingAiGatewayInferenceTableConfig, ModelServingAiGatewayInferenceTableConfigArgs

    CatalogName string
    The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
    Enabled bool
    If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
    SchemaName string
    The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
    TableNamePrefix string
    The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.
    CatalogName string
    The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
    Enabled bool
    If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
    SchemaName string
    The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
    TableNamePrefix string
    The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.
    catalogName String
    The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
    enabled Boolean
    If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
    schemaName String
    The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
    tableNamePrefix String
    The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.
    catalogName string
    The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
    enabled boolean
    If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
    schemaName string
    The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
    tableNamePrefix string
    The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.
    catalog_name str
    The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
    enabled bool
    If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
    schema_name str
    The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
    table_name_prefix str
    The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.
    catalogName String
    The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
    enabled Boolean
    If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
    schemaName String
    The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
    tableNamePrefix String
    The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.

    ModelServingAiGatewayRateLimit, ModelServingAiGatewayRateLimitArgs

    Calls int
    Used to specify how many calls are allowed for a key within the renewal_period.
    RenewalPeriod string
    Renewal period field for a serving endpoint rate limit. Currently, only minute is supported.
    Key string
    Key field for a serving endpoint rate limit. Currently, only user and endpoint are supported, with endpoint being the default if not specified.
    Calls int
    Used to specify how many calls are allowed for a key within the renewal_period.
    RenewalPeriod string
    Renewal period field for a serving endpoint rate limit. Currently, only minute is supported.
    Key string
    Key field for a serving endpoint rate limit. Currently, only user and endpoint are supported, with endpoint being the default if not specified.
    calls Integer
    Used to specify how many calls are allowed for a key within the renewal_period.
    renewalPeriod String
    Renewal period field for a serving endpoint rate limit. Currently, only minute is supported.
    key String
    Key field for a serving endpoint rate limit. Currently, only user and endpoint are supported, with endpoint being the default if not specified.
    calls number
    Used to specify how many calls are allowed for a key within the renewal_period.
    renewalPeriod string
    Renewal period field for a serving endpoint rate limit. Currently, only minute is supported.
    key string
    Key field for a serving endpoint rate limit. Currently, only user and endpoint are supported, with endpoint being the default if not specified.
    calls int
    Used to specify how many calls are allowed for a key within the renewal_period.
    renewal_period str
    Renewal period field for a serving endpoint rate limit. Currently, only minute is supported.
    key str
    Key field for a serving endpoint rate limit. Currently, only user and endpoint are supported, with endpoint being the default if not specified.
    calls Number
    Used to specify how many calls are allowed for a key within the renewal_period.
    renewalPeriod String
    Renewal period field for a serving endpoint rate limit. Currently, only minute is supported.
    key String
    Key field for a serving endpoint rate limit. Currently, only user and endpoint are supported, with endpoint being the default if not specified.

    ModelServingAiGatewayUsageTrackingConfig, ModelServingAiGatewayUsageTrackingConfigArgs

    Enabled bool
    If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
    Enabled bool
    If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
    enabled Boolean
    If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
    enabled boolean
    If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
    enabled bool
    If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
    enabled Boolean
    If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.

    ModelServingConfig, ModelServingConfigArgs

    AutoCaptureConfig ModelServingConfigAutoCaptureConfig
    Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.
    ServedEntities List<ModelServingConfigServedEntity>
    A list of served entities for the endpoint to serve. A serving endpoint can have up to 10 served entities.
    ServedModels List<ModelServingConfigServedModel>
    Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.

    Deprecated: Please use 'config.served_entities' instead of 'config.served_models'.

    TrafficConfig ModelServingConfigTrafficConfig
    A single block represents the traffic split configuration amongst the served models.
    AutoCaptureConfig ModelServingConfigAutoCaptureConfig
    Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.
    ServedEntities []ModelServingConfigServedEntity
    A list of served entities for the endpoint to serve. A serving endpoint can have up to 10 served entities.
    ServedModels []ModelServingConfigServedModel
    Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.

    Deprecated: Please use 'config.served_entities' instead of 'config.served_models'.

    TrafficConfig ModelServingConfigTrafficConfig
    A single block represents the traffic split configuration amongst the served models.
    autoCaptureConfig ModelServingConfigAutoCaptureConfig
    Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.
    servedEntities List<ModelServingConfigServedEntity>
    A list of served entities for the endpoint to serve. A serving endpoint can have up to 10 served entities.
    servedModels List<ModelServingConfigServedModel>
    Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.

    Deprecated: Please use 'config.served_entities' instead of 'config.served_models'.

    trafficConfig ModelServingConfigTrafficConfig
    A single block represents the traffic split configuration amongst the served models.
    autoCaptureConfig ModelServingConfigAutoCaptureConfig
    Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.
    servedEntities ModelServingConfigServedEntity[]
    A list of served entities for the endpoint to serve. A serving endpoint can have up to 10 served entities.
    servedModels ModelServingConfigServedModel[]
    Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.

    Deprecated: Please use 'config.served_entities' instead of 'config.served_models'.

    trafficConfig ModelServingConfigTrafficConfig
    A single block represents the traffic split configuration amongst the served models.
    auto_capture_config ModelServingConfigAutoCaptureConfig
    Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.
    served_entities Sequence[ModelServingConfigServedEntity]
    A list of served entities for the endpoint to serve. A serving endpoint can have up to 10 served entities.
    served_models Sequence[ModelServingConfigServedModel]
    Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.

    Deprecated: Please use 'config.served_entities' instead of 'config.served_models'.

    traffic_config ModelServingConfigTrafficConfig
    A single block represents the traffic split configuration amongst the served models.
    autoCaptureConfig Property Map
    Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.
    servedEntities List<Property Map>
    A list of served entities for the endpoint to serve. A serving endpoint can have up to 10 served entities.
    servedModels List<Property Map>
    Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.

    Deprecated: Please use 'config.served_entities' instead of 'config.served_models'.

    trafficConfig Property Map
    A single block represents the traffic split configuration amongst the served models.

    ModelServingConfigAutoCaptureConfig, ModelServingConfigAutoCaptureConfigArgs

    CatalogName string
    The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
    Enabled bool
    If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
    SchemaName string
    The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
    TableNamePrefix string
    The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.
    CatalogName string
    The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
    Enabled bool
    If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
    SchemaName string
    The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
    TableNamePrefix string
    The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.
    catalogName String
    The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
    enabled Boolean
    If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
    schemaName String
    The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
    tableNamePrefix String
    The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.
    catalogName string
    The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
    enabled boolean
    If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
    schemaName string
    The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
    tableNamePrefix string
    The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.
    catalog_name str
    The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
    enabled bool
    If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
    schema_name str
    The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
    table_name_prefix str
    The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.
    catalogName String
    The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
    enabled Boolean
    If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
    schemaName String
    The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
    tableNamePrefix String
    The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.

    ModelServingConfigServedEntity, ModelServingConfigServedEntityArgs

    EntityName string
    The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC object, the full name of the object should be given in the form of catalog_name.schema_name.model_name.
    EntityVersion string
    The version of the model in Databricks Model Registry to be served or empty if the entity is a FEATURE_SPEC.
    EnvironmentVars Dictionary<string, string>
    An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and subject to change. Example entity environment variables that refer to Databricks secrets: {"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}
    ExternalModel ModelServingConfigServedEntityExternalModel
    The external model to be served. NOTE: Only one of external_model and (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled) can be specified with the latter set being used for custom model serving for a Databricks registered model. When an external_model is present, the served entities list can only have one served_entity object. For an existing endpoint with external_model, it can not be updated to an endpoint without external_model. If the endpoint is created without external_model, users cannot update it to add external_model later.
    InstanceProfileArn string
    ARN of the instance profile that the served entity uses to access AWS resources.
    MaxProvisionedThroughput int
    The maximum tokens per second that the endpoint can scale up to.
    MinProvisionedThroughput int
    The minimum tokens per second that the endpoint can scale down to.
    Name string
    The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to -.
    ScaleToZeroEnabled bool
    Whether the compute resources for the served entity should scale down to zero.
    WorkloadSize string
    The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are Small (4 - 4 provisioned concurrency), Medium (8 - 16 provisioned concurrency), and Large (16 - 64 provisioned concurrency). If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0.
    WorkloadType string
    The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is CPU. For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the available GPU types.
    EntityName string
    The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC object, the full name of the object should be given in the form of catalog_name.schema_name.model_name.
    EntityVersion string
    The version of the model in Databricks Model Registry to be served or empty if the entity is a FEATURE_SPEC.
    EnvironmentVars map[string]string
    An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and subject to change. Example entity environment variables that refer to Databricks secrets: {"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}
    ExternalModel ModelServingConfigServedEntityExternalModel
    The external model to be served. NOTE: Only one of external_model and (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled) can be specified with the latter set being used for custom model serving for a Databricks registered model. When an external_model is present, the served entities list can only have one served_entity object. For an existing endpoint with external_model, it can not be updated to an endpoint without external_model. If the endpoint is created without external_model, users cannot update it to add external_model later.
    InstanceProfileArn string
    ARN of the instance profile that the served entity uses to access AWS resources.
    MaxProvisionedThroughput int
    The maximum tokens per second that the endpoint can scale up to.
    MinProvisionedThroughput int
    The minimum tokens per second that the endpoint can scale down to.
    Name string
    The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to -.
    ScaleToZeroEnabled bool
    Whether the compute resources for the served entity should scale down to zero.
    WorkloadSize string
    The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are Small (4 - 4 provisioned concurrency), Medium (8 - 16 provisioned concurrency), and Large (16 - 64 provisioned concurrency). If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0.
    WorkloadType string
    The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is CPU. For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the available GPU types.
    entityName String
    The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC object, the full name of the object should be given in the form of catalog_name.schema_name.model_name.
    entityVersion String
    The version of the model in Databricks Model Registry to be served or empty if the entity is a FEATURE_SPEC.
    environmentVars Map<String,String>
    An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and subject to change. Example entity environment variables that refer to Databricks secrets: {"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}
    externalModel ModelServingConfigServedEntityExternalModel
    The external model to be served. NOTE: Only one of external_model and (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled) can be specified with the latter set being used for custom model serving for a Databricks registered model. When an external_model is present, the served entities list can only have one served_entity object. For an existing endpoint with external_model, it can not be updated to an endpoint without external_model. If the endpoint is created without external_model, users cannot update it to add external_model later.
    instanceProfileArn String
    ARN of the instance profile that the served entity uses to access AWS resources.
    maxProvisionedThroughput Integer
    The maximum tokens per second that the endpoint can scale up to.
    minProvisionedThroughput Integer
    The minimum tokens per second that the endpoint can scale down to.
    name String
    The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to -.
    scaleToZeroEnabled Boolean
    Whether the compute resources for the served entity should scale down to zero.
    workloadSize String
    The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are Small (4 - 4 provisioned concurrency), Medium (8 - 16 provisioned concurrency), and Large (16 - 64 provisioned concurrency). If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0.
    workloadType String
    The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is CPU. For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the available GPU types.
    entityName string
    The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC object, the full name of the object should be given in the form of catalog_name.schema_name.model_name.
    entityVersion string
    The version of the model in Databricks Model Registry to be served or empty if the entity is a FEATURE_SPEC.
    environmentVars {[key: string]: string}
    An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and subject to change. Example entity environment variables that refer to Databricks secrets: {"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}
    externalModel ModelServingConfigServedEntityExternalModel
    The external model to be served. NOTE: Only one of external_model and (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled) can be specified with the latter set being used for custom model serving for a Databricks registered model. When an external_model is present, the served entities list can only have one served_entity object. For an existing endpoint with external_model, it can not be updated to an endpoint without external_model. If the endpoint is created without external_model, users cannot update it to add external_model later.
    instanceProfileArn string
    ARN of the instance profile that the served entity uses to access AWS resources.
    maxProvisionedThroughput number
    The maximum tokens per second that the endpoint can scale up to.
    minProvisionedThroughput number
    The minimum tokens per second that the endpoint can scale down to.
    name string
    The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to -.
    scaleToZeroEnabled boolean
    Whether the compute resources for the served entity should scale down to zero.
    workloadSize string
    The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are Small (4 - 4 provisioned concurrency), Medium (8 - 16 provisioned concurrency), and Large (16 - 64 provisioned concurrency). If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0.
    workloadType string
    The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is CPU. For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the available GPU types.
    entity_name str
    The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC object, the full name of the object should be given in the form of catalog_name.schema_name.model_name.
    entity_version str
    The version of the model in Databricks Model Registry to be served or empty if the entity is a FEATURE_SPEC.
    environment_vars Mapping[str, str]
    An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and subject to change. Example entity environment variables that refer to Databricks secrets: {"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}
    external_model ModelServingConfigServedEntityExternalModel
    The external model to be served. NOTE: Only one of external_model and (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled) can be specified with the latter set being used for custom model serving for a Databricks registered model. When an external_model is present, the served entities list can only have one served_entity object. For an existing endpoint with external_model, it can not be updated to an endpoint without external_model. If the endpoint is created without external_model, users cannot update it to add external_model later.
    instance_profile_arn str
    ARN of the instance profile that the served entity uses to access AWS resources.
    max_provisioned_throughput int
    The maximum tokens per second that the endpoint can scale up to.
    min_provisioned_throughput int
    The minimum tokens per second that the endpoint can scale down to.
    name str
    The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to -.
    scale_to_zero_enabled bool
    Whether the compute resources for the served entity should scale down to zero.
    workload_size str
    The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are Small (4 - 4 provisioned concurrency), Medium (8 - 16 provisioned concurrency), and Large (16 - 64 provisioned concurrency). If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0.
    workload_type str
    The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is CPU. For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the available GPU types.
    entityName String
    The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC object, the full name of the object should be given in the form of catalog_name.schema_name.model_name.
    entityVersion String
    The version of the model in Databricks Model Registry to be served or empty if the entity is a FEATURE_SPEC.
    environmentVars Map<String>
    An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and subject to change. Example entity environment variables that refer to Databricks secrets: {"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}
    externalModel Property Map
    The external model to be served. NOTE: Only one of external_model and (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled) can be specified with the latter set being used for custom model serving for a Databricks registered model. When an external_model is present, the served entities list can only have one served_entity object. For an existing endpoint with external_model, it can not be updated to an endpoint without external_model. If the endpoint is created without external_model, users cannot update it to add external_model later.
    instanceProfileArn String
    ARN of the instance profile that the served entity uses to access AWS resources.
    maxProvisionedThroughput Number
    The maximum tokens per second that the endpoint can scale up to.
    minProvisionedThroughput Number
    The minimum tokens per second that the endpoint can scale down to.
    name String
    The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to -.
    scaleToZeroEnabled Boolean
    Whether the compute resources for the served entity should scale down to zero.
    workloadSize String
    The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are Small (4 - 4 provisioned concurrency), Medium (8 - 16 provisioned concurrency), and Large (16 - 64 provisioned concurrency). If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0.
    workloadType String
    The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is CPU. For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the available GPU types.

    ModelServingConfigServedEntityExternalModel, ModelServingConfigServedEntityExternalModelArgs

    Name string
    The name of the external model.
    Provider string
    The name of the provider for the external model. Currently, the supported providers are ai21labs, anthropic, amazon-bedrock, cohere, databricks-model-serving, openai, and palm.
    Task string
    The task type of the external model.
    Ai21labsConfig ModelServingConfigServedEntityExternalModelAi21labsConfig
    AI21Labs Config
    AmazonBedrockConfig ModelServingConfigServedEntityExternalModelAmazonBedrockConfig
    Amazon Bedrock Config
    AnthropicConfig ModelServingConfigServedEntityExternalModelAnthropicConfig
    Anthropic Config
    CohereConfig ModelServingConfigServedEntityExternalModelCohereConfig
    Cohere Config
    DatabricksModelServingConfig ModelServingConfigServedEntityExternalModelDatabricksModelServingConfig
    Databricks Model Serving Config
    GoogleCloudVertexAiConfig ModelServingConfigServedEntityExternalModelGoogleCloudVertexAiConfig
    OpenaiConfig ModelServingConfigServedEntityExternalModelOpenaiConfig
    OpenAI Config
    PalmConfig ModelServingConfigServedEntityExternalModelPalmConfig
    PaLM Config
    Name string
    The name of the external model.
    Provider string
    The name of the provider for the external model. Currently, the supported providers are ai21labs, anthropic, amazon-bedrock, cohere, databricks-model-serving, openai, and palm.
    Task string
    The task type of the external model.
    Ai21labsConfig ModelServingConfigServedEntityExternalModelAi21labsConfig
    AI21Labs Config
    AmazonBedrockConfig ModelServingConfigServedEntityExternalModelAmazonBedrockConfig
    Amazon Bedrock Config
    AnthropicConfig ModelServingConfigServedEntityExternalModelAnthropicConfig
    Anthropic Config
    CohereConfig ModelServingConfigServedEntityExternalModelCohereConfig
    Cohere Config
    DatabricksModelServingConfig ModelServingConfigServedEntityExternalModelDatabricksModelServingConfig
    Databricks Model Serving Config
    GoogleCloudVertexAiConfig ModelServingConfigServedEntityExternalModelGoogleCloudVertexAiConfig
    OpenaiConfig ModelServingConfigServedEntityExternalModelOpenaiConfig
    OpenAI Config
    PalmConfig ModelServingConfigServedEntityExternalModelPalmConfig
    PaLM Config
    name String
    The name of the external model.
    provider String
    The name of the provider for the external model. Currently, the supported providers are ai21labs, anthropic, amazon-bedrock, cohere, databricks-model-serving, openai, and palm.
    task String
    The task type of the external model.
    ai21labsConfig ModelServingConfigServedEntityExternalModelAi21labsConfig
    AI21Labs Config
    amazonBedrockConfig ModelServingConfigServedEntityExternalModelAmazonBedrockConfig
    Amazon Bedrock Config
    anthropicConfig ModelServingConfigServedEntityExternalModelAnthropicConfig
    Anthropic Config
    cohereConfig ModelServingConfigServedEntityExternalModelCohereConfig
    Cohere Config
    databricksModelServingConfig ModelServingConfigServedEntityExternalModelDatabricksModelServingConfig
    Databricks Model Serving Config
    googleCloudVertexAiConfig ModelServingConfigServedEntityExternalModelGoogleCloudVertexAiConfig
    openaiConfig ModelServingConfigServedEntityExternalModelOpenaiConfig
    OpenAI Config
    palmConfig ModelServingConfigServedEntityExternalModelPalmConfig
    PaLM Config
    name string
    The name of the external model.
    provider string
    The name of the provider for the external model. Currently, the supported providers are ai21labs, anthropic, amazon-bedrock, cohere, databricks-model-serving, openai, and palm.
    task string
    The task type of the external model.
    ai21labsConfig ModelServingConfigServedEntityExternalModelAi21labsConfig
    AI21Labs Config
    amazonBedrockConfig ModelServingConfigServedEntityExternalModelAmazonBedrockConfig
    Amazon Bedrock Config
    anthropicConfig ModelServingConfigServedEntityExternalModelAnthropicConfig
    Anthropic Config
    cohereConfig ModelServingConfigServedEntityExternalModelCohereConfig
    Cohere Config
    databricksModelServingConfig ModelServingConfigServedEntityExternalModelDatabricksModelServingConfig
    Databricks Model Serving Config
    googleCloudVertexAiConfig ModelServingConfigServedEntityExternalModelGoogleCloudVertexAiConfig
    openaiConfig ModelServingConfigServedEntityExternalModelOpenaiConfig
    OpenAI Config
    palmConfig ModelServingConfigServedEntityExternalModelPalmConfig
    PaLM Config
    name str
    The name of the external model.
    provider str
    The name of the provider for the external model. Currently, the supported providers are ai21labs, anthropic, amazon-bedrock, cohere, databricks-model-serving, openai, and palm.
    task str
    The task type of the external model.
    ai21labs_config ModelServingConfigServedEntityExternalModelAi21labsConfig
    AI21Labs Config
    amazon_bedrock_config ModelServingConfigServedEntityExternalModelAmazonBedrockConfig
    Amazon Bedrock Config
    anthropic_config ModelServingConfigServedEntityExternalModelAnthropicConfig
    Anthropic Config
    cohere_config ModelServingConfigServedEntityExternalModelCohereConfig
    Cohere Config
    databricks_model_serving_config ModelServingConfigServedEntityExternalModelDatabricksModelServingConfig
    Databricks Model Serving Config
    google_cloud_vertex_ai_config ModelServingConfigServedEntityExternalModelGoogleCloudVertexAiConfig
    openai_config ModelServingConfigServedEntityExternalModelOpenaiConfig
    OpenAI Config
    palm_config ModelServingConfigServedEntityExternalModelPalmConfig
    PaLM Config
    name String
    The name of the external model.
    provider String
    The name of the provider for the external model. Currently, the supported providers are ai21labs, anthropic, amazon-bedrock, cohere, databricks-model-serving, openai, and palm.
    task String
    The task type of the external model.
    ai21labsConfig Property Map
    AI21Labs Config
    amazonBedrockConfig Property Map
    Amazon Bedrock Config
    anthropicConfig Property Map
    Anthropic Config
    cohereConfig Property Map
    Cohere Config
    databricksModelServingConfig Property Map
    Databricks Model Serving Config
    googleCloudVertexAiConfig Property Map
    openaiConfig Property Map
    OpenAI Config
    palmConfig Property Map
    PaLM Config

    ModelServingConfigServedEntityExternalModelAi21labsConfig, ModelServingConfigServedEntityExternalModelAi21labsConfigArgs

    Ai21labsApiKey string
    The Databricks secret key reference for an AI21Labs API key.
    Ai21labsApiKeyPlaintext string
    Ai21labsApiKey string
    The Databricks secret key reference for an AI21Labs API key.
    Ai21labsApiKeyPlaintext string
    ai21labsApiKey String
    The Databricks secret key reference for an AI21Labs API key.
    ai21labsApiKeyPlaintext String
    ai21labsApiKey string
    The Databricks secret key reference for an AI21Labs API key.
    ai21labsApiKeyPlaintext string
    ai21labs_api_key str
    The Databricks secret key reference for an AI21Labs API key.
    ai21labs_api_key_plaintext str
    ai21labsApiKey String
    The Databricks secret key reference for an AI21Labs API key.
    ai21labsApiKeyPlaintext String

    ModelServingConfigServedEntityExternalModelAmazonBedrockConfig, ModelServingConfigServedEntityExternalModelAmazonBedrockConfigArgs

    AwsRegion string
    The AWS region to use. Bedrock has to be enabled there.
    BedrockProvider string
    The underlying provider in Amazon Bedrock. Supported values (case insensitive) include: Anthropic, Cohere, AI21Labs, Amazon.
    AwsAccessKeyId string
    The Databricks secret key reference for an AWS Access Key ID with permissions to interact with Bedrock services.
    AwsAccessKeyIdPlaintext string
    AwsSecretAccessKey string
    The Databricks secret key reference for an AWS Secret Access Key paired with the access key ID, with permissions to interact with Bedrock services.
    AwsSecretAccessKeyPlaintext string
    AwsRegion string
    The AWS region to use. Bedrock has to be enabled there.
    BedrockProvider string
    The underlying provider in Amazon Bedrock. Supported values (case insensitive) include: Anthropic, Cohere, AI21Labs, Amazon.
    AwsAccessKeyId string
    The Databricks secret key reference for an AWS Access Key ID with permissions to interact with Bedrock services.
    AwsAccessKeyIdPlaintext string
    AwsSecretAccessKey string
    The Databricks secret key reference for an AWS Secret Access Key paired with the access key ID, with permissions to interact with Bedrock services.
    AwsSecretAccessKeyPlaintext string
    awsRegion String
    The AWS region to use. Bedrock has to be enabled there.
    bedrockProvider String
    The underlying provider in Amazon Bedrock. Supported values (case insensitive) include: Anthropic, Cohere, AI21Labs, Amazon.
    awsAccessKeyId String
    The Databricks secret key reference for an AWS Access Key ID with permissions to interact with Bedrock services.
    awsAccessKeyIdPlaintext String
    awsSecretAccessKey String
    The Databricks secret key reference for an AWS Secret Access Key paired with the access key ID, with permissions to interact with Bedrock services.
    awsSecretAccessKeyPlaintext String
    awsRegion string
    The AWS region to use. Bedrock has to be enabled there.
    bedrockProvider string
    The underlying provider in Amazon Bedrock. Supported values (case insensitive) include: Anthropic, Cohere, AI21Labs, Amazon.
    awsAccessKeyId string
    The Databricks secret key reference for an AWS Access Key ID with permissions to interact with Bedrock services.
    awsAccessKeyIdPlaintext string
    awsSecretAccessKey string
    The Databricks secret key reference for an AWS Secret Access Key paired with the access key ID, with permissions to interact with Bedrock services.
    awsSecretAccessKeyPlaintext string
    aws_region str
    The AWS region to use. Bedrock has to be enabled there.
    bedrock_provider str
    The underlying provider in Amazon Bedrock. Supported values (case insensitive) include: Anthropic, Cohere, AI21Labs, Amazon.
    aws_access_key_id str
    The Databricks secret key reference for an AWS Access Key ID with permissions to interact with Bedrock services.
    aws_access_key_id_plaintext str
    aws_secret_access_key str
    The Databricks secret key reference for an AWS Secret Access Key paired with the access key ID, with permissions to interact with Bedrock services.
    aws_secret_access_key_plaintext str
    awsRegion String
    The AWS region to use. Bedrock has to be enabled there.
    bedrockProvider String
    The underlying provider in Amazon Bedrock. Supported values (case insensitive) include: Anthropic, Cohere, AI21Labs, Amazon.
    awsAccessKeyId String
    The Databricks secret key reference for an AWS Access Key ID with permissions to interact with Bedrock services.
    awsAccessKeyIdPlaintext String
    awsSecretAccessKey String
    The Databricks secret key reference for an AWS Secret Access Key paired with the access key ID, with permissions to interact with Bedrock services.
    awsSecretAccessKeyPlaintext String

    ModelServingConfigServedEntityExternalModelAnthropicConfig, ModelServingConfigServedEntityExternalModelAnthropicConfigArgs

    AnthropicApiKey string
    The Databricks secret key reference for an Anthropic API key. The Databricks secret key reference for an Anthropic API key.
    AnthropicApiKeyPlaintext string
    AnthropicApiKey string
    The Databricks secret key reference for an Anthropic API key. The Databricks secret key reference for an Anthropic API key.
    AnthropicApiKeyPlaintext string
    anthropicApiKey String
    The Databricks secret key reference for an Anthropic API key. The Databricks secret key reference for an Anthropic API key.
    anthropicApiKeyPlaintext String
    anthropicApiKey string
    The Databricks secret key reference for an Anthropic API key. The Databricks secret key reference for an Anthropic API key.
    anthropicApiKeyPlaintext string
    anthropic_api_key str
    The Databricks secret key reference for an Anthropic API key. The Databricks secret key reference for an Anthropic API key.
    anthropic_api_key_plaintext str
    anthropicApiKey String
    The Databricks secret key reference for an Anthropic API key. The Databricks secret key reference for an Anthropic API key.
    anthropicApiKeyPlaintext String

    ModelServingConfigServedEntityExternalModelCohereConfig, ModelServingConfigServedEntityExternalModelCohereConfigArgs

    CohereApiBase string
    CohereApiKey string
    The Databricks secret key reference for a Cohere API key.
    CohereApiKeyPlaintext string
    CohereApiBase string
    CohereApiKey string
    The Databricks secret key reference for a Cohere API key.
    CohereApiKeyPlaintext string
    cohereApiBase String
    cohereApiKey String
    The Databricks secret key reference for a Cohere API key.
    cohereApiKeyPlaintext String
    cohereApiBase string
    cohereApiKey string
    The Databricks secret key reference for a Cohere API key.
    cohereApiKeyPlaintext string
    cohere_api_base str
    cohere_api_key str
    The Databricks secret key reference for a Cohere API key.
    cohere_api_key_plaintext str
    cohereApiBase String
    cohereApiKey String
    The Databricks secret key reference for a Cohere API key.
    cohereApiKeyPlaintext String

    ModelServingConfigServedEntityExternalModelDatabricksModelServingConfig, ModelServingConfigServedEntityExternalModelDatabricksModelServingConfigArgs

    DatabricksWorkspaceUrl string
    The URL of the Databricks workspace containing the model serving endpoint pointed to by this external model.
    DatabricksApiToken string
    The Databricks secret key reference for a Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model.
    DatabricksApiTokenPlaintext string
    DatabricksWorkspaceUrl string
    The URL of the Databricks workspace containing the model serving endpoint pointed to by this external model.
    DatabricksApiToken string
    The Databricks secret key reference for a Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model.
    DatabricksApiTokenPlaintext string
    databricksWorkspaceUrl String
    The URL of the Databricks workspace containing the model serving endpoint pointed to by this external model.
    databricksApiToken String
    The Databricks secret key reference for a Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model.
    databricksApiTokenPlaintext String
    databricksWorkspaceUrl string
    The URL of the Databricks workspace containing the model serving endpoint pointed to by this external model.
    databricksApiToken string
    The Databricks secret key reference for a Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model.
    databricksApiTokenPlaintext string
    databricks_workspace_url str
    The URL of the Databricks workspace containing the model serving endpoint pointed to by this external model.
    databricks_api_token str
    The Databricks secret key reference for a Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model.
    databricks_api_token_plaintext str
    databricksWorkspaceUrl String
    The URL of the Databricks workspace containing the model serving endpoint pointed to by this external model.
    databricksApiToken String
    The Databricks secret key reference for a Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model.
    databricksApiTokenPlaintext String

    ModelServingConfigServedEntityExternalModelGoogleCloudVertexAiConfig, ModelServingConfigServedEntityExternalModelGoogleCloudVertexAiConfigArgs

    ModelServingConfigServedEntityExternalModelOpenaiConfig, ModelServingConfigServedEntityExternalModelOpenaiConfigArgs

    MicrosoftEntraClientId string
    MicrosoftEntraClientSecret string
    MicrosoftEntraClientSecretPlaintext string
    MicrosoftEntraTenantId string
    OpenaiApiBase string
    This is the base URL for the OpenAI API (default: "https://api.openai.com/v1"). For Azure OpenAI, this field is required, and is the base URL for the Azure OpenAI API service provided by Azure.
    OpenaiApiKey string
    The Databricks secret key reference for an OpenAI or Azure OpenAI API key.
    OpenaiApiKeyPlaintext string
    OpenaiApiType string
    This is an optional field to specify the type of OpenAI API to use. For Azure OpenAI, this field is required, and adjust this parameter to represent the preferred security access validation protocol. For access token validation, use azure. For authentication using Azure Active Directory (Azure AD) use, azuread.
    OpenaiApiVersion string
    This is an optional field to specify the OpenAI API version. For Azure OpenAI, this field is required, and is the version of the Azure OpenAI service to utilize, specified by a date.
    OpenaiDeploymentName string
    This field is only required for Azure OpenAI and is the name of the deployment resource for the Azure OpenAI service.
    OpenaiOrganization string
    This is an optional field to specify the organization in OpenAI or Azure OpenAI.
    MicrosoftEntraClientId string
    MicrosoftEntraClientSecret string
    MicrosoftEntraClientSecretPlaintext string
    MicrosoftEntraTenantId string
    OpenaiApiBase string
    This is the base URL for the OpenAI API (default: "https://api.openai.com/v1"). For Azure OpenAI, this field is required, and is the base URL for the Azure OpenAI API service provided by Azure.
    OpenaiApiKey string
    The Databricks secret key reference for an OpenAI or Azure OpenAI API key.
    OpenaiApiKeyPlaintext string
    OpenaiApiType string
    This is an optional field to specify the type of OpenAI API to use. For Azure OpenAI, this field is required, and adjust this parameter to represent the preferred security access validation protocol. For access token validation, use azure. For authentication using Azure Active Directory (Azure AD) use, azuread.
    OpenaiApiVersion string
    This is an optional field to specify the OpenAI API version. For Azure OpenAI, this field is required, and is the version of the Azure OpenAI service to utilize, specified by a date.
    OpenaiDeploymentName string
    This field is only required for Azure OpenAI and is the name of the deployment resource for the Azure OpenAI service.
    OpenaiOrganization string
    This is an optional field to specify the organization in OpenAI or Azure OpenAI.
    microsoftEntraClientId String
    microsoftEntraClientSecret String
    microsoftEntraClientSecretPlaintext String
    microsoftEntraTenantId String
    openaiApiBase String
    This is the base URL for the OpenAI API (default: "https://api.openai.com/v1"). For Azure OpenAI, this field is required, and is the base URL for the Azure OpenAI API service provided by Azure.
    openaiApiKey String
    The Databricks secret key reference for an OpenAI or Azure OpenAI API key.
    openaiApiKeyPlaintext String
    openaiApiType String
    This is an optional field to specify the type of OpenAI API to use. For Azure OpenAI, this field is required, and adjust this parameter to represent the preferred security access validation protocol. For access token validation, use azure. For authentication using Azure Active Directory (Azure AD) use, azuread.
    openaiApiVersion String
    This is an optional field to specify the OpenAI API version. For Azure OpenAI, this field is required, and is the version of the Azure OpenAI service to utilize, specified by a date.
    openaiDeploymentName String
    This field is only required for Azure OpenAI and is the name of the deployment resource for the Azure OpenAI service.
    openaiOrganization String
    This is an optional field to specify the organization in OpenAI or Azure OpenAI.
    microsoftEntraClientId string
    microsoftEntraClientSecret string
    microsoftEntraClientSecretPlaintext string
    microsoftEntraTenantId string
    openaiApiBase string
    This is the base URL for the OpenAI API (default: "https://api.openai.com/v1"). For Azure OpenAI, this field is required, and is the base URL for the Azure OpenAI API service provided by Azure.
    openaiApiKey string
    The Databricks secret key reference for an OpenAI or Azure OpenAI API key.
    openaiApiKeyPlaintext string
    openaiApiType string
    This is an optional field to specify the type of OpenAI API to use. For Azure OpenAI, this field is required, and adjust this parameter to represent the preferred security access validation protocol. For access token validation, use azure. For authentication using Azure Active Directory (Azure AD) use, azuread.
    openaiApiVersion string
    This is an optional field to specify the OpenAI API version. For Azure OpenAI, this field is required, and is the version of the Azure OpenAI service to utilize, specified by a date.
    openaiDeploymentName string
    This field is only required for Azure OpenAI and is the name of the deployment resource for the Azure OpenAI service.
    openaiOrganization string
    This is an optional field to specify the organization in OpenAI or Azure OpenAI.
    microsoft_entra_client_id str
    microsoft_entra_client_secret str
    microsoft_entra_client_secret_plaintext str
    microsoft_entra_tenant_id str
    openai_api_base str
    This is the base URL for the OpenAI API (default: "https://api.openai.com/v1"). For Azure OpenAI, this field is required, and is the base URL for the Azure OpenAI API service provided by Azure.
    openai_api_key str
    The Databricks secret key reference for an OpenAI or Azure OpenAI API key.
    openai_api_key_plaintext str
    openai_api_type str
    This is an optional field to specify the type of OpenAI API to use. For Azure OpenAI, this field is required, and adjust this parameter to represent the preferred security access validation protocol. For access token validation, use azure. For authentication using Azure Active Directory (Azure AD) use, azuread.
    openai_api_version str
    This is an optional field to specify the OpenAI API version. For Azure OpenAI, this field is required, and is the version of the Azure OpenAI service to utilize, specified by a date.
    openai_deployment_name str
    This field is only required for Azure OpenAI and is the name of the deployment resource for the Azure OpenAI service.
    openai_organization str
    This is an optional field to specify the organization in OpenAI or Azure OpenAI.
    microsoftEntraClientId String
    microsoftEntraClientSecret String
    microsoftEntraClientSecretPlaintext String
    microsoftEntraTenantId String
    openaiApiBase String
    This is the base URL for the OpenAI API (default: "https://api.openai.com/v1"). For Azure OpenAI, this field is required, and is the base URL for the Azure OpenAI API service provided by Azure.
    openaiApiKey String
    The Databricks secret key reference for an OpenAI or Azure OpenAI API key.
    openaiApiKeyPlaintext String
    openaiApiType String
    This is an optional field to specify the type of OpenAI API to use. For Azure OpenAI, this field is required, and adjust this parameter to represent the preferred security access validation protocol. For access token validation, use azure. For authentication using Azure Active Directory (Azure AD) use, azuread.
    openaiApiVersion String
    This is an optional field to specify the OpenAI API version. For Azure OpenAI, this field is required, and is the version of the Azure OpenAI service to utilize, specified by a date.
    openaiDeploymentName String
    This field is only required for Azure OpenAI and is the name of the deployment resource for the Azure OpenAI service.
    openaiOrganization String
    This is an optional field to specify the organization in OpenAI or Azure OpenAI.

    ModelServingConfigServedEntityExternalModelPalmConfig, ModelServingConfigServedEntityExternalModelPalmConfigArgs

    PalmApiKey string
    The Databricks secret key reference for a PaLM API key.
    PalmApiKeyPlaintext string
    PalmApiKey string
    The Databricks secret key reference for a PaLM API key.
    PalmApiKeyPlaintext string
    palmApiKey String
    The Databricks secret key reference for a PaLM API key.
    palmApiKeyPlaintext String
    palmApiKey string
    The Databricks secret key reference for a PaLM API key.
    palmApiKeyPlaintext string
    palm_api_key str
    The Databricks secret key reference for a PaLM API key.
    palm_api_key_plaintext str
    palmApiKey String
    The Databricks secret key reference for a PaLM API key.
    palmApiKeyPlaintext String

    ModelServingConfigServedModel, ModelServingConfigServedModelArgs

    ModelName string
    The name of the model in Databricks Model Registry to be served.
    ModelVersion string
    The version of the model in Databricks Model Registry to be served.
    EnvironmentVars Dictionary<string, string>
    a map of environment variable name/values that will be used for serving this model. Environment variables may refer to Databricks secrets using the standard syntax: {{secrets/secret_scope/secret_key}}.
    InstanceProfileArn string
    ARN of the instance profile that the served model will use to access AWS resources.
    MaxProvisionedThroughput int
    The maximum tokens per second that the endpoint can scale up to.
    MinProvisionedThroughput int
    The minimum tokens per second that the endpoint can scale down to.
    Name string
    The name of a served model. It must be unique across an endpoint. If not specified, this field will default to modelname-modelversion. A served model name can consist of alphanumeric characters, dashes, and underscores.
    ScaleToZeroEnabled bool
    Whether the compute resources for the served model should scale down to zero. If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value is true.
    WorkloadSize string
    The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are Small (4 - 4 provisioned concurrency), Medium (8 - 16 provisioned concurrency), and Large (16 - 64 provisioned concurrency).
    WorkloadType string
    The workload type of the served model. The workload type selects which type of compute to use in the endpoint. For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See documentation for all options. The default value is CPU.
    ModelName string
    The name of the model in Databricks Model Registry to be served.
    ModelVersion string
    The version of the model in Databricks Model Registry to be served.
    EnvironmentVars map[string]string
    a map of environment variable name/values that will be used for serving this model. Environment variables may refer to Databricks secrets using the standard syntax: {{secrets/secret_scope/secret_key}}.
    InstanceProfileArn string
    ARN of the instance profile that the served model will use to access AWS resources.
    MaxProvisionedThroughput int
    The maximum tokens per second that the endpoint can scale up to.
    MinProvisionedThroughput int
    The minimum tokens per second that the endpoint can scale down to.
    Name string
    The name of a served model. It must be unique across an endpoint. If not specified, this field will default to modelname-modelversion. A served model name can consist of alphanumeric characters, dashes, and underscores.
    ScaleToZeroEnabled bool
    Whether the compute resources for the served model should scale down to zero. If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value is true.
    WorkloadSize string
    The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are Small (4 - 4 provisioned concurrency), Medium (8 - 16 provisioned concurrency), and Large (16 - 64 provisioned concurrency).
    WorkloadType string
    The workload type of the served model. The workload type selects which type of compute to use in the endpoint. For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See documentation for all options. The default value is CPU.
    modelName String
    The name of the model in Databricks Model Registry to be served.
    modelVersion String
    The version of the model in Databricks Model Registry to be served.
    environmentVars Map<String,String>
    a map of environment variable name/values that will be used for serving this model. Environment variables may refer to Databricks secrets using the standard syntax: {{secrets/secret_scope/secret_key}}.
    instanceProfileArn String
    ARN of the instance profile that the served model will use to access AWS resources.
    maxProvisionedThroughput Integer
    The maximum tokens per second that the endpoint can scale up to.
    minProvisionedThroughput Integer
    The minimum tokens per second that the endpoint can scale down to.
    name String
    The name of a served model. It must be unique across an endpoint. If not specified, this field will default to modelname-modelversion. A served model name can consist of alphanumeric characters, dashes, and underscores.
    scaleToZeroEnabled Boolean
    Whether the compute resources for the served model should scale down to zero. If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value is true.
    workloadSize String
    The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are Small (4 - 4 provisioned concurrency), Medium (8 - 16 provisioned concurrency), and Large (16 - 64 provisioned concurrency).
    workloadType String
    The workload type of the served model. The workload type selects which type of compute to use in the endpoint. For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See documentation for all options. The default value is CPU.
    modelName string
    The name of the model in Databricks Model Registry to be served.
    modelVersion string
    The version of the model in Databricks Model Registry to be served.
    environmentVars {[key: string]: string}
    a map of environment variable name/values that will be used for serving this model. Environment variables may refer to Databricks secrets using the standard syntax: {{secrets/secret_scope/secret_key}}.
    instanceProfileArn string
    ARN of the instance profile that the served model will use to access AWS resources.
    maxProvisionedThroughput number
    The maximum tokens per second that the endpoint can scale up to.
    minProvisionedThroughput number
    The minimum tokens per second that the endpoint can scale down to.
    name string
    The name of a served model. It must be unique across an endpoint. If not specified, this field will default to modelname-modelversion. A served model name can consist of alphanumeric characters, dashes, and underscores.
    scaleToZeroEnabled boolean
    Whether the compute resources for the served model should scale down to zero. If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value is true.
    workloadSize string
    The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are Small (4 - 4 provisioned concurrency), Medium (8 - 16 provisioned concurrency), and Large (16 - 64 provisioned concurrency).
    workloadType string
    The workload type of the served model. The workload type selects which type of compute to use in the endpoint. For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See documentation for all options. The default value is CPU.
    model_name str
    The name of the model in Databricks Model Registry to be served.
    model_version str
    The version of the model in Databricks Model Registry to be served.
    environment_vars Mapping[str, str]
    a map of environment variable name/values that will be used for serving this model. Environment variables may refer to Databricks secrets using the standard syntax: {{secrets/secret_scope/secret_key}}.
    instance_profile_arn str
    ARN of the instance profile that the served model will use to access AWS resources.
    max_provisioned_throughput int
    The maximum tokens per second that the endpoint can scale up to.
    min_provisioned_throughput int
    The minimum tokens per second that the endpoint can scale down to.
    name str
    The name of a served model. It must be unique across an endpoint. If not specified, this field will default to modelname-modelversion. A served model name can consist of alphanumeric characters, dashes, and underscores.
    scale_to_zero_enabled bool
    Whether the compute resources for the served model should scale down to zero. If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value is true.
    workload_size str
    The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are Small (4 - 4 provisioned concurrency), Medium (8 - 16 provisioned concurrency), and Large (16 - 64 provisioned concurrency).
    workload_type str
    The workload type of the served model. The workload type selects which type of compute to use in the endpoint. For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See documentation for all options. The default value is CPU.
    modelName String
    The name of the model in Databricks Model Registry to be served.
    modelVersion String
    The version of the model in Databricks Model Registry to be served.
    environmentVars Map<String>
    a map of environment variable name/values that will be used for serving this model. Environment variables may refer to Databricks secrets using the standard syntax: {{secrets/secret_scope/secret_key}}.
    instanceProfileArn String
    ARN of the instance profile that the served model will use to access AWS resources.
    maxProvisionedThroughput Number
    The maximum tokens per second that the endpoint can scale up to.
    minProvisionedThroughput Number
    The minimum tokens per second that the endpoint can scale down to.
    name String
    The name of a served model. It must be unique across an endpoint. If not specified, this field will default to modelname-modelversion. A served model name can consist of alphanumeric characters, dashes, and underscores.
    scaleToZeroEnabled Boolean
    Whether the compute resources for the served model should scale down to zero. If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value is true.
    workloadSize String
    The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are Small (4 - 4 provisioned concurrency), Medium (8 - 16 provisioned concurrency), and Large (16 - 64 provisioned concurrency).
    workloadType String
    The workload type of the served model. The workload type selects which type of compute to use in the endpoint. For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See documentation for all options. The default value is CPU.

    ModelServingConfigTrafficConfig, ModelServingConfigTrafficConfigArgs

    Routes List<ModelServingConfigTrafficConfigRoute>
    Each block represents a route that defines traffic to each served entity. Each served_entity block needs to have a corresponding routes block.
    Routes []ModelServingConfigTrafficConfigRoute
    Each block represents a route that defines traffic to each served entity. Each served_entity block needs to have a corresponding routes block.
    routes List<ModelServingConfigTrafficConfigRoute>
    Each block represents a route that defines traffic to each served entity. Each served_entity block needs to have a corresponding routes block.
    routes ModelServingConfigTrafficConfigRoute[]
    Each block represents a route that defines traffic to each served entity. Each served_entity block needs to have a corresponding routes block.
    routes Sequence[ModelServingConfigTrafficConfigRoute]
    Each block represents a route that defines traffic to each served entity. Each served_entity block needs to have a corresponding routes block.
    routes List<Property Map>
    Each block represents a route that defines traffic to each served entity. Each served_entity block needs to have a corresponding routes block.

    ModelServingConfigTrafficConfigRoute, ModelServingConfigTrafficConfigRouteArgs

    ServedModelName string
    TrafficPercentage int
    The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.
    ServedModelName string
    TrafficPercentage int
    The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.
    servedModelName String
    trafficPercentage Integer
    The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.
    servedModelName string
    trafficPercentage number
    The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.
    served_model_name str
    traffic_percentage int
    The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.
    servedModelName String
    trafficPercentage Number
    The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.

    ModelServingRateLimit, ModelServingRateLimitArgs

    Calls int
    Used to specify how many calls are allowed for a key within the renewal_period.
    RenewalPeriod string
    Renewal period field for a serving endpoint rate limit. Currently, only minute is supported.
    Key string
    Key field for a serving endpoint rate limit. Currently, only user and endpoint are supported, with endpoint being the default if not specified.
    Calls int
    Used to specify how many calls are allowed for a key within the renewal_period.
    RenewalPeriod string
    Renewal period field for a serving endpoint rate limit. Currently, only minute is supported.
    Key string
    Key field for a serving endpoint rate limit. Currently, only user and endpoint are supported, with endpoint being the default if not specified.
    calls Integer
    Used to specify how many calls are allowed for a key within the renewal_period.
    renewalPeriod String
    Renewal period field for a serving endpoint rate limit. Currently, only minute is supported.
    key String
    Key field for a serving endpoint rate limit. Currently, only user and endpoint are supported, with endpoint being the default if not specified.
    calls number
    Used to specify how many calls are allowed for a key within the renewal_period.
    renewalPeriod string
    Renewal period field for a serving endpoint rate limit. Currently, only minute is supported.
    key string
    Key field for a serving endpoint rate limit. Currently, only user and endpoint are supported, with endpoint being the default if not specified.
    calls int
    Used to specify how many calls are allowed for a key within the renewal_period.
    renewal_period str
    Renewal period field for a serving endpoint rate limit. Currently, only minute is supported.
    key str
    Key field for a serving endpoint rate limit. Currently, only user and endpoint are supported, with endpoint being the default if not specified.
    calls Number
    Used to specify how many calls are allowed for a key within the renewal_period.
    renewalPeriod String
    Renewal period field for a serving endpoint rate limit. Currently, only minute is supported.
    key String
    Key field for a serving endpoint rate limit. Currently, only user and endpoint are supported, with endpoint being the default if not specified.

    ModelServingTag, ModelServingTagArgs

    Key string
    The key field for a tag.
    Value string
    The value field for a tag.
    Key string
    The key field for a tag.
    Value string
    The value field for a tag.
    key String
    The key field for a tag.
    value String
    The value field for a tag.
    key string
    The key field for a tag.
    value string
    The value field for a tag.
    key str
    The key field for a tag.
    value str
    The value field for a tag.
    key String
    The key field for a tag.
    value String
    The value field for a tag.

    Import

    The model serving resource can be imported using the name of the endpoint.

    bash

    $ pulumi import databricks:index/modelServing:ModelServing this <model-serving-endpoint-name>
    

    To learn more about importing existing cloud resources, see Importing resources.

    Package Details

    Repository
    databricks pulumi/pulumi-databricks
    License
    Apache-2.0
    Notes
    This Pulumi package is based on the databricks Terraform Provider.
    databricks logo
    Databricks v1.51.0 published on Tuesday, Oct 8, 2024 by Pulumi