databricks logo
Databricks v1.14.0, May 23 23

databricks.ModelServing

Explore with Pulumi AI

This resource allows you to manage Model Serving endpoints in Databricks.

The following resources are often used in the same context:

Example Usage

using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Databricks = Pulumi.Databricks;

return await Deployment.RunAsync(() => 
{
    var @this = new Databricks.ModelServing("this", new()
    {
        Config = new Databricks.Inputs.ModelServingConfigArgs
        {
            ServedModels = new[]
            {
                new Databricks.Inputs.ModelServingConfigServedModelArgs
                {
                    ModelName = "ads-model",
                    ModelVersion = "2",
                    Name = "prod_model",
                    ScaleToZeroEnabled = true,
                    WorkloadSize = "Small",
                },
                new Databricks.Inputs.ModelServingConfigServedModelArgs
                {
                    ModelName = "ads-model",
                    ModelVersion = "4",
                    Name = "candidate_model",
                    ScaleToZeroEnabled = false,
                    WorkloadSize = "Small",
                },
            },
            TrafficConfig = new Databricks.Inputs.ModelServingConfigTrafficConfigArgs
            {
                Routes = new[]
                {
                    new Databricks.Inputs.ModelServingConfigTrafficConfigRouteArgs
                    {
                        ServedModelName = "prod_model",
                        TrafficPercentage = 90,
                    },
                    new Databricks.Inputs.ModelServingConfigTrafficConfigRouteArgs
                    {
                        ServedModelName = "candidate_model",
                        TrafficPercentage = 10,
                    },
                },
            },
        },
    });

});
package main

import (
	"github.com/pulumi/pulumi-databricks/sdk/go/databricks"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := databricks.NewModelServing(ctx, "this", &databricks.ModelServingArgs{
			Config: &databricks.ModelServingConfigArgs{
				ServedModels: databricks.ModelServingConfigServedModelArray{
					&databricks.ModelServingConfigServedModelArgs{
						ModelName:          pulumi.String("ads-model"),
						ModelVersion:       pulumi.String("2"),
						Name:               pulumi.String("prod_model"),
						ScaleToZeroEnabled: pulumi.Bool(true),
						WorkloadSize:       pulumi.String("Small"),
					},
					&databricks.ModelServingConfigServedModelArgs{
						ModelName:          pulumi.String("ads-model"),
						ModelVersion:       pulumi.String("4"),
						Name:               pulumi.String("candidate_model"),
						ScaleToZeroEnabled: pulumi.Bool(false),
						WorkloadSize:       pulumi.String("Small"),
					},
				},
				TrafficConfig: &databricks.ModelServingConfigTrafficConfigArgs{
					Routes: databricks.ModelServingConfigTrafficConfigRouteArray{
						&databricks.ModelServingConfigTrafficConfigRouteArgs{
							ServedModelName:   pulumi.String("prod_model"),
							TrafficPercentage: pulumi.Int(90),
						},
						&databricks.ModelServingConfigTrafficConfigRouteArgs{
							ServedModelName:   pulumi.String("candidate_model"),
							TrafficPercentage: pulumi.Int(10),
						},
					},
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.databricks.ModelServing;
import com.pulumi.databricks.ModelServingArgs;
import com.pulumi.databricks.inputs.ModelServingConfigArgs;
import com.pulumi.databricks.inputs.ModelServingConfigTrafficConfigArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var this_ = new ModelServing("this", ModelServingArgs.builder()        
            .config(ModelServingConfigArgs.builder()
                .servedModels(                
                    ModelServingConfigServedModelArgs.builder()
                        .modelName("ads-model")
                        .modelVersion("2")
                        .name("prod_model")
                        .scaleToZeroEnabled(true)
                        .workloadSize("Small")
                        .build(),
                    ModelServingConfigServedModelArgs.builder()
                        .modelName("ads-model")
                        .modelVersion("4")
                        .name("candidate_model")
                        .scaleToZeroEnabled(false)
                        .workloadSize("Small")
                        .build())
                .trafficConfig(ModelServingConfigTrafficConfigArgs.builder()
                    .routes(                    
                        ModelServingConfigTrafficConfigRouteArgs.builder()
                            .servedModelName("prod_model")
                            .trafficPercentage(90)
                            .build(),
                        ModelServingConfigTrafficConfigRouteArgs.builder()
                            .servedModelName("candidate_model")
                            .trafficPercentage(10)
                            .build())
                    .build())
                .build())
            .build());

    }
}
import pulumi
import pulumi_databricks as databricks

this = databricks.ModelServing("this", config=databricks.ModelServingConfigArgs(
    served_models=[
        databricks.ModelServingConfigServedModelArgs(
            model_name="ads-model",
            model_version="2",
            name="prod_model",
            scale_to_zero_enabled=True,
            workload_size="Small",
        ),
        databricks.ModelServingConfigServedModelArgs(
            model_name="ads-model",
            model_version="4",
            name="candidate_model",
            scale_to_zero_enabled=False,
            workload_size="Small",
        ),
    ],
    traffic_config=databricks.ModelServingConfigTrafficConfigArgs(
        routes=[
            databricks.ModelServingConfigTrafficConfigRouteArgs(
                served_model_name="prod_model",
                traffic_percentage=90,
            ),
            databricks.ModelServingConfigTrafficConfigRouteArgs(
                served_model_name="candidate_model",
                traffic_percentage=10,
            ),
        ],
    ),
))
import * as pulumi from "@pulumi/pulumi";
import * as databricks from "@pulumi/databricks";

const _this = new databricks.ModelServing("this", {config: {
    servedModels: [
        {
            modelName: "ads-model",
            modelVersion: "2",
            name: "prod_model",
            scaleToZeroEnabled: true,
            workloadSize: "Small",
        },
        {
            modelName: "ads-model",
            modelVersion: "4",
            name: "candidate_model",
            scaleToZeroEnabled: false,
            workloadSize: "Small",
        },
    ],
    trafficConfig: {
        routes: [
            {
                servedModelName: "prod_model",
                trafficPercentage: 90,
            },
            {
                servedModelName: "candidate_model",
                trafficPercentage: 10,
            },
        ],
    },
}});
resources:
  this:
    type: databricks:ModelServing
    properties:
      config:
        servedModels:
          - modelName: ads-model
            modelVersion: '2'
            name: prod_model
            scaleToZeroEnabled: true
            workloadSize: Small
          - modelName: ads-model
            modelVersion: '4'
            name: candidate_model
            scaleToZeroEnabled: false
            workloadSize: Small
        trafficConfig:
          routes:
            - servedModelName: prod_model
              trafficPercentage: 90
            - servedModelName: candidate_model
              trafficPercentage: 10

Create ModelServing Resource

new ModelServing(name: string, args: ModelServingArgs, opts?: CustomResourceOptions);
@overload
def ModelServing(resource_name: str,
                 opts: Optional[ResourceOptions] = None,
                 config: Optional[ModelServingConfigArgs] = None,
                 name: Optional[str] = None)
@overload
def ModelServing(resource_name: str,
                 args: ModelServingArgs,
                 opts: Optional[ResourceOptions] = None)
func NewModelServing(ctx *Context, name string, args ModelServingArgs, opts ...ResourceOption) (*ModelServing, error)
public ModelServing(string name, ModelServingArgs args, CustomResourceOptions? opts = null)
public ModelServing(String name, ModelServingArgs args)
public ModelServing(String name, ModelServingArgs args, CustomResourceOptions options)
type: databricks:ModelServing
properties: # The arguments to resource properties.
options: # Bag of options to control resource's behavior.

name string
The unique name of the resource.
args ModelServingArgs
The arguments to resource properties.
opts CustomResourceOptions
Bag of options to control resource's behavior.
resource_name str
The unique name of the resource.
args ModelServingArgs
The arguments to resource properties.
opts ResourceOptions
Bag of options to control resource's behavior.
ctx Context
Context object for the current deployment.
name string
The unique name of the resource.
args ModelServingArgs
The arguments to resource properties.
opts ResourceOption
Bag of options to control resource's behavior.
name string
The unique name of the resource.
args ModelServingArgs
The arguments to resource properties.
opts CustomResourceOptions
Bag of options to control resource's behavior.
name String
The unique name of the resource.
args ModelServingArgs
The arguments to resource properties.
options CustomResourceOptions
Bag of options to control resource's behavior.

ModelServing Resource Properties

To learn more about resource properties and how to use them, see Inputs and Outputs in the Architecture and Concepts docs.

Inputs

The ModelServing resource accepts the following input properties:

Config ModelServingConfigArgs

The model serving endpoint configuration.

Name string

The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.

Config ModelServingConfigArgs

The model serving endpoint configuration.

Name string

The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.

config ModelServingConfigArgs

The model serving endpoint configuration.

name String

The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.

config ModelServingConfigArgs

The model serving endpoint configuration.

name string

The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.

config ModelServingConfigArgs

The model serving endpoint configuration.

name str

The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.

config Property Map

The model serving endpoint configuration.

name String

The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.

Outputs

All input properties are implicitly available as output properties. Additionally, the ModelServing resource produces the following output properties:

Id string

The provider-assigned unique ID for this managed resource.

Id string

The provider-assigned unique ID for this managed resource.

id String

The provider-assigned unique ID for this managed resource.

id string

The provider-assigned unique ID for this managed resource.

id str

The provider-assigned unique ID for this managed resource.

id String

The provider-assigned unique ID for this managed resource.

Look up Existing ModelServing Resource

Get an existing ModelServing resource’s state with the given name, ID, and optional extra properties used to qualify the lookup.

public static get(name: string, id: Input<ID>, state?: ModelServingState, opts?: CustomResourceOptions): ModelServing
@staticmethod
def get(resource_name: str,
        id: str,
        opts: Optional[ResourceOptions] = None,
        config: Optional[ModelServingConfigArgs] = None,
        name: Optional[str] = None) -> ModelServing
func GetModelServing(ctx *Context, name string, id IDInput, state *ModelServingState, opts ...ResourceOption) (*ModelServing, error)
public static ModelServing Get(string name, Input<string> id, ModelServingState? state, CustomResourceOptions? opts = null)
public static ModelServing get(String name, Output<String> id, ModelServingState state, CustomResourceOptions options)
Resource lookup is not supported in YAML
name
The unique name of the resulting resource.
id
The unique provider ID of the resource to lookup.
state
Any extra arguments used during the lookup.
opts
A bag of options that control this resource's behavior.
resource_name
The unique name of the resulting resource.
id
The unique provider ID of the resource to lookup.
name
The unique name of the resulting resource.
id
The unique provider ID of the resource to lookup.
state
Any extra arguments used during the lookup.
opts
A bag of options that control this resource's behavior.
name
The unique name of the resulting resource.
id
The unique provider ID of the resource to lookup.
state
Any extra arguments used during the lookup.
opts
A bag of options that control this resource's behavior.
name
The unique name of the resulting resource.
id
The unique provider ID of the resource to lookup.
state
Any extra arguments used during the lookup.
opts
A bag of options that control this resource's behavior.
The following state arguments are supported:
Config ModelServingConfigArgs

The model serving endpoint configuration.

Name string

The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.

Config ModelServingConfigArgs

The model serving endpoint configuration.

Name string

The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.

config ModelServingConfigArgs

The model serving endpoint configuration.

name String

The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.

config ModelServingConfigArgs

The model serving endpoint configuration.

name string

The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.

config ModelServingConfigArgs

The model serving endpoint configuration.

name str

The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.

config Property Map

The model serving endpoint configuration.

name String

The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.

Supporting Types

ModelServingConfig

ServedModels List<ModelServingConfigServedModel>

Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.

TrafficConfig ModelServingConfigTrafficConfig

A single block represents the traffic split configuration amongst the served models.

ServedModels []ModelServingConfigServedModel

Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.

TrafficConfig ModelServingConfigTrafficConfig

A single block represents the traffic split configuration amongst the served models.

servedModels List<ModelServingConfigServedModel>

Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.

trafficConfig ModelServingConfigTrafficConfig

A single block represents the traffic split configuration amongst the served models.

servedModels ModelServingConfigServedModel[]

Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.

trafficConfig ModelServingConfigTrafficConfig

A single block represents the traffic split configuration amongst the served models.

served_models Sequence[ModelServingConfigServedModel]

Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.

traffic_config ModelServingConfigTrafficConfig

A single block represents the traffic split configuration amongst the served models.

servedModels List<Property Map>

Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.

trafficConfig Property Map

A single block represents the traffic split configuration amongst the served models.

ModelServingConfigServedModel

ModelName string

The name of the model in Databricks Model Registry to be served.

ModelVersion string

The version of the model in Databricks Model Registry to be served.

WorkloadSize string

The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency).

Name string

The name of a served model. It must be unique across an endpoint. If not specified, this field will default to modelname-modelversion. A served model name can consist of alphanumeric characters, dashes, and underscores.

ScaleToZeroEnabled bool

Whether the compute resources for the served model should scale down to zero. If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value is true.

ModelName string

The name of the model in Databricks Model Registry to be served.

ModelVersion string

The version of the model in Databricks Model Registry to be served.

WorkloadSize string

The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency).

Name string

The name of a served model. It must be unique across an endpoint. If not specified, this field will default to modelname-modelversion. A served model name can consist of alphanumeric characters, dashes, and underscores.

ScaleToZeroEnabled bool

Whether the compute resources for the served model should scale down to zero. If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value is true.

modelName String

The name of the model in Databricks Model Registry to be served.

modelVersion String

The version of the model in Databricks Model Registry to be served.

workloadSize String

The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency).

name String

The name of a served model. It must be unique across an endpoint. If not specified, this field will default to modelname-modelversion. A served model name can consist of alphanumeric characters, dashes, and underscores.

scaleToZeroEnabled Boolean

Whether the compute resources for the served model should scale down to zero. If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value is true.

modelName string

The name of the model in Databricks Model Registry to be served.

modelVersion string

The version of the model in Databricks Model Registry to be served.

workloadSize string

The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency).

name string

The name of a served model. It must be unique across an endpoint. If not specified, this field will default to modelname-modelversion. A served model name can consist of alphanumeric characters, dashes, and underscores.

scaleToZeroEnabled boolean

Whether the compute resources for the served model should scale down to zero. If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value is true.

model_name str

The name of the model in Databricks Model Registry to be served.

model_version str

The version of the model in Databricks Model Registry to be served.

workload_size str

The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency).

name str

The name of a served model. It must be unique across an endpoint. If not specified, this field will default to modelname-modelversion. A served model name can consist of alphanumeric characters, dashes, and underscores.

scale_to_zero_enabled bool

Whether the compute resources for the served model should scale down to zero. If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value is true.

modelName String

The name of the model in Databricks Model Registry to be served.

modelVersion String

The version of the model in Databricks Model Registry to be served.

workloadSize String

The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency).

name String

The name of a served model. It must be unique across an endpoint. If not specified, this field will default to modelname-modelversion. A served model name can consist of alphanumeric characters, dashes, and underscores.

scaleToZeroEnabled Boolean

Whether the compute resources for the served model should scale down to zero. If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value is true.

ModelServingConfigTrafficConfig

Routes List<ModelServingConfigTrafficConfigRoute>

Each block represents a route that defines traffic to each served model. Each served_models block needs to have a corresponding routes block

Routes []ModelServingConfigTrafficConfigRoute

Each block represents a route that defines traffic to each served model. Each served_models block needs to have a corresponding routes block

routes List<ModelServingConfigTrafficConfigRoute>

Each block represents a route that defines traffic to each served model. Each served_models block needs to have a corresponding routes block

routes ModelServingConfigTrafficConfigRoute[]

Each block represents a route that defines traffic to each served model. Each served_models block needs to have a corresponding routes block

routes Sequence[ModelServingConfigTrafficConfigRoute]

Each block represents a route that defines traffic to each served model. Each served_models block needs to have a corresponding routes block

routes List<Property Map>

Each block represents a route that defines traffic to each served model. Each served_models block needs to have a corresponding routes block

ModelServingConfigTrafficConfigRoute

ServedModelName string

The name of the served model this route configures traffic for. This needs to match the name of a served_models block

TrafficPercentage int

The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.

ServedModelName string

The name of the served model this route configures traffic for. This needs to match the name of a served_models block

TrafficPercentage int

The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.

servedModelName String

The name of the served model this route configures traffic for. This needs to match the name of a served_models block

trafficPercentage Integer

The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.

servedModelName string

The name of the served model this route configures traffic for. This needs to match the name of a served_models block

trafficPercentage number

The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.

served_model_name str

The name of the served model this route configures traffic for. This needs to match the name of a served_models block

traffic_percentage int

The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.

servedModelName String

The name of the served model this route configures traffic for. This needs to match the name of a served_models block

trafficPercentage Number

The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.

Import

The model serving resource can be imported using the name of the endpoint. bash

 $ pulumi import databricks:index/modelServing:ModelServing this <model-serving-endpoint-name>

Package Details

Repository
databricks pulumi/pulumi-databricks
License
Apache-2.0
Notes

This Pulumi package is based on the databricks Terraform Provider.