databricks.ModelServing
Explore with Pulumi AI
This resource allows you to manage Model Serving endpoints in Databricks.
Related Resources
The following resources are often used in the same context:
- End to end workspace management guide.
- databricks.Directory to manage directories in Databricks Workspace.
- databricks.MlflowModel to create MLflow models in Databricks.
- databricks.Notebook to manage Databricks Notebooks.
- databricks.Notebook data to export a notebook from Databricks Workspace.
- databricks.Repo to manage Databricks Repos.
Example Usage
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Databricks = Pulumi.Databricks;
return await Deployment.RunAsync(() =>
{
var @this = new Databricks.ModelServing("this", new()
{
Config = new Databricks.Inputs.ModelServingConfigArgs
{
ServedModels = new[]
{
new Databricks.Inputs.ModelServingConfigServedModelArgs
{
ModelName = "ads-model",
ModelVersion = "2",
Name = "prod_model",
ScaleToZeroEnabled = true,
WorkloadSize = "Small",
},
new Databricks.Inputs.ModelServingConfigServedModelArgs
{
ModelName = "ads-model",
ModelVersion = "4",
Name = "candidate_model",
ScaleToZeroEnabled = false,
WorkloadSize = "Small",
},
},
TrafficConfig = new Databricks.Inputs.ModelServingConfigTrafficConfigArgs
{
Routes = new[]
{
new Databricks.Inputs.ModelServingConfigTrafficConfigRouteArgs
{
ServedModelName = "prod_model",
TrafficPercentage = 90,
},
new Databricks.Inputs.ModelServingConfigTrafficConfigRouteArgs
{
ServedModelName = "candidate_model",
TrafficPercentage = 10,
},
},
},
},
});
});
package main
import (
"github.com/pulumi/pulumi-databricks/sdk/go/databricks"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := databricks.NewModelServing(ctx, "this", &databricks.ModelServingArgs{
Config: &databricks.ModelServingConfigArgs{
ServedModels: databricks.ModelServingConfigServedModelArray{
&databricks.ModelServingConfigServedModelArgs{
ModelName: pulumi.String("ads-model"),
ModelVersion: pulumi.String("2"),
Name: pulumi.String("prod_model"),
ScaleToZeroEnabled: pulumi.Bool(true),
WorkloadSize: pulumi.String("Small"),
},
&databricks.ModelServingConfigServedModelArgs{
ModelName: pulumi.String("ads-model"),
ModelVersion: pulumi.String("4"),
Name: pulumi.String("candidate_model"),
ScaleToZeroEnabled: pulumi.Bool(false),
WorkloadSize: pulumi.String("Small"),
},
},
TrafficConfig: &databricks.ModelServingConfigTrafficConfigArgs{
Routes: databricks.ModelServingConfigTrafficConfigRouteArray{
&databricks.ModelServingConfigTrafficConfigRouteArgs{
ServedModelName: pulumi.String("prod_model"),
TrafficPercentage: pulumi.Int(90),
},
&databricks.ModelServingConfigTrafficConfigRouteArgs{
ServedModelName: pulumi.String("candidate_model"),
TrafficPercentage: pulumi.Int(10),
},
},
},
},
})
if err != nil {
return err
}
return nil
})
}
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.databricks.ModelServing;
import com.pulumi.databricks.ModelServingArgs;
import com.pulumi.databricks.inputs.ModelServingConfigArgs;
import com.pulumi.databricks.inputs.ModelServingConfigTrafficConfigArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var this_ = new ModelServing("this", ModelServingArgs.builder()
.config(ModelServingConfigArgs.builder()
.servedModels(
ModelServingConfigServedModelArgs.builder()
.modelName("ads-model")
.modelVersion("2")
.name("prod_model")
.scaleToZeroEnabled(true)
.workloadSize("Small")
.build(),
ModelServingConfigServedModelArgs.builder()
.modelName("ads-model")
.modelVersion("4")
.name("candidate_model")
.scaleToZeroEnabled(false)
.workloadSize("Small")
.build())
.trafficConfig(ModelServingConfigTrafficConfigArgs.builder()
.routes(
ModelServingConfigTrafficConfigRouteArgs.builder()
.servedModelName("prod_model")
.trafficPercentage(90)
.build(),
ModelServingConfigTrafficConfigRouteArgs.builder()
.servedModelName("candidate_model")
.trafficPercentage(10)
.build())
.build())
.build())
.build());
}
}
import pulumi
import pulumi_databricks as databricks
this = databricks.ModelServing("this", config=databricks.ModelServingConfigArgs(
served_models=[
databricks.ModelServingConfigServedModelArgs(
model_name="ads-model",
model_version="2",
name="prod_model",
scale_to_zero_enabled=True,
workload_size="Small",
),
databricks.ModelServingConfigServedModelArgs(
model_name="ads-model",
model_version="4",
name="candidate_model",
scale_to_zero_enabled=False,
workload_size="Small",
),
],
traffic_config=databricks.ModelServingConfigTrafficConfigArgs(
routes=[
databricks.ModelServingConfigTrafficConfigRouteArgs(
served_model_name="prod_model",
traffic_percentage=90,
),
databricks.ModelServingConfigTrafficConfigRouteArgs(
served_model_name="candidate_model",
traffic_percentage=10,
),
],
),
))
import * as pulumi from "@pulumi/pulumi";
import * as databricks from "@pulumi/databricks";
const _this = new databricks.ModelServing("this", {config: {
servedModels: [
{
modelName: "ads-model",
modelVersion: "2",
name: "prod_model",
scaleToZeroEnabled: true,
workloadSize: "Small",
},
{
modelName: "ads-model",
modelVersion: "4",
name: "candidate_model",
scaleToZeroEnabled: false,
workloadSize: "Small",
},
],
trafficConfig: {
routes: [
{
servedModelName: "prod_model",
trafficPercentage: 90,
},
{
servedModelName: "candidate_model",
trafficPercentage: 10,
},
],
},
}});
resources:
this:
type: databricks:ModelServing
properties:
config:
servedModels:
- modelName: ads-model
modelVersion: '2'
name: prod_model
scaleToZeroEnabled: true
workloadSize: Small
- modelName: ads-model
modelVersion: '4'
name: candidate_model
scaleToZeroEnabled: false
workloadSize: Small
trafficConfig:
routes:
- servedModelName: prod_model
trafficPercentage: 90
- servedModelName: candidate_model
trafficPercentage: 10
Create ModelServing Resource
new ModelServing(name: string, args: ModelServingArgs, opts?: CustomResourceOptions);
@overload
def ModelServing(resource_name: str,
opts: Optional[ResourceOptions] = None,
config: Optional[ModelServingConfigArgs] = None,
name: Optional[str] = None)
@overload
def ModelServing(resource_name: str,
args: ModelServingArgs,
opts: Optional[ResourceOptions] = None)
func NewModelServing(ctx *Context, name string, args ModelServingArgs, opts ...ResourceOption) (*ModelServing, error)
public ModelServing(string name, ModelServingArgs args, CustomResourceOptions? opts = null)
public ModelServing(String name, ModelServingArgs args)
public ModelServing(String name, ModelServingArgs args, CustomResourceOptions options)
type: databricks:ModelServing
properties: # The arguments to resource properties.
options: # Bag of options to control resource's behavior.
- name string
- The unique name of the resource.
- args ModelServingArgs
- The arguments to resource properties.
- opts CustomResourceOptions
- Bag of options to control resource's behavior.
- resource_name str
- The unique name of the resource.
- args ModelServingArgs
- The arguments to resource properties.
- opts ResourceOptions
- Bag of options to control resource's behavior.
- ctx Context
- Context object for the current deployment.
- name string
- The unique name of the resource.
- args ModelServingArgs
- The arguments to resource properties.
- opts ResourceOption
- Bag of options to control resource's behavior.
- name string
- The unique name of the resource.
- args ModelServingArgs
- The arguments to resource properties.
- opts CustomResourceOptions
- Bag of options to control resource's behavior.
- name String
- The unique name of the resource.
- args ModelServingArgs
- The arguments to resource properties.
- options CustomResourceOptions
- Bag of options to control resource's behavior.
ModelServing Resource Properties
To learn more about resource properties and how to use them, see Inputs and Outputs in the Architecture and Concepts docs.
Inputs
The ModelServing resource accepts the following input properties:
- Config
Model
Serving Config Args The model serving endpoint configuration.
- Name string
The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
- Config
Model
Serving Config Args The model serving endpoint configuration.
- Name string
The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
- config
Model
Serving Config Args The model serving endpoint configuration.
- name String
The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
- config
Model
Serving Config Args The model serving endpoint configuration.
- name string
The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
- config
Model
Serving Config Args The model serving endpoint configuration.
- name str
The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
- config Property Map
The model serving endpoint configuration.
- name String
The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
Outputs
All input properties are implicitly available as output properties. Additionally, the ModelServing resource produces the following output properties:
- Id string
The provider-assigned unique ID for this managed resource.
- Id string
The provider-assigned unique ID for this managed resource.
- id String
The provider-assigned unique ID for this managed resource.
- id string
The provider-assigned unique ID for this managed resource.
- id str
The provider-assigned unique ID for this managed resource.
- id String
The provider-assigned unique ID for this managed resource.
Look up Existing ModelServing Resource
Get an existing ModelServing resource’s state with the given name, ID, and optional extra properties used to qualify the lookup.
public static get(name: string, id: Input<ID>, state?: ModelServingState, opts?: CustomResourceOptions): ModelServing
@staticmethod
def get(resource_name: str,
id: str,
opts: Optional[ResourceOptions] = None,
config: Optional[ModelServingConfigArgs] = None,
name: Optional[str] = None) -> ModelServing
func GetModelServing(ctx *Context, name string, id IDInput, state *ModelServingState, opts ...ResourceOption) (*ModelServing, error)
public static ModelServing Get(string name, Input<string> id, ModelServingState? state, CustomResourceOptions? opts = null)
public static ModelServing get(String name, Output<String> id, ModelServingState state, CustomResourceOptions options)
Resource lookup is not supported in YAML
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- resource_name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- Config
Model
Serving Config Args The model serving endpoint configuration.
- Name string
The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
- Config
Model
Serving Config Args The model serving endpoint configuration.
- Name string
The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
- config
Model
Serving Config Args The model serving endpoint configuration.
- name String
The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
- config
Model
Serving Config Args The model serving endpoint configuration.
- name string
The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
- config
Model
Serving Config Args The model serving endpoint configuration.
- name str
The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
- config Property Map
The model serving endpoint configuration.
- name String
The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
Supporting Types
ModelServingConfig
- Served
Models List<ModelServing Config Served Model> Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.
- Traffic
Config ModelServing Config Traffic Config A single block represents the traffic split configuration amongst the served models.
- Served
Models []ModelServing Config Served Model Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.
- Traffic
Config ModelServing Config Traffic Config A single block represents the traffic split configuration amongst the served models.
- served
Models List<ModelServing Config Served Model> Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.
- traffic
Config ModelServing Config Traffic Config A single block represents the traffic split configuration amongst the served models.
- served
Models ModelServing Config Served Model[] Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.
- traffic
Config ModelServing Config Traffic Config A single block represents the traffic split configuration amongst the served models.
- served_
models Sequence[ModelServing Config Served Model] Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.
- traffic_
config ModelServing Config Traffic Config A single block represents the traffic split configuration amongst the served models.
- served
Models List<Property Map> Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.
- traffic
Config Property Map A single block represents the traffic split configuration amongst the served models.
ModelServingConfigServedModel
- Model
Name string The name of the model in Databricks Model Registry to be served.
- Model
Version string The version of the model in Databricks Model Registry to be served.
- Workload
Size string The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency).
- Name string
The name of a served model. It must be unique across an endpoint. If not specified, this field will default to
modelname-modelversion
. A served model name can consist of alphanumeric characters, dashes, and underscores.- Scale
To boolZero Enabled Whether the compute resources for the served model should scale down to zero. If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value is
true
.
- Model
Name string The name of the model in Databricks Model Registry to be served.
- Model
Version string The version of the model in Databricks Model Registry to be served.
- Workload
Size string The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency).
- Name string
The name of a served model. It must be unique across an endpoint. If not specified, this field will default to
modelname-modelversion
. A served model name can consist of alphanumeric characters, dashes, and underscores.- Scale
To boolZero Enabled Whether the compute resources for the served model should scale down to zero. If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value is
true
.
- model
Name String The name of the model in Databricks Model Registry to be served.
- model
Version String The version of the model in Databricks Model Registry to be served.
- workload
Size String The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency).
- name String
The name of a served model. It must be unique across an endpoint. If not specified, this field will default to
modelname-modelversion
. A served model name can consist of alphanumeric characters, dashes, and underscores.- scale
To BooleanZero Enabled Whether the compute resources for the served model should scale down to zero. If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value is
true
.
- model
Name string The name of the model in Databricks Model Registry to be served.
- model
Version string The version of the model in Databricks Model Registry to be served.
- workload
Size string The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency).
- name string
The name of a served model. It must be unique across an endpoint. If not specified, this field will default to
modelname-modelversion
. A served model name can consist of alphanumeric characters, dashes, and underscores.- scale
To booleanZero Enabled Whether the compute resources for the served model should scale down to zero. If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value is
true
.
- model_
name str The name of the model in Databricks Model Registry to be served.
- model_
version str The version of the model in Databricks Model Registry to be served.
- workload_
size str The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency).
- name str
The name of a served model. It must be unique across an endpoint. If not specified, this field will default to
modelname-modelversion
. A served model name can consist of alphanumeric characters, dashes, and underscores.- scale_
to_ boolzero_ enabled Whether the compute resources for the served model should scale down to zero. If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value is
true
.
- model
Name String The name of the model in Databricks Model Registry to be served.
- model
Version String The version of the model in Databricks Model Registry to be served.
- workload
Size String The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency).
- name String
The name of a served model. It must be unique across an endpoint. If not specified, this field will default to
modelname-modelversion
. A served model name can consist of alphanumeric characters, dashes, and underscores.- scale
To BooleanZero Enabled Whether the compute resources for the served model should scale down to zero. If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value is
true
.
ModelServingConfigTrafficConfig
- Routes
List<Model
Serving Config Traffic Config Route> Each block represents a route that defines traffic to each served model. Each
served_models
block needs to have a correspondingroutes
block
- Routes
[]Model
Serving Config Traffic Config Route Each block represents a route that defines traffic to each served model. Each
served_models
block needs to have a correspondingroutes
block
- routes
List<Model
Serving Config Traffic Config Route> Each block represents a route that defines traffic to each served model. Each
served_models
block needs to have a correspondingroutes
block
- routes
Model
Serving Config Traffic Config Route[] Each block represents a route that defines traffic to each served model. Each
served_models
block needs to have a correspondingroutes
block
- routes
Sequence[Model
Serving Config Traffic Config Route] Each block represents a route that defines traffic to each served model. Each
served_models
block needs to have a correspondingroutes
block
- routes List<Property Map>
Each block represents a route that defines traffic to each served model. Each
served_models
block needs to have a correspondingroutes
block
ModelServingConfigTrafficConfigRoute
- Served
Model stringName The name of the served model this route configures traffic for. This needs to match the name of a
served_models
block- Traffic
Percentage int The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.
- Served
Model stringName The name of the served model this route configures traffic for. This needs to match the name of a
served_models
block- Traffic
Percentage int The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.
- served
Model StringName The name of the served model this route configures traffic for. This needs to match the name of a
served_models
block- traffic
Percentage Integer The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.
- served
Model stringName The name of the served model this route configures traffic for. This needs to match the name of a
served_models
block- traffic
Percentage number The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.
- served_
model_ strname The name of the served model this route configures traffic for. This needs to match the name of a
served_models
block- traffic_
percentage int The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.
- served
Model StringName The name of the served model this route configures traffic for. This needs to match the name of a
served_models
block- traffic
Percentage Number The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.
Import
The model serving resource can be imported using the name of the endpoint. bash
$ pulumi import databricks:index/modelServing:ModelServing this <model-serving-endpoint-name>
Package Details
- Repository
- databricks pulumi/pulumi-databricks
- License
- Apache-2.0
- Notes
This Pulumi package is based on the
databricks
Terraform Provider.