1. Packages
  2. AWS
  3. API Docs
  4. sagemaker
  5. EndpointConfiguration
AWS v7.12.0 published on Thursday, Nov 20, 2025 by Pulumi
aws logo
AWS v7.12.0 published on Thursday, Nov 20, 2025 by Pulumi

    Provides a SageMaker AI endpoint configuration resource.

    Example Usage

    Basic usage:

    import * as pulumi from "@pulumi/pulumi";
    import * as aws from "@pulumi/aws";
    
    const ec = new aws.sagemaker.EndpointConfiguration("ec", {
        name: "my-endpoint-config",
        productionVariants: [{
            variantName: "variant-1",
            modelName: m.name,
            initialInstanceCount: 1,
            instanceType: "ml.t2.medium",
        }],
        tags: {
            Name: "foo",
        },
    });
    
    import pulumi
    import pulumi_aws as aws
    
    ec = aws.sagemaker.EndpointConfiguration("ec",
        name="my-endpoint-config",
        production_variants=[{
            "variant_name": "variant-1",
            "model_name": m["name"],
            "initial_instance_count": 1,
            "instance_type": "ml.t2.medium",
        }],
        tags={
            "Name": "foo",
        })
    
    package main
    
    import (
    	"github.com/pulumi/pulumi-aws/sdk/v7/go/aws/sagemaker"
    	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
    )
    
    func main() {
    	pulumi.Run(func(ctx *pulumi.Context) error {
    		_, err := sagemaker.NewEndpointConfiguration(ctx, "ec", &sagemaker.EndpointConfigurationArgs{
    			Name: pulumi.String("my-endpoint-config"),
    			ProductionVariants: sagemaker.EndpointConfigurationProductionVariantArray{
    				&sagemaker.EndpointConfigurationProductionVariantArgs{
    					VariantName:          pulumi.String("variant-1"),
    					ModelName:            pulumi.Any(m.Name),
    					InitialInstanceCount: pulumi.Int(1),
    					InstanceType:         pulumi.String("ml.t2.medium"),
    				},
    			},
    			Tags: pulumi.StringMap{
    				"Name": pulumi.String("foo"),
    			},
    		})
    		if err != nil {
    			return err
    		}
    		return nil
    	})
    }
    
    using System.Collections.Generic;
    using System.Linq;
    using Pulumi;
    using Aws = Pulumi.Aws;
    
    return await Deployment.RunAsync(() => 
    {
        var ec = new Aws.Sagemaker.EndpointConfiguration("ec", new()
        {
            Name = "my-endpoint-config",
            ProductionVariants = new[]
            {
                new Aws.Sagemaker.Inputs.EndpointConfigurationProductionVariantArgs
                {
                    VariantName = "variant-1",
                    ModelName = m.Name,
                    InitialInstanceCount = 1,
                    InstanceType = "ml.t2.medium",
                },
            },
            Tags = 
            {
                { "Name", "foo" },
            },
        });
    
    });
    
    package generated_program;
    
    import com.pulumi.Context;
    import com.pulumi.Pulumi;
    import com.pulumi.core.Output;
    import com.pulumi.aws.sagemaker.EndpointConfiguration;
    import com.pulumi.aws.sagemaker.EndpointConfigurationArgs;
    import com.pulumi.aws.sagemaker.inputs.EndpointConfigurationProductionVariantArgs;
    import java.util.List;
    import java.util.ArrayList;
    import java.util.Map;
    import java.io.File;
    import java.nio.file.Files;
    import java.nio.file.Paths;
    
    public class App {
        public static void main(String[] args) {
            Pulumi.run(App::stack);
        }
    
        public static void stack(Context ctx) {
            var ec = new EndpointConfiguration("ec", EndpointConfigurationArgs.builder()
                .name("my-endpoint-config")
                .productionVariants(EndpointConfigurationProductionVariantArgs.builder()
                    .variantName("variant-1")
                    .modelName(m.name())
                    .initialInstanceCount(1)
                    .instanceType("ml.t2.medium")
                    .build())
                .tags(Map.of("Name", "foo"))
                .build());
    
        }
    }
    
    resources:
      ec:
        type: aws:sagemaker:EndpointConfiguration
        properties:
          name: my-endpoint-config
          productionVariants:
            - variantName: variant-1
              modelName: ${m.name}
              initialInstanceCount: 1
              instanceType: ml.t2.medium
          tags:
            Name: foo
    

    Create EndpointConfiguration Resource

    Resources are created with functions called constructors. To learn more about declaring and configuring resources, see Resources.

    Constructor syntax

    new EndpointConfiguration(name: string, args: EndpointConfigurationArgs, opts?: CustomResourceOptions);
    @overload
    def EndpointConfiguration(resource_name: str,
                              args: EndpointConfigurationArgs,
                              opts: Optional[ResourceOptions] = None)
    
    @overload
    def EndpointConfiguration(resource_name: str,
                              opts: Optional[ResourceOptions] = None,
                              production_variants: Optional[Sequence[EndpointConfigurationProductionVariantArgs]] = None,
                              async_inference_config: Optional[EndpointConfigurationAsyncInferenceConfigArgs] = None,
                              data_capture_config: Optional[EndpointConfigurationDataCaptureConfigArgs] = None,
                              execution_role_arn: Optional[str] = None,
                              kms_key_arn: Optional[str] = None,
                              name: Optional[str] = None,
                              name_prefix: Optional[str] = None,
                              region: Optional[str] = None,
                              shadow_production_variants: Optional[Sequence[EndpointConfigurationShadowProductionVariantArgs]] = None,
                              tags: Optional[Mapping[str, str]] = None)
    func NewEndpointConfiguration(ctx *Context, name string, args EndpointConfigurationArgs, opts ...ResourceOption) (*EndpointConfiguration, error)
    public EndpointConfiguration(string name, EndpointConfigurationArgs args, CustomResourceOptions? opts = null)
    public EndpointConfiguration(String name, EndpointConfigurationArgs args)
    public EndpointConfiguration(String name, EndpointConfigurationArgs args, CustomResourceOptions options)
    
    type: aws:sagemaker:EndpointConfiguration
    properties: # The arguments to resource properties.
    options: # Bag of options to control resource's behavior.
    
    

    Parameters

    name string
    The unique name of the resource.
    args EndpointConfigurationArgs
    The arguments to resource properties.
    opts CustomResourceOptions
    Bag of options to control resource's behavior.
    resource_name str
    The unique name of the resource.
    args EndpointConfigurationArgs
    The arguments to resource properties.
    opts ResourceOptions
    Bag of options to control resource's behavior.
    ctx Context
    Context object for the current deployment.
    name string
    The unique name of the resource.
    args EndpointConfigurationArgs
    The arguments to resource properties.
    opts ResourceOption
    Bag of options to control resource's behavior.
    name string
    The unique name of the resource.
    args EndpointConfigurationArgs
    The arguments to resource properties.
    opts CustomResourceOptions
    Bag of options to control resource's behavior.
    name String
    The unique name of the resource.
    args EndpointConfigurationArgs
    The arguments to resource properties.
    options CustomResourceOptions
    Bag of options to control resource's behavior.

    Constructor example

    The following reference example uses placeholder values for all input properties.

    var endpointConfigurationResource = new Aws.Sagemaker.EndpointConfiguration("endpointConfigurationResource", new()
    {
        ProductionVariants = new[]
        {
            new Aws.Sagemaker.Inputs.EndpointConfigurationProductionVariantArgs
            {
                AcceleratorType = "string",
                ContainerStartupHealthCheckTimeoutInSeconds = 0,
                CoreDumpConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationProductionVariantCoreDumpConfigArgs
                {
                    DestinationS3Uri = "string",
                    KmsKeyId = "string",
                },
                EnableSsmAccess = false,
                InferenceAmiVersion = "string",
                InitialInstanceCount = 0,
                InitialVariantWeight = 0,
                InstanceType = "string",
                ManagedInstanceScaling = new Aws.Sagemaker.Inputs.EndpointConfigurationProductionVariantManagedInstanceScalingArgs
                {
                    MaxInstanceCount = 0,
                    MinInstanceCount = 0,
                    Status = "string",
                },
                ModelDataDownloadTimeoutInSeconds = 0,
                ModelName = "string",
                RoutingConfigs = new[]
                {
                    new Aws.Sagemaker.Inputs.EndpointConfigurationProductionVariantRoutingConfigArgs
                    {
                        RoutingStrategy = "string",
                    },
                },
                ServerlessConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationProductionVariantServerlessConfigArgs
                {
                    MaxConcurrency = 0,
                    MemorySizeInMb = 0,
                    ProvisionedConcurrency = 0,
                },
                VariantName = "string",
                VolumeSizeInGb = 0,
            },
        },
        AsyncInferenceConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationAsyncInferenceConfigArgs
        {
            OutputConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationAsyncInferenceConfigOutputConfigArgs
            {
                S3OutputPath = "string",
                KmsKeyId = "string",
                NotificationConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfigArgs
                {
                    ErrorTopic = "string",
                    IncludeInferenceResponseIns = new[]
                    {
                        "string",
                    },
                    SuccessTopic = "string",
                },
                S3FailurePath = "string",
            },
            ClientConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationAsyncInferenceConfigClientConfigArgs
            {
                MaxConcurrentInvocationsPerInstance = 0,
            },
        },
        DataCaptureConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationDataCaptureConfigArgs
        {
            CaptureOptions = new[]
            {
                new Aws.Sagemaker.Inputs.EndpointConfigurationDataCaptureConfigCaptureOptionArgs
                {
                    CaptureMode = "string",
                },
            },
            DestinationS3Uri = "string",
            InitialSamplingPercentage = 0,
            CaptureContentTypeHeader = new Aws.Sagemaker.Inputs.EndpointConfigurationDataCaptureConfigCaptureContentTypeHeaderArgs
            {
                CsvContentTypes = new[]
                {
                    "string",
                },
                JsonContentTypes = new[]
                {
                    "string",
                },
            },
            EnableCapture = false,
            KmsKeyId = "string",
        },
        ExecutionRoleArn = "string",
        KmsKeyArn = "string",
        Name = "string",
        NamePrefix = "string",
        Region = "string",
        ShadowProductionVariants = new[]
        {
            new Aws.Sagemaker.Inputs.EndpointConfigurationShadowProductionVariantArgs
            {
                AcceleratorType = "string",
                ContainerStartupHealthCheckTimeoutInSeconds = 0,
                CoreDumpConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationShadowProductionVariantCoreDumpConfigArgs
                {
                    DestinationS3Uri = "string",
                    KmsKeyId = "string",
                },
                EnableSsmAccess = false,
                InferenceAmiVersion = "string",
                InitialInstanceCount = 0,
                InitialVariantWeight = 0,
                InstanceType = "string",
                ManagedInstanceScaling = new Aws.Sagemaker.Inputs.EndpointConfigurationShadowProductionVariantManagedInstanceScalingArgs
                {
                    MaxInstanceCount = 0,
                    MinInstanceCount = 0,
                    Status = "string",
                },
                ModelDataDownloadTimeoutInSeconds = 0,
                ModelName = "string",
                RoutingConfigs = new[]
                {
                    new Aws.Sagemaker.Inputs.EndpointConfigurationShadowProductionVariantRoutingConfigArgs
                    {
                        RoutingStrategy = "string",
                    },
                },
                ServerlessConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationShadowProductionVariantServerlessConfigArgs
                {
                    MaxConcurrency = 0,
                    MemorySizeInMb = 0,
                    ProvisionedConcurrency = 0,
                },
                VariantName = "string",
                VolumeSizeInGb = 0,
            },
        },
        Tags = 
        {
            { "string", "string" },
        },
    });
    
    example, err := sagemaker.NewEndpointConfiguration(ctx, "endpointConfigurationResource", &sagemaker.EndpointConfigurationArgs{
    	ProductionVariants: sagemaker.EndpointConfigurationProductionVariantArray{
    		&sagemaker.EndpointConfigurationProductionVariantArgs{
    			AcceleratorType: pulumi.String("string"),
    			ContainerStartupHealthCheckTimeoutInSeconds: pulumi.Int(0),
    			CoreDumpConfig: &sagemaker.EndpointConfigurationProductionVariantCoreDumpConfigArgs{
    				DestinationS3Uri: pulumi.String("string"),
    				KmsKeyId:         pulumi.String("string"),
    			},
    			EnableSsmAccess:      pulumi.Bool(false),
    			InferenceAmiVersion:  pulumi.String("string"),
    			InitialInstanceCount: pulumi.Int(0),
    			InitialVariantWeight: pulumi.Float64(0),
    			InstanceType:         pulumi.String("string"),
    			ManagedInstanceScaling: &sagemaker.EndpointConfigurationProductionVariantManagedInstanceScalingArgs{
    				MaxInstanceCount: pulumi.Int(0),
    				MinInstanceCount: pulumi.Int(0),
    				Status:           pulumi.String("string"),
    			},
    			ModelDataDownloadTimeoutInSeconds: pulumi.Int(0),
    			ModelName:                         pulumi.String("string"),
    			RoutingConfigs: sagemaker.EndpointConfigurationProductionVariantRoutingConfigArray{
    				&sagemaker.EndpointConfigurationProductionVariantRoutingConfigArgs{
    					RoutingStrategy: pulumi.String("string"),
    				},
    			},
    			ServerlessConfig: &sagemaker.EndpointConfigurationProductionVariantServerlessConfigArgs{
    				MaxConcurrency:         pulumi.Int(0),
    				MemorySizeInMb:         pulumi.Int(0),
    				ProvisionedConcurrency: pulumi.Int(0),
    			},
    			VariantName:    pulumi.String("string"),
    			VolumeSizeInGb: pulumi.Int(0),
    		},
    	},
    	AsyncInferenceConfig: &sagemaker.EndpointConfigurationAsyncInferenceConfigArgs{
    		OutputConfig: &sagemaker.EndpointConfigurationAsyncInferenceConfigOutputConfigArgs{
    			S3OutputPath: pulumi.String("string"),
    			KmsKeyId:     pulumi.String("string"),
    			NotificationConfig: &sagemaker.EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfigArgs{
    				ErrorTopic: pulumi.String("string"),
    				IncludeInferenceResponseIns: pulumi.StringArray{
    					pulumi.String("string"),
    				},
    				SuccessTopic: pulumi.String("string"),
    			},
    			S3FailurePath: pulumi.String("string"),
    		},
    		ClientConfig: &sagemaker.EndpointConfigurationAsyncInferenceConfigClientConfigArgs{
    			MaxConcurrentInvocationsPerInstance: pulumi.Int(0),
    		},
    	},
    	DataCaptureConfig: &sagemaker.EndpointConfigurationDataCaptureConfigArgs{
    		CaptureOptions: sagemaker.EndpointConfigurationDataCaptureConfigCaptureOptionArray{
    			&sagemaker.EndpointConfigurationDataCaptureConfigCaptureOptionArgs{
    				CaptureMode: pulumi.String("string"),
    			},
    		},
    		DestinationS3Uri:          pulumi.String("string"),
    		InitialSamplingPercentage: pulumi.Int(0),
    		CaptureContentTypeHeader: &sagemaker.EndpointConfigurationDataCaptureConfigCaptureContentTypeHeaderArgs{
    			CsvContentTypes: pulumi.StringArray{
    				pulumi.String("string"),
    			},
    			JsonContentTypes: pulumi.StringArray{
    				pulumi.String("string"),
    			},
    		},
    		EnableCapture: pulumi.Bool(false),
    		KmsKeyId:      pulumi.String("string"),
    	},
    	ExecutionRoleArn: pulumi.String("string"),
    	KmsKeyArn:        pulumi.String("string"),
    	Name:             pulumi.String("string"),
    	NamePrefix:       pulumi.String("string"),
    	Region:           pulumi.String("string"),
    	ShadowProductionVariants: sagemaker.EndpointConfigurationShadowProductionVariantArray{
    		&sagemaker.EndpointConfigurationShadowProductionVariantArgs{
    			AcceleratorType: pulumi.String("string"),
    			ContainerStartupHealthCheckTimeoutInSeconds: pulumi.Int(0),
    			CoreDumpConfig: &sagemaker.EndpointConfigurationShadowProductionVariantCoreDumpConfigArgs{
    				DestinationS3Uri: pulumi.String("string"),
    				KmsKeyId:         pulumi.String("string"),
    			},
    			EnableSsmAccess:      pulumi.Bool(false),
    			InferenceAmiVersion:  pulumi.String("string"),
    			InitialInstanceCount: pulumi.Int(0),
    			InitialVariantWeight: pulumi.Float64(0),
    			InstanceType:         pulumi.String("string"),
    			ManagedInstanceScaling: &sagemaker.EndpointConfigurationShadowProductionVariantManagedInstanceScalingArgs{
    				MaxInstanceCount: pulumi.Int(0),
    				MinInstanceCount: pulumi.Int(0),
    				Status:           pulumi.String("string"),
    			},
    			ModelDataDownloadTimeoutInSeconds: pulumi.Int(0),
    			ModelName:                         pulumi.String("string"),
    			RoutingConfigs: sagemaker.EndpointConfigurationShadowProductionVariantRoutingConfigArray{
    				&sagemaker.EndpointConfigurationShadowProductionVariantRoutingConfigArgs{
    					RoutingStrategy: pulumi.String("string"),
    				},
    			},
    			ServerlessConfig: &sagemaker.EndpointConfigurationShadowProductionVariantServerlessConfigArgs{
    				MaxConcurrency:         pulumi.Int(0),
    				MemorySizeInMb:         pulumi.Int(0),
    				ProvisionedConcurrency: pulumi.Int(0),
    			},
    			VariantName:    pulumi.String("string"),
    			VolumeSizeInGb: pulumi.Int(0),
    		},
    	},
    	Tags: pulumi.StringMap{
    		"string": pulumi.String("string"),
    	},
    })
    
    var endpointConfigurationResource = new EndpointConfiguration("endpointConfigurationResource", EndpointConfigurationArgs.builder()
        .productionVariants(EndpointConfigurationProductionVariantArgs.builder()
            .acceleratorType("string")
            .containerStartupHealthCheckTimeoutInSeconds(0)
            .coreDumpConfig(EndpointConfigurationProductionVariantCoreDumpConfigArgs.builder()
                .destinationS3Uri("string")
                .kmsKeyId("string")
                .build())
            .enableSsmAccess(false)
            .inferenceAmiVersion("string")
            .initialInstanceCount(0)
            .initialVariantWeight(0.0)
            .instanceType("string")
            .managedInstanceScaling(EndpointConfigurationProductionVariantManagedInstanceScalingArgs.builder()
                .maxInstanceCount(0)
                .minInstanceCount(0)
                .status("string")
                .build())
            .modelDataDownloadTimeoutInSeconds(0)
            .modelName("string")
            .routingConfigs(EndpointConfigurationProductionVariantRoutingConfigArgs.builder()
                .routingStrategy("string")
                .build())
            .serverlessConfig(EndpointConfigurationProductionVariantServerlessConfigArgs.builder()
                .maxConcurrency(0)
                .memorySizeInMb(0)
                .provisionedConcurrency(0)
                .build())
            .variantName("string")
            .volumeSizeInGb(0)
            .build())
        .asyncInferenceConfig(EndpointConfigurationAsyncInferenceConfigArgs.builder()
            .outputConfig(EndpointConfigurationAsyncInferenceConfigOutputConfigArgs.builder()
                .s3OutputPath("string")
                .kmsKeyId("string")
                .notificationConfig(EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfigArgs.builder()
                    .errorTopic("string")
                    .includeInferenceResponseIns("string")
                    .successTopic("string")
                    .build())
                .s3FailurePath("string")
                .build())
            .clientConfig(EndpointConfigurationAsyncInferenceConfigClientConfigArgs.builder()
                .maxConcurrentInvocationsPerInstance(0)
                .build())
            .build())
        .dataCaptureConfig(EndpointConfigurationDataCaptureConfigArgs.builder()
            .captureOptions(EndpointConfigurationDataCaptureConfigCaptureOptionArgs.builder()
                .captureMode("string")
                .build())
            .destinationS3Uri("string")
            .initialSamplingPercentage(0)
            .captureContentTypeHeader(EndpointConfigurationDataCaptureConfigCaptureContentTypeHeaderArgs.builder()
                .csvContentTypes("string")
                .jsonContentTypes("string")
                .build())
            .enableCapture(false)
            .kmsKeyId("string")
            .build())
        .executionRoleArn("string")
        .kmsKeyArn("string")
        .name("string")
        .namePrefix("string")
        .region("string")
        .shadowProductionVariants(EndpointConfigurationShadowProductionVariantArgs.builder()
            .acceleratorType("string")
            .containerStartupHealthCheckTimeoutInSeconds(0)
            .coreDumpConfig(EndpointConfigurationShadowProductionVariantCoreDumpConfigArgs.builder()
                .destinationS3Uri("string")
                .kmsKeyId("string")
                .build())
            .enableSsmAccess(false)
            .inferenceAmiVersion("string")
            .initialInstanceCount(0)
            .initialVariantWeight(0.0)
            .instanceType("string")
            .managedInstanceScaling(EndpointConfigurationShadowProductionVariantManagedInstanceScalingArgs.builder()
                .maxInstanceCount(0)
                .minInstanceCount(0)
                .status("string")
                .build())
            .modelDataDownloadTimeoutInSeconds(0)
            .modelName("string")
            .routingConfigs(EndpointConfigurationShadowProductionVariantRoutingConfigArgs.builder()
                .routingStrategy("string")
                .build())
            .serverlessConfig(EndpointConfigurationShadowProductionVariantServerlessConfigArgs.builder()
                .maxConcurrency(0)
                .memorySizeInMb(0)
                .provisionedConcurrency(0)
                .build())
            .variantName("string")
            .volumeSizeInGb(0)
            .build())
        .tags(Map.of("string", "string"))
        .build());
    
    endpoint_configuration_resource = aws.sagemaker.EndpointConfiguration("endpointConfigurationResource",
        production_variants=[{
            "accelerator_type": "string",
            "container_startup_health_check_timeout_in_seconds": 0,
            "core_dump_config": {
                "destination_s3_uri": "string",
                "kms_key_id": "string",
            },
            "enable_ssm_access": False,
            "inference_ami_version": "string",
            "initial_instance_count": 0,
            "initial_variant_weight": 0,
            "instance_type": "string",
            "managed_instance_scaling": {
                "max_instance_count": 0,
                "min_instance_count": 0,
                "status": "string",
            },
            "model_data_download_timeout_in_seconds": 0,
            "model_name": "string",
            "routing_configs": [{
                "routing_strategy": "string",
            }],
            "serverless_config": {
                "max_concurrency": 0,
                "memory_size_in_mb": 0,
                "provisioned_concurrency": 0,
            },
            "variant_name": "string",
            "volume_size_in_gb": 0,
        }],
        async_inference_config={
            "output_config": {
                "s3_output_path": "string",
                "kms_key_id": "string",
                "notification_config": {
                    "error_topic": "string",
                    "include_inference_response_ins": ["string"],
                    "success_topic": "string",
                },
                "s3_failure_path": "string",
            },
            "client_config": {
                "max_concurrent_invocations_per_instance": 0,
            },
        },
        data_capture_config={
            "capture_options": [{
                "capture_mode": "string",
            }],
            "destination_s3_uri": "string",
            "initial_sampling_percentage": 0,
            "capture_content_type_header": {
                "csv_content_types": ["string"],
                "json_content_types": ["string"],
            },
            "enable_capture": False,
            "kms_key_id": "string",
        },
        execution_role_arn="string",
        kms_key_arn="string",
        name="string",
        name_prefix="string",
        region="string",
        shadow_production_variants=[{
            "accelerator_type": "string",
            "container_startup_health_check_timeout_in_seconds": 0,
            "core_dump_config": {
                "destination_s3_uri": "string",
                "kms_key_id": "string",
            },
            "enable_ssm_access": False,
            "inference_ami_version": "string",
            "initial_instance_count": 0,
            "initial_variant_weight": 0,
            "instance_type": "string",
            "managed_instance_scaling": {
                "max_instance_count": 0,
                "min_instance_count": 0,
                "status": "string",
            },
            "model_data_download_timeout_in_seconds": 0,
            "model_name": "string",
            "routing_configs": [{
                "routing_strategy": "string",
            }],
            "serverless_config": {
                "max_concurrency": 0,
                "memory_size_in_mb": 0,
                "provisioned_concurrency": 0,
            },
            "variant_name": "string",
            "volume_size_in_gb": 0,
        }],
        tags={
            "string": "string",
        })
    
    const endpointConfigurationResource = new aws.sagemaker.EndpointConfiguration("endpointConfigurationResource", {
        productionVariants: [{
            acceleratorType: "string",
            containerStartupHealthCheckTimeoutInSeconds: 0,
            coreDumpConfig: {
                destinationS3Uri: "string",
                kmsKeyId: "string",
            },
            enableSsmAccess: false,
            inferenceAmiVersion: "string",
            initialInstanceCount: 0,
            initialVariantWeight: 0,
            instanceType: "string",
            managedInstanceScaling: {
                maxInstanceCount: 0,
                minInstanceCount: 0,
                status: "string",
            },
            modelDataDownloadTimeoutInSeconds: 0,
            modelName: "string",
            routingConfigs: [{
                routingStrategy: "string",
            }],
            serverlessConfig: {
                maxConcurrency: 0,
                memorySizeInMb: 0,
                provisionedConcurrency: 0,
            },
            variantName: "string",
            volumeSizeInGb: 0,
        }],
        asyncInferenceConfig: {
            outputConfig: {
                s3OutputPath: "string",
                kmsKeyId: "string",
                notificationConfig: {
                    errorTopic: "string",
                    includeInferenceResponseIns: ["string"],
                    successTopic: "string",
                },
                s3FailurePath: "string",
            },
            clientConfig: {
                maxConcurrentInvocationsPerInstance: 0,
            },
        },
        dataCaptureConfig: {
            captureOptions: [{
                captureMode: "string",
            }],
            destinationS3Uri: "string",
            initialSamplingPercentage: 0,
            captureContentTypeHeader: {
                csvContentTypes: ["string"],
                jsonContentTypes: ["string"],
            },
            enableCapture: false,
            kmsKeyId: "string",
        },
        executionRoleArn: "string",
        kmsKeyArn: "string",
        name: "string",
        namePrefix: "string",
        region: "string",
        shadowProductionVariants: [{
            acceleratorType: "string",
            containerStartupHealthCheckTimeoutInSeconds: 0,
            coreDumpConfig: {
                destinationS3Uri: "string",
                kmsKeyId: "string",
            },
            enableSsmAccess: false,
            inferenceAmiVersion: "string",
            initialInstanceCount: 0,
            initialVariantWeight: 0,
            instanceType: "string",
            managedInstanceScaling: {
                maxInstanceCount: 0,
                minInstanceCount: 0,
                status: "string",
            },
            modelDataDownloadTimeoutInSeconds: 0,
            modelName: "string",
            routingConfigs: [{
                routingStrategy: "string",
            }],
            serverlessConfig: {
                maxConcurrency: 0,
                memorySizeInMb: 0,
                provisionedConcurrency: 0,
            },
            variantName: "string",
            volumeSizeInGb: 0,
        }],
        tags: {
            string: "string",
        },
    });
    
    type: aws:sagemaker:EndpointConfiguration
    properties:
        asyncInferenceConfig:
            clientConfig:
                maxConcurrentInvocationsPerInstance: 0
            outputConfig:
                kmsKeyId: string
                notificationConfig:
                    errorTopic: string
                    includeInferenceResponseIns:
                        - string
                    successTopic: string
                s3FailurePath: string
                s3OutputPath: string
        dataCaptureConfig:
            captureContentTypeHeader:
                csvContentTypes:
                    - string
                jsonContentTypes:
                    - string
            captureOptions:
                - captureMode: string
            destinationS3Uri: string
            enableCapture: false
            initialSamplingPercentage: 0
            kmsKeyId: string
        executionRoleArn: string
        kmsKeyArn: string
        name: string
        namePrefix: string
        productionVariants:
            - acceleratorType: string
              containerStartupHealthCheckTimeoutInSeconds: 0
              coreDumpConfig:
                destinationS3Uri: string
                kmsKeyId: string
              enableSsmAccess: false
              inferenceAmiVersion: string
              initialInstanceCount: 0
              initialVariantWeight: 0
              instanceType: string
              managedInstanceScaling:
                maxInstanceCount: 0
                minInstanceCount: 0
                status: string
              modelDataDownloadTimeoutInSeconds: 0
              modelName: string
              routingConfigs:
                - routingStrategy: string
              serverlessConfig:
                maxConcurrency: 0
                memorySizeInMb: 0
                provisionedConcurrency: 0
              variantName: string
              volumeSizeInGb: 0
        region: string
        shadowProductionVariants:
            - acceleratorType: string
              containerStartupHealthCheckTimeoutInSeconds: 0
              coreDumpConfig:
                destinationS3Uri: string
                kmsKeyId: string
              enableSsmAccess: false
              inferenceAmiVersion: string
              initialInstanceCount: 0
              initialVariantWeight: 0
              instanceType: string
              managedInstanceScaling:
                maxInstanceCount: 0
                minInstanceCount: 0
                status: string
              modelDataDownloadTimeoutInSeconds: 0
              modelName: string
              routingConfigs:
                - routingStrategy: string
              serverlessConfig:
                maxConcurrency: 0
                memorySizeInMb: 0
                provisionedConcurrency: 0
              variantName: string
              volumeSizeInGb: 0
        tags:
            string: string
    

    EndpointConfiguration Resource Properties

    To learn more about resource properties and how to use them, see Inputs and Outputs in the Architecture and Concepts docs.

    Inputs

    In Python, inputs that are objects can be passed either as argument classes or as dictionary literals.

    The EndpointConfiguration resource accepts the following input properties:

    ProductionVariants List<EndpointConfigurationProductionVariant>
    List each model that you want to host at this endpoint. See below.
    AsyncInferenceConfig EndpointConfigurationAsyncInferenceConfig
    How an endpoint performs asynchronous inference.
    DataCaptureConfig EndpointConfigurationDataCaptureConfig
    Parameters to capture input/output of SageMaker AI models endpoints. Fields are documented below.
    ExecutionRoleArn string
    ARN of an IAM role that SageMaker AI can assume to perform actions on your behalf. Required when model_name is not specified in production_variants to support Inference Components.
    KmsKeyArn string
    ARN of a AWS KMS key that SageMaker AI uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
    Name string
    Name of the endpoint configuration. If omitted, the provider will assign a random, unique name. Conflicts with name_prefix.
    NamePrefix string
    Unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
    Region string
    Region where this resource will be managed. Defaults to the Region set in the provider configuration.
    ShadowProductionVariants List<EndpointConfigurationShadowProductionVariant>
    Models that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on oroduction_variants. If you use this field, you can only specify one variant for production_variants and one variant for shadow_production_variants. See below (same arguments as production_variants).
    Tags Dictionary<string, string>
    Mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    ProductionVariants []EndpointConfigurationProductionVariantArgs
    List each model that you want to host at this endpoint. See below.
    AsyncInferenceConfig EndpointConfigurationAsyncInferenceConfigArgs
    How an endpoint performs asynchronous inference.
    DataCaptureConfig EndpointConfigurationDataCaptureConfigArgs
    Parameters to capture input/output of SageMaker AI models endpoints. Fields are documented below.
    ExecutionRoleArn string
    ARN of an IAM role that SageMaker AI can assume to perform actions on your behalf. Required when model_name is not specified in production_variants to support Inference Components.
    KmsKeyArn string
    ARN of a AWS KMS key that SageMaker AI uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
    Name string
    Name of the endpoint configuration. If omitted, the provider will assign a random, unique name. Conflicts with name_prefix.
    NamePrefix string
    Unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
    Region string
    Region where this resource will be managed. Defaults to the Region set in the provider configuration.
    ShadowProductionVariants []EndpointConfigurationShadowProductionVariantArgs
    Models that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on oroduction_variants. If you use this field, you can only specify one variant for production_variants and one variant for shadow_production_variants. See below (same arguments as production_variants).
    Tags map[string]string
    Mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    productionVariants List<EndpointConfigurationProductionVariant>
    List each model that you want to host at this endpoint. See below.
    asyncInferenceConfig EndpointConfigurationAsyncInferenceConfig
    How an endpoint performs asynchronous inference.
    dataCaptureConfig EndpointConfigurationDataCaptureConfig
    Parameters to capture input/output of SageMaker AI models endpoints. Fields are documented below.
    executionRoleArn String
    ARN of an IAM role that SageMaker AI can assume to perform actions on your behalf. Required when model_name is not specified in production_variants to support Inference Components.
    kmsKeyArn String
    ARN of a AWS KMS key that SageMaker AI uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
    name String
    Name of the endpoint configuration. If omitted, the provider will assign a random, unique name. Conflicts with name_prefix.
    namePrefix String
    Unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
    region String
    Region where this resource will be managed. Defaults to the Region set in the provider configuration.
    shadowProductionVariants List<EndpointConfigurationShadowProductionVariant>
    Models that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on oroduction_variants. If you use this field, you can only specify one variant for production_variants and one variant for shadow_production_variants. See below (same arguments as production_variants).
    tags Map<String,String>
    Mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    productionVariants EndpointConfigurationProductionVariant[]
    List each model that you want to host at this endpoint. See below.
    asyncInferenceConfig EndpointConfigurationAsyncInferenceConfig
    How an endpoint performs asynchronous inference.
    dataCaptureConfig EndpointConfigurationDataCaptureConfig
    Parameters to capture input/output of SageMaker AI models endpoints. Fields are documented below.
    executionRoleArn string
    ARN of an IAM role that SageMaker AI can assume to perform actions on your behalf. Required when model_name is not specified in production_variants to support Inference Components.
    kmsKeyArn string
    ARN of a AWS KMS key that SageMaker AI uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
    name string
    Name of the endpoint configuration. If omitted, the provider will assign a random, unique name. Conflicts with name_prefix.
    namePrefix string
    Unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
    region string
    Region where this resource will be managed. Defaults to the Region set in the provider configuration.
    shadowProductionVariants EndpointConfigurationShadowProductionVariant[]
    Models that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on oroduction_variants. If you use this field, you can only specify one variant for production_variants and one variant for shadow_production_variants. See below (same arguments as production_variants).
    tags {[key: string]: string}
    Mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    production_variants Sequence[EndpointConfigurationProductionVariantArgs]
    List each model that you want to host at this endpoint. See below.
    async_inference_config EndpointConfigurationAsyncInferenceConfigArgs
    How an endpoint performs asynchronous inference.
    data_capture_config EndpointConfigurationDataCaptureConfigArgs
    Parameters to capture input/output of SageMaker AI models endpoints. Fields are documented below.
    execution_role_arn str
    ARN of an IAM role that SageMaker AI can assume to perform actions on your behalf. Required when model_name is not specified in production_variants to support Inference Components.
    kms_key_arn str
    ARN of a AWS KMS key that SageMaker AI uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
    name str
    Name of the endpoint configuration. If omitted, the provider will assign a random, unique name. Conflicts with name_prefix.
    name_prefix str
    Unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
    region str
    Region where this resource will be managed. Defaults to the Region set in the provider configuration.
    shadow_production_variants Sequence[EndpointConfigurationShadowProductionVariantArgs]
    Models that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on oroduction_variants. If you use this field, you can only specify one variant for production_variants and one variant for shadow_production_variants. See below (same arguments as production_variants).
    tags Mapping[str, str]
    Mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    productionVariants List<Property Map>
    List each model that you want to host at this endpoint. See below.
    asyncInferenceConfig Property Map
    How an endpoint performs asynchronous inference.
    dataCaptureConfig Property Map
    Parameters to capture input/output of SageMaker AI models endpoints. Fields are documented below.
    executionRoleArn String
    ARN of an IAM role that SageMaker AI can assume to perform actions on your behalf. Required when model_name is not specified in production_variants to support Inference Components.
    kmsKeyArn String
    ARN of a AWS KMS key that SageMaker AI uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
    name String
    Name of the endpoint configuration. If omitted, the provider will assign a random, unique name. Conflicts with name_prefix.
    namePrefix String
    Unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
    region String
    Region where this resource will be managed. Defaults to the Region set in the provider configuration.
    shadowProductionVariants List<Property Map>
    Models that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on oroduction_variants. If you use this field, you can only specify one variant for production_variants and one variant for shadow_production_variants. See below (same arguments as production_variants).
    tags Map<String>
    Mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.

    Outputs

    All input properties are implicitly available as output properties. Additionally, the EndpointConfiguration resource produces the following output properties:

    Arn string
    ARN assigned by AWS to this endpoint configuration.
    Id string
    The provider-assigned unique ID for this managed resource.
    TagsAll Dictionary<string, string>
    Map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.
    Arn string
    ARN assigned by AWS to this endpoint configuration.
    Id string
    The provider-assigned unique ID for this managed resource.
    TagsAll map[string]string
    Map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.
    arn String
    ARN assigned by AWS to this endpoint configuration.
    id String
    The provider-assigned unique ID for this managed resource.
    tagsAll Map<String,String>
    Map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.
    arn string
    ARN assigned by AWS to this endpoint configuration.
    id string
    The provider-assigned unique ID for this managed resource.
    tagsAll {[key: string]: string}
    Map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.
    arn str
    ARN assigned by AWS to this endpoint configuration.
    id str
    The provider-assigned unique ID for this managed resource.
    tags_all Mapping[str, str]
    Map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.
    arn String
    ARN assigned by AWS to this endpoint configuration.
    id String
    The provider-assigned unique ID for this managed resource.
    tagsAll Map<String>
    Map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.

    Look up Existing EndpointConfiguration Resource

    Get an existing EndpointConfiguration resource’s state with the given name, ID, and optional extra properties used to qualify the lookup.

    public static get(name: string, id: Input<ID>, state?: EndpointConfigurationState, opts?: CustomResourceOptions): EndpointConfiguration
    @staticmethod
    def get(resource_name: str,
            id: str,
            opts: Optional[ResourceOptions] = None,
            arn: Optional[str] = None,
            async_inference_config: Optional[EndpointConfigurationAsyncInferenceConfigArgs] = None,
            data_capture_config: Optional[EndpointConfigurationDataCaptureConfigArgs] = None,
            execution_role_arn: Optional[str] = None,
            kms_key_arn: Optional[str] = None,
            name: Optional[str] = None,
            name_prefix: Optional[str] = None,
            production_variants: Optional[Sequence[EndpointConfigurationProductionVariantArgs]] = None,
            region: Optional[str] = None,
            shadow_production_variants: Optional[Sequence[EndpointConfigurationShadowProductionVariantArgs]] = None,
            tags: Optional[Mapping[str, str]] = None,
            tags_all: Optional[Mapping[str, str]] = None) -> EndpointConfiguration
    func GetEndpointConfiguration(ctx *Context, name string, id IDInput, state *EndpointConfigurationState, opts ...ResourceOption) (*EndpointConfiguration, error)
    public static EndpointConfiguration Get(string name, Input<string> id, EndpointConfigurationState? state, CustomResourceOptions? opts = null)
    public static EndpointConfiguration get(String name, Output<String> id, EndpointConfigurationState state, CustomResourceOptions options)
    resources:  _:    type: aws:sagemaker:EndpointConfiguration    get:      id: ${id}
    name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    state
    Any extra arguments used during the lookup.
    opts
    A bag of options that control this resource's behavior.
    resource_name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    state
    Any extra arguments used during the lookup.
    opts
    A bag of options that control this resource's behavior.
    name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    state
    Any extra arguments used during the lookup.
    opts
    A bag of options that control this resource's behavior.
    name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    state
    Any extra arguments used during the lookup.
    opts
    A bag of options that control this resource's behavior.
    The following state arguments are supported:
    Arn string
    ARN assigned by AWS to this endpoint configuration.
    AsyncInferenceConfig EndpointConfigurationAsyncInferenceConfig
    How an endpoint performs asynchronous inference.
    DataCaptureConfig EndpointConfigurationDataCaptureConfig
    Parameters to capture input/output of SageMaker AI models endpoints. Fields are documented below.
    ExecutionRoleArn string
    ARN of an IAM role that SageMaker AI can assume to perform actions on your behalf. Required when model_name is not specified in production_variants to support Inference Components.
    KmsKeyArn string
    ARN of a AWS KMS key that SageMaker AI uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
    Name string
    Name of the endpoint configuration. If omitted, the provider will assign a random, unique name. Conflicts with name_prefix.
    NamePrefix string
    Unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
    ProductionVariants List<EndpointConfigurationProductionVariant>
    List each model that you want to host at this endpoint. See below.
    Region string
    Region where this resource will be managed. Defaults to the Region set in the provider configuration.
    ShadowProductionVariants List<EndpointConfigurationShadowProductionVariant>
    Models that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on oroduction_variants. If you use this field, you can only specify one variant for production_variants and one variant for shadow_production_variants. See below (same arguments as production_variants).
    Tags Dictionary<string, string>
    Mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    TagsAll Dictionary<string, string>
    Map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.
    Arn string
    ARN assigned by AWS to this endpoint configuration.
    AsyncInferenceConfig EndpointConfigurationAsyncInferenceConfigArgs
    How an endpoint performs asynchronous inference.
    DataCaptureConfig EndpointConfigurationDataCaptureConfigArgs
    Parameters to capture input/output of SageMaker AI models endpoints. Fields are documented below.
    ExecutionRoleArn string
    ARN of an IAM role that SageMaker AI can assume to perform actions on your behalf. Required when model_name is not specified in production_variants to support Inference Components.
    KmsKeyArn string
    ARN of a AWS KMS key that SageMaker AI uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
    Name string
    Name of the endpoint configuration. If omitted, the provider will assign a random, unique name. Conflicts with name_prefix.
    NamePrefix string
    Unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
    ProductionVariants []EndpointConfigurationProductionVariantArgs
    List each model that you want to host at this endpoint. See below.
    Region string
    Region where this resource will be managed. Defaults to the Region set in the provider configuration.
    ShadowProductionVariants []EndpointConfigurationShadowProductionVariantArgs
    Models that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on oroduction_variants. If you use this field, you can only specify one variant for production_variants and one variant for shadow_production_variants. See below (same arguments as production_variants).
    Tags map[string]string
    Mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    TagsAll map[string]string
    Map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.
    arn String
    ARN assigned by AWS to this endpoint configuration.
    asyncInferenceConfig EndpointConfigurationAsyncInferenceConfig
    How an endpoint performs asynchronous inference.
    dataCaptureConfig EndpointConfigurationDataCaptureConfig
    Parameters to capture input/output of SageMaker AI models endpoints. Fields are documented below.
    executionRoleArn String
    ARN of an IAM role that SageMaker AI can assume to perform actions on your behalf. Required when model_name is not specified in production_variants to support Inference Components.
    kmsKeyArn String
    ARN of a AWS KMS key that SageMaker AI uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
    name String
    Name of the endpoint configuration. If omitted, the provider will assign a random, unique name. Conflicts with name_prefix.
    namePrefix String
    Unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
    productionVariants List<EndpointConfigurationProductionVariant>
    List each model that you want to host at this endpoint. See below.
    region String
    Region where this resource will be managed. Defaults to the Region set in the provider configuration.
    shadowProductionVariants List<EndpointConfigurationShadowProductionVariant>
    Models that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on oroduction_variants. If you use this field, you can only specify one variant for production_variants and one variant for shadow_production_variants. See below (same arguments as production_variants).
    tags Map<String,String>
    Mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    tagsAll Map<String,String>
    Map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.
    arn string
    ARN assigned by AWS to this endpoint configuration.
    asyncInferenceConfig EndpointConfigurationAsyncInferenceConfig
    How an endpoint performs asynchronous inference.
    dataCaptureConfig EndpointConfigurationDataCaptureConfig
    Parameters to capture input/output of SageMaker AI models endpoints. Fields are documented below.
    executionRoleArn string
    ARN of an IAM role that SageMaker AI can assume to perform actions on your behalf. Required when model_name is not specified in production_variants to support Inference Components.
    kmsKeyArn string
    ARN of a AWS KMS key that SageMaker AI uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
    name string
    Name of the endpoint configuration. If omitted, the provider will assign a random, unique name. Conflicts with name_prefix.
    namePrefix string
    Unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
    productionVariants EndpointConfigurationProductionVariant[]
    List each model that you want to host at this endpoint. See below.
    region string
    Region where this resource will be managed. Defaults to the Region set in the provider configuration.
    shadowProductionVariants EndpointConfigurationShadowProductionVariant[]
    Models that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on oroduction_variants. If you use this field, you can only specify one variant for production_variants and one variant for shadow_production_variants. See below (same arguments as production_variants).
    tags {[key: string]: string}
    Mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    tagsAll {[key: string]: string}
    Map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.
    arn str
    ARN assigned by AWS to this endpoint configuration.
    async_inference_config EndpointConfigurationAsyncInferenceConfigArgs
    How an endpoint performs asynchronous inference.
    data_capture_config EndpointConfigurationDataCaptureConfigArgs
    Parameters to capture input/output of SageMaker AI models endpoints. Fields are documented below.
    execution_role_arn str
    ARN of an IAM role that SageMaker AI can assume to perform actions on your behalf. Required when model_name is not specified in production_variants to support Inference Components.
    kms_key_arn str
    ARN of a AWS KMS key that SageMaker AI uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
    name str
    Name of the endpoint configuration. If omitted, the provider will assign a random, unique name. Conflicts with name_prefix.
    name_prefix str
    Unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
    production_variants Sequence[EndpointConfigurationProductionVariantArgs]
    List each model that you want to host at this endpoint. See below.
    region str
    Region where this resource will be managed. Defaults to the Region set in the provider configuration.
    shadow_production_variants Sequence[EndpointConfigurationShadowProductionVariantArgs]
    Models that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on oroduction_variants. If you use this field, you can only specify one variant for production_variants and one variant for shadow_production_variants. See below (same arguments as production_variants).
    tags Mapping[str, str]
    Mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    tags_all Mapping[str, str]
    Map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.
    arn String
    ARN assigned by AWS to this endpoint configuration.
    asyncInferenceConfig Property Map
    How an endpoint performs asynchronous inference.
    dataCaptureConfig Property Map
    Parameters to capture input/output of SageMaker AI models endpoints. Fields are documented below.
    executionRoleArn String
    ARN of an IAM role that SageMaker AI can assume to perform actions on your behalf. Required when model_name is not specified in production_variants to support Inference Components.
    kmsKeyArn String
    ARN of a AWS KMS key that SageMaker AI uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
    name String
    Name of the endpoint configuration. If omitted, the provider will assign a random, unique name. Conflicts with name_prefix.
    namePrefix String
    Unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
    productionVariants List<Property Map>
    List each model that you want to host at this endpoint. See below.
    region String
    Region where this resource will be managed. Defaults to the Region set in the provider configuration.
    shadowProductionVariants List<Property Map>
    Models that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on oroduction_variants. If you use this field, you can only specify one variant for production_variants and one variant for shadow_production_variants. See below (same arguments as production_variants).
    tags Map<String>
    Mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    tagsAll Map<String>
    Map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.

    Supporting Types

    EndpointConfigurationAsyncInferenceConfig, EndpointConfigurationAsyncInferenceConfigArgs

    OutputConfig EndpointConfigurationAsyncInferenceConfigOutputConfig
    Configuration for asynchronous inference invocation outputs.
    ClientConfig EndpointConfigurationAsyncInferenceConfigClientConfig
    Configures the behavior of the client used by SageMaker AI to interact with the model container during asynchronous inference.
    OutputConfig EndpointConfigurationAsyncInferenceConfigOutputConfig
    Configuration for asynchronous inference invocation outputs.
    ClientConfig EndpointConfigurationAsyncInferenceConfigClientConfig
    Configures the behavior of the client used by SageMaker AI to interact with the model container during asynchronous inference.
    outputConfig EndpointConfigurationAsyncInferenceConfigOutputConfig
    Configuration for asynchronous inference invocation outputs.
    clientConfig EndpointConfigurationAsyncInferenceConfigClientConfig
    Configures the behavior of the client used by SageMaker AI to interact with the model container during asynchronous inference.
    outputConfig EndpointConfigurationAsyncInferenceConfigOutputConfig
    Configuration for asynchronous inference invocation outputs.
    clientConfig EndpointConfigurationAsyncInferenceConfigClientConfig
    Configures the behavior of the client used by SageMaker AI to interact with the model container during asynchronous inference.
    output_config EndpointConfigurationAsyncInferenceConfigOutputConfig
    Configuration for asynchronous inference invocation outputs.
    client_config EndpointConfigurationAsyncInferenceConfigClientConfig
    Configures the behavior of the client used by SageMaker AI to interact with the model container during asynchronous inference.
    outputConfig Property Map
    Configuration for asynchronous inference invocation outputs.
    clientConfig Property Map
    Configures the behavior of the client used by SageMaker AI to interact with the model container during asynchronous inference.

    EndpointConfigurationAsyncInferenceConfigClientConfig, EndpointConfigurationAsyncInferenceConfigClientConfigArgs

    MaxConcurrentInvocationsPerInstance int
    Maximum number of concurrent requests sent by the SageMaker AI client to the model container. If no value is provided, SageMaker AI will choose an optimal value for you.
    MaxConcurrentInvocationsPerInstance int
    Maximum number of concurrent requests sent by the SageMaker AI client to the model container. If no value is provided, SageMaker AI will choose an optimal value for you.
    maxConcurrentInvocationsPerInstance Integer
    Maximum number of concurrent requests sent by the SageMaker AI client to the model container. If no value is provided, SageMaker AI will choose an optimal value for you.
    maxConcurrentInvocationsPerInstance number
    Maximum number of concurrent requests sent by the SageMaker AI client to the model container. If no value is provided, SageMaker AI will choose an optimal value for you.
    max_concurrent_invocations_per_instance int
    Maximum number of concurrent requests sent by the SageMaker AI client to the model container. If no value is provided, SageMaker AI will choose an optimal value for you.
    maxConcurrentInvocationsPerInstance Number
    Maximum number of concurrent requests sent by the SageMaker AI client to the model container. If no value is provided, SageMaker AI will choose an optimal value for you.

    EndpointConfigurationAsyncInferenceConfigOutputConfig, EndpointConfigurationAsyncInferenceConfigOutputConfigArgs

    S3OutputPath string
    S3 location to upload inference responses to.
    KmsKeyId string
    KMS key that SageMaker AI uses to encrypt the asynchronous inference output in S3.
    NotificationConfig EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfig
    Configuration for notifications of inference results for asynchronous inference.
    S3FailurePath string
    S3 location to upload failure inference responses to.
    S3OutputPath string
    S3 location to upload inference responses to.
    KmsKeyId string
    KMS key that SageMaker AI uses to encrypt the asynchronous inference output in S3.
    NotificationConfig EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfig
    Configuration for notifications of inference results for asynchronous inference.
    S3FailurePath string
    S3 location to upload failure inference responses to.
    s3OutputPath String
    S3 location to upload inference responses to.
    kmsKeyId String
    KMS key that SageMaker AI uses to encrypt the asynchronous inference output in S3.
    notificationConfig EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfig
    Configuration for notifications of inference results for asynchronous inference.
    s3FailurePath String
    S3 location to upload failure inference responses to.
    s3OutputPath string
    S3 location to upload inference responses to.
    kmsKeyId string
    KMS key that SageMaker AI uses to encrypt the asynchronous inference output in S3.
    notificationConfig EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfig
    Configuration for notifications of inference results for asynchronous inference.
    s3FailurePath string
    S3 location to upload failure inference responses to.
    s3_output_path str
    S3 location to upload inference responses to.
    kms_key_id str
    KMS key that SageMaker AI uses to encrypt the asynchronous inference output in S3.
    notification_config EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfig
    Configuration for notifications of inference results for asynchronous inference.
    s3_failure_path str
    S3 location to upload failure inference responses to.
    s3OutputPath String
    S3 location to upload inference responses to.
    kmsKeyId String
    KMS key that SageMaker AI uses to encrypt the asynchronous inference output in S3.
    notificationConfig Property Map
    Configuration for notifications of inference results for asynchronous inference.
    s3FailurePath String
    S3 location to upload failure inference responses to.

    EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfig, EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfigArgs

    ErrorTopic string
    SNS topic to post a notification to when inference fails. If no topic is provided, no notification is sent on failure.
    IncludeInferenceResponseIns List<string>
    SNS topics where you want the inference response to be included. Valid values are SUCCESS_NOTIFICATION_TOPIC and ERROR_NOTIFICATION_TOPIC.
    SuccessTopic string
    SNS topic to post a notification to when inference completes successfully. If no topic is provided, no notification is sent on success.
    ErrorTopic string
    SNS topic to post a notification to when inference fails. If no topic is provided, no notification is sent on failure.
    IncludeInferenceResponseIns []string
    SNS topics where you want the inference response to be included. Valid values are SUCCESS_NOTIFICATION_TOPIC and ERROR_NOTIFICATION_TOPIC.
    SuccessTopic string
    SNS topic to post a notification to when inference completes successfully. If no topic is provided, no notification is sent on success.
    errorTopic String
    SNS topic to post a notification to when inference fails. If no topic is provided, no notification is sent on failure.
    includeInferenceResponseIns List<String>
    SNS topics where you want the inference response to be included. Valid values are SUCCESS_NOTIFICATION_TOPIC and ERROR_NOTIFICATION_TOPIC.
    successTopic String
    SNS topic to post a notification to when inference completes successfully. If no topic is provided, no notification is sent on success.
    errorTopic string
    SNS topic to post a notification to when inference fails. If no topic is provided, no notification is sent on failure.
    includeInferenceResponseIns string[]
    SNS topics where you want the inference response to be included. Valid values are SUCCESS_NOTIFICATION_TOPIC and ERROR_NOTIFICATION_TOPIC.
    successTopic string
    SNS topic to post a notification to when inference completes successfully. If no topic is provided, no notification is sent on success.
    error_topic str
    SNS topic to post a notification to when inference fails. If no topic is provided, no notification is sent on failure.
    include_inference_response_ins Sequence[str]
    SNS topics where you want the inference response to be included. Valid values are SUCCESS_NOTIFICATION_TOPIC and ERROR_NOTIFICATION_TOPIC.
    success_topic str
    SNS topic to post a notification to when inference completes successfully. If no topic is provided, no notification is sent on success.
    errorTopic String
    SNS topic to post a notification to when inference fails. If no topic is provided, no notification is sent on failure.
    includeInferenceResponseIns List<String>
    SNS topics where you want the inference response to be included. Valid values are SUCCESS_NOTIFICATION_TOPIC and ERROR_NOTIFICATION_TOPIC.
    successTopic String
    SNS topic to post a notification to when inference completes successfully. If no topic is provided, no notification is sent on success.

    EndpointConfigurationDataCaptureConfig, EndpointConfigurationDataCaptureConfigArgs

    CaptureOptions List<EndpointConfigurationDataCaptureConfigCaptureOption>
    What data to capture. Fields are documented below.
    DestinationS3Uri string
    URL for S3 location where the captured data is stored.
    InitialSamplingPercentage int
    Portion of data to capture. Should be between 0 and 100.
    CaptureContentTypeHeader EndpointConfigurationDataCaptureConfigCaptureContentTypeHeader
    Content type headers to capture. See capture_content_type_header below.
    EnableCapture bool
    Flag to enable data capture. Defaults to false.
    KmsKeyId string
    ARN of a KMS key that SageMaker AI uses to encrypt the captured data on S3.
    CaptureOptions []EndpointConfigurationDataCaptureConfigCaptureOption
    What data to capture. Fields are documented below.
    DestinationS3Uri string
    URL for S3 location where the captured data is stored.
    InitialSamplingPercentage int
    Portion of data to capture. Should be between 0 and 100.
    CaptureContentTypeHeader EndpointConfigurationDataCaptureConfigCaptureContentTypeHeader
    Content type headers to capture. See capture_content_type_header below.
    EnableCapture bool
    Flag to enable data capture. Defaults to false.
    KmsKeyId string
    ARN of a KMS key that SageMaker AI uses to encrypt the captured data on S3.
    captureOptions List<EndpointConfigurationDataCaptureConfigCaptureOption>
    What data to capture. Fields are documented below.
    destinationS3Uri String
    URL for S3 location where the captured data is stored.
    initialSamplingPercentage Integer
    Portion of data to capture. Should be between 0 and 100.
    captureContentTypeHeader EndpointConfigurationDataCaptureConfigCaptureContentTypeHeader
    Content type headers to capture. See capture_content_type_header below.
    enableCapture Boolean
    Flag to enable data capture. Defaults to false.
    kmsKeyId String
    ARN of a KMS key that SageMaker AI uses to encrypt the captured data on S3.
    captureOptions EndpointConfigurationDataCaptureConfigCaptureOption[]
    What data to capture. Fields are documented below.
    destinationS3Uri string
    URL for S3 location where the captured data is stored.
    initialSamplingPercentage number
    Portion of data to capture. Should be between 0 and 100.
    captureContentTypeHeader EndpointConfigurationDataCaptureConfigCaptureContentTypeHeader
    Content type headers to capture. See capture_content_type_header below.
    enableCapture boolean
    Flag to enable data capture. Defaults to false.
    kmsKeyId string
    ARN of a KMS key that SageMaker AI uses to encrypt the captured data on S3.
    capture_options Sequence[EndpointConfigurationDataCaptureConfigCaptureOption]
    What data to capture. Fields are documented below.
    destination_s3_uri str
    URL for S3 location where the captured data is stored.
    initial_sampling_percentage int
    Portion of data to capture. Should be between 0 and 100.
    capture_content_type_header EndpointConfigurationDataCaptureConfigCaptureContentTypeHeader
    Content type headers to capture. See capture_content_type_header below.
    enable_capture bool
    Flag to enable data capture. Defaults to false.
    kms_key_id str
    ARN of a KMS key that SageMaker AI uses to encrypt the captured data on S3.
    captureOptions List<Property Map>
    What data to capture. Fields are documented below.
    destinationS3Uri String
    URL for S3 location where the captured data is stored.
    initialSamplingPercentage Number
    Portion of data to capture. Should be between 0 and 100.
    captureContentTypeHeader Property Map
    Content type headers to capture. See capture_content_type_header below.
    enableCapture Boolean
    Flag to enable data capture. Defaults to false.
    kmsKeyId String
    ARN of a KMS key that SageMaker AI uses to encrypt the captured data on S3.

    EndpointConfigurationDataCaptureConfigCaptureContentTypeHeader, EndpointConfigurationDataCaptureConfigCaptureContentTypeHeaderArgs

    CsvContentTypes List<string>
    CSV content type headers to capture. One of csv_content_types or json_content_types is required.
    JsonContentTypes List<string>
    The JSON content type headers to capture. One of json_content_types or csv_content_types is required.
    CsvContentTypes []string
    CSV content type headers to capture. One of csv_content_types or json_content_types is required.
    JsonContentTypes []string
    The JSON content type headers to capture. One of json_content_types or csv_content_types is required.
    csvContentTypes List<String>
    CSV content type headers to capture. One of csv_content_types or json_content_types is required.
    jsonContentTypes List<String>
    The JSON content type headers to capture. One of json_content_types or csv_content_types is required.
    csvContentTypes string[]
    CSV content type headers to capture. One of csv_content_types or json_content_types is required.
    jsonContentTypes string[]
    The JSON content type headers to capture. One of json_content_types or csv_content_types is required.
    csv_content_types Sequence[str]
    CSV content type headers to capture. One of csv_content_types or json_content_types is required.
    json_content_types Sequence[str]
    The JSON content type headers to capture. One of json_content_types or csv_content_types is required.
    csvContentTypes List<String>
    CSV content type headers to capture. One of csv_content_types or json_content_types is required.
    jsonContentTypes List<String>
    The JSON content type headers to capture. One of json_content_types or csv_content_types is required.

    EndpointConfigurationDataCaptureConfigCaptureOption, EndpointConfigurationDataCaptureConfigCaptureOptionArgs

    CaptureMode string
    Data to be captured. Should be one of Input, Output or InputAndOutput.
    CaptureMode string
    Data to be captured. Should be one of Input, Output or InputAndOutput.
    captureMode String
    Data to be captured. Should be one of Input, Output or InputAndOutput.
    captureMode string
    Data to be captured. Should be one of Input, Output or InputAndOutput.
    capture_mode str
    Data to be captured. Should be one of Input, Output or InputAndOutput.
    captureMode String
    Data to be captured. Should be one of Input, Output or InputAndOutput.

    EndpointConfigurationProductionVariant, EndpointConfigurationProductionVariantArgs

    AcceleratorType string
    Size of the Elastic Inference (EI) instance to use for the production variant.
    ContainerStartupHealthCheckTimeoutInSeconds int
    Timeout value, in seconds, for your inference container to pass health check by SageMaker AI Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
    CoreDumpConfig EndpointConfigurationProductionVariantCoreDumpConfig
    Core dump configuration from the model container when the process crashes. Fields are documented below.
    EnableSsmAccess bool
    Whether to turn on native AWS SSM access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind endpoints. Ignored if model_name is not set (Inference Components endpoint).
    InferenceAmiVersion string
    Option from a collection of preconfigured AMI images. Each image is configured by AWS with a set of software and driver versions. AWS optimizes these configurations for different machine learning workloads.
    InitialInstanceCount int
    Initial number of instances used for auto-scaling.
    InitialVariantWeight double
    Initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, defaults to 1.0. Ignored if model_name is not set (Inference Components endpoint).
    InstanceType string
    Type of instance to start.
    ManagedInstanceScaling EndpointConfigurationProductionVariantManagedInstanceScaling
    Control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
    ModelDataDownloadTimeoutInSeconds int
    Timeout value, in seconds, to download and extract the model that you want to host from S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
    ModelName string
    Name of the model to use. Required unless using Inference Components (in which case execution_role_arn must be specified at the endpoint configuration level).
    RoutingConfigs List<EndpointConfigurationProductionVariantRoutingConfig>
    How the endpoint routes incoming traffic. See routing_config below.
    ServerlessConfig EndpointConfigurationProductionVariantServerlessConfig
    How an endpoint performs asynchronous inference.
    VariantName string
    Name of the variant. If omitted, the provider will assign a random, unique name.
    VolumeSizeInGb int
    Size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.
    AcceleratorType string
    Size of the Elastic Inference (EI) instance to use for the production variant.
    ContainerStartupHealthCheckTimeoutInSeconds int
    Timeout value, in seconds, for your inference container to pass health check by SageMaker AI Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
    CoreDumpConfig EndpointConfigurationProductionVariantCoreDumpConfig
    Core dump configuration from the model container when the process crashes. Fields are documented below.
    EnableSsmAccess bool
    Whether to turn on native AWS SSM access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind endpoints. Ignored if model_name is not set (Inference Components endpoint).
    InferenceAmiVersion string
    Option from a collection of preconfigured AMI images. Each image is configured by AWS with a set of software and driver versions. AWS optimizes these configurations for different machine learning workloads.
    InitialInstanceCount int
    Initial number of instances used for auto-scaling.
    InitialVariantWeight float64
    Initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, defaults to 1.0. Ignored if model_name is not set (Inference Components endpoint).
    InstanceType string
    Type of instance to start.
    ManagedInstanceScaling EndpointConfigurationProductionVariantManagedInstanceScaling
    Control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
    ModelDataDownloadTimeoutInSeconds int
    Timeout value, in seconds, to download and extract the model that you want to host from S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
    ModelName string
    Name of the model to use. Required unless using Inference Components (in which case execution_role_arn must be specified at the endpoint configuration level).
    RoutingConfigs []EndpointConfigurationProductionVariantRoutingConfig
    How the endpoint routes incoming traffic. See routing_config below.
    ServerlessConfig EndpointConfigurationProductionVariantServerlessConfig
    How an endpoint performs asynchronous inference.
    VariantName string
    Name of the variant. If omitted, the provider will assign a random, unique name.
    VolumeSizeInGb int
    Size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.
    acceleratorType String
    Size of the Elastic Inference (EI) instance to use for the production variant.
    containerStartupHealthCheckTimeoutInSeconds Integer
    Timeout value, in seconds, for your inference container to pass health check by SageMaker AI Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
    coreDumpConfig EndpointConfigurationProductionVariantCoreDumpConfig
    Core dump configuration from the model container when the process crashes. Fields are documented below.
    enableSsmAccess Boolean
    Whether to turn on native AWS SSM access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind endpoints. Ignored if model_name is not set (Inference Components endpoint).
    inferenceAmiVersion String
    Option from a collection of preconfigured AMI images. Each image is configured by AWS with a set of software and driver versions. AWS optimizes these configurations for different machine learning workloads.
    initialInstanceCount Integer
    Initial number of instances used for auto-scaling.
    initialVariantWeight Double
    Initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, defaults to 1.0. Ignored if model_name is not set (Inference Components endpoint).
    instanceType String
    Type of instance to start.
    managedInstanceScaling EndpointConfigurationProductionVariantManagedInstanceScaling
    Control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
    modelDataDownloadTimeoutInSeconds Integer
    Timeout value, in seconds, to download and extract the model that you want to host from S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
    modelName String
    Name of the model to use. Required unless using Inference Components (in which case execution_role_arn must be specified at the endpoint configuration level).
    routingConfigs List<EndpointConfigurationProductionVariantRoutingConfig>
    How the endpoint routes incoming traffic. See routing_config below.
    serverlessConfig EndpointConfigurationProductionVariantServerlessConfig
    How an endpoint performs asynchronous inference.
    variantName String
    Name of the variant. If omitted, the provider will assign a random, unique name.
    volumeSizeInGb Integer
    Size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.
    acceleratorType string
    Size of the Elastic Inference (EI) instance to use for the production variant.
    containerStartupHealthCheckTimeoutInSeconds number
    Timeout value, in seconds, for your inference container to pass health check by SageMaker AI Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
    coreDumpConfig EndpointConfigurationProductionVariantCoreDumpConfig
    Core dump configuration from the model container when the process crashes. Fields are documented below.
    enableSsmAccess boolean
    Whether to turn on native AWS SSM access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind endpoints. Ignored if model_name is not set (Inference Components endpoint).
    inferenceAmiVersion string
    Option from a collection of preconfigured AMI images. Each image is configured by AWS with a set of software and driver versions. AWS optimizes these configurations for different machine learning workloads.
    initialInstanceCount number
    Initial number of instances used for auto-scaling.
    initialVariantWeight number
    Initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, defaults to 1.0. Ignored if model_name is not set (Inference Components endpoint).
    instanceType string
    Type of instance to start.
    managedInstanceScaling EndpointConfigurationProductionVariantManagedInstanceScaling
    Control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
    modelDataDownloadTimeoutInSeconds number
    Timeout value, in seconds, to download and extract the model that you want to host from S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
    modelName string
    Name of the model to use. Required unless using Inference Components (in which case execution_role_arn must be specified at the endpoint configuration level).
    routingConfigs EndpointConfigurationProductionVariantRoutingConfig[]
    How the endpoint routes incoming traffic. See routing_config below.
    serverlessConfig EndpointConfigurationProductionVariantServerlessConfig
    How an endpoint performs asynchronous inference.
    variantName string
    Name of the variant. If omitted, the provider will assign a random, unique name.
    volumeSizeInGb number
    Size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.
    accelerator_type str
    Size of the Elastic Inference (EI) instance to use for the production variant.
    container_startup_health_check_timeout_in_seconds int
    Timeout value, in seconds, for your inference container to pass health check by SageMaker AI Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
    core_dump_config EndpointConfigurationProductionVariantCoreDumpConfig
    Core dump configuration from the model container when the process crashes. Fields are documented below.
    enable_ssm_access bool
    Whether to turn on native AWS SSM access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind endpoints. Ignored if model_name is not set (Inference Components endpoint).
    inference_ami_version str
    Option from a collection of preconfigured AMI images. Each image is configured by AWS with a set of software and driver versions. AWS optimizes these configurations for different machine learning workloads.
    initial_instance_count int
    Initial number of instances used for auto-scaling.
    initial_variant_weight float
    Initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, defaults to 1.0. Ignored if model_name is not set (Inference Components endpoint).
    instance_type str
    Type of instance to start.
    managed_instance_scaling EndpointConfigurationProductionVariantManagedInstanceScaling
    Control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
    model_data_download_timeout_in_seconds int
    Timeout value, in seconds, to download and extract the model that you want to host from S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
    model_name str
    Name of the model to use. Required unless using Inference Components (in which case execution_role_arn must be specified at the endpoint configuration level).
    routing_configs Sequence[EndpointConfigurationProductionVariantRoutingConfig]
    How the endpoint routes incoming traffic. See routing_config below.
    serverless_config EndpointConfigurationProductionVariantServerlessConfig
    How an endpoint performs asynchronous inference.
    variant_name str
    Name of the variant. If omitted, the provider will assign a random, unique name.
    volume_size_in_gb int
    Size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.
    acceleratorType String
    Size of the Elastic Inference (EI) instance to use for the production variant.
    containerStartupHealthCheckTimeoutInSeconds Number
    Timeout value, in seconds, for your inference container to pass health check by SageMaker AI Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
    coreDumpConfig Property Map
    Core dump configuration from the model container when the process crashes. Fields are documented below.
    enableSsmAccess Boolean
    Whether to turn on native AWS SSM access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind endpoints. Ignored if model_name is not set (Inference Components endpoint).
    inferenceAmiVersion String
    Option from a collection of preconfigured AMI images. Each image is configured by AWS with a set of software and driver versions. AWS optimizes these configurations for different machine learning workloads.
    initialInstanceCount Number
    Initial number of instances used for auto-scaling.
    initialVariantWeight Number
    Initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, defaults to 1.0. Ignored if model_name is not set (Inference Components endpoint).
    instanceType String
    Type of instance to start.
    managedInstanceScaling Property Map
    Control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
    modelDataDownloadTimeoutInSeconds Number
    Timeout value, in seconds, to download and extract the model that you want to host from S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
    modelName String
    Name of the model to use. Required unless using Inference Components (in which case execution_role_arn must be specified at the endpoint configuration level).
    routingConfigs List<Property Map>
    How the endpoint routes incoming traffic. See routing_config below.
    serverlessConfig Property Map
    How an endpoint performs asynchronous inference.
    variantName String
    Name of the variant. If omitted, the provider will assign a random, unique name.
    volumeSizeInGb Number
    Size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.

    EndpointConfigurationProductionVariantCoreDumpConfig, EndpointConfigurationProductionVariantCoreDumpConfigArgs

    DestinationS3Uri string
    S3 bucket to send the core dump to.
    KmsKeyId string
    KMS key that SageMaker AI uses to encrypt the core dump data at rest using S3 server-side encryption.
    DestinationS3Uri string
    S3 bucket to send the core dump to.
    KmsKeyId string
    KMS key that SageMaker AI uses to encrypt the core dump data at rest using S3 server-side encryption.
    destinationS3Uri String
    S3 bucket to send the core dump to.
    kmsKeyId String
    KMS key that SageMaker AI uses to encrypt the core dump data at rest using S3 server-side encryption.
    destinationS3Uri string
    S3 bucket to send the core dump to.
    kmsKeyId string
    KMS key that SageMaker AI uses to encrypt the core dump data at rest using S3 server-side encryption.
    destination_s3_uri str
    S3 bucket to send the core dump to.
    kms_key_id str
    KMS key that SageMaker AI uses to encrypt the core dump data at rest using S3 server-side encryption.
    destinationS3Uri String
    S3 bucket to send the core dump to.
    kmsKeyId String
    KMS key that SageMaker AI uses to encrypt the core dump data at rest using S3 server-side encryption.

    EndpointConfigurationProductionVariantManagedInstanceScaling, EndpointConfigurationProductionVariantManagedInstanceScalingArgs

    MaxInstanceCount int
    Maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
    MinInstanceCount int
    Minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
    Status string
    Whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.
    MaxInstanceCount int
    Maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
    MinInstanceCount int
    Minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
    Status string
    Whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.
    maxInstanceCount Integer
    Maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
    minInstanceCount Integer
    Minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
    status String
    Whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.
    maxInstanceCount number
    Maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
    minInstanceCount number
    Minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
    status string
    Whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.
    max_instance_count int
    Maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
    min_instance_count int
    Minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
    status str
    Whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.
    maxInstanceCount Number
    Maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
    minInstanceCount Number
    Minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
    status String
    Whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.

    EndpointConfigurationProductionVariantRoutingConfig, EndpointConfigurationProductionVariantRoutingConfigArgs

    RoutingStrategy string
    How the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.
    RoutingStrategy string
    How the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.
    routingStrategy String
    How the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.
    routingStrategy string
    How the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.
    routing_strategy str
    How the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.
    routingStrategy String
    How the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.

    EndpointConfigurationProductionVariantServerlessConfig, EndpointConfigurationProductionVariantServerlessConfigArgs

    MaxConcurrency int
    Maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
    MemorySizeInMb int
    Memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
    ProvisionedConcurrency int
    Amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.
    MaxConcurrency int
    Maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
    MemorySizeInMb int
    Memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
    ProvisionedConcurrency int
    Amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.
    maxConcurrency Integer
    Maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
    memorySizeInMb Integer
    Memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
    provisionedConcurrency Integer
    Amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.
    maxConcurrency number
    Maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
    memorySizeInMb number
    Memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
    provisionedConcurrency number
    Amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.
    max_concurrency int
    Maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
    memory_size_in_mb int
    Memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
    provisioned_concurrency int
    Amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.
    maxConcurrency Number
    Maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
    memorySizeInMb Number
    Memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
    provisionedConcurrency Number
    Amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.

    EndpointConfigurationShadowProductionVariant, EndpointConfigurationShadowProductionVariantArgs

    AcceleratorType string
    Size of the Elastic Inference (EI) instance to use for the production variant.
    ContainerStartupHealthCheckTimeoutInSeconds int
    Timeout value, in seconds, for your inference container to pass health check by SageMaker AI Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
    CoreDumpConfig EndpointConfigurationShadowProductionVariantCoreDumpConfig
    Core dump configuration from the model container when the process crashes. Fields are documented below.
    EnableSsmAccess bool
    Whether to turn on native AWS SSM access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind endpoints. Ignored if model_name is not set (Inference Components endpoint).
    InferenceAmiVersion string
    Option from a collection of preconfigured AMI images. Each image is configured by AWS with a set of software and driver versions. AWS optimizes these configurations for different machine learning workloads.
    InitialInstanceCount int
    Initial number of instances used for auto-scaling.
    InitialVariantWeight double
    Initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, defaults to 1.0. Ignored if model_name is not set (Inference Components endpoint).
    InstanceType string
    Type of instance to start.
    ManagedInstanceScaling EndpointConfigurationShadowProductionVariantManagedInstanceScaling
    Control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
    ModelDataDownloadTimeoutInSeconds int
    Timeout value, in seconds, to download and extract the model that you want to host from S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
    ModelName string
    Name of the model to use. Required unless using Inference Components (in which case execution_role_arn must be specified at the endpoint configuration level).
    RoutingConfigs List<EndpointConfigurationShadowProductionVariantRoutingConfig>
    How the endpoint routes incoming traffic. See routing_config below.
    ServerlessConfig EndpointConfigurationShadowProductionVariantServerlessConfig
    How an endpoint performs asynchronous inference.
    VariantName string
    Name of the variant. If omitted, the provider will assign a random, unique name.
    VolumeSizeInGb int
    Size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.
    AcceleratorType string
    Size of the Elastic Inference (EI) instance to use for the production variant.
    ContainerStartupHealthCheckTimeoutInSeconds int
    Timeout value, in seconds, for your inference container to pass health check by SageMaker AI Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
    CoreDumpConfig EndpointConfigurationShadowProductionVariantCoreDumpConfig
    Core dump configuration from the model container when the process crashes. Fields are documented below.
    EnableSsmAccess bool
    Whether to turn on native AWS SSM access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind endpoints. Ignored if model_name is not set (Inference Components endpoint).
    InferenceAmiVersion string
    Option from a collection of preconfigured AMI images. Each image is configured by AWS with a set of software and driver versions. AWS optimizes these configurations for different machine learning workloads.
    InitialInstanceCount int
    Initial number of instances used for auto-scaling.
    InitialVariantWeight float64
    Initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, defaults to 1.0. Ignored if model_name is not set (Inference Components endpoint).
    InstanceType string
    Type of instance to start.
    ManagedInstanceScaling EndpointConfigurationShadowProductionVariantManagedInstanceScaling
    Control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
    ModelDataDownloadTimeoutInSeconds int
    Timeout value, in seconds, to download and extract the model that you want to host from S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
    ModelName string
    Name of the model to use. Required unless using Inference Components (in which case execution_role_arn must be specified at the endpoint configuration level).
    RoutingConfigs []EndpointConfigurationShadowProductionVariantRoutingConfig
    How the endpoint routes incoming traffic. See routing_config below.
    ServerlessConfig EndpointConfigurationShadowProductionVariantServerlessConfig
    How an endpoint performs asynchronous inference.
    VariantName string
    Name of the variant. If omitted, the provider will assign a random, unique name.
    VolumeSizeInGb int
    Size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.
    acceleratorType String
    Size of the Elastic Inference (EI) instance to use for the production variant.
    containerStartupHealthCheckTimeoutInSeconds Integer
    Timeout value, in seconds, for your inference container to pass health check by SageMaker AI Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
    coreDumpConfig EndpointConfigurationShadowProductionVariantCoreDumpConfig
    Core dump configuration from the model container when the process crashes. Fields are documented below.
    enableSsmAccess Boolean
    Whether to turn on native AWS SSM access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind endpoints. Ignored if model_name is not set (Inference Components endpoint).
    inferenceAmiVersion String
    Option from a collection of preconfigured AMI images. Each image is configured by AWS with a set of software and driver versions. AWS optimizes these configurations for different machine learning workloads.
    initialInstanceCount Integer
    Initial number of instances used for auto-scaling.
    initialVariantWeight Double
    Initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, defaults to 1.0. Ignored if model_name is not set (Inference Components endpoint).
    instanceType String
    Type of instance to start.
    managedInstanceScaling EndpointConfigurationShadowProductionVariantManagedInstanceScaling
    Control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
    modelDataDownloadTimeoutInSeconds Integer
    Timeout value, in seconds, to download and extract the model that you want to host from S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
    modelName String
    Name of the model to use. Required unless using Inference Components (in which case execution_role_arn must be specified at the endpoint configuration level).
    routingConfigs List<EndpointConfigurationShadowProductionVariantRoutingConfig>
    How the endpoint routes incoming traffic. See routing_config below.
    serverlessConfig EndpointConfigurationShadowProductionVariantServerlessConfig
    How an endpoint performs asynchronous inference.
    variantName String
    Name of the variant. If omitted, the provider will assign a random, unique name.
    volumeSizeInGb Integer
    Size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.
    acceleratorType string
    Size of the Elastic Inference (EI) instance to use for the production variant.
    containerStartupHealthCheckTimeoutInSeconds number
    Timeout value, in seconds, for your inference container to pass health check by SageMaker AI Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
    coreDumpConfig EndpointConfigurationShadowProductionVariantCoreDumpConfig
    Core dump configuration from the model container when the process crashes. Fields are documented below.
    enableSsmAccess boolean
    Whether to turn on native AWS SSM access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind endpoints. Ignored if model_name is not set (Inference Components endpoint).
    inferenceAmiVersion string
    Option from a collection of preconfigured AMI images. Each image is configured by AWS with a set of software and driver versions. AWS optimizes these configurations for different machine learning workloads.
    initialInstanceCount number
    Initial number of instances used for auto-scaling.
    initialVariantWeight number
    Initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, defaults to 1.0. Ignored if model_name is not set (Inference Components endpoint).
    instanceType string
    Type of instance to start.
    managedInstanceScaling EndpointConfigurationShadowProductionVariantManagedInstanceScaling
    Control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
    modelDataDownloadTimeoutInSeconds number
    Timeout value, in seconds, to download and extract the model that you want to host from S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
    modelName string
    Name of the model to use. Required unless using Inference Components (in which case execution_role_arn must be specified at the endpoint configuration level).
    routingConfigs EndpointConfigurationShadowProductionVariantRoutingConfig[]
    How the endpoint routes incoming traffic. See routing_config below.
    serverlessConfig EndpointConfigurationShadowProductionVariantServerlessConfig
    How an endpoint performs asynchronous inference.
    variantName string
    Name of the variant. If omitted, the provider will assign a random, unique name.
    volumeSizeInGb number
    Size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.
    accelerator_type str
    Size of the Elastic Inference (EI) instance to use for the production variant.
    container_startup_health_check_timeout_in_seconds int
    Timeout value, in seconds, for your inference container to pass health check by SageMaker AI Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
    core_dump_config EndpointConfigurationShadowProductionVariantCoreDumpConfig
    Core dump configuration from the model container when the process crashes. Fields are documented below.
    enable_ssm_access bool
    Whether to turn on native AWS SSM access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind endpoints. Ignored if model_name is not set (Inference Components endpoint).
    inference_ami_version str
    Option from a collection of preconfigured AMI images. Each image is configured by AWS with a set of software and driver versions. AWS optimizes these configurations for different machine learning workloads.
    initial_instance_count int
    Initial number of instances used for auto-scaling.
    initial_variant_weight float
    Initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, defaults to 1.0. Ignored if model_name is not set (Inference Components endpoint).
    instance_type str
    Type of instance to start.
    managed_instance_scaling EndpointConfigurationShadowProductionVariantManagedInstanceScaling
    Control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
    model_data_download_timeout_in_seconds int
    Timeout value, in seconds, to download and extract the model that you want to host from S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
    model_name str
    Name of the model to use. Required unless using Inference Components (in which case execution_role_arn must be specified at the endpoint configuration level).
    routing_configs Sequence[EndpointConfigurationShadowProductionVariantRoutingConfig]
    How the endpoint routes incoming traffic. See routing_config below.
    serverless_config EndpointConfigurationShadowProductionVariantServerlessConfig
    How an endpoint performs asynchronous inference.
    variant_name str
    Name of the variant. If omitted, the provider will assign a random, unique name.
    volume_size_in_gb int
    Size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.
    acceleratorType String
    Size of the Elastic Inference (EI) instance to use for the production variant.
    containerStartupHealthCheckTimeoutInSeconds Number
    Timeout value, in seconds, for your inference container to pass health check by SageMaker AI Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
    coreDumpConfig Property Map
    Core dump configuration from the model container when the process crashes. Fields are documented below.
    enableSsmAccess Boolean
    Whether to turn on native AWS SSM access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind endpoints. Ignored if model_name is not set (Inference Components endpoint).
    inferenceAmiVersion String
    Option from a collection of preconfigured AMI images. Each image is configured by AWS with a set of software and driver versions. AWS optimizes these configurations for different machine learning workloads.
    initialInstanceCount Number
    Initial number of instances used for auto-scaling.
    initialVariantWeight Number
    Initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, defaults to 1.0. Ignored if model_name is not set (Inference Components endpoint).
    instanceType String
    Type of instance to start.
    managedInstanceScaling Property Map
    Control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
    modelDataDownloadTimeoutInSeconds Number
    Timeout value, in seconds, to download and extract the model that you want to host from S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
    modelName String
    Name of the model to use. Required unless using Inference Components (in which case execution_role_arn must be specified at the endpoint configuration level).
    routingConfigs List<Property Map>
    How the endpoint routes incoming traffic. See routing_config below.
    serverlessConfig Property Map
    How an endpoint performs asynchronous inference.
    variantName String
    Name of the variant. If omitted, the provider will assign a random, unique name.
    volumeSizeInGb Number
    Size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.

    EndpointConfigurationShadowProductionVariantCoreDumpConfig, EndpointConfigurationShadowProductionVariantCoreDumpConfigArgs

    DestinationS3Uri string
    S3 bucket to send the core dump to.
    KmsKeyId string
    KMS key that SageMaker AI uses to encrypt the core dump data at rest using S3 server-side encryption.
    DestinationS3Uri string
    S3 bucket to send the core dump to.
    KmsKeyId string
    KMS key that SageMaker AI uses to encrypt the core dump data at rest using S3 server-side encryption.
    destinationS3Uri String
    S3 bucket to send the core dump to.
    kmsKeyId String
    KMS key that SageMaker AI uses to encrypt the core dump data at rest using S3 server-side encryption.
    destinationS3Uri string
    S3 bucket to send the core dump to.
    kmsKeyId string
    KMS key that SageMaker AI uses to encrypt the core dump data at rest using S3 server-side encryption.
    destination_s3_uri str
    S3 bucket to send the core dump to.
    kms_key_id str
    KMS key that SageMaker AI uses to encrypt the core dump data at rest using S3 server-side encryption.
    destinationS3Uri String
    S3 bucket to send the core dump to.
    kmsKeyId String
    KMS key that SageMaker AI uses to encrypt the core dump data at rest using S3 server-side encryption.

    EndpointConfigurationShadowProductionVariantManagedInstanceScaling, EndpointConfigurationShadowProductionVariantManagedInstanceScalingArgs

    MaxInstanceCount int
    Maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
    MinInstanceCount int
    Minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
    Status string
    Whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.
    MaxInstanceCount int
    Maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
    MinInstanceCount int
    Minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
    Status string
    Whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.
    maxInstanceCount Integer
    Maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
    minInstanceCount Integer
    Minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
    status String
    Whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.
    maxInstanceCount number
    Maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
    minInstanceCount number
    Minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
    status string
    Whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.
    max_instance_count int
    Maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
    min_instance_count int
    Minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
    status str
    Whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.
    maxInstanceCount Number
    Maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
    minInstanceCount Number
    Minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
    status String
    Whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.

    EndpointConfigurationShadowProductionVariantRoutingConfig, EndpointConfigurationShadowProductionVariantRoutingConfigArgs

    RoutingStrategy string
    How the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.
    RoutingStrategy string
    How the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.
    routingStrategy String
    How the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.
    routingStrategy string
    How the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.
    routing_strategy str
    How the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.
    routingStrategy String
    How the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.

    EndpointConfigurationShadowProductionVariantServerlessConfig, EndpointConfigurationShadowProductionVariantServerlessConfigArgs

    MaxConcurrency int
    Maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
    MemorySizeInMb int
    Memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
    ProvisionedConcurrency int
    Amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.
    MaxConcurrency int
    Maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
    MemorySizeInMb int
    Memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
    ProvisionedConcurrency int
    Amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.
    maxConcurrency Integer
    Maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
    memorySizeInMb Integer
    Memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
    provisionedConcurrency Integer
    Amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.
    maxConcurrency number
    Maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
    memorySizeInMb number
    Memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
    provisionedConcurrency number
    Amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.
    max_concurrency int
    Maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
    memory_size_in_mb int
    Memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
    provisioned_concurrency int
    Amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.
    maxConcurrency Number
    Maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
    memorySizeInMb Number
    Memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
    provisionedConcurrency Number
    Amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.

    Import

    Using pulumi import, import endpoint configurations using the name. For example:

    $ pulumi import aws:sagemaker/endpointConfiguration:EndpointConfiguration test_endpoint_config endpoint-config-foo
    

    To learn more about importing existing cloud resources, see Importing resources.

    Package Details

    Repository
    AWS Classic pulumi/pulumi-aws
    License
    Apache-2.0
    Notes
    This Pulumi package is based on the aws Terraform Provider.
    aws logo
    AWS v7.12.0 published on Thursday, Nov 20, 2025 by Pulumi
      Meet Neo: Your AI Platform Teammate