aws.sagemaker.EndpointConfiguration

AWS v7.14.0, Dec 11 25

AWS v7.14.0 published on Thursday, Dec 11, 2025 by Pulumi

Schema (JSON)

pulumi/pulumi-aws

AWS v7.14.0 published on Thursday, Dec 11, 2025 by Pulumi

Schema (JSON)

pulumi/pulumi-aws

Example Usage

Basic usage:

import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";

const ec = new aws.sagemaker.EndpointConfiguration("ec", {
    name: "my-endpoint-config",
    productionVariants: [{
        variantName: "variant-1",
        modelName: m.name,
        initialInstanceCount: 1,
        instanceType: "ml.t2.medium",
    }],
    tags: {
        Name: "foo",
    },
});

import pulumi
import pulumi_aws as aws

ec = aws.sagemaker.EndpointConfiguration("ec",
    name="my-endpoint-config",
    production_variants=[{
        "variant_name": "variant-1",
        "model_name": m["name"],
        "initial_instance_count": 1,
        "instance_type": "ml.t2.medium",
    }],
    tags={
        "Name": "foo",
    })

package main

import (
	"github.com/pulumi/pulumi-aws/sdk/v7/go/aws/sagemaker"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := sagemaker.NewEndpointConfiguration(ctx, "ec", &sagemaker.EndpointConfigurationArgs{
			Name: pulumi.String("my-endpoint-config"),
			ProductionVariants: sagemaker.EndpointConfigurationProductionVariantArray{
				&sagemaker.EndpointConfigurationProductionVariantArgs{
					VariantName:          pulumi.String("variant-1"),
					ModelName:            pulumi.Any(m.Name),
					InitialInstanceCount: pulumi.Int(1),
					InstanceType:         pulumi.String("ml.t2.medium"),
				},
			},
			Tags: pulumi.StringMap{
				"Name": pulumi.String("foo"),
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}

using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Aws = Pulumi.Aws;

return await Deployment.RunAsync(() => 
{
    var ec = new Aws.Sagemaker.EndpointConfiguration("ec", new()
    {
        Name = "my-endpoint-config",
        ProductionVariants = new[]
        {
            new Aws.Sagemaker.Inputs.EndpointConfigurationProductionVariantArgs
            {
                VariantName = "variant-1",
                ModelName = m.Name,
                InitialInstanceCount = 1,
                InstanceType = "ml.t2.medium",
            },
        },
        Tags = 
        {
            { "Name", "foo" },
        },
    });

});

package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.sagemaker.EndpointConfiguration;
import com.pulumi.aws.sagemaker.EndpointConfigurationArgs;
import com.pulumi.aws.sagemaker.inputs.EndpointConfigurationProductionVariantArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var ec = new EndpointConfiguration("ec", EndpointConfigurationArgs.builder()
            .name("my-endpoint-config")
            .productionVariants(EndpointConfigurationProductionVariantArgs.builder()
                .variantName("variant-1")
                .modelName(m.name())
                .initialInstanceCount(1)
                .instanceType("ml.t2.medium")
                .build())
            .tags(Map.of("Name", "foo"))
            .build());

    }
}

resources:
  ec:
    type: aws:sagemaker:EndpointConfiguration
    properties:
      name: my-endpoint-config
      productionVariants:
        - variantName: variant-1
          modelName: ${m.name}
          initialInstanceCount: 1
          instanceType: ml.t2.medium
      tags:
        Name: foo

Create EndpointConfiguration Resource

Resources are created with functions called constructors. To learn more about declaring and configuring resources, see Resources.

Constructor syntax

new EndpointConfiguration(name: string, args: EndpointConfigurationArgs, opts?: CustomResourceOptions);

@overload
def EndpointConfiguration(resource_name: str,
                          args: EndpointConfigurationArgs,
                          opts: Optional[ResourceOptions] = None)

@overload
def EndpointConfiguration(resource_name: str,
                          opts: Optional[ResourceOptions] = None,
                          production_variants: Optional[Sequence[EndpointConfigurationProductionVariantArgs]] = None,
                          async_inference_config: Optional[EndpointConfigurationAsyncInferenceConfigArgs] = None,
                          data_capture_config: Optional[EndpointConfigurationDataCaptureConfigArgs] = None,
                          execution_role_arn: Optional[str] = None,
                          kms_key_arn: Optional[str] = None,
                          name: Optional[str] = None,
                          name_prefix: Optional[str] = None,
                          region: Optional[str] = None,
                          shadow_production_variants: Optional[Sequence[EndpointConfigurationShadowProductionVariantArgs]] = None,
                          tags: Optional[Mapping[str, str]] = None)

func NewEndpointConfiguration(ctx *Context, name string, args EndpointConfigurationArgs, opts ...ResourceOption) (*EndpointConfiguration, error)

public EndpointConfiguration(string name, EndpointConfigurationArgs args, CustomResourceOptions? opts = null)

public EndpointConfiguration(String name, EndpointConfigurationArgs args)
public EndpointConfiguration(String name, EndpointConfigurationArgs args, CustomResourceOptions options)

type: aws:sagemaker:EndpointConfiguration
properties: # The arguments to resource properties.
options: # Bag of options to control resource's behavior.

Parameters

name string: The unique name of the resource.
args EndpointConfigurationArgs: The arguments to resource properties.
opts CustomResourceOptions: Bag of options to control resource's behavior.

resource_name str: The unique name of the resource.
args EndpointConfigurationArgs: The arguments to resource properties.
opts ResourceOptions: Bag of options to control resource's behavior.

ctx Context: Context object for the current deployment.
name string: The unique name of the resource.
args EndpointConfigurationArgs: The arguments to resource properties.
opts ResourceOption: Bag of options to control resource's behavior.

name string: The unique name of the resource.
args EndpointConfigurationArgs: The arguments to resource properties.
opts CustomResourceOptions: Bag of options to control resource's behavior.

name String: The unique name of the resource.
args EndpointConfigurationArgs: The arguments to resource properties.
options CustomResourceOptions: Bag of options to control resource's behavior.

Constructor example

The following reference example uses placeholder values for all input properties.

var endpointConfigurationResource = new Aws.Sagemaker.EndpointConfiguration("endpointConfigurationResource", new()
{
    ProductionVariants = new[]
    {
        new Aws.Sagemaker.Inputs.EndpointConfigurationProductionVariantArgs
        {
            AcceleratorType = "string",
            ContainerStartupHealthCheckTimeoutInSeconds = 0,
            CoreDumpConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationProductionVariantCoreDumpConfigArgs
            {
                DestinationS3Uri = "string",
                KmsKeyId = "string",
            },
            EnableSsmAccess = false,
            InferenceAmiVersion = "string",
            InitialInstanceCount = 0,
            InitialVariantWeight = 0,
            InstanceType = "string",
            ManagedInstanceScaling = new Aws.Sagemaker.Inputs.EndpointConfigurationProductionVariantManagedInstanceScalingArgs
            {
                MaxInstanceCount = 0,
                MinInstanceCount = 0,
                Status = "string",
            },
            ModelDataDownloadTimeoutInSeconds = 0,
            ModelName = "string",
            RoutingConfigs = new[]
            {
                new Aws.Sagemaker.Inputs.EndpointConfigurationProductionVariantRoutingConfigArgs
                {
                    RoutingStrategy = "string",
                },
            },
            ServerlessConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationProductionVariantServerlessConfigArgs
            {
                MaxConcurrency = 0,
                MemorySizeInMb = 0,
                ProvisionedConcurrency = 0,
            },
            VariantName = "string",
            VolumeSizeInGb = 0,
        },
    },
    AsyncInferenceConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationAsyncInferenceConfigArgs
    {
        OutputConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationAsyncInferenceConfigOutputConfigArgs
        {
            S3OutputPath = "string",
            KmsKeyId = "string",
            NotificationConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfigArgs
            {
                ErrorTopic = "string",
                IncludeInferenceResponseIns = new[]
                {
                    "string",
                },
                SuccessTopic = "string",
            },
            S3FailurePath = "string",
        },
        ClientConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationAsyncInferenceConfigClientConfigArgs
        {
            MaxConcurrentInvocationsPerInstance = 0,
        },
    },
    DataCaptureConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationDataCaptureConfigArgs
    {
        CaptureOptions = new[]
        {
            new Aws.Sagemaker.Inputs.EndpointConfigurationDataCaptureConfigCaptureOptionArgs
            {
                CaptureMode = "string",
            },
        },
        DestinationS3Uri = "string",
        InitialSamplingPercentage = 0,
        CaptureContentTypeHeader = new Aws.Sagemaker.Inputs.EndpointConfigurationDataCaptureConfigCaptureContentTypeHeaderArgs
        {
            CsvContentTypes = new[]
            {
                "string",
            },
            JsonContentTypes = new[]
            {
                "string",
            },
        },
        EnableCapture = false,
        KmsKeyId = "string",
    },
    ExecutionRoleArn = "string",
    KmsKeyArn = "string",
    Name = "string",
    NamePrefix = "string",
    Region = "string",
    ShadowProductionVariants = new[]
    {
        new Aws.Sagemaker.Inputs.EndpointConfigurationShadowProductionVariantArgs
        {
            AcceleratorType = "string",
            ContainerStartupHealthCheckTimeoutInSeconds = 0,
            CoreDumpConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationShadowProductionVariantCoreDumpConfigArgs
            {
                DestinationS3Uri = "string",
                KmsKeyId = "string",
            },
            EnableSsmAccess = false,
            InferenceAmiVersion = "string",
            InitialInstanceCount = 0,
            InitialVariantWeight = 0,
            InstanceType = "string",
            ManagedInstanceScaling = new Aws.Sagemaker.Inputs.EndpointConfigurationShadowProductionVariantManagedInstanceScalingArgs
            {
                MaxInstanceCount = 0,
                MinInstanceCount = 0,
                Status = "string",
            },
            ModelDataDownloadTimeoutInSeconds = 0,
            ModelName = "string",
            RoutingConfigs = new[]
            {
                new Aws.Sagemaker.Inputs.EndpointConfigurationShadowProductionVariantRoutingConfigArgs
                {
                    RoutingStrategy = "string",
                },
            },
            ServerlessConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationShadowProductionVariantServerlessConfigArgs
            {
                MaxConcurrency = 0,
                MemorySizeInMb = 0,
                ProvisionedConcurrency = 0,
            },
            VariantName = "string",
            VolumeSizeInGb = 0,
        },
    },
    Tags = 
    {
        { "string", "string" },
    },
});

example, err := sagemaker.NewEndpointConfiguration(ctx, "endpointConfigurationResource", &sagemaker.EndpointConfigurationArgs{
	ProductionVariants: sagemaker.EndpointConfigurationProductionVariantArray{
		&sagemaker.EndpointConfigurationProductionVariantArgs{
			AcceleratorType: pulumi.String("string"),
			ContainerStartupHealthCheckTimeoutInSeconds: pulumi.Int(0),
			CoreDumpConfig: &sagemaker.EndpointConfigurationProductionVariantCoreDumpConfigArgs{
				DestinationS3Uri: pulumi.String("string"),
				KmsKeyId:         pulumi.String("string"),
			},
			EnableSsmAccess:      pulumi.Bool(false),
			InferenceAmiVersion:  pulumi.String("string"),
			InitialInstanceCount: pulumi.Int(0),
			InitialVariantWeight: pulumi.Float64(0),
			InstanceType:         pulumi.String("string"),
			ManagedInstanceScaling: &sagemaker.EndpointConfigurationProductionVariantManagedInstanceScalingArgs{
				MaxInstanceCount: pulumi.Int(0),
				MinInstanceCount: pulumi.Int(0),
				Status:           pulumi.String("string"),
			},
			ModelDataDownloadTimeoutInSeconds: pulumi.Int(0),
			ModelName:                         pulumi.String("string"),
			RoutingConfigs: sagemaker.EndpointConfigurationProductionVariantRoutingConfigArray{
				&sagemaker.EndpointConfigurationProductionVariantRoutingConfigArgs{
					RoutingStrategy: pulumi.String("string"),
				},
			},
			ServerlessConfig: &sagemaker.EndpointConfigurationProductionVariantServerlessConfigArgs{
				MaxConcurrency:         pulumi.Int(0),
				MemorySizeInMb:         pulumi.Int(0),
				ProvisionedConcurrency: pulumi.Int(0),
			},
			VariantName:    pulumi.String("string"),
			VolumeSizeInGb: pulumi.Int(0),
		},
	},
	AsyncInferenceConfig: &sagemaker.EndpointConfigurationAsyncInferenceConfigArgs{
		OutputConfig: &sagemaker.EndpointConfigurationAsyncInferenceConfigOutputConfigArgs{
			S3OutputPath: pulumi.String("string"),
			KmsKeyId:     pulumi.String("string"),
			NotificationConfig: &sagemaker.EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfigArgs{
				ErrorTopic: pulumi.String("string"),
				IncludeInferenceResponseIns: pulumi.StringArray{
					pulumi.String("string"),
				},
				SuccessTopic: pulumi.String("string"),
			},
			S3FailurePath: pulumi.String("string"),
		},
		ClientConfig: &sagemaker.EndpointConfigurationAsyncInferenceConfigClientConfigArgs{
			MaxConcurrentInvocationsPerInstance: pulumi.Int(0),
		},
	},
	DataCaptureConfig: &sagemaker.EndpointConfigurationDataCaptureConfigArgs{
		CaptureOptions: sagemaker.EndpointConfigurationDataCaptureConfigCaptureOptionArray{
			&sagemaker.EndpointConfigurationDataCaptureConfigCaptureOptionArgs{
				CaptureMode: pulumi.String("string"),
			},
		},
		DestinationS3Uri:          pulumi.String("string"),
		InitialSamplingPercentage: pulumi.Int(0),
		CaptureContentTypeHeader: &sagemaker.EndpointConfigurationDataCaptureConfigCaptureContentTypeHeaderArgs{
			CsvContentTypes: pulumi.StringArray{
				pulumi.String("string"),
			},
			JsonContentTypes: pulumi.StringArray{
				pulumi.String("string"),
			},
		},
		EnableCapture: pulumi.Bool(false),
		KmsKeyId:      pulumi.String("string"),
	},
	ExecutionRoleArn: pulumi.String("string"),
	KmsKeyArn:        pulumi.String("string"),
	Name:             pulumi.String("string"),
	NamePrefix:       pulumi.String("string"),
	Region:           pulumi.String("string"),
	ShadowProductionVariants: sagemaker.EndpointConfigurationShadowProductionVariantArray{
		&sagemaker.EndpointConfigurationShadowProductionVariantArgs{
			AcceleratorType: pulumi.String("string"),
			ContainerStartupHealthCheckTimeoutInSeconds: pulumi.Int(0),
			CoreDumpConfig: &sagemaker.EndpointConfigurationShadowProductionVariantCoreDumpConfigArgs{
				DestinationS3Uri: pulumi.String("string"),
				KmsKeyId:         pulumi.String("string"),
			},
			EnableSsmAccess:      pulumi.Bool(false),
			InferenceAmiVersion:  pulumi.String("string"),
			InitialInstanceCount: pulumi.Int(0),
			InitialVariantWeight: pulumi.Float64(0),
			InstanceType:         pulumi.String("string"),
			ManagedInstanceScaling: &sagemaker.EndpointConfigurationShadowProductionVariantManagedInstanceScalingArgs{
				MaxInstanceCount: pulumi.Int(0),
				MinInstanceCount: pulumi.Int(0),
				Status:           pulumi.String("string"),
			},
			ModelDataDownloadTimeoutInSeconds: pulumi.Int(0),
			ModelName:                         pulumi.String("string"),
			RoutingConfigs: sagemaker.EndpointConfigurationShadowProductionVariantRoutingConfigArray{
				&sagemaker.EndpointConfigurationShadowProductionVariantRoutingConfigArgs{
					RoutingStrategy: pulumi.String("string"),
				},
			},
			ServerlessConfig: &sagemaker.EndpointConfigurationShadowProductionVariantServerlessConfigArgs{
				MaxConcurrency:         pulumi.Int(0),
				MemorySizeInMb:         pulumi.Int(0),
				ProvisionedConcurrency: pulumi.Int(0),
			},
			VariantName:    pulumi.String("string"),
			VolumeSizeInGb: pulumi.Int(0),
		},
	},
	Tags: pulumi.StringMap{
		"string": pulumi.String("string"),
	},
})

var endpointConfigurationResource = new EndpointConfiguration("endpointConfigurationResource", EndpointConfigurationArgs.builder()
    .productionVariants(EndpointConfigurationProductionVariantArgs.builder()
        .acceleratorType("string")
        .containerStartupHealthCheckTimeoutInSeconds(0)
        .coreDumpConfig(EndpointConfigurationProductionVariantCoreDumpConfigArgs.builder()
            .destinationS3Uri("string")
            .kmsKeyId("string")
            .build())
        .enableSsmAccess(false)
        .inferenceAmiVersion("string")
        .initialInstanceCount(0)
        .initialVariantWeight(0.0)
        .instanceType("string")
        .managedInstanceScaling(EndpointConfigurationProductionVariantManagedInstanceScalingArgs.builder()
            .maxInstanceCount(0)
            .minInstanceCount(0)
            .status("string")
            .build())
        .modelDataDownloadTimeoutInSeconds(0)
        .modelName("string")
        .routingConfigs(EndpointConfigurationProductionVariantRoutingConfigArgs.builder()
            .routingStrategy("string")
            .build())
        .serverlessConfig(EndpointConfigurationProductionVariantServerlessConfigArgs.builder()
            .maxConcurrency(0)
            .memorySizeInMb(0)
            .provisionedConcurrency(0)
            .build())
        .variantName("string")
        .volumeSizeInGb(0)
        .build())
    .asyncInferenceConfig(EndpointConfigurationAsyncInferenceConfigArgs.builder()
        .outputConfig(EndpointConfigurationAsyncInferenceConfigOutputConfigArgs.builder()
            .s3OutputPath("string")
            .kmsKeyId("string")
            .notificationConfig(EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfigArgs.builder()
                .errorTopic("string")
                .includeInferenceResponseIns("string")
                .successTopic("string")
                .build())
            .s3FailurePath("string")
            .build())
        .clientConfig(EndpointConfigurationAsyncInferenceConfigClientConfigArgs.builder()
            .maxConcurrentInvocationsPerInstance(0)
            .build())
        .build())
    .dataCaptureConfig(EndpointConfigurationDataCaptureConfigArgs.builder()
        .captureOptions(EndpointConfigurationDataCaptureConfigCaptureOptionArgs.builder()
            .captureMode("string")
            .build())
        .destinationS3Uri("string")
        .initialSamplingPercentage(0)
        .captureContentTypeHeader(EndpointConfigurationDataCaptureConfigCaptureContentTypeHeaderArgs.builder()
            .csvContentTypes("string")
            .jsonContentTypes("string")
            .build())
        .enableCapture(false)
        .kmsKeyId("string")
        .build())
    .executionRoleArn("string")
    .kmsKeyArn("string")
    .name("string")
    .namePrefix("string")
    .region("string")
    .shadowProductionVariants(EndpointConfigurationShadowProductionVariantArgs.builder()
        .acceleratorType("string")
        .containerStartupHealthCheckTimeoutInSeconds(0)
        .coreDumpConfig(EndpointConfigurationShadowProductionVariantCoreDumpConfigArgs.builder()
            .destinationS3Uri("string")
            .kmsKeyId("string")
            .build())
        .enableSsmAccess(false)
        .inferenceAmiVersion("string")
        .initialInstanceCount(0)
        .initialVariantWeight(0.0)
        .instanceType("string")
        .managedInstanceScaling(EndpointConfigurationShadowProductionVariantManagedInstanceScalingArgs.builder()
            .maxInstanceCount(0)
            .minInstanceCount(0)
            .status("string")
            .build())
        .modelDataDownloadTimeoutInSeconds(0)
        .modelName("string")
        .routingConfigs(EndpointConfigurationShadowProductionVariantRoutingConfigArgs.builder()
            .routingStrategy("string")
            .build())
        .serverlessConfig(EndpointConfigurationShadowProductionVariantServerlessConfigArgs.builder()
            .maxConcurrency(0)
            .memorySizeInMb(0)
            .provisionedConcurrency(0)
            .build())
        .variantName("string")
        .volumeSizeInGb(0)
        .build())
    .tags(Map.of("string", "string"))
    .build());

endpoint_configuration_resource = aws.sagemaker.EndpointConfiguration("endpointConfigurationResource",
    production_variants=[{
        "accelerator_type": "string",
        "container_startup_health_check_timeout_in_seconds": 0,
        "core_dump_config": {
            "destination_s3_uri": "string",
            "kms_key_id": "string",
        },
        "enable_ssm_access": False,
        "inference_ami_version": "string",
        "initial_instance_count": 0,
        "initial_variant_weight": 0,
        "instance_type": "string",
        "managed_instance_scaling": {
            "max_instance_count": 0,
            "min_instance_count": 0,
            "status": "string",
        },
        "model_data_download_timeout_in_seconds": 0,
        "model_name": "string",
        "routing_configs": [{
            "routing_strategy": "string",
        }],
        "serverless_config": {
            "max_concurrency": 0,
            "memory_size_in_mb": 0,
            "provisioned_concurrency": 0,
        },
        "variant_name": "string",
        "volume_size_in_gb": 0,
    }],
    async_inference_config={
        "output_config": {
            "s3_output_path": "string",
            "kms_key_id": "string",
            "notification_config": {
                "error_topic": "string",
                "include_inference_response_ins": ["string"],
                "success_topic": "string",
            },
            "s3_failure_path": "string",
        },
        "client_config": {
            "max_concurrent_invocations_per_instance": 0,
        },
    },
    data_capture_config={
        "capture_options": [{
            "capture_mode": "string",
        }],
        "destination_s3_uri": "string",
        "initial_sampling_percentage": 0,
        "capture_content_type_header": {
            "csv_content_types": ["string"],
            "json_content_types": ["string"],
        },
        "enable_capture": False,
        "kms_key_id": "string",
    },
    execution_role_arn="string",
    kms_key_arn="string",
    name="string",
    name_prefix="string",
    region="string",
    shadow_production_variants=[{
        "accelerator_type": "string",
        "container_startup_health_check_timeout_in_seconds": 0,
        "core_dump_config": {
            "destination_s3_uri": "string",
            "kms_key_id": "string",
        },
        "enable_ssm_access": False,
        "inference_ami_version": "string",
        "initial_instance_count": 0,
        "initial_variant_weight": 0,
        "instance_type": "string",
        "managed_instance_scaling": {
            "max_instance_count": 0,
            "min_instance_count": 0,
            "status": "string",
        },
        "model_data_download_timeout_in_seconds": 0,
        "model_name": "string",
        "routing_configs": [{
            "routing_strategy": "string",
        }],
        "serverless_config": {
            "max_concurrency": 0,
            "memory_size_in_mb": 0,
            "provisioned_concurrency": 0,
        },
        "variant_name": "string",
        "volume_size_in_gb": 0,
    }],
    tags={
        "string": "string",
    })

const endpointConfigurationResource = new aws.sagemaker.EndpointConfiguration("endpointConfigurationResource", {
    productionVariants: [{
        acceleratorType: "string",
        containerStartupHealthCheckTimeoutInSeconds: 0,
        coreDumpConfig: {
            destinationS3Uri: "string",
            kmsKeyId: "string",
        },
        enableSsmAccess: false,
        inferenceAmiVersion: "string",
        initialInstanceCount: 0,
        initialVariantWeight: 0,
        instanceType: "string",
        managedInstanceScaling: {
            maxInstanceCount: 0,
            minInstanceCount: 0,
            status: "string",
        },
        modelDataDownloadTimeoutInSeconds: 0,
        modelName: "string",
        routingConfigs: [{
            routingStrategy: "string",
        }],
        serverlessConfig: {
            maxConcurrency: 0,
            memorySizeInMb: 0,
            provisionedConcurrency: 0,
        },
        variantName: "string",
        volumeSizeInGb: 0,
    }],
    asyncInferenceConfig: {
        outputConfig: {
            s3OutputPath: "string",
            kmsKeyId: "string",
            notificationConfig: {
                errorTopic: "string",
                includeInferenceResponseIns: ["string"],
                successTopic: "string",
            },
            s3FailurePath: "string",
        },
        clientConfig: {
            maxConcurrentInvocationsPerInstance: 0,
        },
    },
    dataCaptureConfig: {
        captureOptions: [{
            captureMode: "string",
        }],
        destinationS3Uri: "string",
        initialSamplingPercentage: 0,
        captureContentTypeHeader: {
            csvContentTypes: ["string"],
            jsonContentTypes: ["string"],
        },
        enableCapture: false,
        kmsKeyId: "string",
    },
    executionRoleArn: "string",
    kmsKeyArn: "string",
    name: "string",
    namePrefix: "string",
    region: "string",
    shadowProductionVariants: [{
        acceleratorType: "string",
        containerStartupHealthCheckTimeoutInSeconds: 0,
        coreDumpConfig: {
            destinationS3Uri: "string",
            kmsKeyId: "string",
        },
        enableSsmAccess: false,
        inferenceAmiVersion: "string",
        initialInstanceCount: 0,
        initialVariantWeight: 0,
        instanceType: "string",
        managedInstanceScaling: {
            maxInstanceCount: 0,
            minInstanceCount: 0,
            status: "string",
        },
        modelDataDownloadTimeoutInSeconds: 0,
        modelName: "string",
        routingConfigs: [{
            routingStrategy: "string",
        }],
        serverlessConfig: {
            maxConcurrency: 0,
            memorySizeInMb: 0,
            provisionedConcurrency: 0,
        },
        variantName: "string",
        volumeSizeInGb: 0,
    }],
    tags: {
        string: "string",
    },
});

type: aws:sagemaker:EndpointConfiguration
properties:
    asyncInferenceConfig:
        clientConfig:
            maxConcurrentInvocationsPerInstance: 0
        outputConfig:
            kmsKeyId: string
            notificationConfig:
                errorTopic: string
                includeInferenceResponseIns:
                    - string
                successTopic: string
            s3FailurePath: string
            s3OutputPath: string
    dataCaptureConfig:
        captureContentTypeHeader:
            csvContentTypes:
                - string
            jsonContentTypes:
                - string
        captureOptions:
            - captureMode: string
        destinationS3Uri: string
        enableCapture: false
        initialSamplingPercentage: 0
        kmsKeyId: string
    executionRoleArn: string
    kmsKeyArn: string
    name: string
    namePrefix: string
    productionVariants:
        - acceleratorType: string
          containerStartupHealthCheckTimeoutInSeconds: 0
          coreDumpConfig:
            destinationS3Uri: string
            kmsKeyId: string
          enableSsmAccess: false
          inferenceAmiVersion: string
          initialInstanceCount: 0
          initialVariantWeight: 0
          instanceType: string
          managedInstanceScaling:
            maxInstanceCount: 0
            minInstanceCount: 0
            status: string
          modelDataDownloadTimeoutInSeconds: 0
          modelName: string
          routingConfigs:
            - routingStrategy: string
          serverlessConfig:
            maxConcurrency: 0
            memorySizeInMb: 0
            provisionedConcurrency: 0
          variantName: string
          volumeSizeInGb: 0
    region: string
    shadowProductionVariants:
        - acceleratorType: string
          containerStartupHealthCheckTimeoutInSeconds: 0
          coreDumpConfig:
            destinationS3Uri: string
            kmsKeyId: string
          enableSsmAccess: false
          inferenceAmiVersion: string
          initialInstanceCount: 0
          initialVariantWeight: 0
          instanceType: string
          managedInstanceScaling:
            maxInstanceCount: 0
            minInstanceCount: 0
            status: string
          modelDataDownloadTimeoutInSeconds: 0
          modelName: string
          routingConfigs:
            - routingStrategy: string
          serverlessConfig:
            maxConcurrency: 0
            memorySizeInMb: 0
            provisionedConcurrency: 0
          variantName: string
          volumeSizeInGb: 0
    tags:
        string: string

EndpointConfiguration Resource Properties

To learn more about resource properties and how to use them, see Inputs and Outputs in the Architecture and Concepts docs.

Inputs

In Python, inputs that are objects can be passed either as argument classes or as dictionary literals.

The EndpointConfiguration resource accepts the following input properties:

ProductionVariants List<EndpointConfigurationProductionVariant>: List each model that you want to host at this endpoint. See below.
AsyncInferenceConfig EndpointConfigurationAsyncInferenceConfig: How an endpoint performs asynchronous inference.
DataCaptureConfig EndpointConfigurationDataCaptureConfig: Parameters to capture input/output of SageMaker AI models endpoints. Fields are documented below.
ExecutionRoleArn string: ARN of an IAM role that SageMaker AI can assume to perform actions on your behalf. Required when model_name is not specified in production_variants to support Inference Components.
KmsKeyArn string: ARN of a AWS KMS key that SageMaker AI uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
Name string: Name of the endpoint configuration. If omitted, the provider will assign a random, unique name. Conflicts with name_prefix.
NamePrefix string: Unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
Region string: Region where this resource will be managed. Defaults to the Region set in the provider configuration.
ShadowProductionVariants List<EndpointConfigurationShadowProductionVariant>: Models that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on oroduction_variants. If you use this field, you can only specify one variant for production_variants and one variant for shadow_production_variants. See below (same arguments as production_variants).
Tags Dictionary<string, string>: Mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.

ProductionVariants []EndpointConfigurationProductionVariantArgs: List each model that you want to host at this endpoint. See below.
AsyncInferenceConfig EndpointConfigurationAsyncInferenceConfigArgs: How an endpoint performs asynchronous inference.
DataCaptureConfig EndpointConfigurationDataCaptureConfigArgs: Parameters to capture input/output of SageMaker AI models endpoints. Fields are documented below.
ExecutionRoleArn string: ARN of an IAM role that SageMaker AI can assume to perform actions on your behalf. Required when model_name is not specified in production_variants to support Inference Components.
KmsKeyArn string: ARN of a AWS KMS key that SageMaker AI uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
Name string: Name of the endpoint configuration. If omitted, the provider will assign a random, unique name. Conflicts with name_prefix.
NamePrefix string: Unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
Region string: Region where this resource will be managed. Defaults to the Region set in the provider configuration.
ShadowProductionVariants []EndpointConfigurationShadowProductionVariantArgs: Models that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on oroduction_variants. If you use this field, you can only specify one variant for production_variants and one variant for shadow_production_variants. See below (same arguments as production_variants).
Tags map[string]string: Mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.

productionVariants List<EndpointConfigurationProductionVariant>: List each model that you want to host at this endpoint. See below.
asyncInferenceConfig EndpointConfigurationAsyncInferenceConfig: How an endpoint performs asynchronous inference.
dataCaptureConfig EndpointConfigurationDataCaptureConfig: Parameters to capture input/output of SageMaker AI models endpoints. Fields are documented below.
executionRoleArn String: ARN of an IAM role that SageMaker AI can assume to perform actions on your behalf. Required when model_name is not specified in production_variants to support Inference Components.
kmsKeyArn String: ARN of a AWS KMS key that SageMaker AI uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
name String: Name of the endpoint configuration. If omitted, the provider will assign a random, unique name. Conflicts with name_prefix.
namePrefix String: Unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
region String: Region where this resource will be managed. Defaults to the Region set in the provider configuration.
shadowProductionVariants List<EndpointConfigurationShadowProductionVariant>: Models that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on oroduction_variants. If you use this field, you can only specify one variant for production_variants and one variant for shadow_production_variants. See below (same arguments as production_variants).
tags Map<String,String>: Mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.

productionVariants EndpointConfigurationProductionVariant[]: List each model that you want to host at this endpoint. See below.
asyncInferenceConfig EndpointConfigurationAsyncInferenceConfig: How an endpoint performs asynchronous inference.
dataCaptureConfig EndpointConfigurationDataCaptureConfig: Parameters to capture input/output of SageMaker AI models endpoints. Fields are documented below.
executionRoleArn string: ARN of an IAM role that SageMaker AI can assume to perform actions on your behalf. Required when model_name is not specified in production_variants to support Inference Components.
kmsKeyArn string: ARN of a AWS KMS key that SageMaker AI uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
name string: Name of the endpoint configuration. If omitted, the provider will assign a random, unique name. Conflicts with name_prefix.
namePrefix string: Unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
region string: Region where this resource will be managed. Defaults to the Region set in the provider configuration.
shadowProductionVariants EndpointConfigurationShadowProductionVariant[]: Models that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on oroduction_variants. If you use this field, you can only specify one variant for production_variants and one variant for shadow_production_variants. See below (same arguments as production_variants).
tags {[key: string]: string}: Mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.

production_variants Sequence[EndpointConfigurationProductionVariantArgs]: List each model that you want to host at this endpoint. See below.
async_inference_config EndpointConfigurationAsyncInferenceConfigArgs: How an endpoint performs asynchronous inference.
data_capture_config EndpointConfigurationDataCaptureConfigArgs: Parameters to capture input/output of SageMaker AI models endpoints. Fields are documented below.
execution_role_arn str: ARN of an IAM role that SageMaker AI can assume to perform actions on your behalf. Required when model_name is not specified in production_variants to support Inference Components.
kms_key_arn str: ARN of a AWS KMS key that SageMaker AI uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
name str: Name of the endpoint configuration. If omitted, the provider will assign a random, unique name. Conflicts with name_prefix.
name_prefix str: Unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
region str: Region where this resource will be managed. Defaults to the Region set in the provider configuration.
shadow_production_variants Sequence[EndpointConfigurationShadowProductionVariantArgs]: Models that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on oroduction_variants. If you use this field, you can only specify one variant for production_variants and one variant for shadow_production_variants. See below (same arguments as production_variants).
tags Mapping[str, str]: Mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.

productionVariants List<Property Map>: List each model that you want to host at this endpoint. See below.
asyncInferenceConfig Property Map: How an endpoint performs asynchronous inference.
dataCaptureConfig Property Map: Parameters to capture input/output of SageMaker AI models endpoints. Fields are documented below.
executionRoleArn String: ARN of an IAM role that SageMaker AI can assume to perform actions on your behalf. Required when model_name is not specified in production_variants to support Inference Components.
kmsKeyArn String: ARN of a AWS KMS key that SageMaker AI uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
name String: Name of the endpoint configuration. If omitted, the provider will assign a random, unique name. Conflicts with name_prefix.
namePrefix String: Unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
region String: Region where this resource will be managed. Defaults to the Region set in the provider configuration.
shadowProductionVariants List<Property Map>: Models that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on oroduction_variants. If you use this field, you can only specify one variant for production_variants and one variant for shadow_production_variants. See below (same arguments as production_variants).
tags Map<String>: Mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.

Outputs

All input properties are implicitly available as output properties. Additionally, the EndpointConfiguration resource produces the following output properties:

Arn string: ARN assigned by AWS to this endpoint configuration.
Id string: The provider-assigned unique ID for this managed resource.
TagsAll Dictionary<string, string>: Map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.

Arn string: ARN assigned by AWS to this endpoint configuration.
Id string: The provider-assigned unique ID for this managed resource.
TagsAll map[string]string: Map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.

arn String: ARN assigned by AWS to this endpoint configuration.
id String: The provider-assigned unique ID for this managed resource.
tagsAll Map<String,String>: Map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.

arn string: ARN assigned by AWS to this endpoint configuration.
id string: The provider-assigned unique ID for this managed resource.
tagsAll {[key: string]: string}: Map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.

arn str: ARN assigned by AWS to this endpoint configuration.
id str: The provider-assigned unique ID for this managed resource.
tags_all Mapping[str, str]: Map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.

arn String: ARN assigned by AWS to this endpoint configuration.
id String: The provider-assigned unique ID for this managed resource.
tagsAll Map<String>: Map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.

Look up Existing EndpointConfiguration Resource

Get an existing EndpointConfiguration resource’s state with the given name, ID, and optional extra properties used to qualify the lookup.

public static get(name: string, id: Input<ID>, state?: EndpointConfigurationState, opts?: CustomResourceOptions): EndpointConfiguration

@staticmethod
def get(resource_name: str,
        id: str,
        opts: Optional[ResourceOptions] = None,
        arn: Optional[str] = None,
        async_inference_config: Optional[EndpointConfigurationAsyncInferenceConfigArgs] = None,
        data_capture_config: Optional[EndpointConfigurationDataCaptureConfigArgs] = None,
        execution_role_arn: Optional[str] = None,
        kms_key_arn: Optional[str] = None,
        name: Optional[str] = None,
        name_prefix: Optional[str] = None,
        production_variants: Optional[Sequence[EndpointConfigurationProductionVariantArgs]] = None,
        region: Optional[str] = None,
        shadow_production_variants: Optional[Sequence[EndpointConfigurationShadowProductionVariantArgs]] = None,
        tags: Optional[Mapping[str, str]] = None,
        tags_all: Optional[Mapping[str, str]] = None) -> EndpointConfiguration

func GetEndpointConfiguration(ctx *Context, name string, id IDInput, state *EndpointConfigurationState, opts ...ResourceOption) (*EndpointConfiguration, error)

public static EndpointConfiguration Get(string name, Input<string> id, EndpointConfigurationState? state, CustomResourceOptions? opts = null)

public static EndpointConfiguration get(String name, Output<String> id, EndpointConfigurationState state, CustomResourceOptions options)

resources:  _:    type: aws:sagemaker:EndpointConfiguration    get:      id: ${id}

name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.
state: Any extra arguments used during the lookup.
opts: A bag of options that control this resource's behavior.

resource_name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.

name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.
state: Any extra arguments used during the lookup.
opts: A bag of options that control this resource's behavior.

name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.
state: Any extra arguments used during the lookup.
opts: A bag of options that control this resource's behavior.

name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.
state: Any extra arguments used during the lookup.
opts: A bag of options that control this resource's behavior.

The following state arguments are supported:

Arn string: ARN assigned by AWS to this endpoint configuration.
AsyncInferenceConfig EndpointConfigurationAsyncInferenceConfig: How an endpoint performs asynchronous inference.
DataCaptureConfig EndpointConfigurationDataCaptureConfig: Parameters to capture input/output of SageMaker AI models endpoints. Fields are documented below.
ExecutionRoleArn string: ARN of an IAM role that SageMaker AI can assume to perform actions on your behalf. Required when model_name is not specified in production_variants to support Inference Components.
KmsKeyArn string: ARN of a AWS KMS key that SageMaker AI uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
Name string: Name of the endpoint configuration. If omitted, the provider will assign a random, unique name. Conflicts with name_prefix.
NamePrefix string: Unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
ProductionVariants List<EndpointConfigurationProductionVariant>: List each model that you want to host at this endpoint. See below.
Region string: Region where this resource will be managed. Defaults to the Region set in the provider configuration.
ShadowProductionVariants List<EndpointConfigurationShadowProductionVariant>: Models that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on oroduction_variants. If you use this field, you can only specify one variant for production_variants and one variant for shadow_production_variants. See below (same arguments as production_variants).
Tags Dictionary<string, string>: Mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
TagsAll Dictionary<string, string>: Map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.

Arn string: ARN assigned by AWS to this endpoint configuration.
AsyncInferenceConfig EndpointConfigurationAsyncInferenceConfigArgs: How an endpoint performs asynchronous inference.
DataCaptureConfig EndpointConfigurationDataCaptureConfigArgs: Parameters to capture input/output of SageMaker AI models endpoints. Fields are documented below.
ExecutionRoleArn string: ARN of an IAM role that SageMaker AI can assume to perform actions on your behalf. Required when model_name is not specified in production_variants to support Inference Components.
KmsKeyArn string: ARN of a AWS KMS key that SageMaker AI uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
Name string: Name of the endpoint configuration. If omitted, the provider will assign a random, unique name. Conflicts with name_prefix.
NamePrefix string: Unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
ProductionVariants []EndpointConfigurationProductionVariantArgs: List each model that you want to host at this endpoint. See below.
Region string: Region where this resource will be managed. Defaults to the Region set in the provider configuration.
ShadowProductionVariants []EndpointConfigurationShadowProductionVariantArgs: Models that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on oroduction_variants. If you use this field, you can only specify one variant for production_variants and one variant for shadow_production_variants. See below (same arguments as production_variants).
Tags map[string]string: Mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
TagsAll map[string]string: Map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.

arn String: ARN assigned by AWS to this endpoint configuration.
asyncInferenceConfig EndpointConfigurationAsyncInferenceConfig: How an endpoint performs asynchronous inference.
dataCaptureConfig EndpointConfigurationDataCaptureConfig: Parameters to capture input/output of SageMaker AI models endpoints. Fields are documented below.
executionRoleArn String: ARN of an IAM role that SageMaker AI can assume to perform actions on your behalf. Required when model_name is not specified in production_variants to support Inference Components.
kmsKeyArn String: ARN of a AWS KMS key that SageMaker AI uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
name String: Name of the endpoint configuration. If omitted, the provider will assign a random, unique name. Conflicts with name_prefix.
namePrefix String: Unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
productionVariants List<EndpointConfigurationProductionVariant>: List each model that you want to host at this endpoint. See below.
region String: Region where this resource will be managed. Defaults to the Region set in the provider configuration.
shadowProductionVariants List<EndpointConfigurationShadowProductionVariant>: Models that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on oroduction_variants. If you use this field, you can only specify one variant for production_variants and one variant for shadow_production_variants. See below (same arguments as production_variants).
tags Map<String,String>: Mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
tagsAll Map<String,String>: Map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.

arn string: ARN assigned by AWS to this endpoint configuration.
asyncInferenceConfig EndpointConfigurationAsyncInferenceConfig: How an endpoint performs asynchronous inference.
dataCaptureConfig EndpointConfigurationDataCaptureConfig: Parameters to capture input/output of SageMaker AI models endpoints. Fields are documented below.
executionRoleArn string: ARN of an IAM role that SageMaker AI can assume to perform actions on your behalf. Required when model_name is not specified in production_variants to support Inference Components.
kmsKeyArn string: ARN of a AWS KMS key that SageMaker AI uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
name string: Name of the endpoint configuration. If omitted, the provider will assign a random, unique name. Conflicts with name_prefix.
namePrefix string: Unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
productionVariants EndpointConfigurationProductionVariant[]: List each model that you want to host at this endpoint. See below.
region string: Region where this resource will be managed. Defaults to the Region set in the provider configuration.
shadowProductionVariants EndpointConfigurationShadowProductionVariant[]: Models that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on oroduction_variants. If you use this field, you can only specify one variant for production_variants and one variant for shadow_production_variants. See below (same arguments as production_variants).
tags {[key: string]: string}: Mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
tagsAll {[key: string]: string}: Map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.

arn str: ARN assigned by AWS to this endpoint configuration.
async_inference_config EndpointConfigurationAsyncInferenceConfigArgs: How an endpoint performs asynchronous inference.
data_capture_config EndpointConfigurationDataCaptureConfigArgs: Parameters to capture input/output of SageMaker AI models endpoints. Fields are documented below.
execution_role_arn str: ARN of an IAM role that SageMaker AI can assume to perform actions on your behalf. Required when model_name is not specified in production_variants to support Inference Components.
kms_key_arn str: ARN of a AWS KMS key that SageMaker AI uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
name str: Name of the endpoint configuration. If omitted, the provider will assign a random, unique name. Conflicts with name_prefix.
name_prefix str: Unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
production_variants Sequence[EndpointConfigurationProductionVariantArgs]: List each model that you want to host at this endpoint. See below.
region str: Region where this resource will be managed. Defaults to the Region set in the provider configuration.
shadow_production_variants Sequence[EndpointConfigurationShadowProductionVariantArgs]: Models that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on oroduction_variants. If you use this field, you can only specify one variant for production_variants and one variant for shadow_production_variants. See below (same arguments as production_variants).
tags Mapping[str, str]: Mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
tags_all Mapping[str, str]: Map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.

arn String: ARN assigned by AWS to this endpoint configuration.
asyncInferenceConfig Property Map: How an endpoint performs asynchronous inference.
dataCaptureConfig Property Map: Parameters to capture input/output of SageMaker AI models endpoints. Fields are documented below.
executionRoleArn String: ARN of an IAM role that SageMaker AI can assume to perform actions on your behalf. Required when model_name is not specified in production_variants to support Inference Components.
kmsKeyArn String: ARN of a AWS KMS key that SageMaker AI uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
name String: Name of the endpoint configuration. If omitted, the provider will assign a random, unique name. Conflicts with name_prefix.
namePrefix String: Unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
productionVariants List<Property Map>: List each model that you want to host at this endpoint. See below.
region String: Region where this resource will be managed. Defaults to the Region set in the provider configuration.
shadowProductionVariants List<Property Map>: Models that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on oroduction_variants. If you use this field, you can only specify one variant for production_variants and one variant for shadow_production_variants. See below (same arguments as production_variants).
tags Map<String>: Mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
tagsAll Map<String>: Map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.

Supporting Types

EndpointConfigurationAsyncInferenceConfig, EndpointConfigurationAsyncInferenceConfigArgs

OutputConfig EndpointConfigurationAsyncInferenceConfigOutputConfig: Configuration for asynchronous inference invocation outputs.
ClientConfig EndpointConfigurationAsyncInferenceConfigClientConfig: Configures the behavior of the client used by SageMaker AI to interact with the model container during asynchronous inference.

OutputConfig EndpointConfigurationAsyncInferenceConfigOutputConfig: Configuration for asynchronous inference invocation outputs.
ClientConfig EndpointConfigurationAsyncInferenceConfigClientConfig: Configures the behavior of the client used by SageMaker AI to interact with the model container during asynchronous inference.

outputConfig EndpointConfigurationAsyncInferenceConfigOutputConfig: Configuration for asynchronous inference invocation outputs.
clientConfig EndpointConfigurationAsyncInferenceConfigClientConfig: Configures the behavior of the client used by SageMaker AI to interact with the model container during asynchronous inference.

outputConfig EndpointConfigurationAsyncInferenceConfigOutputConfig: Configuration for asynchronous inference invocation outputs.
clientConfig EndpointConfigurationAsyncInferenceConfigClientConfig: Configures the behavior of the client used by SageMaker AI to interact with the model container during asynchronous inference.

output_config EndpointConfigurationAsyncInferenceConfigOutputConfig: Configuration for asynchronous inference invocation outputs.
client_config EndpointConfigurationAsyncInferenceConfigClientConfig: Configures the behavior of the client used by SageMaker AI to interact with the model container during asynchronous inference.

outputConfig Property Map: Configuration for asynchronous inference invocation outputs.
clientConfig Property Map: Configures the behavior of the client used by SageMaker AI to interact with the model container during asynchronous inference.

EndpointConfigurationAsyncInferenceConfigClientConfig, EndpointConfigurationAsyncInferenceConfigClientConfigArgs

MaxConcurrentInvocationsPerInstance int: Maximum number of concurrent requests sent by the SageMaker AI client to the model container. If no value is provided, SageMaker AI will choose an optimal value for you.

MaxConcurrentInvocationsPerInstance int: Maximum number of concurrent requests sent by the SageMaker AI client to the model container. If no value is provided, SageMaker AI will choose an optimal value for you.

maxConcurrentInvocationsPerInstance Integer: Maximum number of concurrent requests sent by the SageMaker AI client to the model container. If no value is provided, SageMaker AI will choose an optimal value for you.

maxConcurrentInvocationsPerInstance number: Maximum number of concurrent requests sent by the SageMaker AI client to the model container. If no value is provided, SageMaker AI will choose an optimal value for you.

max_concurrent_invocations_per_instance int: Maximum number of concurrent requests sent by the SageMaker AI client to the model container. If no value is provided, SageMaker AI will choose an optimal value for you.

maxConcurrentInvocationsPerInstance Number: Maximum number of concurrent requests sent by the SageMaker AI client to the model container. If no value is provided, SageMaker AI will choose an optimal value for you.

EndpointConfigurationAsyncInferenceConfigOutputConfig, EndpointConfigurationAsyncInferenceConfigOutputConfigArgs

S3OutputPath string: S3 location to upload inference responses to.
KmsKeyId string: KMS key that SageMaker AI uses to encrypt the asynchronous inference output in S3.
NotificationConfig EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfig: Configuration for notifications of inference results for asynchronous inference.
S3FailurePath string: S3 location to upload failure inference responses to.

S3OutputPath string: S3 location to upload inference responses to.
KmsKeyId string: KMS key that SageMaker AI uses to encrypt the asynchronous inference output in S3.
NotificationConfig EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfig: Configuration for notifications of inference results for asynchronous inference.
S3FailurePath string: S3 location to upload failure inference responses to.

s3OutputPath String: S3 location to upload inference responses to.
kmsKeyId String: KMS key that SageMaker AI uses to encrypt the asynchronous inference output in S3.
notificationConfig EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfig: Configuration for notifications of inference results for asynchronous inference.
s3FailurePath String: S3 location to upload failure inference responses to.

s3OutputPath string: S3 location to upload inference responses to.
kmsKeyId string: KMS key that SageMaker AI uses to encrypt the asynchronous inference output in S3.
notificationConfig EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfig: Configuration for notifications of inference results for asynchronous inference.
s3FailurePath string: S3 location to upload failure inference responses to.

s3_output_path str: S3 location to upload inference responses to.
kms_key_id str: KMS key that SageMaker AI uses to encrypt the asynchronous inference output in S3.
notification_config EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfig: Configuration for notifications of inference results for asynchronous inference.
s3_failure_path str: S3 location to upload failure inference responses to.

s3OutputPath String: S3 location to upload inference responses to.
kmsKeyId String: KMS key that SageMaker AI uses to encrypt the asynchronous inference output in S3.
notificationConfig Property Map: Configuration for notifications of inference results for asynchronous inference.
s3FailurePath String: S3 location to upload failure inference responses to.

EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfig, EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfigArgs

ErrorTopic string: SNS topic to post a notification to when inference fails. If no topic is provided, no notification is sent on failure.
IncludeInferenceResponseIns List<string>: SNS topics where you want the inference response to be included. Valid values are SUCCESS_NOTIFICATION_TOPIC and ERROR_NOTIFICATION_TOPIC.
SuccessTopic string: SNS topic to post a notification to when inference completes successfully. If no topic is provided, no notification is sent on success.

ErrorTopic string: SNS topic to post a notification to when inference fails. If no topic is provided, no notification is sent on failure.
IncludeInferenceResponseIns []string: SNS topics where you want the inference response to be included. Valid values are SUCCESS_NOTIFICATION_TOPIC and ERROR_NOTIFICATION_TOPIC.
SuccessTopic string: SNS topic to post a notification to when inference completes successfully. If no topic is provided, no notification is sent on success.

errorTopic String: SNS topic to post a notification to when inference fails. If no topic is provided, no notification is sent on failure.
includeInferenceResponseIns List<String>: SNS topics where you want the inference response to be included. Valid values are SUCCESS_NOTIFICATION_TOPIC and ERROR_NOTIFICATION_TOPIC.
successTopic String: SNS topic to post a notification to when inference completes successfully. If no topic is provided, no notification is sent on success.

errorTopic string: SNS topic to post a notification to when inference fails. If no topic is provided, no notification is sent on failure.
includeInferenceResponseIns string[]: SNS topics where you want the inference response to be included. Valid values are SUCCESS_NOTIFICATION_TOPIC and ERROR_NOTIFICATION_TOPIC.
successTopic string: SNS topic to post a notification to when inference completes successfully. If no topic is provided, no notification is sent on success.

error_topic str: SNS topic to post a notification to when inference fails. If no topic is provided, no notification is sent on failure.
include_inference_response_ins Sequence[str]: SNS topics where you want the inference response to be included. Valid values are SUCCESS_NOTIFICATION_TOPIC and ERROR_NOTIFICATION_TOPIC.
success_topic str: SNS topic to post a notification to when inference completes successfully. If no topic is provided, no notification is sent on success.

errorTopic String: SNS topic to post a notification to when inference fails. If no topic is provided, no notification is sent on failure.
includeInferenceResponseIns List<String>: SNS topics where you want the inference response to be included. Valid values are SUCCESS_NOTIFICATION_TOPIC and ERROR_NOTIFICATION_TOPIC.
successTopic String: SNS topic to post a notification to when inference completes successfully. If no topic is provided, no notification is sent on success.

EndpointConfigurationDataCaptureConfig, EndpointConfigurationDataCaptureConfigArgs

CaptureOptions List<EndpointConfigurationDataCaptureConfigCaptureOption>: What data to capture. Fields are documented below.
DestinationS3Uri string: URL for S3 location where the captured data is stored.
InitialSamplingPercentage int: Portion of data to capture. Should be between 0 and 100.
CaptureContentTypeHeader EndpointConfigurationDataCaptureConfigCaptureContentTypeHeader: Content type headers to capture. See capture_content_type_header below.
EnableCapture bool: Flag to enable data capture. Defaults to false.
KmsKeyId string: ARN of a KMS key that SageMaker AI uses to encrypt the captured data on S3.

CaptureOptions []EndpointConfigurationDataCaptureConfigCaptureOption: What data to capture. Fields are documented below.
DestinationS3Uri string: URL for S3 location where the captured data is stored.
InitialSamplingPercentage int: Portion of data to capture. Should be between 0 and 100.
CaptureContentTypeHeader EndpointConfigurationDataCaptureConfigCaptureContentTypeHeader: Content type headers to capture. See capture_content_type_header below.
EnableCapture bool: Flag to enable data capture. Defaults to false.
KmsKeyId string: ARN of a KMS key that SageMaker AI uses to encrypt the captured data on S3.

captureOptions List<EndpointConfigurationDataCaptureConfigCaptureOption>: What data to capture. Fields are documented below.
destinationS3Uri String: URL for S3 location where the captured data is stored.
initialSamplingPercentage Integer: Portion of data to capture. Should be between 0 and 100.
captureContentTypeHeader EndpointConfigurationDataCaptureConfigCaptureContentTypeHeader: Content type headers to capture. See capture_content_type_header below.
enableCapture Boolean: Flag to enable data capture. Defaults to false.
kmsKeyId String: ARN of a KMS key that SageMaker AI uses to encrypt the captured data on S3.

captureOptions EndpointConfigurationDataCaptureConfigCaptureOption[]: What data to capture. Fields are documented below.
destinationS3Uri string: URL for S3 location where the captured data is stored.
initialSamplingPercentage number: Portion of data to capture. Should be between 0 and 100.
captureContentTypeHeader EndpointConfigurationDataCaptureConfigCaptureContentTypeHeader: Content type headers to capture. See capture_content_type_header below.
enableCapture boolean: Flag to enable data capture. Defaults to false.
kmsKeyId string: ARN of a KMS key that SageMaker AI uses to encrypt the captured data on S3.

capture_options Sequence[EndpointConfigurationDataCaptureConfigCaptureOption]: What data to capture. Fields are documented below.
destination_s3_uri str: URL for S3 location where the captured data is stored.
initial_sampling_percentage int: Portion of data to capture. Should be between 0 and 100.
capture_content_type_header EndpointConfigurationDataCaptureConfigCaptureContentTypeHeader: Content type headers to capture. See capture_content_type_header below.
enable_capture bool: Flag to enable data capture. Defaults to false.
kms_key_id str: ARN of a KMS key that SageMaker AI uses to encrypt the captured data on S3.

captureOptions List<Property Map>: What data to capture. Fields are documented below.
destinationS3Uri String: URL for S3 location where the captured data is stored.
initialSamplingPercentage Number: Portion of data to capture. Should be between 0 and 100.
captureContentTypeHeader Property Map: Content type headers to capture. See capture_content_type_header below.
enableCapture Boolean: Flag to enable data capture. Defaults to false.
kmsKeyId String: ARN of a KMS key that SageMaker AI uses to encrypt the captured data on S3.

EndpointConfigurationDataCaptureConfigCaptureContentTypeHeader, EndpointConfigurationDataCaptureConfigCaptureContentTypeHeaderArgs

CsvContentTypes List<string>: CSV content type headers to capture. One of csv_content_types or json_content_types is required.
JsonContentTypes List<string>: The JSON content type headers to capture. One of json_content_types or csv_content_types is required.

CsvContentTypes []string: CSV content type headers to capture. One of csv_content_types or json_content_types is required.
JsonContentTypes []string: The JSON content type headers to capture. One of json_content_types or csv_content_types is required.

csvContentTypes List<String>: CSV content type headers to capture. One of csv_content_types or json_content_types is required.
jsonContentTypes List<String>: The JSON content type headers to capture. One of json_content_types or csv_content_types is required.

csvContentTypes string[]: CSV content type headers to capture. One of csv_content_types or json_content_types is required.
jsonContentTypes string[]: The JSON content type headers to capture. One of json_content_types or csv_content_types is required.

csv_content_types Sequence[str]: CSV content type headers to capture. One of csv_content_types or json_content_types is required.
json_content_types Sequence[str]: The JSON content type headers to capture. One of json_content_types or csv_content_types is required.

csvContentTypes List<String>: CSV content type headers to capture. One of csv_content_types or json_content_types is required.
jsonContentTypes List<String>: The JSON content type headers to capture. One of json_content_types or csv_content_types is required.

EndpointConfigurationDataCaptureConfigCaptureOption, EndpointConfigurationDataCaptureConfigCaptureOptionArgs

CaptureMode string: Data to be captured. Should be one of Input, Output or InputAndOutput.

CaptureMode string: Data to be captured. Should be one of Input, Output or InputAndOutput.

captureMode String: Data to be captured. Should be one of Input, Output or InputAndOutput.

captureMode string: Data to be captured. Should be one of Input, Output or InputAndOutput.

capture_mode str: Data to be captured. Should be one of Input, Output or InputAndOutput.

captureMode String: Data to be captured. Should be one of Input, Output or InputAndOutput.

EndpointConfigurationProductionVariant, EndpointConfigurationProductionVariantArgs

AcceleratorType string: Size of the Elastic Inference (EI) instance to use for the production variant.
ContainerStartupHealthCheckTimeoutInSeconds int: Timeout value, in seconds, for your inference container to pass health check by SageMaker AI Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
CoreDumpConfig EndpointConfigurationProductionVariantCoreDumpConfig: Core dump configuration from the model container when the process crashes. Fields are documented below.
EnableSsmAccess bool: Whether to turn on native AWS SSM access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind endpoints. Ignored if model_name is not set (Inference Components endpoint).
InferenceAmiVersion string: Option from a collection of preconfigured AMI images. Each image is configured by AWS with a set of software and driver versions. AWS optimizes these configurations for different machine learning workloads.
InitialInstanceCount int: Initial number of instances used for auto-scaling.
InitialVariantWeight double: Initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, defaults to 1.0. Ignored if model_name is not set (Inference Components endpoint).
InstanceType string: Type of instance to start.
ManagedInstanceScaling EndpointConfigurationProductionVariantManagedInstanceScaling: Control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
ModelDataDownloadTimeoutInSeconds int: Timeout value, in seconds, to download and extract the model that you want to host from S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
ModelName string: Name of the model to use. Required unless using Inference Components (in which case execution_role_arn must be specified at the endpoint configuration level).
RoutingConfigs List<EndpointConfigurationProductionVariantRoutingConfig>: How the endpoint routes incoming traffic. See routing_config below.
ServerlessConfig EndpointConfigurationProductionVariantServerlessConfig: How an endpoint performs asynchronous inference.
VariantName string: Name of the variant. If omitted, the provider will assign a random, unique name.
VolumeSizeInGb int: Size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.

AcceleratorType string: Size of the Elastic Inference (EI) instance to use for the production variant.
ContainerStartupHealthCheckTimeoutInSeconds int: Timeout value, in seconds, for your inference container to pass health check by SageMaker AI Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
CoreDumpConfig EndpointConfigurationProductionVariantCoreDumpConfig: Core dump configuration from the model container when the process crashes. Fields are documented below.
EnableSsmAccess bool: Whether to turn on native AWS SSM access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind endpoints. Ignored if model_name is not set (Inference Components endpoint).
InferenceAmiVersion string: Option from a collection of preconfigured AMI images. Each image is configured by AWS with a set of software and driver versions. AWS optimizes these configurations for different machine learning workloads.
InitialInstanceCount int: Initial number of instances used for auto-scaling.
InitialVariantWeight float64: Initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, defaults to 1.0. Ignored if model_name is not set (Inference Components endpoint).
InstanceType string: Type of instance to start.
ManagedInstanceScaling EndpointConfigurationProductionVariantManagedInstanceScaling: Control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
ModelDataDownloadTimeoutInSeconds int: Timeout value, in seconds, to download and extract the model that you want to host from S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
ModelName string: Name of the model to use. Required unless using Inference Components (in which case execution_role_arn must be specified at the endpoint configuration level).
RoutingConfigs []EndpointConfigurationProductionVariantRoutingConfig: How the endpoint routes incoming traffic. See routing_config below.
ServerlessConfig EndpointConfigurationProductionVariantServerlessConfig: How an endpoint performs asynchronous inference.
VariantName string: Name of the variant. If omitted, the provider will assign a random, unique name.
VolumeSizeInGb int: Size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.

acceleratorType String: Size of the Elastic Inference (EI) instance to use for the production variant.
containerStartupHealthCheckTimeoutInSeconds Integer: Timeout value, in seconds, for your inference container to pass health check by SageMaker AI Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
coreDumpConfig EndpointConfigurationProductionVariantCoreDumpConfig: Core dump configuration from the model container when the process crashes. Fields are documented below.
enableSsmAccess Boolean: Whether to turn on native AWS SSM access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind endpoints. Ignored if model_name is not set (Inference Components endpoint).
inferenceAmiVersion String: Option from a collection of preconfigured AMI images. Each image is configured by AWS with a set of software and driver versions. AWS optimizes these configurations for different machine learning workloads.
initialInstanceCount Integer: Initial number of instances used for auto-scaling.
initialVariantWeight Double: Initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, defaults to 1.0. Ignored if model_name is not set (Inference Components endpoint).
instanceType String: Type of instance to start.
managedInstanceScaling EndpointConfigurationProductionVariantManagedInstanceScaling: Control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
modelDataDownloadTimeoutInSeconds Integer: Timeout value, in seconds, to download and extract the model that you want to host from S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
modelName String: Name of the model to use. Required unless using Inference Components (in which case execution_role_arn must be specified at the endpoint configuration level).
routingConfigs List<EndpointConfigurationProductionVariantRoutingConfig>: How the endpoint routes incoming traffic. See routing_config below.
serverlessConfig EndpointConfigurationProductionVariantServerlessConfig: How an endpoint performs asynchronous inference.
variantName String: Name of the variant. If omitted, the provider will assign a random, unique name.
volumeSizeInGb Integer: Size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.

acceleratorType string: Size of the Elastic Inference (EI) instance to use for the production variant.
containerStartupHealthCheckTimeoutInSeconds number: Timeout value, in seconds, for your inference container to pass health check by SageMaker AI Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
coreDumpConfig EndpointConfigurationProductionVariantCoreDumpConfig: Core dump configuration from the model container when the process crashes. Fields are documented below.
enableSsmAccess boolean: Whether to turn on native AWS SSM access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind endpoints. Ignored if model_name is not set (Inference Components endpoint).
inferenceAmiVersion string: Option from a collection of preconfigured AMI images. Each image is configured by AWS with a set of software and driver versions. AWS optimizes these configurations for different machine learning workloads.
initialInstanceCount number: Initial number of instances used for auto-scaling.
initialVariantWeight number: Initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, defaults to 1.0. Ignored if model_name is not set (Inference Components endpoint).
instanceType string: Type of instance to start.
managedInstanceScaling EndpointConfigurationProductionVariantManagedInstanceScaling: Control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
modelDataDownloadTimeoutInSeconds number: Timeout value, in seconds, to download and extract the model that you want to host from S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
modelName string: Name of the model to use. Required unless using Inference Components (in which case execution_role_arn must be specified at the endpoint configuration level).
routingConfigs EndpointConfigurationProductionVariantRoutingConfig[]: How the endpoint routes incoming traffic. See routing_config below.
serverlessConfig EndpointConfigurationProductionVariantServerlessConfig: How an endpoint performs asynchronous inference.
variantName string: Name of the variant. If omitted, the provider will assign a random, unique name.
volumeSizeInGb number: Size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.

accelerator_type str: Size of the Elastic Inference (EI) instance to use for the production variant.
container_startup_health_check_timeout_in_seconds int: Timeout value, in seconds, for your inference container to pass health check by SageMaker AI Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
core_dump_config EndpointConfigurationProductionVariantCoreDumpConfig: Core dump configuration from the model container when the process crashes. Fields are documented below.
enable_ssm_access bool: Whether to turn on native AWS SSM access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind endpoints. Ignored if model_name is not set (Inference Components endpoint).
inference_ami_version str: Option from a collection of preconfigured AMI images. Each image is configured by AWS with a set of software and driver versions. AWS optimizes these configurations for different machine learning workloads.
initial_instance_count int: Initial number of instances used for auto-scaling.
initial_variant_weight float: Initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, defaults to 1.0. Ignored if model_name is not set (Inference Components endpoint).
instance_type str: Type of instance to start.
managed_instance_scaling EndpointConfigurationProductionVariantManagedInstanceScaling: Control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
model_data_download_timeout_in_seconds int: Timeout value, in seconds, to download and extract the model that you want to host from S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
model_name str: Name of the model to use. Required unless using Inference Components (in which case execution_role_arn must be specified at the endpoint configuration level).
routing_configs Sequence[EndpointConfigurationProductionVariantRoutingConfig]: How the endpoint routes incoming traffic. See routing_config below.
serverless_config EndpointConfigurationProductionVariantServerlessConfig: How an endpoint performs asynchronous inference.
variant_name str: Name of the variant. If omitted, the provider will assign a random, unique name.
volume_size_in_gb int: Size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.

acceleratorType String: Size of the Elastic Inference (EI) instance to use for the production variant.
containerStartupHealthCheckTimeoutInSeconds Number: Timeout value, in seconds, for your inference container to pass health check by SageMaker AI Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
coreDumpConfig Property Map: Core dump configuration from the model container when the process crashes. Fields are documented below.
enableSsmAccess Boolean: Whether to turn on native AWS SSM access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind endpoints. Ignored if model_name is not set (Inference Components endpoint).
inferenceAmiVersion String: Option from a collection of preconfigured AMI images. Each image is configured by AWS with a set of software and driver versions. AWS optimizes these configurations for different machine learning workloads.
initialInstanceCount Number: Initial number of instances used for auto-scaling.
initialVariantWeight Number: Initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, defaults to 1.0. Ignored if model_name is not set (Inference Components endpoint).
instanceType String: Type of instance to start.
managedInstanceScaling Property Map: Control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
modelDataDownloadTimeoutInSeconds Number: Timeout value, in seconds, to download and extract the model that you want to host from S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
modelName String: Name of the model to use. Required unless using Inference Components (in which case execution_role_arn must be specified at the endpoint configuration level).
routingConfigs List<Property Map>: How the endpoint routes incoming traffic. See routing_config below.
serverlessConfig Property Map: How an endpoint performs asynchronous inference.
variantName String: Name of the variant. If omitted, the provider will assign a random, unique name.
volumeSizeInGb Number: Size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.

EndpointConfigurationProductionVariantCoreDumpConfig, EndpointConfigurationProductionVariantCoreDumpConfigArgs

DestinationS3Uri string: S3 bucket to send the core dump to.
KmsKeyId string: KMS key that SageMaker AI uses to encrypt the core dump data at rest using S3 server-side encryption.

DestinationS3Uri string: S3 bucket to send the core dump to.
KmsKeyId string: KMS key that SageMaker AI uses to encrypt the core dump data at rest using S3 server-side encryption.

destinationS3Uri String: S3 bucket to send the core dump to.
kmsKeyId String: KMS key that SageMaker AI uses to encrypt the core dump data at rest using S3 server-side encryption.

destinationS3Uri string: S3 bucket to send the core dump to.
kmsKeyId string: KMS key that SageMaker AI uses to encrypt the core dump data at rest using S3 server-side encryption.

destination_s3_uri str: S3 bucket to send the core dump to.
kms_key_id str: KMS key that SageMaker AI uses to encrypt the core dump data at rest using S3 server-side encryption.

destinationS3Uri String: S3 bucket to send the core dump to.
kmsKeyId String: KMS key that SageMaker AI uses to encrypt the core dump data at rest using S3 server-side encryption.

EndpointConfigurationProductionVariantManagedInstanceScaling, EndpointConfigurationProductionVariantManagedInstanceScalingArgs

MaxInstanceCount int: Maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
MinInstanceCount int: Minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
Status string: Whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.

MaxInstanceCount int: Maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
MinInstanceCount int: Minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
Status string: Whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.

maxInstanceCount Integer: Maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
minInstanceCount Integer: Minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
status String: Whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.

maxInstanceCount number: Maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
minInstanceCount number: Minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
status string: Whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.

max_instance_count int: Maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
min_instance_count int: Minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
status str: Whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.

maxInstanceCount Number: Maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
minInstanceCount Number: Minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
status String: Whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.

EndpointConfigurationProductionVariantRoutingConfig, EndpointConfigurationProductionVariantRoutingConfigArgs

RoutingStrategy string: How the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.

RoutingStrategy string: How the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.

routingStrategy String: How the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.

routingStrategy string: How the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.

routing_strategy str: How the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.

routingStrategy String: How the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.

EndpointConfigurationProductionVariantServerlessConfig, EndpointConfigurationProductionVariantServerlessConfigArgs

MaxConcurrency int: Maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
MemorySizeInMb int: Memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
ProvisionedConcurrency int: Amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.

MaxConcurrency int: Maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
MemorySizeInMb int: Memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
ProvisionedConcurrency int: Amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.

maxConcurrency Integer: Maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
memorySizeInMb Integer: Memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
provisionedConcurrency Integer: Amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.

maxConcurrency number: Maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
memorySizeInMb number: Memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
provisionedConcurrency number: Amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.

max_concurrency int: Maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
memory_size_in_mb int: Memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
provisioned_concurrency int: Amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.

maxConcurrency Number: Maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
memorySizeInMb Number: Memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
provisionedConcurrency Number: Amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.

EndpointConfigurationShadowProductionVariant, EndpointConfigurationShadowProductionVariantArgs

AcceleratorType string: Size of the Elastic Inference (EI) instance to use for the production variant.
ContainerStartupHealthCheckTimeoutInSeconds int: Timeout value, in seconds, for your inference container to pass health check by SageMaker AI Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
CoreDumpConfig EndpointConfigurationShadowProductionVariantCoreDumpConfig: Core dump configuration from the model container when the process crashes. Fields are documented below.
EnableSsmAccess bool: Whether to turn on native AWS SSM access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind endpoints. Ignored if model_name is not set (Inference Components endpoint).
InferenceAmiVersion string: Option from a collection of preconfigured AMI images. Each image is configured by AWS with a set of software and driver versions. AWS optimizes these configurations for different machine learning workloads.
InitialInstanceCount int: Initial number of instances used for auto-scaling.
InitialVariantWeight double: Initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, defaults to 1.0. Ignored if model_name is not set (Inference Components endpoint).
InstanceType string: Type of instance to start.
ManagedInstanceScaling EndpointConfigurationShadowProductionVariantManagedInstanceScaling: Control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
ModelDataDownloadTimeoutInSeconds int: Timeout value, in seconds, to download and extract the model that you want to host from S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
ModelName string: Name of the model to use. Required unless using Inference Components (in which case execution_role_arn must be specified at the endpoint configuration level).
RoutingConfigs List<EndpointConfigurationShadowProductionVariantRoutingConfig>: How the endpoint routes incoming traffic. See routing_config below.
ServerlessConfig EndpointConfigurationShadowProductionVariantServerlessConfig: How an endpoint performs asynchronous inference.
VariantName string: Name of the variant. If omitted, the provider will assign a random, unique name.
VolumeSizeInGb int: Size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.

AcceleratorType string: Size of the Elastic Inference (EI) instance to use for the production variant.
ContainerStartupHealthCheckTimeoutInSeconds int: Timeout value, in seconds, for your inference container to pass health check by SageMaker AI Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
CoreDumpConfig EndpointConfigurationShadowProductionVariantCoreDumpConfig: Core dump configuration from the model container when the process crashes. Fields are documented below.
EnableSsmAccess bool: Whether to turn on native AWS SSM access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind endpoints. Ignored if model_name is not set (Inference Components endpoint).
InferenceAmiVersion string: Option from a collection of preconfigured AMI images. Each image is configured by AWS with a set of software and driver versions. AWS optimizes these configurations for different machine learning workloads.
InitialInstanceCount int: Initial number of instances used for auto-scaling.
InitialVariantWeight float64: Initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, defaults to 1.0. Ignored if model_name is not set (Inference Components endpoint).
InstanceType string: Type of instance to start.
ManagedInstanceScaling EndpointConfigurationShadowProductionVariantManagedInstanceScaling: Control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
ModelDataDownloadTimeoutInSeconds int: Timeout value, in seconds, to download and extract the model that you want to host from S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
ModelName string: Name of the model to use. Required unless using Inference Components (in which case execution_role_arn must be specified at the endpoint configuration level).
RoutingConfigs []EndpointConfigurationShadowProductionVariantRoutingConfig: How the endpoint routes incoming traffic. See routing_config below.
ServerlessConfig EndpointConfigurationShadowProductionVariantServerlessConfig: How an endpoint performs asynchronous inference.
VariantName string: Name of the variant. If omitted, the provider will assign a random, unique name.
VolumeSizeInGb int: Size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.

acceleratorType String: Size of the Elastic Inference (EI) instance to use for the production variant.
containerStartupHealthCheckTimeoutInSeconds Integer: Timeout value, in seconds, for your inference container to pass health check by SageMaker AI Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
coreDumpConfig EndpointConfigurationShadowProductionVariantCoreDumpConfig: Core dump configuration from the model container when the process crashes. Fields are documented below.
enableSsmAccess Boolean: Whether to turn on native AWS SSM access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind endpoints. Ignored if model_name is not set (Inference Components endpoint).
inferenceAmiVersion String: Option from a collection of preconfigured AMI images. Each image is configured by AWS with a set of software and driver versions. AWS optimizes these configurations for different machine learning workloads.
initialInstanceCount Integer: Initial number of instances used for auto-scaling.
initialVariantWeight Double: Initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, defaults to 1.0. Ignored if model_name is not set (Inference Components endpoint).
instanceType String: Type of instance to start.
managedInstanceScaling EndpointConfigurationShadowProductionVariantManagedInstanceScaling: Control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
modelDataDownloadTimeoutInSeconds Integer: Timeout value, in seconds, to download and extract the model that you want to host from S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
modelName String: Name of the model to use. Required unless using Inference Components (in which case execution_role_arn must be specified at the endpoint configuration level).
routingConfigs List<EndpointConfigurationShadowProductionVariantRoutingConfig>: How the endpoint routes incoming traffic. See routing_config below.
serverlessConfig EndpointConfigurationShadowProductionVariantServerlessConfig: How an endpoint performs asynchronous inference.
variantName String: Name of the variant. If omitted, the provider will assign a random, unique name.
volumeSizeInGb Integer: Size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.

acceleratorType string: Size of the Elastic Inference (EI) instance to use for the production variant.
containerStartupHealthCheckTimeoutInSeconds number: Timeout value, in seconds, for your inference container to pass health check by SageMaker AI Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
coreDumpConfig EndpointConfigurationShadowProductionVariantCoreDumpConfig: Core dump configuration from the model container when the process crashes. Fields are documented below.
enableSsmAccess boolean: Whether to turn on native AWS SSM access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind endpoints. Ignored if model_name is not set (Inference Components endpoint).
inferenceAmiVersion string: Option from a collection of preconfigured AMI images. Each image is configured by AWS with a set of software and driver versions. AWS optimizes these configurations for different machine learning workloads.
initialInstanceCount number: Initial number of instances used for auto-scaling.
initialVariantWeight number: Initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, defaults to 1.0. Ignored if model_name is not set (Inference Components endpoint).
instanceType string: Type of instance to start.
managedInstanceScaling EndpointConfigurationShadowProductionVariantManagedInstanceScaling: Control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
modelDataDownloadTimeoutInSeconds number: Timeout value, in seconds, to download and extract the model that you want to host from S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
modelName string: Name of the model to use. Required unless using Inference Components (in which case execution_role_arn must be specified at the endpoint configuration level).
routingConfigs EndpointConfigurationShadowProductionVariantRoutingConfig[]: How the endpoint routes incoming traffic. See routing_config below.
serverlessConfig EndpointConfigurationShadowProductionVariantServerlessConfig: How an endpoint performs asynchronous inference.
variantName string: Name of the variant. If omitted, the provider will assign a random, unique name.
volumeSizeInGb number: Size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.

accelerator_type str: Size of the Elastic Inference (EI) instance to use for the production variant.
container_startup_health_check_timeout_in_seconds int: Timeout value, in seconds, for your inference container to pass health check by SageMaker AI Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
core_dump_config EndpointConfigurationShadowProductionVariantCoreDumpConfig: Core dump configuration from the model container when the process crashes. Fields are documented below.
enable_ssm_access bool: Whether to turn on native AWS SSM access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind endpoints. Ignored if model_name is not set (Inference Components endpoint).
inference_ami_version str: Option from a collection of preconfigured AMI images. Each image is configured by AWS with a set of software and driver versions. AWS optimizes these configurations for different machine learning workloads.
initial_instance_count int: Initial number of instances used for auto-scaling.
initial_variant_weight float: Initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, defaults to 1.0. Ignored if model_name is not set (Inference Components endpoint).
instance_type str: Type of instance to start.
managed_instance_scaling EndpointConfigurationShadowProductionVariantManagedInstanceScaling: Control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
model_data_download_timeout_in_seconds int: Timeout value, in seconds, to download and extract the model that you want to host from S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
model_name str: Name of the model to use. Required unless using Inference Components (in which case execution_role_arn must be specified at the endpoint configuration level).
routing_configs Sequence[EndpointConfigurationShadowProductionVariantRoutingConfig]: How the endpoint routes incoming traffic. See routing_config below.
serverless_config EndpointConfigurationShadowProductionVariantServerlessConfig: How an endpoint performs asynchronous inference.
variant_name str: Name of the variant. If omitted, the provider will assign a random, unique name.
volume_size_in_gb int: Size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.

acceleratorType String: Size of the Elastic Inference (EI) instance to use for the production variant.
containerStartupHealthCheckTimeoutInSeconds Number: Timeout value, in seconds, for your inference container to pass health check by SageMaker AI Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
coreDumpConfig Property Map: Core dump configuration from the model container when the process crashes. Fields are documented below.
enableSsmAccess Boolean: Whether to turn on native AWS SSM access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind endpoints. Ignored if model_name is not set (Inference Components endpoint).
inferenceAmiVersion String: Option from a collection of preconfigured AMI images. Each image is configured by AWS with a set of software and driver versions. AWS optimizes these configurations for different machine learning workloads.
initialInstanceCount Number: Initial number of instances used for auto-scaling.
initialVariantWeight Number: Initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, defaults to 1.0. Ignored if model_name is not set (Inference Components endpoint).
instanceType String: Type of instance to start.
managedInstanceScaling Property Map: Control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
modelDataDownloadTimeoutInSeconds Number: Timeout value, in seconds, to download and extract the model that you want to host from S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
modelName String: Name of the model to use. Required unless using Inference Components (in which case execution_role_arn must be specified at the endpoint configuration level).
routingConfigs List<Property Map>: How the endpoint routes incoming traffic. See routing_config below.
serverlessConfig Property Map: How an endpoint performs asynchronous inference.
variantName String: Name of the variant. If omitted, the provider will assign a random, unique name.
volumeSizeInGb Number: Size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.

EndpointConfigurationShadowProductionVariantCoreDumpConfig, EndpointConfigurationShadowProductionVariantCoreDumpConfigArgs

DestinationS3Uri string: S3 bucket to send the core dump to.
KmsKeyId string: KMS key that SageMaker AI uses to encrypt the core dump data at rest using S3 server-side encryption.

DestinationS3Uri string: S3 bucket to send the core dump to.
KmsKeyId string: KMS key that SageMaker AI uses to encrypt the core dump data at rest using S3 server-side encryption.

destinationS3Uri String: S3 bucket to send the core dump to.
kmsKeyId String: KMS key that SageMaker AI uses to encrypt the core dump data at rest using S3 server-side encryption.

destinationS3Uri string: S3 bucket to send the core dump to.
kmsKeyId string: KMS key that SageMaker AI uses to encrypt the core dump data at rest using S3 server-side encryption.

destination_s3_uri str: S3 bucket to send the core dump to.
kms_key_id str: KMS key that SageMaker AI uses to encrypt the core dump data at rest using S3 server-side encryption.

destinationS3Uri String: S3 bucket to send the core dump to.
kmsKeyId String: KMS key that SageMaker AI uses to encrypt the core dump data at rest using S3 server-side encryption.

EndpointConfigurationShadowProductionVariantManagedInstanceScaling, EndpointConfigurationShadowProductionVariantManagedInstanceScalingArgs

MaxInstanceCount int: Maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
MinInstanceCount int: Minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
Status string: Whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.

MaxInstanceCount int: Maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
MinInstanceCount int: Minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
Status string: Whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.

maxInstanceCount Integer: Maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
minInstanceCount Integer: Minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
status String: Whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.

maxInstanceCount number: Maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
minInstanceCount number: Minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
status string: Whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.

max_instance_count int: Maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
min_instance_count int: Minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
status str: Whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.

maxInstanceCount Number: Maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
minInstanceCount Number: Minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
status String: Whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.

EndpointConfigurationShadowProductionVariantRoutingConfig, EndpointConfigurationShadowProductionVariantRoutingConfigArgs

RoutingStrategy string: How the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.

RoutingStrategy string: How the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.

routingStrategy String: How the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.

routingStrategy string: How the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.

routing_strategy str: How the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.

routingStrategy String: How the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.

EndpointConfigurationShadowProductionVariantServerlessConfig, EndpointConfigurationShadowProductionVariantServerlessConfigArgs

MaxConcurrency int: Maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
MemorySizeInMb int: Memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
ProvisionedConcurrency int: Amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.

MaxConcurrency int: Maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
MemorySizeInMb int: Memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
ProvisionedConcurrency int: Amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.

maxConcurrency Integer: Maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
memorySizeInMb Integer: Memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
provisionedConcurrency Integer: Amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.

maxConcurrency number: Maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
memorySizeInMb number: Memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
provisionedConcurrency number: Amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.

max_concurrency int: Maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
memory_size_in_mb int: Memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
provisioned_concurrency int: Amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.

maxConcurrency Number: Maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
memorySizeInMb Number: Memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
provisionedConcurrency Number: Amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.

Import

Using pulumi import, import endpoint configurations using the name. For example:

$ pulumi import aws:sagemaker/endpointConfiguration:EndpointConfiguration test_endpoint_config endpoint-config-foo

To learn more about importing existing cloud resources, see Importing resources.

Package Details

Repository: AWS Classic pulumi/pulumi-aws
License: Apache-2.0
Notes: This Pulumi package is based on the aws Terraform Provider.

AWS v7.14.0 published on Thursday, Dec 11, 2025 by Pulumi

Schema (JSON)

pulumi/pulumi-aws

aws.sagemaker.EndpointConfiguration

On this page

On this page

Example Usage

Create EndpointConfiguration Resource

Constructor syntax

Parameters

Constructor example

EndpointConfiguration Resource Properties

Inputs

Outputs

Look up Existing EndpointConfiguration Resource

Supporting Types

EndpointConfigurationAsyncInferenceConfig, EndpointConfigurationAsyncInferenceConfigArgs

EndpointConfigurationAsyncInferenceConfigClientConfig, EndpointConfigurationAsyncInferenceConfigClientConfigArgs

EndpointConfigurationAsyncInferenceConfigOutputConfig, EndpointConfigurationAsyncInferenceConfigOutputConfigArgs

EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfig, EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfigArgs

EndpointConfigurationDataCaptureConfig, EndpointConfigurationDataCaptureConfigArgs

EndpointConfigurationDataCaptureConfigCaptureContentTypeHeader, EndpointConfigurationDataCaptureConfigCaptureContentTypeHeaderArgs

EndpointConfigurationDataCaptureConfigCaptureOption, EndpointConfigurationDataCaptureConfigCaptureOptionArgs

EndpointConfigurationProductionVariant, EndpointConfigurationProductionVariantArgs

EndpointConfigurationProductionVariantCoreDumpConfig, EndpointConfigurationProductionVariantCoreDumpConfigArgs

EndpointConfigurationProductionVariantManagedInstanceScaling, EndpointConfigurationProductionVariantManagedInstanceScalingArgs

EndpointConfigurationProductionVariantRoutingConfig, EndpointConfigurationProductionVariantRoutingConfigArgs

EndpointConfigurationProductionVariantServerlessConfig, EndpointConfigurationProductionVariantServerlessConfigArgs

EndpointConfigurationShadowProductionVariant, EndpointConfigurationShadowProductionVariantArgs

EndpointConfigurationShadowProductionVariantCoreDumpConfig, EndpointConfigurationShadowProductionVariantCoreDumpConfigArgs

EndpointConfigurationShadowProductionVariantManagedInstanceScaling, EndpointConfigurationShadowProductionVariantManagedInstanceScalingArgs

EndpointConfigurationShadowProductionVariantRoutingConfig, EndpointConfigurationShadowProductionVariantRoutingConfigArgs

EndpointConfigurationShadowProductionVariantServerlessConfig, EndpointConfigurationShadowProductionVariantServerlessConfigArgs

Import

Package Details

On this page

On this page