1. Packages
  2. Gcore Provider
  3. API Docs
  4. InferenceDeployment
gcore 0.22.0 published on Wednesday, Apr 30, 2025 by g-core

gcore.InferenceDeployment

Explore with Pulumi AI

gcore logo
gcore 0.22.0 published on Wednesday, Apr 30, 2025 by g-core

    Represent inference deployment

    Example Usage

    Prerequisite
    import * as pulumi from "@pulumi/pulumi";
    import * as gcore from "@pulumi/gcore";
    
    const project = gcore.getProject({
        name: "Default",
    });
    const region = gcore.getRegion({
        name: "Luxembourg-2",
    });
    
    import pulumi
    import pulumi_gcore as gcore
    
    project = gcore.get_project(name="Default")
    region = gcore.get_region(name="Luxembourg-2")
    
    package main
    
    import (
    	"github.com/pulumi/pulumi-terraform-provider/sdks/go/gcore/gcore"
    	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
    )
    
    func main() {
    	pulumi.Run(func(ctx *pulumi.Context) error {
    		_, err := gcore.GetProject(ctx, &gcore.GetProjectArgs{
    			Name: "Default",
    		}, nil)
    		if err != nil {
    			return err
    		}
    		_, err = gcore.GetRegion(ctx, &gcore.GetRegionArgs{
    			Name: "Luxembourg-2",
    		}, nil)
    		if err != nil {
    			return err
    		}
    		return nil
    	})
    }
    
    using System.Collections.Generic;
    using System.Linq;
    using Pulumi;
    using Gcore = Pulumi.Gcore;
    
    return await Deployment.RunAsync(() => 
    {
        var project = Gcore.GetProject.Invoke(new()
        {
            Name = "Default",
        });
    
        var region = Gcore.GetRegion.Invoke(new()
        {
            Name = "Luxembourg-2",
        });
    
    });
    
    package generated_program;
    
    import com.pulumi.Context;
    import com.pulumi.Pulumi;
    import com.pulumi.core.Output;
    import com.pulumi.gcore.GcoreFunctions;
    import com.pulumi.gcore.inputs.GetProjectArgs;
    import com.pulumi.gcore.inputs.GetRegionArgs;
    import java.util.List;
    import java.util.ArrayList;
    import java.util.Map;
    import java.io.File;
    import java.nio.file.Files;
    import java.nio.file.Paths;
    
    public class App {
        public static void main(String[] args) {
            Pulumi.run(App::stack);
        }
    
        public static void stack(Context ctx) {
            final var project = GcoreFunctions.getProject(GetProjectArgs.builder()
                .name("Default")
                .build());
    
            final var region = GcoreFunctions.getRegion(GetRegionArgs.builder()
                .name("Luxembourg-2")
                .build());
    
        }
    }
    
    variables:
      project:
        fn::invoke:
          function: gcore:getProject
          arguments:
            name: Default
      region:
        fn::invoke:
          function: gcore:getRegion
          arguments:
            name: Luxembourg-2
    

    Basic example

    Creating inference deployment

    import * as pulumi from "@pulumi/pulumi";
    import * as gcore from "@pulumi/gcore";
    
    const inf = new gcore.InferenceDeployment("inf", {
        projectId: data.gcore_project.project.id,
        image: "nginx:latest",
        listeningPort: 80,
        flavorName: "inference-4vcpu-16gib",
        containers: [{
            regionId: data.gcore_region.region.id,
            scaleMin: 2,
            scaleMax: 2,
            triggersCpuThreshold: 80,
        }],
        livenessProbe: {
            enabled: true,
            failureThreshold: 3,
            initialDelaySeconds: 10,
            periodSeconds: 10,
            timeoutSeconds: 1,
            successThreshold: 1,
            httpGetPort: 80,
            httpGetHeaders: {
                "User-Agent": "my user agent",
            },
            httpGetHost: "localhost",
            httpGetPath: "/",
            httpGetSchema: "HTTPS",
        },
        readinessProbe: {
            enabled: false,
        },
        startupProbe: {
            enabled: false,
        },
    });
    
    import pulumi
    import pulumi_gcore as gcore
    
    inf = gcore.InferenceDeployment("inf",
        project_id=data["gcore_project"]["project"]["id"],
        image="nginx:latest",
        listening_port=80,
        flavor_name="inference-4vcpu-16gib",
        containers=[{
            "region_id": data["gcore_region"]["region"]["id"],
            "scale_min": 2,
            "scale_max": 2,
            "triggers_cpu_threshold": 80,
        }],
        liveness_probe={
            "enabled": True,
            "failure_threshold": 3,
            "initial_delay_seconds": 10,
            "period_seconds": 10,
            "timeout_seconds": 1,
            "success_threshold": 1,
            "http_get_port": 80,
            "http_get_headers": {
                "User-Agent": "my user agent",
            },
            "http_get_host": "localhost",
            "http_get_path": "/",
            "http_get_schema": "HTTPS",
        },
        readiness_probe={
            "enabled": False,
        },
        startup_probe={
            "enabled": False,
        })
    
    package main
    
    import (
    	"github.com/pulumi/pulumi-terraform-provider/sdks/go/gcore/gcore"
    	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
    )
    
    func main() {
    	pulumi.Run(func(ctx *pulumi.Context) error {
    		_, err := gcore.NewInferenceDeployment(ctx, "inf", &gcore.InferenceDeploymentArgs{
    			ProjectId:     pulumi.Any(data.Gcore_project.Project.Id),
    			Image:         pulumi.String("nginx:latest"),
    			ListeningPort: pulumi.Float64(80),
    			FlavorName:    pulumi.String("inference-4vcpu-16gib"),
    			Containers: gcore.InferenceDeploymentContainerArray{
    				&gcore.InferenceDeploymentContainerArgs{
    					RegionId:             pulumi.Any(data.Gcore_region.Region.Id),
    					ScaleMin:             pulumi.Float64(2),
    					ScaleMax:             pulumi.Float64(2),
    					TriggersCpuThreshold: pulumi.Float64(80),
    				},
    			},
    			LivenessProbe: &gcore.InferenceDeploymentLivenessProbeArgs{
    				Enabled:             pulumi.Bool(true),
    				FailureThreshold:    pulumi.Float64(3),
    				InitialDelaySeconds: pulumi.Float64(10),
    				PeriodSeconds:       pulumi.Float64(10),
    				TimeoutSeconds:      pulumi.Float64(1),
    				SuccessThreshold:    pulumi.Float64(1),
    				HttpGetPort:         pulumi.Float64(80),
    				HttpGetHeaders: pulumi.StringMap{
    					"User-Agent": pulumi.String("my user agent"),
    				},
    				HttpGetHost:   pulumi.String("localhost"),
    				HttpGetPath:   pulumi.String("/"),
    				HttpGetSchema: pulumi.String("HTTPS"),
    			},
    			ReadinessProbe: &gcore.InferenceDeploymentReadinessProbeArgs{
    				Enabled: pulumi.Bool(false),
    			},
    			StartupProbe: &gcore.InferenceDeploymentStartupProbeArgs{
    				Enabled: pulumi.Bool(false),
    			},
    		})
    		if err != nil {
    			return err
    		}
    		return nil
    	})
    }
    
    using System.Collections.Generic;
    using System.Linq;
    using Pulumi;
    using Gcore = Pulumi.Gcore;
    
    return await Deployment.RunAsync(() => 
    {
        var inf = new Gcore.InferenceDeployment("inf", new()
        {
            ProjectId = data.Gcore_project.Project.Id,
            Image = "nginx:latest",
            ListeningPort = 80,
            FlavorName = "inference-4vcpu-16gib",
            Containers = new[]
            {
                new Gcore.Inputs.InferenceDeploymentContainerArgs
                {
                    RegionId = data.Gcore_region.Region.Id,
                    ScaleMin = 2,
                    ScaleMax = 2,
                    TriggersCpuThreshold = 80,
                },
            },
            LivenessProbe = new Gcore.Inputs.InferenceDeploymentLivenessProbeArgs
            {
                Enabled = true,
                FailureThreshold = 3,
                InitialDelaySeconds = 10,
                PeriodSeconds = 10,
                TimeoutSeconds = 1,
                SuccessThreshold = 1,
                HttpGetPort = 80,
                HttpGetHeaders = 
                {
                    { "User-Agent", "my user agent" },
                },
                HttpGetHost = "localhost",
                HttpGetPath = "/",
                HttpGetSchema = "HTTPS",
            },
            ReadinessProbe = new Gcore.Inputs.InferenceDeploymentReadinessProbeArgs
            {
                Enabled = false,
            },
            StartupProbe = new Gcore.Inputs.InferenceDeploymentStartupProbeArgs
            {
                Enabled = false,
            },
        });
    
    });
    
    package generated_program;
    
    import com.pulumi.Context;
    import com.pulumi.Pulumi;
    import com.pulumi.core.Output;
    import com.pulumi.gcore.InferenceDeployment;
    import com.pulumi.gcore.InferenceDeploymentArgs;
    import com.pulumi.gcore.inputs.InferenceDeploymentContainerArgs;
    import com.pulumi.gcore.inputs.InferenceDeploymentLivenessProbeArgs;
    import com.pulumi.gcore.inputs.InferenceDeploymentReadinessProbeArgs;
    import com.pulumi.gcore.inputs.InferenceDeploymentStartupProbeArgs;
    import java.util.List;
    import java.util.ArrayList;
    import java.util.Map;
    import java.io.File;
    import java.nio.file.Files;
    import java.nio.file.Paths;
    
    public class App {
        public static void main(String[] args) {
            Pulumi.run(App::stack);
        }
    
        public static void stack(Context ctx) {
            var inf = new InferenceDeployment("inf", InferenceDeploymentArgs.builder()
                .projectId(data.gcore_project().project().id())
                .image("nginx:latest")
                .listeningPort(80)
                .flavorName("inference-4vcpu-16gib")
                .containers(InferenceDeploymentContainerArgs.builder()
                    .regionId(data.gcore_region().region().id())
                    .scaleMin(2)
                    .scaleMax(2)
                    .triggersCpuThreshold(80)
                    .build())
                .livenessProbe(InferenceDeploymentLivenessProbeArgs.builder()
                    .enabled(true)
                    .failureThreshold(3)
                    .initialDelaySeconds(10)
                    .periodSeconds(10)
                    .timeoutSeconds(1)
                    .successThreshold(1)
                    .httpGetPort(80)
                    .httpGetHeaders(Map.of("User-Agent", "my user agent"))
                    .httpGetHost("localhost")
                    .httpGetPath("/")
                    .httpGetSchema("HTTPS")
                    .build())
                .readinessProbe(InferenceDeploymentReadinessProbeArgs.builder()
                    .enabled(false)
                    .build())
                .startupProbe(InferenceDeploymentStartupProbeArgs.builder()
                    .enabled(false)
                    .build())
                .build());
    
        }
    }
    
    resources:
      'inf':
        type: gcore:InferenceDeployment
        properties:
          projectId: ${data.gcore_project.project.id}
          image: nginx:latest
          listeningPort: 80
          flavorName: inference-4vcpu-16gib
          containers:
            - regionId: ${data.gcore_region.region.id}
              scaleMin: 2
              scaleMax: 2
              triggersCpuThreshold: 80
          livenessProbe:
            enabled: true
            failureThreshold: 3
            initialDelaySeconds: 10
            periodSeconds: 10
            timeoutSeconds: 1
            successThreshold: 1
            httpGetPort: 80
            httpGetHeaders:
              User-Agent: my user agent
            httpGetHost: localhost
            httpGetPath: /
            httpGetSchema: HTTPS
          readinessProbe:
            enabled: false
          startupProbe:
            enabled: false
    

    Creating inference deployment with sqs trigger

    import * as pulumi from "@pulumi/pulumi";
    import * as gcore from "@pulumi/gcore";
    
    const aws = new gcore.InferenceSecret("aws", {
        projectId: data.gcore_project.project.id,
        dataAwsAccessKeyId: "my-aws-access-key-id",
        dataAwsSecretAccessKey: "my-aws-access-key",
    });
    const inf = new gcore.InferenceDeployment("inf", {
        projectId: data.gcore_project.project.id,
        image: "nginx:latest",
        listeningPort: 80,
        flavorName: "inference-4vcpu-16gib",
        timeout: 60,
        containers: [{
            regionId: data.gcore_region.region.id,
            cooldownPeriod: 60,
            pollingInterval: 60,
            scaleMin: 0,
            scaleMax: 2,
            triggersCpuThreshold: 80,
            triggersSqsSecretName: aws.name,
            triggersSqsAwsRegion: "us-west-2",
            triggersSqsQueueUrl: "https://sqs.us-west-2.amazonaws.com/1234567890/my-queue",
            triggersSqsQueueLength: 5,
            triggersSqsActivationQueueLength: 2,
        }],
        livenessProbe: {
            enabled: false,
        },
        readinessProbe: {
            enabled: false,
        },
        startupProbe: {
            enabled: false,
        },
    });
    
    import pulumi
    import pulumi_gcore as gcore
    
    aws = gcore.InferenceSecret("aws",
        project_id=data["gcore_project"]["project"]["id"],
        data_aws_access_key_id="my-aws-access-key-id",
        data_aws_secret_access_key="my-aws-access-key")
    inf = gcore.InferenceDeployment("inf",
        project_id=data["gcore_project"]["project"]["id"],
        image="nginx:latest",
        listening_port=80,
        flavor_name="inference-4vcpu-16gib",
        timeout=60,
        containers=[{
            "region_id": data["gcore_region"]["region"]["id"],
            "cooldown_period": 60,
            "polling_interval": 60,
            "scale_min": 0,
            "scale_max": 2,
            "triggers_cpu_threshold": 80,
            "triggers_sqs_secret_name": aws.name,
            "triggers_sqs_aws_region": "us-west-2",
            "triggers_sqs_queue_url": "https://sqs.us-west-2.amazonaws.com/1234567890/my-queue",
            "triggers_sqs_queue_length": 5,
            "triggers_sqs_activation_queue_length": 2,
        }],
        liveness_probe={
            "enabled": False,
        },
        readiness_probe={
            "enabled": False,
        },
        startup_probe={
            "enabled": False,
        })
    
    package main
    
    import (
    	"github.com/pulumi/pulumi-terraform-provider/sdks/go/gcore/gcore"
    	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
    )
    
    func main() {
    	pulumi.Run(func(ctx *pulumi.Context) error {
    		aws, err := gcore.NewInferenceSecret(ctx, "aws", &gcore.InferenceSecretArgs{
    			ProjectId:              pulumi.Any(data.Gcore_project.Project.Id),
    			DataAwsAccessKeyId:     pulumi.String("my-aws-access-key-id"),
    			DataAwsSecretAccessKey: pulumi.String("my-aws-access-key"),
    		})
    		if err != nil {
    			return err
    		}
    		_, err = gcore.NewInferenceDeployment(ctx, "inf", &gcore.InferenceDeploymentArgs{
    			ProjectId:     pulumi.Any(data.Gcore_project.Project.Id),
    			Image:         pulumi.String("nginx:latest"),
    			ListeningPort: pulumi.Float64(80),
    			FlavorName:    pulumi.String("inference-4vcpu-16gib"),
    			Timeout:       pulumi.Float64(60),
    			Containers: gcore.InferenceDeploymentContainerArray{
    				&gcore.InferenceDeploymentContainerArgs{
    					RegionId:                         pulumi.Any(data.Gcore_region.Region.Id),
    					CooldownPeriod:                   pulumi.Float64(60),
    					PollingInterval:                  pulumi.Float64(60),
    					ScaleMin:                         pulumi.Float64(0),
    					ScaleMax:                         pulumi.Float64(2),
    					TriggersCpuThreshold:             pulumi.Float64(80),
    					TriggersSqsSecretName:            aws.Name,
    					TriggersSqsAwsRegion:             pulumi.String("us-west-2"),
    					TriggersSqsQueueUrl:              pulumi.String("https://sqs.us-west-2.amazonaws.com/1234567890/my-queue"),
    					TriggersSqsQueueLength:           pulumi.Float64(5),
    					TriggersSqsActivationQueueLength: pulumi.Float64(2),
    				},
    			},
    			LivenessProbe: &gcore.InferenceDeploymentLivenessProbeArgs{
    				Enabled: pulumi.Bool(false),
    			},
    			ReadinessProbe: &gcore.InferenceDeploymentReadinessProbeArgs{
    				Enabled: pulumi.Bool(false),
    			},
    			StartupProbe: &gcore.InferenceDeploymentStartupProbeArgs{
    				Enabled: pulumi.Bool(false),
    			},
    		})
    		if err != nil {
    			return err
    		}
    		return nil
    	})
    }
    
    using System.Collections.Generic;
    using System.Linq;
    using Pulumi;
    using Gcore = Pulumi.Gcore;
    
    return await Deployment.RunAsync(() => 
    {
        var aws = new Gcore.InferenceSecret("aws", new()
        {
            ProjectId = data.Gcore_project.Project.Id,
            DataAwsAccessKeyId = "my-aws-access-key-id",
            DataAwsSecretAccessKey = "my-aws-access-key",
        });
    
        var inf = new Gcore.InferenceDeployment("inf", new()
        {
            ProjectId = data.Gcore_project.Project.Id,
            Image = "nginx:latest",
            ListeningPort = 80,
            FlavorName = "inference-4vcpu-16gib",
            Timeout = 60,
            Containers = new[]
            {
                new Gcore.Inputs.InferenceDeploymentContainerArgs
                {
                    RegionId = data.Gcore_region.Region.Id,
                    CooldownPeriod = 60,
                    PollingInterval = 60,
                    ScaleMin = 0,
                    ScaleMax = 2,
                    TriggersCpuThreshold = 80,
                    TriggersSqsSecretName = aws.Name,
                    TriggersSqsAwsRegion = "us-west-2",
                    TriggersSqsQueueUrl = "https://sqs.us-west-2.amazonaws.com/1234567890/my-queue",
                    TriggersSqsQueueLength = 5,
                    TriggersSqsActivationQueueLength = 2,
                },
            },
            LivenessProbe = new Gcore.Inputs.InferenceDeploymentLivenessProbeArgs
            {
                Enabled = false,
            },
            ReadinessProbe = new Gcore.Inputs.InferenceDeploymentReadinessProbeArgs
            {
                Enabled = false,
            },
            StartupProbe = new Gcore.Inputs.InferenceDeploymentStartupProbeArgs
            {
                Enabled = false,
            },
        });
    
    });
    
    package generated_program;
    
    import com.pulumi.Context;
    import com.pulumi.Pulumi;
    import com.pulumi.core.Output;
    import com.pulumi.gcore.InferenceSecret;
    import com.pulumi.gcore.InferenceSecretArgs;
    import com.pulumi.gcore.InferenceDeployment;
    import com.pulumi.gcore.InferenceDeploymentArgs;
    import com.pulumi.gcore.inputs.InferenceDeploymentContainerArgs;
    import com.pulumi.gcore.inputs.InferenceDeploymentLivenessProbeArgs;
    import com.pulumi.gcore.inputs.InferenceDeploymentReadinessProbeArgs;
    import com.pulumi.gcore.inputs.InferenceDeploymentStartupProbeArgs;
    import java.util.List;
    import java.util.ArrayList;
    import java.util.Map;
    import java.io.File;
    import java.nio.file.Files;
    import java.nio.file.Paths;
    
    public class App {
        public static void main(String[] args) {
            Pulumi.run(App::stack);
        }
    
        public static void stack(Context ctx) {
            var aws = new InferenceSecret("aws", InferenceSecretArgs.builder()
                .projectId(data.gcore_project().project().id())
                .dataAwsAccessKeyId("my-aws-access-key-id")
                .dataAwsSecretAccessKey("my-aws-access-key")
                .build());
    
            var inf = new InferenceDeployment("inf", InferenceDeploymentArgs.builder()
                .projectId(data.gcore_project().project().id())
                .image("nginx:latest")
                .listeningPort(80)
                .flavorName("inference-4vcpu-16gib")
                .timeout(60)
                .containers(InferenceDeploymentContainerArgs.builder()
                    .regionId(data.gcore_region().region().id())
                    .cooldownPeriod(60)
                    .pollingInterval(60)
                    .scaleMin(0)
                    .scaleMax(2)
                    .triggersCpuThreshold(80)
                    .triggersSqsSecretName(aws.name())
                    .triggersSqsAwsRegion("us-west-2")
                    .triggersSqsQueueUrl("https://sqs.us-west-2.amazonaws.com/1234567890/my-queue")
                    .triggersSqsQueueLength(5)
                    .triggersSqsActivationQueueLength(2)
                    .build())
                .livenessProbe(InferenceDeploymentLivenessProbeArgs.builder()
                    .enabled(false)
                    .build())
                .readinessProbe(InferenceDeploymentReadinessProbeArgs.builder()
                    .enabled(false)
                    .build())
                .startupProbe(InferenceDeploymentStartupProbeArgs.builder()
                    .enabled(false)
                    .build())
                .build());
    
        }
    }
    
    resources:
      aws:
        type: gcore:InferenceSecret
        properties:
          projectId: ${data.gcore_project.project.id}
          dataAwsAccessKeyId: my-aws-access-key-id
          dataAwsSecretAccessKey: my-aws-access-key
      'inf':
        type: gcore:InferenceDeployment
        properties:
          projectId: ${data.gcore_project.project.id}
          image: nginx:latest
          listeningPort: 80
          flavorName: inference-4vcpu-16gib
          timeout: 60
          containers:
            - regionId: ${data.gcore_region.region.id}
              cooldownPeriod: 60
              pollingInterval: 60
              scaleMin: 0
              scaleMax: 2
              triggersCpuThreshold: 80
              triggersSqsSecretName: ${aws.name}
              triggersSqsAwsRegion: us-west-2
              triggersSqsQueueUrl: https://sqs.us-west-2.amazonaws.com/1234567890/my-queue
              triggersSqsQueueLength: 5
              triggersSqsActivationQueueLength: 2
          livenessProbe:
            enabled: false
          readinessProbe:
            enabled: false
          startupProbe:
            enabled: false
    

    Create InferenceDeployment Resource

    Resources are created with functions called constructors. To learn more about declaring and configuring resources, see Resources.

    Constructor syntax

    new InferenceDeployment(name: string, args: InferenceDeploymentArgs, opts?: CustomResourceOptions);
    @overload
    def InferenceDeployment(resource_name: str,
                            args: InferenceDeploymentArgs,
                            opts: Optional[ResourceOptions] = None)
    
    @overload
    def InferenceDeployment(resource_name: str,
                            opts: Optional[ResourceOptions] = None,
                            flavor_name: Optional[str] = None,
                            listening_port: Optional[float] = None,
                            containers: Optional[Sequence[InferenceDeploymentContainerArgs]] = None,
                            image: Optional[str] = None,
                            inference_deployment_id: Optional[str] = None,
                            logging: Optional[InferenceDeploymentLoggingArgs] = None,
                            description: Optional[str] = None,
                            credentials_name: Optional[str] = None,
                            auth_enabled: Optional[bool] = None,
                            command: Optional[str] = None,
                            liveness_probe: Optional[InferenceDeploymentLivenessProbeArgs] = None,
                            envs: Optional[Mapping[str, str]] = None,
                            name: Optional[str] = None,
                            project_id: Optional[float] = None,
                            project_name: Optional[str] = None,
                            readiness_probe: Optional[InferenceDeploymentReadinessProbeArgs] = None,
                            startup_probe: Optional[InferenceDeploymentStartupProbeArgs] = None,
                            timeout: Optional[float] = None)
    func NewInferenceDeployment(ctx *Context, name string, args InferenceDeploymentArgs, opts ...ResourceOption) (*InferenceDeployment, error)
    public InferenceDeployment(string name, InferenceDeploymentArgs args, CustomResourceOptions? opts = null)
    public InferenceDeployment(String name, InferenceDeploymentArgs args)
    public InferenceDeployment(String name, InferenceDeploymentArgs args, CustomResourceOptions options)
    
    type: gcore:InferenceDeployment
    properties: # The arguments to resource properties.
    options: # Bag of options to control resource's behavior.
    
    

    Parameters

    name string
    The unique name of the resource.
    args InferenceDeploymentArgs
    The arguments to resource properties.
    opts CustomResourceOptions
    Bag of options to control resource's behavior.
    resource_name str
    The unique name of the resource.
    args InferenceDeploymentArgs
    The arguments to resource properties.
    opts ResourceOptions
    Bag of options to control resource's behavior.
    ctx Context
    Context object for the current deployment.
    name string
    The unique name of the resource.
    args InferenceDeploymentArgs
    The arguments to resource properties.
    opts ResourceOption
    Bag of options to control resource's behavior.
    name string
    The unique name of the resource.
    args InferenceDeploymentArgs
    The arguments to resource properties.
    opts CustomResourceOptions
    Bag of options to control resource's behavior.
    name String
    The unique name of the resource.
    args InferenceDeploymentArgs
    The arguments to resource properties.
    options CustomResourceOptions
    Bag of options to control resource's behavior.

    Constructor example

    The following reference example uses placeholder values for all input properties.

    var inferenceDeploymentResource = new Gcore.InferenceDeployment("inferenceDeploymentResource", new()
    {
        FlavorName = "string",
        ListeningPort = 0,
        Containers = new[]
        {
            new Gcore.Inputs.InferenceDeploymentContainerArgs
            {
                ScaleMax = 0,
                CooldownPeriod = 0,
                ScaleMin = 0,
                RegionId = 0,
                TriggersHttpRate = 0,
                TriggersSqsActivationQueueLength = 0,
                TotalContainers = 0,
                TriggersCpuThreshold = 0,
                TriggersGpuMemoryThreshold = 0,
                TriggersGpuUtilizationThreshold = 0,
                PollingInterval = 0,
                TriggersHttpWindow = 0,
                TriggersMemoryThreshold = 0,
                ReadyContainers = 0,
                TriggersSqsAwsEndpoint = "string",
                TriggersSqsAwsRegion = "string",
                TriggersSqsQueueLength = 0,
                TriggersSqsQueueUrl = "string",
                TriggersSqsScaleOnDelayed = false,
                TriggersSqsScaleOnFlight = false,
                TriggersSqsSecretName = "string",
            },
        },
        Image = "string",
        InferenceDeploymentId = "string",
        Logging = new Gcore.Inputs.InferenceDeploymentLoggingArgs
        {
            DestinationRegionId = 0,
            Enabled = false,
            RetentionPolicyPeriod = 0,
            TopicName = "string",
        },
        Description = "string",
        CredentialsName = "string",
        AuthEnabled = false,
        Command = "string",
        LivenessProbe = new Gcore.Inputs.InferenceDeploymentLivenessProbeArgs
        {
            Enabled = false,
            ExecCommand = "string",
            FailureThreshold = 0,
            HttpGetHeaders = 
            {
                { "string", "string" },
            },
            HttpGetHost = "string",
            HttpGetPath = "string",
            HttpGetPort = 0,
            HttpGetSchema = "string",
            InitialDelaySeconds = 0,
            PeriodSeconds = 0,
            SuccessThreshold = 0,
            TcpSocketPort = 0,
            TimeoutSeconds = 0,
        },
        Envs = 
        {
            { "string", "string" },
        },
        Name = "string",
        ProjectId = 0,
        ProjectName = "string",
        ReadinessProbe = new Gcore.Inputs.InferenceDeploymentReadinessProbeArgs
        {
            Enabled = false,
            ExecCommand = "string",
            FailureThreshold = 0,
            HttpGetHeaders = 
            {
                { "string", "string" },
            },
            HttpGetHost = "string",
            HttpGetPath = "string",
            HttpGetPort = 0,
            HttpGetSchema = "string",
            InitialDelaySeconds = 0,
            PeriodSeconds = 0,
            SuccessThreshold = 0,
            TcpSocketPort = 0,
            TimeoutSeconds = 0,
        },
        StartupProbe = new Gcore.Inputs.InferenceDeploymentStartupProbeArgs
        {
            Enabled = false,
            ExecCommand = "string",
            FailureThreshold = 0,
            HttpGetHeaders = 
            {
                { "string", "string" },
            },
            HttpGetHost = "string",
            HttpGetPath = "string",
            HttpGetPort = 0,
            HttpGetSchema = "string",
            InitialDelaySeconds = 0,
            PeriodSeconds = 0,
            SuccessThreshold = 0,
            TcpSocketPort = 0,
            TimeoutSeconds = 0,
        },
        Timeout = 0,
    });
    
    example, err := gcore.NewInferenceDeployment(ctx, "inferenceDeploymentResource", &gcore.InferenceDeploymentArgs{
    	FlavorName:    pulumi.String("string"),
    	ListeningPort: pulumi.Float64(0),
    	Containers: gcore.InferenceDeploymentContainerArray{
    		&gcore.InferenceDeploymentContainerArgs{
    			ScaleMax:                         pulumi.Float64(0),
    			CooldownPeriod:                   pulumi.Float64(0),
    			ScaleMin:                         pulumi.Float64(0),
    			RegionId:                         pulumi.Float64(0),
    			TriggersHttpRate:                 pulumi.Float64(0),
    			TriggersSqsActivationQueueLength: pulumi.Float64(0),
    			TotalContainers:                  pulumi.Float64(0),
    			TriggersCpuThreshold:             pulumi.Float64(0),
    			TriggersGpuMemoryThreshold:       pulumi.Float64(0),
    			TriggersGpuUtilizationThreshold:  pulumi.Float64(0),
    			PollingInterval:                  pulumi.Float64(0),
    			TriggersHttpWindow:               pulumi.Float64(0),
    			TriggersMemoryThreshold:          pulumi.Float64(0),
    			ReadyContainers:                  pulumi.Float64(0),
    			TriggersSqsAwsEndpoint:           pulumi.String("string"),
    			TriggersSqsAwsRegion:             pulumi.String("string"),
    			TriggersSqsQueueLength:           pulumi.Float64(0),
    			TriggersSqsQueueUrl:              pulumi.String("string"),
    			TriggersSqsScaleOnDelayed:        pulumi.Bool(false),
    			TriggersSqsScaleOnFlight:         pulumi.Bool(false),
    			TriggersSqsSecretName:            pulumi.String("string"),
    		},
    	},
    	Image:                 pulumi.String("string"),
    	InferenceDeploymentId: pulumi.String("string"),
    	Logging: &gcore.InferenceDeploymentLoggingArgs{
    		DestinationRegionId:   pulumi.Float64(0),
    		Enabled:               pulumi.Bool(false),
    		RetentionPolicyPeriod: pulumi.Float64(0),
    		TopicName:             pulumi.String("string"),
    	},
    	Description:     pulumi.String("string"),
    	CredentialsName: pulumi.String("string"),
    	AuthEnabled:     pulumi.Bool(false),
    	Command:         pulumi.String("string"),
    	LivenessProbe: &gcore.InferenceDeploymentLivenessProbeArgs{
    		Enabled:          pulumi.Bool(false),
    		ExecCommand:      pulumi.String("string"),
    		FailureThreshold: pulumi.Float64(0),
    		HttpGetHeaders: pulumi.StringMap{
    			"string": pulumi.String("string"),
    		},
    		HttpGetHost:         pulumi.String("string"),
    		HttpGetPath:         pulumi.String("string"),
    		HttpGetPort:         pulumi.Float64(0),
    		HttpGetSchema:       pulumi.String("string"),
    		InitialDelaySeconds: pulumi.Float64(0),
    		PeriodSeconds:       pulumi.Float64(0),
    		SuccessThreshold:    pulumi.Float64(0),
    		TcpSocketPort:       pulumi.Float64(0),
    		TimeoutSeconds:      pulumi.Float64(0),
    	},
    	Envs: pulumi.StringMap{
    		"string": pulumi.String("string"),
    	},
    	Name:        pulumi.String("string"),
    	ProjectId:   pulumi.Float64(0),
    	ProjectName: pulumi.String("string"),
    	ReadinessProbe: &gcore.InferenceDeploymentReadinessProbeArgs{
    		Enabled:          pulumi.Bool(false),
    		ExecCommand:      pulumi.String("string"),
    		FailureThreshold: pulumi.Float64(0),
    		HttpGetHeaders: pulumi.StringMap{
    			"string": pulumi.String("string"),
    		},
    		HttpGetHost:         pulumi.String("string"),
    		HttpGetPath:         pulumi.String("string"),
    		HttpGetPort:         pulumi.Float64(0),
    		HttpGetSchema:       pulumi.String("string"),
    		InitialDelaySeconds: pulumi.Float64(0),
    		PeriodSeconds:       pulumi.Float64(0),
    		SuccessThreshold:    pulumi.Float64(0),
    		TcpSocketPort:       pulumi.Float64(0),
    		TimeoutSeconds:      pulumi.Float64(0),
    	},
    	StartupProbe: &gcore.InferenceDeploymentStartupProbeArgs{
    		Enabled:          pulumi.Bool(false),
    		ExecCommand:      pulumi.String("string"),
    		FailureThreshold: pulumi.Float64(0),
    		HttpGetHeaders: pulumi.StringMap{
    			"string": pulumi.String("string"),
    		},
    		HttpGetHost:         pulumi.String("string"),
    		HttpGetPath:         pulumi.String("string"),
    		HttpGetPort:         pulumi.Float64(0),
    		HttpGetSchema:       pulumi.String("string"),
    		InitialDelaySeconds: pulumi.Float64(0),
    		PeriodSeconds:       pulumi.Float64(0),
    		SuccessThreshold:    pulumi.Float64(0),
    		TcpSocketPort:       pulumi.Float64(0),
    		TimeoutSeconds:      pulumi.Float64(0),
    	},
    	Timeout: pulumi.Float64(0),
    })
    
    var inferenceDeploymentResource = new InferenceDeployment("inferenceDeploymentResource", InferenceDeploymentArgs.builder()
        .flavorName("string")
        .listeningPort(0)
        .containers(InferenceDeploymentContainerArgs.builder()
            .scaleMax(0)
            .cooldownPeriod(0)
            .scaleMin(0)
            .regionId(0)
            .triggersHttpRate(0)
            .triggersSqsActivationQueueLength(0)
            .totalContainers(0)
            .triggersCpuThreshold(0)
            .triggersGpuMemoryThreshold(0)
            .triggersGpuUtilizationThreshold(0)
            .pollingInterval(0)
            .triggersHttpWindow(0)
            .triggersMemoryThreshold(0)
            .readyContainers(0)
            .triggersSqsAwsEndpoint("string")
            .triggersSqsAwsRegion("string")
            .triggersSqsQueueLength(0)
            .triggersSqsQueueUrl("string")
            .triggersSqsScaleOnDelayed(false)
            .triggersSqsScaleOnFlight(false)
            .triggersSqsSecretName("string")
            .build())
        .image("string")
        .inferenceDeploymentId("string")
        .logging(InferenceDeploymentLoggingArgs.builder()
            .destinationRegionId(0)
            .enabled(false)
            .retentionPolicyPeriod(0)
            .topicName("string")
            .build())
        .description("string")
        .credentialsName("string")
        .authEnabled(false)
        .command("string")
        .livenessProbe(InferenceDeploymentLivenessProbeArgs.builder()
            .enabled(false)
            .execCommand("string")
            .failureThreshold(0)
            .httpGetHeaders(Map.of("string", "string"))
            .httpGetHost("string")
            .httpGetPath("string")
            .httpGetPort(0)
            .httpGetSchema("string")
            .initialDelaySeconds(0)
            .periodSeconds(0)
            .successThreshold(0)
            .tcpSocketPort(0)
            .timeoutSeconds(0)
            .build())
        .envs(Map.of("string", "string"))
        .name("string")
        .projectId(0)
        .projectName("string")
        .readinessProbe(InferenceDeploymentReadinessProbeArgs.builder()
            .enabled(false)
            .execCommand("string")
            .failureThreshold(0)
            .httpGetHeaders(Map.of("string", "string"))
            .httpGetHost("string")
            .httpGetPath("string")
            .httpGetPort(0)
            .httpGetSchema("string")
            .initialDelaySeconds(0)
            .periodSeconds(0)
            .successThreshold(0)
            .tcpSocketPort(0)
            .timeoutSeconds(0)
            .build())
        .startupProbe(InferenceDeploymentStartupProbeArgs.builder()
            .enabled(false)
            .execCommand("string")
            .failureThreshold(0)
            .httpGetHeaders(Map.of("string", "string"))
            .httpGetHost("string")
            .httpGetPath("string")
            .httpGetPort(0)
            .httpGetSchema("string")
            .initialDelaySeconds(0)
            .periodSeconds(0)
            .successThreshold(0)
            .tcpSocketPort(0)
            .timeoutSeconds(0)
            .build())
        .timeout(0)
        .build());
    
    inference_deployment_resource = gcore.InferenceDeployment("inferenceDeploymentResource",
        flavor_name="string",
        listening_port=0,
        containers=[{
            "scale_max": 0,
            "cooldown_period": 0,
            "scale_min": 0,
            "region_id": 0,
            "triggers_http_rate": 0,
            "triggers_sqs_activation_queue_length": 0,
            "total_containers": 0,
            "triggers_cpu_threshold": 0,
            "triggers_gpu_memory_threshold": 0,
            "triggers_gpu_utilization_threshold": 0,
            "polling_interval": 0,
            "triggers_http_window": 0,
            "triggers_memory_threshold": 0,
            "ready_containers": 0,
            "triggers_sqs_aws_endpoint": "string",
            "triggers_sqs_aws_region": "string",
            "triggers_sqs_queue_length": 0,
            "triggers_sqs_queue_url": "string",
            "triggers_sqs_scale_on_delayed": False,
            "triggers_sqs_scale_on_flight": False,
            "triggers_sqs_secret_name": "string",
        }],
        image="string",
        inference_deployment_id="string",
        logging={
            "destination_region_id": 0,
            "enabled": False,
            "retention_policy_period": 0,
            "topic_name": "string",
        },
        description="string",
        credentials_name="string",
        auth_enabled=False,
        command="string",
        liveness_probe={
            "enabled": False,
            "exec_command": "string",
            "failure_threshold": 0,
            "http_get_headers": {
                "string": "string",
            },
            "http_get_host": "string",
            "http_get_path": "string",
            "http_get_port": 0,
            "http_get_schema": "string",
            "initial_delay_seconds": 0,
            "period_seconds": 0,
            "success_threshold": 0,
            "tcp_socket_port": 0,
            "timeout_seconds": 0,
        },
        envs={
            "string": "string",
        },
        name="string",
        project_id=0,
        project_name="string",
        readiness_probe={
            "enabled": False,
            "exec_command": "string",
            "failure_threshold": 0,
            "http_get_headers": {
                "string": "string",
            },
            "http_get_host": "string",
            "http_get_path": "string",
            "http_get_port": 0,
            "http_get_schema": "string",
            "initial_delay_seconds": 0,
            "period_seconds": 0,
            "success_threshold": 0,
            "tcp_socket_port": 0,
            "timeout_seconds": 0,
        },
        startup_probe={
            "enabled": False,
            "exec_command": "string",
            "failure_threshold": 0,
            "http_get_headers": {
                "string": "string",
            },
            "http_get_host": "string",
            "http_get_path": "string",
            "http_get_port": 0,
            "http_get_schema": "string",
            "initial_delay_seconds": 0,
            "period_seconds": 0,
            "success_threshold": 0,
            "tcp_socket_port": 0,
            "timeout_seconds": 0,
        },
        timeout=0)
    
    const inferenceDeploymentResource = new gcore.InferenceDeployment("inferenceDeploymentResource", {
        flavorName: "string",
        listeningPort: 0,
        containers: [{
            scaleMax: 0,
            cooldownPeriod: 0,
            scaleMin: 0,
            regionId: 0,
            triggersHttpRate: 0,
            triggersSqsActivationQueueLength: 0,
            totalContainers: 0,
            triggersCpuThreshold: 0,
            triggersGpuMemoryThreshold: 0,
            triggersGpuUtilizationThreshold: 0,
            pollingInterval: 0,
            triggersHttpWindow: 0,
            triggersMemoryThreshold: 0,
            readyContainers: 0,
            triggersSqsAwsEndpoint: "string",
            triggersSqsAwsRegion: "string",
            triggersSqsQueueLength: 0,
            triggersSqsQueueUrl: "string",
            triggersSqsScaleOnDelayed: false,
            triggersSqsScaleOnFlight: false,
            triggersSqsSecretName: "string",
        }],
        image: "string",
        inferenceDeploymentId: "string",
        logging: {
            destinationRegionId: 0,
            enabled: false,
            retentionPolicyPeriod: 0,
            topicName: "string",
        },
        description: "string",
        credentialsName: "string",
        authEnabled: false,
        command: "string",
        livenessProbe: {
            enabled: false,
            execCommand: "string",
            failureThreshold: 0,
            httpGetHeaders: {
                string: "string",
            },
            httpGetHost: "string",
            httpGetPath: "string",
            httpGetPort: 0,
            httpGetSchema: "string",
            initialDelaySeconds: 0,
            periodSeconds: 0,
            successThreshold: 0,
            tcpSocketPort: 0,
            timeoutSeconds: 0,
        },
        envs: {
            string: "string",
        },
        name: "string",
        projectId: 0,
        projectName: "string",
        readinessProbe: {
            enabled: false,
            execCommand: "string",
            failureThreshold: 0,
            httpGetHeaders: {
                string: "string",
            },
            httpGetHost: "string",
            httpGetPath: "string",
            httpGetPort: 0,
            httpGetSchema: "string",
            initialDelaySeconds: 0,
            periodSeconds: 0,
            successThreshold: 0,
            tcpSocketPort: 0,
            timeoutSeconds: 0,
        },
        startupProbe: {
            enabled: false,
            execCommand: "string",
            failureThreshold: 0,
            httpGetHeaders: {
                string: "string",
            },
            httpGetHost: "string",
            httpGetPath: "string",
            httpGetPort: 0,
            httpGetSchema: "string",
            initialDelaySeconds: 0,
            periodSeconds: 0,
            successThreshold: 0,
            tcpSocketPort: 0,
            timeoutSeconds: 0,
        },
        timeout: 0,
    });
    
    type: gcore:InferenceDeployment
    properties:
        authEnabled: false
        command: string
        containers:
            - cooldownPeriod: 0
              pollingInterval: 0
              readyContainers: 0
              regionId: 0
              scaleMax: 0
              scaleMin: 0
              totalContainers: 0
              triggersCpuThreshold: 0
              triggersGpuMemoryThreshold: 0
              triggersGpuUtilizationThreshold: 0
              triggersHttpRate: 0
              triggersHttpWindow: 0
              triggersMemoryThreshold: 0
              triggersSqsActivationQueueLength: 0
              triggersSqsAwsEndpoint: string
              triggersSqsAwsRegion: string
              triggersSqsQueueLength: 0
              triggersSqsQueueUrl: string
              triggersSqsScaleOnDelayed: false
              triggersSqsScaleOnFlight: false
              triggersSqsSecretName: string
        credentialsName: string
        description: string
        envs:
            string: string
        flavorName: string
        image: string
        inferenceDeploymentId: string
        listeningPort: 0
        livenessProbe:
            enabled: false
            execCommand: string
            failureThreshold: 0
            httpGetHeaders:
                string: string
            httpGetHost: string
            httpGetPath: string
            httpGetPort: 0
            httpGetSchema: string
            initialDelaySeconds: 0
            periodSeconds: 0
            successThreshold: 0
            tcpSocketPort: 0
            timeoutSeconds: 0
        logging:
            destinationRegionId: 0
            enabled: false
            retentionPolicyPeriod: 0
            topicName: string
        name: string
        projectId: 0
        projectName: string
        readinessProbe:
            enabled: false
            execCommand: string
            failureThreshold: 0
            httpGetHeaders:
                string: string
            httpGetHost: string
            httpGetPath: string
            httpGetPort: 0
            httpGetSchema: string
            initialDelaySeconds: 0
            periodSeconds: 0
            successThreshold: 0
            tcpSocketPort: 0
            timeoutSeconds: 0
        startupProbe:
            enabled: false
            execCommand: string
            failureThreshold: 0
            httpGetHeaders:
                string: string
            httpGetHost: string
            httpGetPath: string
            httpGetPort: 0
            httpGetSchema: string
            initialDelaySeconds: 0
            periodSeconds: 0
            successThreshold: 0
            tcpSocketPort: 0
            timeoutSeconds: 0
        timeout: 0
    

    InferenceDeployment Resource Properties

    To learn more about resource properties and how to use them, see Inputs and Outputs in the Architecture and Concepts docs.

    Inputs

    In Python, inputs that are objects can be passed either as argument classes or as dictionary literals.

    The InferenceDeployment resource accepts the following input properties:

    Containers List<InferenceDeploymentContainer>
    A required list of container definitions. Each entry represents a container configuration, and at least one container must be specified. See the nested schema below for further details.
    FlavorName string
    Specifies the resource flavor for the container, determining its allocated CPU, memory, and potentially GPU resources.
    Image string
    The container image to be used for deployment. This should be a valid image reference, such as a public or private Docker image (registry.example.com/my-image:latest). Note: If the image is hosted in a private registry, you must specify credentials_name to provide authentication details.
    ListeningPort double
    The port on which the container will accept incoming traffic. This should match the port your application is configured to listen on within the container.
    AuthEnabled bool
    Set to true to enable API key authentication for the inference instance.
    Command string
    Command to be executed when running a container from an image.
    CredentialsName string
    Required if using a private image registry. Specifies the name of the credentials to authenticate with the registry where the container image is stored.
    Description string
    Envs Dictionary<string, string>
    Environment variables for the inference instance.
    InferenceDeploymentId string
    The ID of this resource.
    LivenessProbe InferenceDeploymentLivenessProbe
    Logging InferenceDeploymentLogging
    Name string
    The name of the deployment. This should be unique within the scope of the project.
    ProjectId double
    ProjectName string
    ReadinessProbe InferenceDeploymentReadinessProbe
    StartupProbe InferenceDeploymentStartupProbe
    Timeout double
    Containers []InferenceDeploymentContainerArgs
    A required list of container definitions. Each entry represents a container configuration, and at least one container must be specified. See the nested schema below for further details.
    FlavorName string
    Specifies the resource flavor for the container, determining its allocated CPU, memory, and potentially GPU resources.
    Image string
    The container image to be used for deployment. This should be a valid image reference, such as a public or private Docker image (registry.example.com/my-image:latest). Note: If the image is hosted in a private registry, you must specify credentials_name to provide authentication details.
    ListeningPort float64
    The port on which the container will accept incoming traffic. This should match the port your application is configured to listen on within the container.
    AuthEnabled bool
    Set to true to enable API key authentication for the inference instance.
    Command string
    Command to be executed when running a container from an image.
    CredentialsName string
    Required if using a private image registry. Specifies the name of the credentials to authenticate with the registry where the container image is stored.
    Description string
    Envs map[string]string
    Environment variables for the inference instance.
    InferenceDeploymentId string
    The ID of this resource.
    LivenessProbe InferenceDeploymentLivenessProbeArgs
    Logging InferenceDeploymentLoggingArgs
    Name string
    The name of the deployment. This should be unique within the scope of the project.
    ProjectId float64
    ProjectName string
    ReadinessProbe InferenceDeploymentReadinessProbeArgs
    StartupProbe InferenceDeploymentStartupProbeArgs
    Timeout float64
    containers List<InferenceDeploymentContainer>
    A required list of container definitions. Each entry represents a container configuration, and at least one container must be specified. See the nested schema below for further details.
    flavorName String
    Specifies the resource flavor for the container, determining its allocated CPU, memory, and potentially GPU resources.
    image String
    The container image to be used for deployment. This should be a valid image reference, such as a public or private Docker image (registry.example.com/my-image:latest). Note: If the image is hosted in a private registry, you must specify credentials_name to provide authentication details.
    listeningPort Double
    The port on which the container will accept incoming traffic. This should match the port your application is configured to listen on within the container.
    authEnabled Boolean
    Set to true to enable API key authentication for the inference instance.
    command String
    Command to be executed when running a container from an image.
    credentialsName String
    Required if using a private image registry. Specifies the name of the credentials to authenticate with the registry where the container image is stored.
    description String
    envs Map<String,String>
    Environment variables for the inference instance.
    inferenceDeploymentId String
    The ID of this resource.
    livenessProbe InferenceDeploymentLivenessProbe
    logging InferenceDeploymentLogging
    name String
    The name of the deployment. This should be unique within the scope of the project.
    projectId Double
    projectName String
    readinessProbe InferenceDeploymentReadinessProbe
    startupProbe InferenceDeploymentStartupProbe
    timeout Double
    containers InferenceDeploymentContainer[]
    A required list of container definitions. Each entry represents a container configuration, and at least one container must be specified. See the nested schema below for further details.
    flavorName string
    Specifies the resource flavor for the container, determining its allocated CPU, memory, and potentially GPU resources.
    image string
    The container image to be used for deployment. This should be a valid image reference, such as a public or private Docker image (registry.example.com/my-image:latest). Note: If the image is hosted in a private registry, you must specify credentials_name to provide authentication details.
    listeningPort number
    The port on which the container will accept incoming traffic. This should match the port your application is configured to listen on within the container.
    authEnabled boolean
    Set to true to enable API key authentication for the inference instance.
    command string
    Command to be executed when running a container from an image.
    credentialsName string
    Required if using a private image registry. Specifies the name of the credentials to authenticate with the registry where the container image is stored.
    description string
    envs {[key: string]: string}
    Environment variables for the inference instance.
    inferenceDeploymentId string
    The ID of this resource.
    livenessProbe InferenceDeploymentLivenessProbe
    logging InferenceDeploymentLogging
    name string
    The name of the deployment. This should be unique within the scope of the project.
    projectId number
    projectName string
    readinessProbe InferenceDeploymentReadinessProbe
    startupProbe InferenceDeploymentStartupProbe
    timeout number
    containers Sequence[InferenceDeploymentContainerArgs]
    A required list of container definitions. Each entry represents a container configuration, and at least one container must be specified. See the nested schema below for further details.
    flavor_name str
    Specifies the resource flavor for the container, determining its allocated CPU, memory, and potentially GPU resources.
    image str
    The container image to be used for deployment. This should be a valid image reference, such as a public or private Docker image (registry.example.com/my-image:latest). Note: If the image is hosted in a private registry, you must specify credentials_name to provide authentication details.
    listening_port float
    The port on which the container will accept incoming traffic. This should match the port your application is configured to listen on within the container.
    auth_enabled bool
    Set to true to enable API key authentication for the inference instance.
    command str
    Command to be executed when running a container from an image.
    credentials_name str
    Required if using a private image registry. Specifies the name of the credentials to authenticate with the registry where the container image is stored.
    description str
    envs Mapping[str, str]
    Environment variables for the inference instance.
    inference_deployment_id str
    The ID of this resource.
    liveness_probe InferenceDeploymentLivenessProbeArgs
    logging InferenceDeploymentLoggingArgs
    name str
    The name of the deployment. This should be unique within the scope of the project.
    project_id float
    project_name str
    readiness_probe InferenceDeploymentReadinessProbeArgs
    startup_probe InferenceDeploymentStartupProbeArgs
    timeout float
    containers List<Property Map>
    A required list of container definitions. Each entry represents a container configuration, and at least one container must be specified. See the nested schema below for further details.
    flavorName String
    Specifies the resource flavor for the container, determining its allocated CPU, memory, and potentially GPU resources.
    image String
    The container image to be used for deployment. This should be a valid image reference, such as a public or private Docker image (registry.example.com/my-image:latest). Note: If the image is hosted in a private registry, you must specify credentials_name to provide authentication details.
    listeningPort Number
    The port on which the container will accept incoming traffic. This should match the port your application is configured to listen on within the container.
    authEnabled Boolean
    Set to true to enable API key authentication for the inference instance.
    command String
    Command to be executed when running a container from an image.
    credentialsName String
    Required if using a private image registry. Specifies the name of the credentials to authenticate with the registry where the container image is stored.
    description String
    envs Map<String>
    Environment variables for the inference instance.
    inferenceDeploymentId String
    The ID of this resource.
    livenessProbe Property Map
    logging Property Map
    name String
    The name of the deployment. This should be unique within the scope of the project.
    projectId Number
    projectName String
    readinessProbe Property Map
    startupProbe Property Map
    timeout Number

    Outputs

    All input properties are implicitly available as output properties. Additionally, the InferenceDeployment resource produces the following output properties:

    Address string
    CreatedAt string
    Datetime when the inference deployment was created. The format is 2025-12-28T19:14:44.180394
    Id string
    The provider-assigned unique ID for this managed resource.
    Status string
    Address string
    CreatedAt string
    Datetime when the inference deployment was created. The format is 2025-12-28T19:14:44.180394
    Id string
    The provider-assigned unique ID for this managed resource.
    Status string
    address String
    createdAt String
    Datetime when the inference deployment was created. The format is 2025-12-28T19:14:44.180394
    id String
    The provider-assigned unique ID for this managed resource.
    status String
    address string
    createdAt string
    Datetime when the inference deployment was created. The format is 2025-12-28T19:14:44.180394
    id string
    The provider-assigned unique ID for this managed resource.
    status string
    address str
    created_at str
    Datetime when the inference deployment was created. The format is 2025-12-28T19:14:44.180394
    id str
    The provider-assigned unique ID for this managed resource.
    status str
    address String
    createdAt String
    Datetime when the inference deployment was created. The format is 2025-12-28T19:14:44.180394
    id String
    The provider-assigned unique ID for this managed resource.
    status String

    Look up Existing InferenceDeployment Resource

    Get an existing InferenceDeployment resource’s state with the given name, ID, and optional extra properties used to qualify the lookup.

    public static get(name: string, id: Input<ID>, state?: InferenceDeploymentState, opts?: CustomResourceOptions): InferenceDeployment
    @staticmethod
    def get(resource_name: str,
            id: str,
            opts: Optional[ResourceOptions] = None,
            address: Optional[str] = None,
            auth_enabled: Optional[bool] = None,
            command: Optional[str] = None,
            containers: Optional[Sequence[InferenceDeploymentContainerArgs]] = None,
            created_at: Optional[str] = None,
            credentials_name: Optional[str] = None,
            description: Optional[str] = None,
            envs: Optional[Mapping[str, str]] = None,
            flavor_name: Optional[str] = None,
            image: Optional[str] = None,
            inference_deployment_id: Optional[str] = None,
            listening_port: Optional[float] = None,
            liveness_probe: Optional[InferenceDeploymentLivenessProbeArgs] = None,
            logging: Optional[InferenceDeploymentLoggingArgs] = None,
            name: Optional[str] = None,
            project_id: Optional[float] = None,
            project_name: Optional[str] = None,
            readiness_probe: Optional[InferenceDeploymentReadinessProbeArgs] = None,
            startup_probe: Optional[InferenceDeploymentStartupProbeArgs] = None,
            status: Optional[str] = None,
            timeout: Optional[float] = None) -> InferenceDeployment
    func GetInferenceDeployment(ctx *Context, name string, id IDInput, state *InferenceDeploymentState, opts ...ResourceOption) (*InferenceDeployment, error)
    public static InferenceDeployment Get(string name, Input<string> id, InferenceDeploymentState? state, CustomResourceOptions? opts = null)
    public static InferenceDeployment get(String name, Output<String> id, InferenceDeploymentState state, CustomResourceOptions options)
    resources:  _:    type: gcore:InferenceDeployment    get:      id: ${id}
    name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    state
    Any extra arguments used during the lookup.
    opts
    A bag of options that control this resource's behavior.
    resource_name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    state
    Any extra arguments used during the lookup.
    opts
    A bag of options that control this resource's behavior.
    name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    state
    Any extra arguments used during the lookup.
    opts
    A bag of options that control this resource's behavior.
    name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    state
    Any extra arguments used during the lookup.
    opts
    A bag of options that control this resource's behavior.
    The following state arguments are supported:
    Address string
    AuthEnabled bool
    Set to true to enable API key authentication for the inference instance.
    Command string
    Command to be executed when running a container from an image.
    Containers List<InferenceDeploymentContainer>
    A required list of container definitions. Each entry represents a container configuration, and at least one container must be specified. See the nested schema below for further details.
    CreatedAt string
    Datetime when the inference deployment was created. The format is 2025-12-28T19:14:44.180394
    CredentialsName string
    Required if using a private image registry. Specifies the name of the credentials to authenticate with the registry where the container image is stored.
    Description string
    Envs Dictionary<string, string>
    Environment variables for the inference instance.
    FlavorName string
    Specifies the resource flavor for the container, determining its allocated CPU, memory, and potentially GPU resources.
    Image string
    The container image to be used for deployment. This should be a valid image reference, such as a public or private Docker image (registry.example.com/my-image:latest). Note: If the image is hosted in a private registry, you must specify credentials_name to provide authentication details.
    InferenceDeploymentId string
    The ID of this resource.
    ListeningPort double
    The port on which the container will accept incoming traffic. This should match the port your application is configured to listen on within the container.
    LivenessProbe InferenceDeploymentLivenessProbe
    Logging InferenceDeploymentLogging
    Name string
    The name of the deployment. This should be unique within the scope of the project.
    ProjectId double
    ProjectName string
    ReadinessProbe InferenceDeploymentReadinessProbe
    StartupProbe InferenceDeploymentStartupProbe
    Status string
    Timeout double
    Address string
    AuthEnabled bool
    Set to true to enable API key authentication for the inference instance.
    Command string
    Command to be executed when running a container from an image.
    Containers []InferenceDeploymentContainerArgs
    A required list of container definitions. Each entry represents a container configuration, and at least one container must be specified. See the nested schema below for further details.
    CreatedAt string
    Datetime when the inference deployment was created. The format is 2025-12-28T19:14:44.180394
    CredentialsName string
    Required if using a private image registry. Specifies the name of the credentials to authenticate with the registry where the container image is stored.
    Description string
    Envs map[string]string
    Environment variables for the inference instance.
    FlavorName string
    Specifies the resource flavor for the container, determining its allocated CPU, memory, and potentially GPU resources.
    Image string
    The container image to be used for deployment. This should be a valid image reference, such as a public or private Docker image (registry.example.com/my-image:latest). Note: If the image is hosted in a private registry, you must specify credentials_name to provide authentication details.
    InferenceDeploymentId string
    The ID of this resource.
    ListeningPort float64
    The port on which the container will accept incoming traffic. This should match the port your application is configured to listen on within the container.
    LivenessProbe InferenceDeploymentLivenessProbeArgs
    Logging InferenceDeploymentLoggingArgs
    Name string
    The name of the deployment. This should be unique within the scope of the project.
    ProjectId float64
    ProjectName string
    ReadinessProbe InferenceDeploymentReadinessProbeArgs
    StartupProbe InferenceDeploymentStartupProbeArgs
    Status string
    Timeout float64
    address String
    authEnabled Boolean
    Set to true to enable API key authentication for the inference instance.
    command String
    Command to be executed when running a container from an image.
    containers List<InferenceDeploymentContainer>
    A required list of container definitions. Each entry represents a container configuration, and at least one container must be specified. See the nested schema below for further details.
    createdAt String
    Datetime when the inference deployment was created. The format is 2025-12-28T19:14:44.180394
    credentialsName String
    Required if using a private image registry. Specifies the name of the credentials to authenticate with the registry where the container image is stored.
    description String
    envs Map<String,String>
    Environment variables for the inference instance.
    flavorName String
    Specifies the resource flavor for the container, determining its allocated CPU, memory, and potentially GPU resources.
    image String
    The container image to be used for deployment. This should be a valid image reference, such as a public or private Docker image (registry.example.com/my-image:latest). Note: If the image is hosted in a private registry, you must specify credentials_name to provide authentication details.
    inferenceDeploymentId String
    The ID of this resource.
    listeningPort Double
    The port on which the container will accept incoming traffic. This should match the port your application is configured to listen on within the container.
    livenessProbe InferenceDeploymentLivenessProbe
    logging InferenceDeploymentLogging
    name String
    The name of the deployment. This should be unique within the scope of the project.
    projectId Double
    projectName String
    readinessProbe InferenceDeploymentReadinessProbe
    startupProbe InferenceDeploymentStartupProbe
    status String
    timeout Double
    address string
    authEnabled boolean
    Set to true to enable API key authentication for the inference instance.
    command string
    Command to be executed when running a container from an image.
    containers InferenceDeploymentContainer[]
    A required list of container definitions. Each entry represents a container configuration, and at least one container must be specified. See the nested schema below for further details.
    createdAt string
    Datetime when the inference deployment was created. The format is 2025-12-28T19:14:44.180394
    credentialsName string
    Required if using a private image registry. Specifies the name of the credentials to authenticate with the registry where the container image is stored.
    description string
    envs {[key: string]: string}
    Environment variables for the inference instance.
    flavorName string
    Specifies the resource flavor for the container, determining its allocated CPU, memory, and potentially GPU resources.
    image string
    The container image to be used for deployment. This should be a valid image reference, such as a public or private Docker image (registry.example.com/my-image:latest). Note: If the image is hosted in a private registry, you must specify credentials_name to provide authentication details.
    inferenceDeploymentId string
    The ID of this resource.
    listeningPort number
    The port on which the container will accept incoming traffic. This should match the port your application is configured to listen on within the container.
    livenessProbe InferenceDeploymentLivenessProbe
    logging InferenceDeploymentLogging
    name string
    The name of the deployment. This should be unique within the scope of the project.
    projectId number
    projectName string
    readinessProbe InferenceDeploymentReadinessProbe
    startupProbe InferenceDeploymentStartupProbe
    status string
    timeout number
    address str
    auth_enabled bool
    Set to true to enable API key authentication for the inference instance.
    command str
    Command to be executed when running a container from an image.
    containers Sequence[InferenceDeploymentContainerArgs]
    A required list of container definitions. Each entry represents a container configuration, and at least one container must be specified. See the nested schema below for further details.
    created_at str
    Datetime when the inference deployment was created. The format is 2025-12-28T19:14:44.180394
    credentials_name str
    Required if using a private image registry. Specifies the name of the credentials to authenticate with the registry where the container image is stored.
    description str
    envs Mapping[str, str]
    Environment variables for the inference instance.
    flavor_name str
    Specifies the resource flavor for the container, determining its allocated CPU, memory, and potentially GPU resources.
    image str
    The container image to be used for deployment. This should be a valid image reference, such as a public or private Docker image (registry.example.com/my-image:latest). Note: If the image is hosted in a private registry, you must specify credentials_name to provide authentication details.
    inference_deployment_id str
    The ID of this resource.
    listening_port float
    The port on which the container will accept incoming traffic. This should match the port your application is configured to listen on within the container.
    liveness_probe InferenceDeploymentLivenessProbeArgs
    logging InferenceDeploymentLoggingArgs
    name str
    The name of the deployment. This should be unique within the scope of the project.
    project_id float
    project_name str
    readiness_probe InferenceDeploymentReadinessProbeArgs
    startup_probe InferenceDeploymentStartupProbeArgs
    status str
    timeout float
    address String
    authEnabled Boolean
    Set to true to enable API key authentication for the inference instance.
    command String
    Command to be executed when running a container from an image.
    containers List<Property Map>
    A required list of container definitions. Each entry represents a container configuration, and at least one container must be specified. See the nested schema below for further details.
    createdAt String
    Datetime when the inference deployment was created. The format is 2025-12-28T19:14:44.180394
    credentialsName String
    Required if using a private image registry. Specifies the name of the credentials to authenticate with the registry where the container image is stored.
    description String
    envs Map<String>
    Environment variables for the inference instance.
    flavorName String
    Specifies the resource flavor for the container, determining its allocated CPU, memory, and potentially GPU resources.
    image String
    The container image to be used for deployment. This should be a valid image reference, such as a public or private Docker image (registry.example.com/my-image:latest). Note: If the image is hosted in a private registry, you must specify credentials_name to provide authentication details.
    inferenceDeploymentId String
    The ID of this resource.
    listeningPort Number
    The port on which the container will accept incoming traffic. This should match the port your application is configured to listen on within the container.
    livenessProbe Property Map
    logging Property Map
    name String
    The name of the deployment. This should be unique within the scope of the project.
    projectId Number
    projectName String
    readinessProbe Property Map
    startupProbe Property Map
    status String
    timeout Number

    Supporting Types

    InferenceDeploymentContainer, InferenceDeploymentContainerArgs

    CooldownPeriod double
    Cooldown period between scaling actions in seconds
    RegionId double
    Region id for the container
    ScaleMax double
    Maximum scale for the container
    ScaleMin double
    Minimum scale for the container. It can be set to 0, in which case the container will be downscaled to 0 when there is no load.
    PollingInterval double
    Polling interval for scaling triggers in seconds
    ReadyContainers double
    Status of the containers deployment. Number of ready instances
    TotalContainers double
    Status of the containers deployment. Total number of instances
    TriggersCpuThreshold double
    CPU trigger threshold configuration
    TriggersGpuMemoryThreshold double
    GPU memory trigger threshold configuration. Calculated by DCGMFIDEVMEMCOPY_UTIL metric
    TriggersGpuUtilizationThreshold double
    GPU utilization trigger threshold configuration. Calculated by DCGMFIDEVGPUUTIL metric
    TriggersHttpRate double
    Request count per 'window' seconds for the http trigger. Required if you use http trigger
    TriggersHttpWindow double
    Time window for rate calculation in seconds. Required if you use http trigger
    TriggersMemoryThreshold double
    Memory trigger threshold configuration
    TriggersSqsActivationQueueLength double
    Number of messages for activation
    TriggersSqsAwsEndpoint string
    Custom AWS endpoint, left empty to use default aws endpoint
    TriggersSqsAwsRegion string
    AWS region. Required if you use SQS trigger
    TriggersSqsQueueLength double
    Number of messages for one replica
    TriggersSqsQueueUrl string
    URL of the SQS queue. Required if you use SQS trigger
    TriggersSqsScaleOnDelayed bool
    Scale on delayed messages
    TriggersSqsScaleOnFlight bool
    Scale on in-flight messages
    TriggersSqsSecretName string
    Name of the secret with AWS credentials. Required if you use SQS trigger
    CooldownPeriod float64
    Cooldown period between scaling actions in seconds
    RegionId float64
    Region id for the container
    ScaleMax float64
    Maximum scale for the container
    ScaleMin float64
    Minimum scale for the container. It can be set to 0, in which case the container will be downscaled to 0 when there is no load.
    PollingInterval float64
    Polling interval for scaling triggers in seconds
    ReadyContainers float64
    Status of the containers deployment. Number of ready instances
    TotalContainers float64
    Status of the containers deployment. Total number of instances
    TriggersCpuThreshold float64
    CPU trigger threshold configuration
    TriggersGpuMemoryThreshold float64
    GPU memory trigger threshold configuration. Calculated by DCGMFIDEVMEMCOPY_UTIL metric
    TriggersGpuUtilizationThreshold float64
    GPU utilization trigger threshold configuration. Calculated by DCGMFIDEVGPUUTIL metric
    TriggersHttpRate float64
    Request count per 'window' seconds for the http trigger. Required if you use http trigger
    TriggersHttpWindow float64
    Time window for rate calculation in seconds. Required if you use http trigger
    TriggersMemoryThreshold float64
    Memory trigger threshold configuration
    TriggersSqsActivationQueueLength float64
    Number of messages for activation
    TriggersSqsAwsEndpoint string
    Custom AWS endpoint, left empty to use default aws endpoint
    TriggersSqsAwsRegion string
    AWS region. Required if you use SQS trigger
    TriggersSqsQueueLength float64
    Number of messages for one replica
    TriggersSqsQueueUrl string
    URL of the SQS queue. Required if you use SQS trigger
    TriggersSqsScaleOnDelayed bool
    Scale on delayed messages
    TriggersSqsScaleOnFlight bool
    Scale on in-flight messages
    TriggersSqsSecretName string
    Name of the secret with AWS credentials. Required if you use SQS trigger
    cooldownPeriod Double
    Cooldown period between scaling actions in seconds
    regionId Double
    Region id for the container
    scaleMax Double
    Maximum scale for the container
    scaleMin Double
    Minimum scale for the container. It can be set to 0, in which case the container will be downscaled to 0 when there is no load.
    pollingInterval Double
    Polling interval for scaling triggers in seconds
    readyContainers Double
    Status of the containers deployment. Number of ready instances
    totalContainers Double
    Status of the containers deployment. Total number of instances
    triggersCpuThreshold Double
    CPU trigger threshold configuration
    triggersGpuMemoryThreshold Double
    GPU memory trigger threshold configuration. Calculated by DCGMFIDEVMEMCOPY_UTIL metric
    triggersGpuUtilizationThreshold Double
    GPU utilization trigger threshold configuration. Calculated by DCGMFIDEVGPUUTIL metric
    triggersHttpRate Double
    Request count per 'window' seconds for the http trigger. Required if you use http trigger
    triggersHttpWindow Double
    Time window for rate calculation in seconds. Required if you use http trigger
    triggersMemoryThreshold Double
    Memory trigger threshold configuration
    triggersSqsActivationQueueLength Double
    Number of messages for activation
    triggersSqsAwsEndpoint String
    Custom AWS endpoint, left empty to use default aws endpoint
    triggersSqsAwsRegion String
    AWS region. Required if you use SQS trigger
    triggersSqsQueueLength Double
    Number of messages for one replica
    triggersSqsQueueUrl String
    URL of the SQS queue. Required if you use SQS trigger
    triggersSqsScaleOnDelayed Boolean
    Scale on delayed messages
    triggersSqsScaleOnFlight Boolean
    Scale on in-flight messages
    triggersSqsSecretName String
    Name of the secret with AWS credentials. Required if you use SQS trigger
    cooldownPeriod number
    Cooldown period between scaling actions in seconds
    regionId number
    Region id for the container
    scaleMax number
    Maximum scale for the container
    scaleMin number
    Minimum scale for the container. It can be set to 0, in which case the container will be downscaled to 0 when there is no load.
    pollingInterval number
    Polling interval for scaling triggers in seconds
    readyContainers number
    Status of the containers deployment. Number of ready instances
    totalContainers number
    Status of the containers deployment. Total number of instances
    triggersCpuThreshold number
    CPU trigger threshold configuration
    triggersGpuMemoryThreshold number
    GPU memory trigger threshold configuration. Calculated by DCGMFIDEVMEMCOPY_UTIL metric
    triggersGpuUtilizationThreshold number
    GPU utilization trigger threshold configuration. Calculated by DCGMFIDEVGPUUTIL metric
    triggersHttpRate number
    Request count per 'window' seconds for the http trigger. Required if you use http trigger
    triggersHttpWindow number
    Time window for rate calculation in seconds. Required if you use http trigger
    triggersMemoryThreshold number
    Memory trigger threshold configuration
    triggersSqsActivationQueueLength number
    Number of messages for activation
    triggersSqsAwsEndpoint string
    Custom AWS endpoint, left empty to use default aws endpoint
    triggersSqsAwsRegion string
    AWS region. Required if you use SQS trigger
    triggersSqsQueueLength number
    Number of messages for one replica
    triggersSqsQueueUrl string
    URL of the SQS queue. Required if you use SQS trigger
    triggersSqsScaleOnDelayed boolean
    Scale on delayed messages
    triggersSqsScaleOnFlight boolean
    Scale on in-flight messages
    triggersSqsSecretName string
    Name of the secret with AWS credentials. Required if you use SQS trigger
    cooldown_period float
    Cooldown period between scaling actions in seconds
    region_id float
    Region id for the container
    scale_max float
    Maximum scale for the container
    scale_min float
    Minimum scale for the container. It can be set to 0, in which case the container will be downscaled to 0 when there is no load.
    polling_interval float
    Polling interval for scaling triggers in seconds
    ready_containers float
    Status of the containers deployment. Number of ready instances
    total_containers float
    Status of the containers deployment. Total number of instances
    triggers_cpu_threshold float
    CPU trigger threshold configuration
    triggers_gpu_memory_threshold float
    GPU memory trigger threshold configuration. Calculated by DCGMFIDEVMEMCOPY_UTIL metric
    triggers_gpu_utilization_threshold float
    GPU utilization trigger threshold configuration. Calculated by DCGMFIDEVGPUUTIL metric
    triggers_http_rate float
    Request count per 'window' seconds for the http trigger. Required if you use http trigger
    triggers_http_window float
    Time window for rate calculation in seconds. Required if you use http trigger
    triggers_memory_threshold float
    Memory trigger threshold configuration
    triggers_sqs_activation_queue_length float
    Number of messages for activation
    triggers_sqs_aws_endpoint str
    Custom AWS endpoint, left empty to use default aws endpoint
    triggers_sqs_aws_region str
    AWS region. Required if you use SQS trigger
    triggers_sqs_queue_length float
    Number of messages for one replica
    triggers_sqs_queue_url str
    URL of the SQS queue. Required if you use SQS trigger
    triggers_sqs_scale_on_delayed bool
    Scale on delayed messages
    triggers_sqs_scale_on_flight bool
    Scale on in-flight messages
    triggers_sqs_secret_name str
    Name of the secret with AWS credentials. Required if you use SQS trigger
    cooldownPeriod Number
    Cooldown period between scaling actions in seconds
    regionId Number
    Region id for the container
    scaleMax Number
    Maximum scale for the container
    scaleMin Number
    Minimum scale for the container. It can be set to 0, in which case the container will be downscaled to 0 when there is no load.
    pollingInterval Number
    Polling interval for scaling triggers in seconds
    readyContainers Number
    Status of the containers deployment. Number of ready instances
    totalContainers Number
    Status of the containers deployment. Total number of instances
    triggersCpuThreshold Number
    CPU trigger threshold configuration
    triggersGpuMemoryThreshold Number
    GPU memory trigger threshold configuration. Calculated by DCGMFIDEVMEMCOPY_UTIL metric
    triggersGpuUtilizationThreshold Number
    GPU utilization trigger threshold configuration. Calculated by DCGMFIDEVGPUUTIL metric
    triggersHttpRate Number
    Request count per 'window' seconds for the http trigger. Required if you use http trigger
    triggersHttpWindow Number
    Time window for rate calculation in seconds. Required if you use http trigger
    triggersMemoryThreshold Number
    Memory trigger threshold configuration
    triggersSqsActivationQueueLength Number
    Number of messages for activation
    triggersSqsAwsEndpoint String
    Custom AWS endpoint, left empty to use default aws endpoint
    triggersSqsAwsRegion String
    AWS region. Required if you use SQS trigger
    triggersSqsQueueLength Number
    Number of messages for one replica
    triggersSqsQueueUrl String
    URL of the SQS queue. Required if you use SQS trigger
    triggersSqsScaleOnDelayed Boolean
    Scale on delayed messages
    triggersSqsScaleOnFlight Boolean
    Scale on in-flight messages
    triggersSqsSecretName String
    Name of the secret with AWS credentials. Required if you use SQS trigger

    InferenceDeploymentLivenessProbe, InferenceDeploymentLivenessProbeArgs

    Enabled bool
    Enable or disable probe
    ExecCommand string
    Command to execute in the container to determine the health
    FailureThreshold double
    Number of failed probes before the container is considered unhealthy
    HttpGetHeaders Dictionary<string, string>
    HTTP headers to use when sending a HTTP GET request, valid only for HTTP probes
    HttpGetHost string
    Host name to connect to, valid only for HTTP probes
    HttpGetPath string
    Path to access on the HTTP server, valid only for HTTP probes
    HttpGetPort double
    Number of the port to access on the HTTP server, valid only for HTTP probes
    HttpGetSchema string
    Scheme to use for connecting to the host, valid only for HTTP probes
    InitialDelaySeconds double
    Number of seconds after the container has started before liveness probes are initiated
    PeriodSeconds double
    How often (in seconds) to perform the probe
    SuccessThreshold double
    Minimum consecutive successes for the probe to be considered successful after having failed
    TcpSocketPort double
    Port to connect to
    TimeoutSeconds double
    Number of seconds after which the probe times out
    Enabled bool
    Enable or disable probe
    ExecCommand string
    Command to execute in the container to determine the health
    FailureThreshold float64
    Number of failed probes before the container is considered unhealthy
    HttpGetHeaders map[string]string
    HTTP headers to use when sending a HTTP GET request, valid only for HTTP probes
    HttpGetHost string
    Host name to connect to, valid only for HTTP probes
    HttpGetPath string
    Path to access on the HTTP server, valid only for HTTP probes
    HttpGetPort float64
    Number of the port to access on the HTTP server, valid only for HTTP probes
    HttpGetSchema string
    Scheme to use for connecting to the host, valid only for HTTP probes
    InitialDelaySeconds float64
    Number of seconds after the container has started before liveness probes are initiated
    PeriodSeconds float64
    How often (in seconds) to perform the probe
    SuccessThreshold float64
    Minimum consecutive successes for the probe to be considered successful after having failed
    TcpSocketPort float64
    Port to connect to
    TimeoutSeconds float64
    Number of seconds after which the probe times out
    enabled Boolean
    Enable or disable probe
    execCommand String
    Command to execute in the container to determine the health
    failureThreshold Double
    Number of failed probes before the container is considered unhealthy
    httpGetHeaders Map<String,String>
    HTTP headers to use when sending a HTTP GET request, valid only for HTTP probes
    httpGetHost String
    Host name to connect to, valid only for HTTP probes
    httpGetPath String
    Path to access on the HTTP server, valid only for HTTP probes
    httpGetPort Double
    Number of the port to access on the HTTP server, valid only for HTTP probes
    httpGetSchema String
    Scheme to use for connecting to the host, valid only for HTTP probes
    initialDelaySeconds Double
    Number of seconds after the container has started before liveness probes are initiated
    periodSeconds Double
    How often (in seconds) to perform the probe
    successThreshold Double
    Minimum consecutive successes for the probe to be considered successful after having failed
    tcpSocketPort Double
    Port to connect to
    timeoutSeconds Double
    Number of seconds after which the probe times out
    enabled boolean
    Enable or disable probe
    execCommand string
    Command to execute in the container to determine the health
    failureThreshold number
    Number of failed probes before the container is considered unhealthy
    httpGetHeaders {[key: string]: string}
    HTTP headers to use when sending a HTTP GET request, valid only for HTTP probes
    httpGetHost string
    Host name to connect to, valid only for HTTP probes
    httpGetPath string
    Path to access on the HTTP server, valid only for HTTP probes
    httpGetPort number
    Number of the port to access on the HTTP server, valid only for HTTP probes
    httpGetSchema string
    Scheme to use for connecting to the host, valid only for HTTP probes
    initialDelaySeconds number
    Number of seconds after the container has started before liveness probes are initiated
    periodSeconds number
    How often (in seconds) to perform the probe
    successThreshold number
    Minimum consecutive successes for the probe to be considered successful after having failed
    tcpSocketPort number
    Port to connect to
    timeoutSeconds number
    Number of seconds after which the probe times out
    enabled bool
    Enable or disable probe
    exec_command str
    Command to execute in the container to determine the health
    failure_threshold float
    Number of failed probes before the container is considered unhealthy
    http_get_headers Mapping[str, str]
    HTTP headers to use when sending a HTTP GET request, valid only for HTTP probes
    http_get_host str
    Host name to connect to, valid only for HTTP probes
    http_get_path str
    Path to access on the HTTP server, valid only for HTTP probes
    http_get_port float
    Number of the port to access on the HTTP server, valid only for HTTP probes
    http_get_schema str
    Scheme to use for connecting to the host, valid only for HTTP probes
    initial_delay_seconds float
    Number of seconds after the container has started before liveness probes are initiated
    period_seconds float
    How often (in seconds) to perform the probe
    success_threshold float
    Minimum consecutive successes for the probe to be considered successful after having failed
    tcp_socket_port float
    Port to connect to
    timeout_seconds float
    Number of seconds after which the probe times out
    enabled Boolean
    Enable or disable probe
    execCommand String
    Command to execute in the container to determine the health
    failureThreshold Number
    Number of failed probes before the container is considered unhealthy
    httpGetHeaders Map<String>
    HTTP headers to use when sending a HTTP GET request, valid only for HTTP probes
    httpGetHost String
    Host name to connect to, valid only for HTTP probes
    httpGetPath String
    Path to access on the HTTP server, valid only for HTTP probes
    httpGetPort Number
    Number of the port to access on the HTTP server, valid only for HTTP probes
    httpGetSchema String
    Scheme to use for connecting to the host, valid only for HTTP probes
    initialDelaySeconds Number
    Number of seconds after the container has started before liveness probes are initiated
    periodSeconds Number
    How often (in seconds) to perform the probe
    successThreshold Number
    Minimum consecutive successes for the probe to be considered successful after having failed
    tcpSocketPort Number
    Port to connect to
    timeoutSeconds Number
    Number of seconds after which the probe times out

    InferenceDeploymentLogging, InferenceDeploymentLoggingArgs

    InferenceDeploymentReadinessProbe, InferenceDeploymentReadinessProbeArgs

    Enabled bool
    Enable or disable probe
    ExecCommand string
    Command to execute in the container to determine the health
    FailureThreshold double
    Number of failed probes before the container is considered unhealthy
    HttpGetHeaders Dictionary<string, string>
    HTTP headers to use when sending a HTTP GET request, valid only for HTTP probes
    HttpGetHost string
    Host name to connect to, valid only for HTTP probes
    HttpGetPath string
    Path to access on the HTTP server, valid only for HTTP probes
    HttpGetPort double
    Number of the port to access on the HTTP server, valid only for HTTP probes
    HttpGetSchema string
    Scheme to use for connecting to the host, valid only for HTTP probes
    InitialDelaySeconds double
    Number of seconds after the container has started before liveness probes are initiated
    PeriodSeconds double
    How often (in seconds) to perform the probe
    SuccessThreshold double
    Minimum consecutive successes for the probe to be considered successful after having failed
    TcpSocketPort double
    Port to connect to
    TimeoutSeconds double
    Number of seconds after which the probe times out
    Enabled bool
    Enable or disable probe
    ExecCommand string
    Command to execute in the container to determine the health
    FailureThreshold float64
    Number of failed probes before the container is considered unhealthy
    HttpGetHeaders map[string]string
    HTTP headers to use when sending a HTTP GET request, valid only for HTTP probes
    HttpGetHost string
    Host name to connect to, valid only for HTTP probes
    HttpGetPath string
    Path to access on the HTTP server, valid only for HTTP probes
    HttpGetPort float64
    Number of the port to access on the HTTP server, valid only for HTTP probes
    HttpGetSchema string
    Scheme to use for connecting to the host, valid only for HTTP probes
    InitialDelaySeconds float64
    Number of seconds after the container has started before liveness probes are initiated
    PeriodSeconds float64
    How often (in seconds) to perform the probe
    SuccessThreshold float64
    Minimum consecutive successes for the probe to be considered successful after having failed
    TcpSocketPort float64
    Port to connect to
    TimeoutSeconds float64
    Number of seconds after which the probe times out
    enabled Boolean
    Enable or disable probe
    execCommand String
    Command to execute in the container to determine the health
    failureThreshold Double
    Number of failed probes before the container is considered unhealthy
    httpGetHeaders Map<String,String>
    HTTP headers to use when sending a HTTP GET request, valid only for HTTP probes
    httpGetHost String
    Host name to connect to, valid only for HTTP probes
    httpGetPath String
    Path to access on the HTTP server, valid only for HTTP probes
    httpGetPort Double
    Number of the port to access on the HTTP server, valid only for HTTP probes
    httpGetSchema String
    Scheme to use for connecting to the host, valid only for HTTP probes
    initialDelaySeconds Double
    Number of seconds after the container has started before liveness probes are initiated
    periodSeconds Double
    How often (in seconds) to perform the probe
    successThreshold Double
    Minimum consecutive successes for the probe to be considered successful after having failed
    tcpSocketPort Double
    Port to connect to
    timeoutSeconds Double
    Number of seconds after which the probe times out
    enabled boolean
    Enable or disable probe
    execCommand string
    Command to execute in the container to determine the health
    failureThreshold number
    Number of failed probes before the container is considered unhealthy
    httpGetHeaders {[key: string]: string}
    HTTP headers to use when sending a HTTP GET request, valid only for HTTP probes
    httpGetHost string
    Host name to connect to, valid only for HTTP probes
    httpGetPath string
    Path to access on the HTTP server, valid only for HTTP probes
    httpGetPort number
    Number of the port to access on the HTTP server, valid only for HTTP probes
    httpGetSchema string
    Scheme to use for connecting to the host, valid only for HTTP probes
    initialDelaySeconds number
    Number of seconds after the container has started before liveness probes are initiated
    periodSeconds number
    How often (in seconds) to perform the probe
    successThreshold number
    Minimum consecutive successes for the probe to be considered successful after having failed
    tcpSocketPort number
    Port to connect to
    timeoutSeconds number
    Number of seconds after which the probe times out
    enabled bool
    Enable or disable probe
    exec_command str
    Command to execute in the container to determine the health
    failure_threshold float
    Number of failed probes before the container is considered unhealthy
    http_get_headers Mapping[str, str]
    HTTP headers to use when sending a HTTP GET request, valid only for HTTP probes
    http_get_host str
    Host name to connect to, valid only for HTTP probes
    http_get_path str
    Path to access on the HTTP server, valid only for HTTP probes
    http_get_port float
    Number of the port to access on the HTTP server, valid only for HTTP probes
    http_get_schema str
    Scheme to use for connecting to the host, valid only for HTTP probes
    initial_delay_seconds float
    Number of seconds after the container has started before liveness probes are initiated
    period_seconds float
    How often (in seconds) to perform the probe
    success_threshold float
    Minimum consecutive successes for the probe to be considered successful after having failed
    tcp_socket_port float
    Port to connect to
    timeout_seconds float
    Number of seconds after which the probe times out
    enabled Boolean
    Enable or disable probe
    execCommand String
    Command to execute in the container to determine the health
    failureThreshold Number
    Number of failed probes before the container is considered unhealthy
    httpGetHeaders Map<String>
    HTTP headers to use when sending a HTTP GET request, valid only for HTTP probes
    httpGetHost String
    Host name to connect to, valid only for HTTP probes
    httpGetPath String
    Path to access on the HTTP server, valid only for HTTP probes
    httpGetPort Number
    Number of the port to access on the HTTP server, valid only for HTTP probes
    httpGetSchema String
    Scheme to use for connecting to the host, valid only for HTTP probes
    initialDelaySeconds Number
    Number of seconds after the container has started before liveness probes are initiated
    periodSeconds Number
    How often (in seconds) to perform the probe
    successThreshold Number
    Minimum consecutive successes for the probe to be considered successful after having failed
    tcpSocketPort Number
    Port to connect to
    timeoutSeconds Number
    Number of seconds after which the probe times out

    InferenceDeploymentStartupProbe, InferenceDeploymentStartupProbeArgs

    Enabled bool
    Enable or disable probe
    ExecCommand string
    Command to execute in the container to determine the health
    FailureThreshold double
    Number of failed probes before the container is considered unhealthy
    HttpGetHeaders Dictionary<string, string>
    HTTP headers to use when sending a HTTP GET request, valid only for HTTP probes
    HttpGetHost string
    Host name to connect to, valid only for HTTP probes
    HttpGetPath string
    Path to access on the HTTP server, valid only for HTTP probes
    HttpGetPort double
    Number of the port to access on the HTTP server, valid only for HTTP probes
    HttpGetSchema string
    Scheme to use for connecting to the host, valid only for HTTP probes
    InitialDelaySeconds double
    Number of seconds after the container has started before liveness probes are initiated
    PeriodSeconds double
    How often (in seconds) to perform the probe
    SuccessThreshold double
    Minimum consecutive successes for the probe to be considered successful after having failed
    TcpSocketPort double
    Port to connect to
    TimeoutSeconds double
    Number of seconds after which the probe times out
    Enabled bool
    Enable or disable probe
    ExecCommand string
    Command to execute in the container to determine the health
    FailureThreshold float64
    Number of failed probes before the container is considered unhealthy
    HttpGetHeaders map[string]string
    HTTP headers to use when sending a HTTP GET request, valid only for HTTP probes
    HttpGetHost string
    Host name to connect to, valid only for HTTP probes
    HttpGetPath string
    Path to access on the HTTP server, valid only for HTTP probes
    HttpGetPort float64
    Number of the port to access on the HTTP server, valid only for HTTP probes
    HttpGetSchema string
    Scheme to use for connecting to the host, valid only for HTTP probes
    InitialDelaySeconds float64
    Number of seconds after the container has started before liveness probes are initiated
    PeriodSeconds float64
    How often (in seconds) to perform the probe
    SuccessThreshold float64
    Minimum consecutive successes for the probe to be considered successful after having failed
    TcpSocketPort float64
    Port to connect to
    TimeoutSeconds float64
    Number of seconds after which the probe times out
    enabled Boolean
    Enable or disable probe
    execCommand String
    Command to execute in the container to determine the health
    failureThreshold Double
    Number of failed probes before the container is considered unhealthy
    httpGetHeaders Map<String,String>
    HTTP headers to use when sending a HTTP GET request, valid only for HTTP probes
    httpGetHost String
    Host name to connect to, valid only for HTTP probes
    httpGetPath String
    Path to access on the HTTP server, valid only for HTTP probes
    httpGetPort Double
    Number of the port to access on the HTTP server, valid only for HTTP probes
    httpGetSchema String
    Scheme to use for connecting to the host, valid only for HTTP probes
    initialDelaySeconds Double
    Number of seconds after the container has started before liveness probes are initiated
    periodSeconds Double
    How often (in seconds) to perform the probe
    successThreshold Double
    Minimum consecutive successes for the probe to be considered successful after having failed
    tcpSocketPort Double
    Port to connect to
    timeoutSeconds Double
    Number of seconds after which the probe times out
    enabled boolean
    Enable or disable probe
    execCommand string
    Command to execute in the container to determine the health
    failureThreshold number
    Number of failed probes before the container is considered unhealthy
    httpGetHeaders {[key: string]: string}
    HTTP headers to use when sending a HTTP GET request, valid only for HTTP probes
    httpGetHost string
    Host name to connect to, valid only for HTTP probes
    httpGetPath string
    Path to access on the HTTP server, valid only for HTTP probes
    httpGetPort number
    Number of the port to access on the HTTP server, valid only for HTTP probes
    httpGetSchema string
    Scheme to use for connecting to the host, valid only for HTTP probes
    initialDelaySeconds number
    Number of seconds after the container has started before liveness probes are initiated
    periodSeconds number
    How often (in seconds) to perform the probe
    successThreshold number
    Minimum consecutive successes for the probe to be considered successful after having failed
    tcpSocketPort number
    Port to connect to
    timeoutSeconds number
    Number of seconds after which the probe times out
    enabled bool
    Enable or disable probe
    exec_command str
    Command to execute in the container to determine the health
    failure_threshold float
    Number of failed probes before the container is considered unhealthy
    http_get_headers Mapping[str, str]
    HTTP headers to use when sending a HTTP GET request, valid only for HTTP probes
    http_get_host str
    Host name to connect to, valid only for HTTP probes
    http_get_path str
    Path to access on the HTTP server, valid only for HTTP probes
    http_get_port float
    Number of the port to access on the HTTP server, valid only for HTTP probes
    http_get_schema str
    Scheme to use for connecting to the host, valid only for HTTP probes
    initial_delay_seconds float
    Number of seconds after the container has started before liveness probes are initiated
    period_seconds float
    How often (in seconds) to perform the probe
    success_threshold float
    Minimum consecutive successes for the probe to be considered successful after having failed
    tcp_socket_port float
    Port to connect to
    timeout_seconds float
    Number of seconds after which the probe times out
    enabled Boolean
    Enable or disable probe
    execCommand String
    Command to execute in the container to determine the health
    failureThreshold Number
    Number of failed probes before the container is considered unhealthy
    httpGetHeaders Map<String>
    HTTP headers to use when sending a HTTP GET request, valid only for HTTP probes
    httpGetHost String
    Host name to connect to, valid only for HTTP probes
    httpGetPath String
    Path to access on the HTTP server, valid only for HTTP probes
    httpGetPort Number
    Number of the port to access on the HTTP server, valid only for HTTP probes
    httpGetSchema String
    Scheme to use for connecting to the host, valid only for HTTP probes
    initialDelaySeconds Number
    Number of seconds after the container has started before liveness probes are initiated
    periodSeconds Number
    How often (in seconds) to perform the probe
    successThreshold Number
    Minimum consecutive successes for the probe to be considered successful after having failed
    tcpSocketPort Number
    Port to connect to
    timeoutSeconds Number
    Number of seconds after which the probe times out

    Import

    import using <project_id>:<inference_deployment_name> format

    $ pulumi import gcore:index/inferenceDeployment:InferenceDeployment inf1 1:my-first-inference
    

    To learn more about importing existing cloud resources, see Importing resources.

    Package Details

    Repository
    gcore g-core/terraform-provider-gcore
    License
    Notes
    This Pulumi package is based on the gcore Terraform Provider.
    gcore logo
    gcore 0.22.0 published on Wednesday, Apr 30, 2025 by g-core