1. Packages
  2. Google Cloud (GCP) Classic
  3. API Docs
  4. vertex
  5. AiEndpointWithModelGardenDeployment
Google Cloud v8.40.0 published on Monday, Aug 11, 2025 by Pulumi

gcp.vertex.AiEndpointWithModelGardenDeployment

Explore with Pulumi AI

gcp logo
Google Cloud v8.40.0 published on Monday, Aug 11, 2025 by Pulumi

    Create an Endpoint and deploy a Model Garden model to it.

    To get more information about EndpointWithModelGardenDeployment, see:

    Example Usage

    Vertex Ai Deploy Basic

    import * as pulumi from "@pulumi/pulumi";
    import * as gcp from "@pulumi/gcp";
    
    const deploy = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy", {
        publisherModelName: "publishers/google/models/paligemma@paligemma-224-float32",
        location: "us-central1",
        modelConfig: {
            acceptEula: true,
        },
    });
    
    import pulumi
    import pulumi_gcp as gcp
    
    deploy = gcp.vertex.AiEndpointWithModelGardenDeployment("deploy",
        publisher_model_name="publishers/google/models/paligemma@paligemma-224-float32",
        location="us-central1",
        model_config={
            "accept_eula": True,
        })
    
    package main
    
    import (
    	"github.com/pulumi/pulumi-gcp/sdk/v8/go/gcp/vertex"
    	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
    )
    
    func main() {
    	pulumi.Run(func(ctx *pulumi.Context) error {
    		_, err := vertex.NewAiEndpointWithModelGardenDeployment(ctx, "deploy", &vertex.AiEndpointWithModelGardenDeploymentArgs{
    			PublisherModelName: pulumi.String("publishers/google/models/paligemma@paligemma-224-float32"),
    			Location:           pulumi.String("us-central1"),
    			ModelConfig: &vertex.AiEndpointWithModelGardenDeploymentModelConfigArgs{
    				AcceptEula: pulumi.Bool(true),
    			},
    		})
    		if err != nil {
    			return err
    		}
    		return nil
    	})
    }
    
    using System.Collections.Generic;
    using System.Linq;
    using Pulumi;
    using Gcp = Pulumi.Gcp;
    
    return await Deployment.RunAsync(() => 
    {
        var deploy = new Gcp.Vertex.AiEndpointWithModelGardenDeployment("deploy", new()
        {
            PublisherModelName = "publishers/google/models/paligemma@paligemma-224-float32",
            Location = "us-central1",
            ModelConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs
            {
                AcceptEula = true,
            },
        });
    
    });
    
    package generated_program;
    
    import com.pulumi.Context;
    import com.pulumi.Pulumi;
    import com.pulumi.core.Output;
    import com.pulumi.gcp.vertex.AiEndpointWithModelGardenDeployment;
    import com.pulumi.gcp.vertex.AiEndpointWithModelGardenDeploymentArgs;
    import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs;
    import java.util.List;
    import java.util.ArrayList;
    import java.util.Map;
    import java.io.File;
    import java.nio.file.Files;
    import java.nio.file.Paths;
    
    public class App {
        public static void main(String[] args) {
            Pulumi.run(App::stack);
        }
    
        public static void stack(Context ctx) {
            var deploy = new AiEndpointWithModelGardenDeployment("deploy", AiEndpointWithModelGardenDeploymentArgs.builder()
                .publisherModelName("publishers/google/models/paligemma@paligemma-224-float32")
                .location("us-central1")
                .modelConfig(AiEndpointWithModelGardenDeploymentModelConfigArgs.builder()
                    .acceptEula(true)
                    .build())
                .build());
    
        }
    }
    
    resources:
      deploy:
        type: gcp:vertex:AiEndpointWithModelGardenDeployment
        properties:
          publisherModelName: publishers/google/models/paligemma@paligemma-224-float32
          location: us-central1
          modelConfig:
            acceptEula: true
    

    Vertex Ai Deploy Huggingface Model

    import * as pulumi from "@pulumi/pulumi";
    import * as gcp from "@pulumi/gcp";
    
    const deploy = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy", {
        huggingFaceModelId: "Qwen/Qwen3-0.6B",
        location: "us-central1",
        modelConfig: {
            acceptEula: true,
        },
    });
    
    import pulumi
    import pulumi_gcp as gcp
    
    deploy = gcp.vertex.AiEndpointWithModelGardenDeployment("deploy",
        hugging_face_model_id="Qwen/Qwen3-0.6B",
        location="us-central1",
        model_config={
            "accept_eula": True,
        })
    
    package main
    
    import (
    	"github.com/pulumi/pulumi-gcp/sdk/v8/go/gcp/vertex"
    	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
    )
    
    func main() {
    	pulumi.Run(func(ctx *pulumi.Context) error {
    		_, err := vertex.NewAiEndpointWithModelGardenDeployment(ctx, "deploy", &vertex.AiEndpointWithModelGardenDeploymentArgs{
    			HuggingFaceModelId: pulumi.String("Qwen/Qwen3-0.6B"),
    			Location:           pulumi.String("us-central1"),
    			ModelConfig: &vertex.AiEndpointWithModelGardenDeploymentModelConfigArgs{
    				AcceptEula: pulumi.Bool(true),
    			},
    		})
    		if err != nil {
    			return err
    		}
    		return nil
    	})
    }
    
    using System.Collections.Generic;
    using System.Linq;
    using Pulumi;
    using Gcp = Pulumi.Gcp;
    
    return await Deployment.RunAsync(() => 
    {
        var deploy = new Gcp.Vertex.AiEndpointWithModelGardenDeployment("deploy", new()
        {
            HuggingFaceModelId = "Qwen/Qwen3-0.6B",
            Location = "us-central1",
            ModelConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs
            {
                AcceptEula = true,
            },
        });
    
    });
    
    package generated_program;
    
    import com.pulumi.Context;
    import com.pulumi.Pulumi;
    import com.pulumi.core.Output;
    import com.pulumi.gcp.vertex.AiEndpointWithModelGardenDeployment;
    import com.pulumi.gcp.vertex.AiEndpointWithModelGardenDeploymentArgs;
    import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs;
    import java.util.List;
    import java.util.ArrayList;
    import java.util.Map;
    import java.io.File;
    import java.nio.file.Files;
    import java.nio.file.Paths;
    
    public class App {
        public static void main(String[] args) {
            Pulumi.run(App::stack);
        }
    
        public static void stack(Context ctx) {
            var deploy = new AiEndpointWithModelGardenDeployment("deploy", AiEndpointWithModelGardenDeploymentArgs.builder()
                .huggingFaceModelId("Qwen/Qwen3-0.6B")
                .location("us-central1")
                .modelConfig(AiEndpointWithModelGardenDeploymentModelConfigArgs.builder()
                    .acceptEula(true)
                    .build())
                .build());
    
        }
    }
    
    resources:
      deploy:
        type: gcp:vertex:AiEndpointWithModelGardenDeployment
        properties:
          huggingFaceModelId: Qwen/Qwen3-0.6B
          location: us-central1
          modelConfig:
            acceptEula: true
    

    Vertex Ai Deploy With Configs

    import * as pulumi from "@pulumi/pulumi";
    import * as gcp from "@pulumi/gcp";
    
    const deploy = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy", {
        publisherModelName: "publishers/google/models/paligemma@paligemma-224-float32",
        location: "us-central1",
        modelConfig: {
            acceptEula: true,
        },
        deployConfig: {
            dedicatedResources: {
                machineSpec: {
                    machineType: "g2-standard-16",
                    acceleratorType: "NVIDIA_L4",
                    acceleratorCount: 1,
                },
                minReplicaCount: 1,
            },
        },
    });
    
    import pulumi
    import pulumi_gcp as gcp
    
    deploy = gcp.vertex.AiEndpointWithModelGardenDeployment("deploy",
        publisher_model_name="publishers/google/models/paligemma@paligemma-224-float32",
        location="us-central1",
        model_config={
            "accept_eula": True,
        },
        deploy_config={
            "dedicated_resources": {
                "machine_spec": {
                    "machine_type": "g2-standard-16",
                    "accelerator_type": "NVIDIA_L4",
                    "accelerator_count": 1,
                },
                "min_replica_count": 1,
            },
        })
    
    package main
    
    import (
    	"github.com/pulumi/pulumi-gcp/sdk/v8/go/gcp/vertex"
    	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
    )
    
    func main() {
    	pulumi.Run(func(ctx *pulumi.Context) error {
    		_, err := vertex.NewAiEndpointWithModelGardenDeployment(ctx, "deploy", &vertex.AiEndpointWithModelGardenDeploymentArgs{
    			PublisherModelName: pulumi.String("publishers/google/models/paligemma@paligemma-224-float32"),
    			Location:           pulumi.String("us-central1"),
    			ModelConfig: &vertex.AiEndpointWithModelGardenDeploymentModelConfigArgs{
    				AcceptEula: pulumi.Bool(true),
    			},
    			DeployConfig: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigArgs{
    				DedicatedResources: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs{
    					MachineSpec: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs{
    						MachineType:      pulumi.String("g2-standard-16"),
    						AcceleratorType:  pulumi.String("NVIDIA_L4"),
    						AcceleratorCount: pulumi.Int(1),
    					},
    					MinReplicaCount: pulumi.Int(1),
    				},
    			},
    		})
    		if err != nil {
    			return err
    		}
    		return nil
    	})
    }
    
    using System.Collections.Generic;
    using System.Linq;
    using Pulumi;
    using Gcp = Pulumi.Gcp;
    
    return await Deployment.RunAsync(() => 
    {
        var deploy = new Gcp.Vertex.AiEndpointWithModelGardenDeployment("deploy", new()
        {
            PublisherModelName = "publishers/google/models/paligemma@paligemma-224-float32",
            Location = "us-central1",
            ModelConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs
            {
                AcceptEula = true,
            },
            DeployConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigArgs
            {
                DedicatedResources = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs
                {
                    MachineSpec = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs
                    {
                        MachineType = "g2-standard-16",
                        AcceleratorType = "NVIDIA_L4",
                        AcceleratorCount = 1,
                    },
                    MinReplicaCount = 1,
                },
            },
        });
    
    });
    
    package generated_program;
    
    import com.pulumi.Context;
    import com.pulumi.Pulumi;
    import com.pulumi.core.Output;
    import com.pulumi.gcp.vertex.AiEndpointWithModelGardenDeployment;
    import com.pulumi.gcp.vertex.AiEndpointWithModelGardenDeploymentArgs;
    import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs;
    import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentDeployConfigArgs;
    import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs;
    import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs;
    import java.util.List;
    import java.util.ArrayList;
    import java.util.Map;
    import java.io.File;
    import java.nio.file.Files;
    import java.nio.file.Paths;
    
    public class App {
        public static void main(String[] args) {
            Pulumi.run(App::stack);
        }
    
        public static void stack(Context ctx) {
            var deploy = new AiEndpointWithModelGardenDeployment("deploy", AiEndpointWithModelGardenDeploymentArgs.builder()
                .publisherModelName("publishers/google/models/paligemma@paligemma-224-float32")
                .location("us-central1")
                .modelConfig(AiEndpointWithModelGardenDeploymentModelConfigArgs.builder()
                    .acceptEula(true)
                    .build())
                .deployConfig(AiEndpointWithModelGardenDeploymentDeployConfigArgs.builder()
                    .dedicatedResources(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs.builder()
                        .machineSpec(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs.builder()
                            .machineType("g2-standard-16")
                            .acceleratorType("NVIDIA_L4")
                            .acceleratorCount(1)
                            .build())
                        .minReplicaCount(1)
                        .build())
                    .build())
                .build());
    
        }
    }
    
    resources:
      deploy:
        type: gcp:vertex:AiEndpointWithModelGardenDeployment
        properties:
          publisherModelName: publishers/google/models/paligemma@paligemma-224-float32
          location: us-central1
          modelConfig:
            acceptEula: true
          deployConfig:
            dedicatedResources:
              machineSpec:
                machineType: g2-standard-16
                acceleratorType: NVIDIA_L4
                acceleratorCount: 1
              minReplicaCount: 1
    

    Vertex Ai Deploy Multiple Models In Parallel

    import * as pulumi from "@pulumi/pulumi";
    import * as gcp from "@pulumi/gcp";
    
    const deploy_gemma_11_2b_it = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-gemma-1_1-2b-it", {
        publisherModelName: "publishers/google/models/gemma@gemma-1.1-2b-it",
        location: "us-central1",
        modelConfig: {
            acceptEula: true,
        },
        deployConfig: {
            dedicatedResources: {
                machineSpec: {
                    machineType: "g2-standard-12",
                    acceleratorType: "us-central1",
                    acceleratorCount: 1,
                },
                minReplicaCount: 1,
            },
        },
    });
    const deploy_qwen3_06b = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-qwen3-0_6b", {
        huggingFaceModelId: "Qwen/Qwen3-0.6B",
        location: "us-central1",
        modelConfig: {
            acceptEula: true,
        },
        deployConfig: {
            dedicatedResources: {
                machineSpec: {
                    machineType: "g2-standard-12",
                    acceleratorType: "NVIDIA_L4",
                    acceleratorCount: 1,
                },
                minReplicaCount: 1,
            },
        },
    });
    const deploy_llama_32_1b = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-llama-3_2-1b", {
        publisherModelName: "publishers/meta/models/llama3-2@llama-3.2-1b",
        location: "us-central1",
        modelConfig: {
            acceptEula: true,
        },
        deployConfig: {
            dedicatedResources: {
                machineSpec: {
                    machineType: "g2-standard-12",
                    acceleratorType: "NVIDIA_L4",
                    acceleratorCount: 1,
                },
                minReplicaCount: 1,
            },
        },
    });
    
    import pulumi
    import pulumi_gcp as gcp
    
    deploy_gemma_11_2b_it = gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-gemma-1_1-2b-it",
        publisher_model_name="publishers/google/models/gemma@gemma-1.1-2b-it",
        location="us-central1",
        model_config={
            "accept_eula": True,
        },
        deploy_config={
            "dedicated_resources": {
                "machine_spec": {
                    "machine_type": "g2-standard-12",
                    "accelerator_type": "us-central1",
                    "accelerator_count": 1,
                },
                "min_replica_count": 1,
            },
        })
    deploy_qwen3_06b = gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-qwen3-0_6b",
        hugging_face_model_id="Qwen/Qwen3-0.6B",
        location="us-central1",
        model_config={
            "accept_eula": True,
        },
        deploy_config={
            "dedicated_resources": {
                "machine_spec": {
                    "machine_type": "g2-standard-12",
                    "accelerator_type": "NVIDIA_L4",
                    "accelerator_count": 1,
                },
                "min_replica_count": 1,
            },
        })
    deploy_llama_32_1b = gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-llama-3_2-1b",
        publisher_model_name="publishers/meta/models/llama3-2@llama-3.2-1b",
        location="us-central1",
        model_config={
            "accept_eula": True,
        },
        deploy_config={
            "dedicated_resources": {
                "machine_spec": {
                    "machine_type": "g2-standard-12",
                    "accelerator_type": "NVIDIA_L4",
                    "accelerator_count": 1,
                },
                "min_replica_count": 1,
            },
        })
    
    package main
    
    import (
    	"github.com/pulumi/pulumi-gcp/sdk/v8/go/gcp/vertex"
    	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
    )
    
    func main() {
    	pulumi.Run(func(ctx *pulumi.Context) error {
    		_, err := vertex.NewAiEndpointWithModelGardenDeployment(ctx, "deploy-gemma-1_1-2b-it", &vertex.AiEndpointWithModelGardenDeploymentArgs{
    			PublisherModelName: pulumi.String("publishers/google/models/gemma@gemma-1.1-2b-it"),
    			Location:           pulumi.String("us-central1"),
    			ModelConfig: &vertex.AiEndpointWithModelGardenDeploymentModelConfigArgs{
    				AcceptEula: pulumi.Bool(true),
    			},
    			DeployConfig: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigArgs{
    				DedicatedResources: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs{
    					MachineSpec: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs{
    						MachineType:      pulumi.String("g2-standard-12"),
    						AcceleratorType:  pulumi.String("us-central1"),
    						AcceleratorCount: pulumi.Int(1),
    					},
    					MinReplicaCount: pulumi.Int(1),
    				},
    			},
    		})
    		if err != nil {
    			return err
    		}
    		_, err = vertex.NewAiEndpointWithModelGardenDeployment(ctx, "deploy-qwen3-0_6b", &vertex.AiEndpointWithModelGardenDeploymentArgs{
    			HuggingFaceModelId: pulumi.String("Qwen/Qwen3-0.6B"),
    			Location:           pulumi.String("us-central1"),
    			ModelConfig: &vertex.AiEndpointWithModelGardenDeploymentModelConfigArgs{
    				AcceptEula: pulumi.Bool(true),
    			},
    			DeployConfig: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigArgs{
    				DedicatedResources: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs{
    					MachineSpec: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs{
    						MachineType:      pulumi.String("g2-standard-12"),
    						AcceleratorType:  pulumi.String("NVIDIA_L4"),
    						AcceleratorCount: pulumi.Int(1),
    					},
    					MinReplicaCount: pulumi.Int(1),
    				},
    			},
    		})
    		if err != nil {
    			return err
    		}
    		_, err = vertex.NewAiEndpointWithModelGardenDeployment(ctx, "deploy-llama-3_2-1b", &vertex.AiEndpointWithModelGardenDeploymentArgs{
    			PublisherModelName: pulumi.String("publishers/meta/models/llama3-2@llama-3.2-1b"),
    			Location:           pulumi.String("us-central1"),
    			ModelConfig: &vertex.AiEndpointWithModelGardenDeploymentModelConfigArgs{
    				AcceptEula: pulumi.Bool(true),
    			},
    			DeployConfig: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigArgs{
    				DedicatedResources: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs{
    					MachineSpec: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs{
    						MachineType:      pulumi.String("g2-standard-12"),
    						AcceleratorType:  pulumi.String("NVIDIA_L4"),
    						AcceleratorCount: pulumi.Int(1),
    					},
    					MinReplicaCount: pulumi.Int(1),
    				},
    			},
    		})
    		if err != nil {
    			return err
    		}
    		return nil
    	})
    }
    
    using System.Collections.Generic;
    using System.Linq;
    using Pulumi;
    using Gcp = Pulumi.Gcp;
    
    return await Deployment.RunAsync(() => 
    {
        var deploy_gemma_11_2b_it = new Gcp.Vertex.AiEndpointWithModelGardenDeployment("deploy-gemma-1_1-2b-it", new()
        {
            PublisherModelName = "publishers/google/models/gemma@gemma-1.1-2b-it",
            Location = "us-central1",
            ModelConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs
            {
                AcceptEula = true,
            },
            DeployConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigArgs
            {
                DedicatedResources = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs
                {
                    MachineSpec = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs
                    {
                        MachineType = "g2-standard-12",
                        AcceleratorType = "us-central1",
                        AcceleratorCount = 1,
                    },
                    MinReplicaCount = 1,
                },
            },
        });
    
        var deploy_qwen3_06b = new Gcp.Vertex.AiEndpointWithModelGardenDeployment("deploy-qwen3-0_6b", new()
        {
            HuggingFaceModelId = "Qwen/Qwen3-0.6B",
            Location = "us-central1",
            ModelConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs
            {
                AcceptEula = true,
            },
            DeployConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigArgs
            {
                DedicatedResources = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs
                {
                    MachineSpec = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs
                    {
                        MachineType = "g2-standard-12",
                        AcceleratorType = "NVIDIA_L4",
                        AcceleratorCount = 1,
                    },
                    MinReplicaCount = 1,
                },
            },
        });
    
        var deploy_llama_32_1b = new Gcp.Vertex.AiEndpointWithModelGardenDeployment("deploy-llama-3_2-1b", new()
        {
            PublisherModelName = "publishers/meta/models/llama3-2@llama-3.2-1b",
            Location = "us-central1",
            ModelConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs
            {
                AcceptEula = true,
            },
            DeployConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigArgs
            {
                DedicatedResources = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs
                {
                    MachineSpec = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs
                    {
                        MachineType = "g2-standard-12",
                        AcceleratorType = "NVIDIA_L4",
                        AcceleratorCount = 1,
                    },
                    MinReplicaCount = 1,
                },
            },
        });
    
    });
    
    package generated_program;
    
    import com.pulumi.Context;
    import com.pulumi.Pulumi;
    import com.pulumi.core.Output;
    import com.pulumi.gcp.vertex.AiEndpointWithModelGardenDeployment;
    import com.pulumi.gcp.vertex.AiEndpointWithModelGardenDeploymentArgs;
    import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs;
    import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentDeployConfigArgs;
    import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs;
    import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs;
    import java.util.List;
    import java.util.ArrayList;
    import java.util.Map;
    import java.io.File;
    import java.nio.file.Files;
    import java.nio.file.Paths;
    
    public class App {
        public static void main(String[] args) {
            Pulumi.run(App::stack);
        }
    
        public static void stack(Context ctx) {
            var deploy_gemma_11_2b_it = new AiEndpointWithModelGardenDeployment("deploy-gemma-11-2b-it", AiEndpointWithModelGardenDeploymentArgs.builder()
                .publisherModelName("publishers/google/models/gemma@gemma-1.1-2b-it")
                .location("us-central1")
                .modelConfig(AiEndpointWithModelGardenDeploymentModelConfigArgs.builder()
                    .acceptEula(true)
                    .build())
                .deployConfig(AiEndpointWithModelGardenDeploymentDeployConfigArgs.builder()
                    .dedicatedResources(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs.builder()
                        .machineSpec(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs.builder()
                            .machineType("g2-standard-12")
                            .acceleratorType("us-central1")
                            .acceleratorCount(1)
                            .build())
                        .minReplicaCount(1)
                        .build())
                    .build())
                .build());
    
            var deploy_qwen3_06b = new AiEndpointWithModelGardenDeployment("deploy-qwen3-06b", AiEndpointWithModelGardenDeploymentArgs.builder()
                .huggingFaceModelId("Qwen/Qwen3-0.6B")
                .location("us-central1")
                .modelConfig(AiEndpointWithModelGardenDeploymentModelConfigArgs.builder()
                    .acceptEula(true)
                    .build())
                .deployConfig(AiEndpointWithModelGardenDeploymentDeployConfigArgs.builder()
                    .dedicatedResources(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs.builder()
                        .machineSpec(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs.builder()
                            .machineType("g2-standard-12")
                            .acceleratorType("NVIDIA_L4")
                            .acceleratorCount(1)
                            .build())
                        .minReplicaCount(1)
                        .build())
                    .build())
                .build());
    
            var deploy_llama_32_1b = new AiEndpointWithModelGardenDeployment("deploy-llama-32-1b", AiEndpointWithModelGardenDeploymentArgs.builder()
                .publisherModelName("publishers/meta/models/llama3-2@llama-3.2-1b")
                .location("us-central1")
                .modelConfig(AiEndpointWithModelGardenDeploymentModelConfigArgs.builder()
                    .acceptEula(true)
                    .build())
                .deployConfig(AiEndpointWithModelGardenDeploymentDeployConfigArgs.builder()
                    .dedicatedResources(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs.builder()
                        .machineSpec(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs.builder()
                            .machineType("g2-standard-12")
                            .acceleratorType("NVIDIA_L4")
                            .acceleratorCount(1)
                            .build())
                        .minReplicaCount(1)
                        .build())
                    .build())
                .build());
    
        }
    }
    
    resources:
      deploy-gemma-11-2b-it:
        type: gcp:vertex:AiEndpointWithModelGardenDeployment
        name: deploy-gemma-1_1-2b-it
        properties:
          publisherModelName: publishers/google/models/gemma@gemma-1.1-2b-it
          location: us-central1
          modelConfig:
            acceptEula: true
          deployConfig:
            dedicatedResources:
              machineSpec:
                machineType: g2-standard-12
                acceleratorType: us-central1
                acceleratorCount: 1
              minReplicaCount: 1
      deploy-qwen3-06b:
        type: gcp:vertex:AiEndpointWithModelGardenDeployment
        name: deploy-qwen3-0_6b
        properties:
          huggingFaceModelId: Qwen/Qwen3-0.6B
          location: us-central1
          modelConfig:
            acceptEula: true
          deployConfig:
            dedicatedResources:
              machineSpec:
                machineType: g2-standard-12
                acceleratorType: NVIDIA_L4
                acceleratorCount: 1
              minReplicaCount: 1
      deploy-llama-32-1b:
        type: gcp:vertex:AiEndpointWithModelGardenDeployment
        name: deploy-llama-3_2-1b
        properties:
          publisherModelName: publishers/meta/models/llama3-2@llama-3.2-1b
          location: us-central1
          modelConfig:
            acceptEula: true
          deployConfig:
            dedicatedResources:
              machineSpec:
                machineType: g2-standard-12
                acceleratorType: NVIDIA_L4
                acceleratorCount: 1
              minReplicaCount: 1
    

    Vertex Ai Deploy Multiple Models In Sequence

    import * as pulumi from "@pulumi/pulumi";
    import * as gcp from "@pulumi/gcp";
    
    const deploy_gemma_11_2b_it = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-gemma-1_1-2b-it", {
        publisherModelName: "publishers/google/models/gemma@gemma-1.1-2b-it",
        location: "us-central1",
        modelConfig: {
            acceptEula: true,
        },
        deployConfig: {
            dedicatedResources: {
                machineSpec: {
                    machineType: "g2-standard-12",
                    acceleratorType: "NVIDIA_L4",
                    acceleratorCount: 1,
                },
                minReplicaCount: 1,
            },
        },
    });
    const deploy_qwen3_06b = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-qwen3-0_6b", {
        huggingFaceModelId: "Qwen/Qwen3-0.6B",
        location: "us-central1",
        modelConfig: {
            acceptEula: true,
        },
        deployConfig: {
            dedicatedResources: {
                machineSpec: {
                    machineType: "g2-standard-12",
                    acceleratorType: "NVIDIA_L4",
                    acceleratorCount: 1,
                },
                minReplicaCount: 1,
            },
        },
    }, {
        dependsOn: [deploy_gemma_11_2b_it],
    });
    const deploy_llama_32_1b = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-llama-3_2-1b", {
        publisherModelName: "publishers/meta/models/llama3-2@llama-3.2-1b",
        location: "us-central1",
        modelConfig: {
            acceptEula: true,
        },
        deployConfig: {
            dedicatedResources: {
                machineSpec: {
                    machineType: "g2-standard-12",
                    acceleratorType: "NVIDIA_L4",
                    acceleratorCount: 1,
                },
                minReplicaCount: 1,
            },
        },
    }, {
        dependsOn: [deploy_qwen3_06b],
    });
    
    import pulumi
    import pulumi_gcp as gcp
    
    deploy_gemma_11_2b_it = gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-gemma-1_1-2b-it",
        publisher_model_name="publishers/google/models/gemma@gemma-1.1-2b-it",
        location="us-central1",
        model_config={
            "accept_eula": True,
        },
        deploy_config={
            "dedicated_resources": {
                "machine_spec": {
                    "machine_type": "g2-standard-12",
                    "accelerator_type": "NVIDIA_L4",
                    "accelerator_count": 1,
                },
                "min_replica_count": 1,
            },
        })
    deploy_qwen3_06b = gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-qwen3-0_6b",
        hugging_face_model_id="Qwen/Qwen3-0.6B",
        location="us-central1",
        model_config={
            "accept_eula": True,
        },
        deploy_config={
            "dedicated_resources": {
                "machine_spec": {
                    "machine_type": "g2-standard-12",
                    "accelerator_type": "NVIDIA_L4",
                    "accelerator_count": 1,
                },
                "min_replica_count": 1,
            },
        },
        opts = pulumi.ResourceOptions(depends_on=[deploy_gemma_11_2b_it]))
    deploy_llama_32_1b = gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-llama-3_2-1b",
        publisher_model_name="publishers/meta/models/llama3-2@llama-3.2-1b",
        location="us-central1",
        model_config={
            "accept_eula": True,
        },
        deploy_config={
            "dedicated_resources": {
                "machine_spec": {
                    "machine_type": "g2-standard-12",
                    "accelerator_type": "NVIDIA_L4",
                    "accelerator_count": 1,
                },
                "min_replica_count": 1,
            },
        },
        opts = pulumi.ResourceOptions(depends_on=[deploy_qwen3_06b]))
    
    package main
    
    import (
    	"github.com/pulumi/pulumi-gcp/sdk/v8/go/gcp/vertex"
    	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
    )
    
    func main() {
    	pulumi.Run(func(ctx *pulumi.Context) error {
    		deploy_gemma_11_2b_it, err := vertex.NewAiEndpointWithModelGardenDeployment(ctx, "deploy-gemma-1_1-2b-it", &vertex.AiEndpointWithModelGardenDeploymentArgs{
    			PublisherModelName: pulumi.String("publishers/google/models/gemma@gemma-1.1-2b-it"),
    			Location:           pulumi.String("us-central1"),
    			ModelConfig: &vertex.AiEndpointWithModelGardenDeploymentModelConfigArgs{
    				AcceptEula: pulumi.Bool(true),
    			},
    			DeployConfig: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigArgs{
    				DedicatedResources: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs{
    					MachineSpec: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs{
    						MachineType:      pulumi.String("g2-standard-12"),
    						AcceleratorType:  pulumi.String("NVIDIA_L4"),
    						AcceleratorCount: pulumi.Int(1),
    					},
    					MinReplicaCount: pulumi.Int(1),
    				},
    			},
    		})
    		if err != nil {
    			return err
    		}
    		deploy_qwen3_06b, err := vertex.NewAiEndpointWithModelGardenDeployment(ctx, "deploy-qwen3-0_6b", &vertex.AiEndpointWithModelGardenDeploymentArgs{
    			HuggingFaceModelId: pulumi.String("Qwen/Qwen3-0.6B"),
    			Location:           pulumi.String("us-central1"),
    			ModelConfig: &vertex.AiEndpointWithModelGardenDeploymentModelConfigArgs{
    				AcceptEula: pulumi.Bool(true),
    			},
    			DeployConfig: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigArgs{
    				DedicatedResources: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs{
    					MachineSpec: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs{
    						MachineType:      pulumi.String("g2-standard-12"),
    						AcceleratorType:  pulumi.String("NVIDIA_L4"),
    						AcceleratorCount: pulumi.Int(1),
    					},
    					MinReplicaCount: pulumi.Int(1),
    				},
    			},
    		}, pulumi.DependsOn([]pulumi.Resource{
    			deploy_gemma_11_2b_it,
    		}))
    		if err != nil {
    			return err
    		}
    		_, err = vertex.NewAiEndpointWithModelGardenDeployment(ctx, "deploy-llama-3_2-1b", &vertex.AiEndpointWithModelGardenDeploymentArgs{
    			PublisherModelName: pulumi.String("publishers/meta/models/llama3-2@llama-3.2-1b"),
    			Location:           pulumi.String("us-central1"),
    			ModelConfig: &vertex.AiEndpointWithModelGardenDeploymentModelConfigArgs{
    				AcceptEula: pulumi.Bool(true),
    			},
    			DeployConfig: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigArgs{
    				DedicatedResources: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs{
    					MachineSpec: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs{
    						MachineType:      pulumi.String("g2-standard-12"),
    						AcceleratorType:  pulumi.String("NVIDIA_L4"),
    						AcceleratorCount: pulumi.Int(1),
    					},
    					MinReplicaCount: pulumi.Int(1),
    				},
    			},
    		}, pulumi.DependsOn([]pulumi.Resource{
    			deploy_qwen3_06b,
    		}))
    		if err != nil {
    			return err
    		}
    		return nil
    	})
    }
    
    using System.Collections.Generic;
    using System.Linq;
    using Pulumi;
    using Gcp = Pulumi.Gcp;
    
    return await Deployment.RunAsync(() => 
    {
        var deploy_gemma_11_2b_it = new Gcp.Vertex.AiEndpointWithModelGardenDeployment("deploy-gemma-1_1-2b-it", new()
        {
            PublisherModelName = "publishers/google/models/gemma@gemma-1.1-2b-it",
            Location = "us-central1",
            ModelConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs
            {
                AcceptEula = true,
            },
            DeployConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigArgs
            {
                DedicatedResources = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs
                {
                    MachineSpec = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs
                    {
                        MachineType = "g2-standard-12",
                        AcceleratorType = "NVIDIA_L4",
                        AcceleratorCount = 1,
                    },
                    MinReplicaCount = 1,
                },
            },
        });
    
        var deploy_qwen3_06b = new Gcp.Vertex.AiEndpointWithModelGardenDeployment("deploy-qwen3-0_6b", new()
        {
            HuggingFaceModelId = "Qwen/Qwen3-0.6B",
            Location = "us-central1",
            ModelConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs
            {
                AcceptEula = true,
            },
            DeployConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigArgs
            {
                DedicatedResources = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs
                {
                    MachineSpec = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs
                    {
                        MachineType = "g2-standard-12",
                        AcceleratorType = "NVIDIA_L4",
                        AcceleratorCount = 1,
                    },
                    MinReplicaCount = 1,
                },
            },
        }, new CustomResourceOptions
        {
            DependsOn =
            {
                deploy_gemma_11_2b_it,
            },
        });
    
        var deploy_llama_32_1b = new Gcp.Vertex.AiEndpointWithModelGardenDeployment("deploy-llama-3_2-1b", new()
        {
            PublisherModelName = "publishers/meta/models/llama3-2@llama-3.2-1b",
            Location = "us-central1",
            ModelConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs
            {
                AcceptEula = true,
            },
            DeployConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigArgs
            {
                DedicatedResources = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs
                {
                    MachineSpec = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs
                    {
                        MachineType = "g2-standard-12",
                        AcceleratorType = "NVIDIA_L4",
                        AcceleratorCount = 1,
                    },
                    MinReplicaCount = 1,
                },
            },
        }, new CustomResourceOptions
        {
            DependsOn =
            {
                deploy_qwen3_06b,
            },
        });
    
    });
    
    package generated_program;
    
    import com.pulumi.Context;
    import com.pulumi.Pulumi;
    import com.pulumi.core.Output;
    import com.pulumi.gcp.vertex.AiEndpointWithModelGardenDeployment;
    import com.pulumi.gcp.vertex.AiEndpointWithModelGardenDeploymentArgs;
    import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs;
    import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentDeployConfigArgs;
    import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs;
    import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs;
    import com.pulumi.resources.CustomResourceOptions;
    import java.util.List;
    import java.util.ArrayList;
    import java.util.Map;
    import java.io.File;
    import java.nio.file.Files;
    import java.nio.file.Paths;
    
    public class App {
        public static void main(String[] args) {
            Pulumi.run(App::stack);
        }
    
        public static void stack(Context ctx) {
            var deploy_gemma_11_2b_it = new AiEndpointWithModelGardenDeployment("deploy-gemma-11-2b-it", AiEndpointWithModelGardenDeploymentArgs.builder()
                .publisherModelName("publishers/google/models/gemma@gemma-1.1-2b-it")
                .location("us-central1")
                .modelConfig(AiEndpointWithModelGardenDeploymentModelConfigArgs.builder()
                    .acceptEula(true)
                    .build())
                .deployConfig(AiEndpointWithModelGardenDeploymentDeployConfigArgs.builder()
                    .dedicatedResources(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs.builder()
                        .machineSpec(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs.builder()
                            .machineType("g2-standard-12")
                            .acceleratorType("NVIDIA_L4")
                            .acceleratorCount(1)
                            .build())
                        .minReplicaCount(1)
                        .build())
                    .build())
                .build());
    
            var deploy_qwen3_06b = new AiEndpointWithModelGardenDeployment("deploy-qwen3-06b", AiEndpointWithModelGardenDeploymentArgs.builder()
                .huggingFaceModelId("Qwen/Qwen3-0.6B")
                .location("us-central1")
                .modelConfig(AiEndpointWithModelGardenDeploymentModelConfigArgs.builder()
                    .acceptEula(true)
                    .build())
                .deployConfig(AiEndpointWithModelGardenDeploymentDeployConfigArgs.builder()
                    .dedicatedResources(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs.builder()
                        .machineSpec(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs.builder()
                            .machineType("g2-standard-12")
                            .acceleratorType("NVIDIA_L4")
                            .acceleratorCount(1)
                            .build())
                        .minReplicaCount(1)
                        .build())
                    .build())
                .build(), CustomResourceOptions.builder()
                    .dependsOn(deploy_gemma_11_2b_it)
                    .build());
    
            var deploy_llama_32_1b = new AiEndpointWithModelGardenDeployment("deploy-llama-32-1b", AiEndpointWithModelGardenDeploymentArgs.builder()
                .publisherModelName("publishers/meta/models/llama3-2@llama-3.2-1b")
                .location("us-central1")
                .modelConfig(AiEndpointWithModelGardenDeploymentModelConfigArgs.builder()
                    .acceptEula(true)
                    .build())
                .deployConfig(AiEndpointWithModelGardenDeploymentDeployConfigArgs.builder()
                    .dedicatedResources(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs.builder()
                        .machineSpec(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs.builder()
                            .machineType("g2-standard-12")
                            .acceleratorType("NVIDIA_L4")
                            .acceleratorCount(1)
                            .build())
                        .minReplicaCount(1)
                        .build())
                    .build())
                .build(), CustomResourceOptions.builder()
                    .dependsOn(deploy_qwen3_06b)
                    .build());
    
        }
    }
    
    resources:
      deploy-gemma-11-2b-it:
        type: gcp:vertex:AiEndpointWithModelGardenDeployment
        name: deploy-gemma-1_1-2b-it
        properties:
          publisherModelName: publishers/google/models/gemma@gemma-1.1-2b-it
          location: us-central1
          modelConfig:
            acceptEula: true
          deployConfig:
            dedicatedResources:
              machineSpec:
                machineType: g2-standard-12
                acceleratorType: NVIDIA_L4
                acceleratorCount: 1
              minReplicaCount: 1
      deploy-qwen3-06b:
        type: gcp:vertex:AiEndpointWithModelGardenDeployment
        name: deploy-qwen3-0_6b
        properties:
          huggingFaceModelId: Qwen/Qwen3-0.6B
          location: us-central1
          modelConfig:
            acceptEula: true
          deployConfig:
            dedicatedResources:
              machineSpec:
                machineType: g2-standard-12
                acceleratorType: NVIDIA_L4
                acceleratorCount: 1
              minReplicaCount: 1
        options:
          dependsOn:
            - ${["deploy-gemma-11-2b-it"]}
      deploy-llama-32-1b:
        type: gcp:vertex:AiEndpointWithModelGardenDeployment
        name: deploy-llama-3_2-1b
        properties:
          publisherModelName: publishers/meta/models/llama3-2@llama-3.2-1b
          location: us-central1
          modelConfig:
            acceptEula: true
          deployConfig:
            dedicatedResources:
              machineSpec:
                machineType: g2-standard-12
                acceleratorType: NVIDIA_L4
                acceleratorCount: 1
              minReplicaCount: 1
        options:
          dependsOn:
            - ${["deploy-qwen3-06b"]}
    

    Create AiEndpointWithModelGardenDeployment Resource

    Resources are created with functions called constructors. To learn more about declaring and configuring resources, see Resources.

    Constructor syntax

    new AiEndpointWithModelGardenDeployment(name: string, args: AiEndpointWithModelGardenDeploymentArgs, opts?: CustomResourceOptions);
    @overload
    def AiEndpointWithModelGardenDeployment(resource_name: str,
                                            args: AiEndpointWithModelGardenDeploymentArgs,
                                            opts: Optional[ResourceOptions] = None)
    
    @overload
    def AiEndpointWithModelGardenDeployment(resource_name: str,
                                            opts: Optional[ResourceOptions] = None,
                                            location: Optional[str] = None,
                                            deploy_config: Optional[AiEndpointWithModelGardenDeploymentDeployConfigArgs] = None,
                                            endpoint_config: Optional[AiEndpointWithModelGardenDeploymentEndpointConfigArgs] = None,
                                            hugging_face_model_id: Optional[str] = None,
                                            model_config: Optional[AiEndpointWithModelGardenDeploymentModelConfigArgs] = None,
                                            project: Optional[str] = None,
                                            publisher_model_name: Optional[str] = None)
    func NewAiEndpointWithModelGardenDeployment(ctx *Context, name string, args AiEndpointWithModelGardenDeploymentArgs, opts ...ResourceOption) (*AiEndpointWithModelGardenDeployment, error)
    public AiEndpointWithModelGardenDeployment(string name, AiEndpointWithModelGardenDeploymentArgs args, CustomResourceOptions? opts = null)
    public AiEndpointWithModelGardenDeployment(String name, AiEndpointWithModelGardenDeploymentArgs args)
    public AiEndpointWithModelGardenDeployment(String name, AiEndpointWithModelGardenDeploymentArgs args, CustomResourceOptions options)
    
    type: gcp:vertex:AiEndpointWithModelGardenDeployment
    properties: # The arguments to resource properties.
    options: # Bag of options to control resource's behavior.
    
    

    Parameters

    name string
    The unique name of the resource.
    args AiEndpointWithModelGardenDeploymentArgs
    The arguments to resource properties.
    opts CustomResourceOptions
    Bag of options to control resource's behavior.
    resource_name str
    The unique name of the resource.
    args AiEndpointWithModelGardenDeploymentArgs
    The arguments to resource properties.
    opts ResourceOptions
    Bag of options to control resource's behavior.
    ctx Context
    Context object for the current deployment.
    name string
    The unique name of the resource.
    args AiEndpointWithModelGardenDeploymentArgs
    The arguments to resource properties.
    opts ResourceOption
    Bag of options to control resource's behavior.
    name string
    The unique name of the resource.
    args AiEndpointWithModelGardenDeploymentArgs
    The arguments to resource properties.
    opts CustomResourceOptions
    Bag of options to control resource's behavior.
    name String
    The unique name of the resource.
    args AiEndpointWithModelGardenDeploymentArgs
    The arguments to resource properties.
    options CustomResourceOptions
    Bag of options to control resource's behavior.

    Constructor example

    The following reference example uses placeholder values for all input properties.

    var aiEndpointWithModelGardenDeploymentResource = new Gcp.Vertex.AiEndpointWithModelGardenDeployment("aiEndpointWithModelGardenDeploymentResource", new()
    {
        Location = "string",
        DeployConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigArgs
        {
            DedicatedResources = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs
            {
                MachineSpec = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs
                {
                    AcceleratorCount = 0,
                    AcceleratorType = "string",
                    MachineType = "string",
                    MultihostGpuNodeCount = 0,
                    ReservationAffinity = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecReservationAffinityArgs
                    {
                        ReservationAffinityType = "string",
                        Key = "string",
                        Values = new[]
                        {
                            "string",
                        },
                    },
                    TpuTopology = "string",
                },
                MinReplicaCount = 0,
                AutoscalingMetricSpecs = new[]
                {
                    new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesAutoscalingMetricSpecArgs
                    {
                        MetricName = "string",
                        Target = 0,
                    },
                },
                MaxReplicaCount = 0,
                RequiredReplicaCount = 0,
                Spot = false,
            },
            FastTryoutEnabled = false,
            SystemLabels = 
            {
                { "string", "string" },
            },
        },
        EndpointConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentEndpointConfigArgs
        {
            DedicatedEndpointEnabled = false,
            EndpointDisplayName = "string",
        },
        HuggingFaceModelId = "string",
        ModelConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs
        {
            AcceptEula = false,
            ContainerSpec = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecArgs
            {
                ImageUri = "string",
                HealthRoute = "string",
                DeploymentTimeout = "string",
                Envs = new[]
                {
                    new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecEnvArgs
                    {
                        Name = "string",
                        Value = "string",
                    },
                },
                GrpcPorts = new[]
                {
                    new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecGrpcPortArgs
                    {
                        ContainerPort = 0,
                    },
                },
                HealthProbe = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeArgs
                {
                    Exec = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeExecArgs
                    {
                        Commands = new[]
                        {
                            "string",
                        },
                    },
                    FailureThreshold = 0,
                    Grpc = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeGrpcArgs
                    {
                        Port = 0,
                        Service = "string",
                    },
                    HttpGet = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGetArgs
                    {
                        Host = "string",
                        HttpHeaders = new[]
                        {
                            new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGetHttpHeaderArgs
                            {
                                Name = "string",
                                Value = "string",
                            },
                        },
                        Path = "string",
                        Port = 0,
                        Scheme = "string",
                    },
                    InitialDelaySeconds = 0,
                    PeriodSeconds = 0,
                    SuccessThreshold = 0,
                    TcpSocket = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeTcpSocketArgs
                    {
                        Host = "string",
                        Port = 0,
                    },
                    TimeoutSeconds = 0,
                },
                Args = new[]
                {
                    "string",
                },
                Commands = new[]
                {
                    "string",
                },
                LivenessProbe = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeArgs
                {
                    Exec = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeExecArgs
                    {
                        Commands = new[]
                        {
                            "string",
                        },
                    },
                    FailureThreshold = 0,
                    Grpc = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeGrpcArgs
                    {
                        Port = 0,
                        Service = "string",
                    },
                    HttpGet = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGetArgs
                    {
                        Host = "string",
                        HttpHeaders = new[]
                        {
                            new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGetHttpHeaderArgs
                            {
                                Name = "string",
                                Value = "string",
                            },
                        },
                        Path = "string",
                        Port = 0,
                        Scheme = "string",
                    },
                    InitialDelaySeconds = 0,
                    PeriodSeconds = 0,
                    SuccessThreshold = 0,
                    TcpSocket = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeTcpSocketArgs
                    {
                        Host = "string",
                        Port = 0,
                    },
                    TimeoutSeconds = 0,
                },
                Ports = new[]
                {
                    new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecPortArgs
                    {
                        ContainerPort = 0,
                    },
                },
                PredictRoute = "string",
                SharedMemorySizeMb = "string",
                StartupProbe = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeArgs
                {
                    Exec = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeExecArgs
                    {
                        Commands = new[]
                        {
                            "string",
                        },
                    },
                    FailureThreshold = 0,
                    Grpc = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeGrpcArgs
                    {
                        Port = 0,
                        Service = "string",
                    },
                    HttpGet = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGetArgs
                    {
                        Host = "string",
                        HttpHeaders = new[]
                        {
                            new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGetHttpHeaderArgs
                            {
                                Name = "string",
                                Value = "string",
                            },
                        },
                        Path = "string",
                        Port = 0,
                        Scheme = "string",
                    },
                    InitialDelaySeconds = 0,
                    PeriodSeconds = 0,
                    SuccessThreshold = 0,
                    TcpSocket = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeTcpSocketArgs
                    {
                        Host = "string",
                        Port = 0,
                    },
                    TimeoutSeconds = 0,
                },
            },
            HuggingFaceAccessToken = "string",
            HuggingFaceCacheEnabled = false,
            ModelDisplayName = "string",
        },
        Project = "string",
        PublisherModelName = "string",
    });
    
    example, err := vertex.NewAiEndpointWithModelGardenDeployment(ctx, "aiEndpointWithModelGardenDeploymentResource", &vertex.AiEndpointWithModelGardenDeploymentArgs{
    	Location: pulumi.String("string"),
    	DeployConfig: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigArgs{
    		DedicatedResources: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs{
    			MachineSpec: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs{
    				AcceleratorCount:      pulumi.Int(0),
    				AcceleratorType:       pulumi.String("string"),
    				MachineType:           pulumi.String("string"),
    				MultihostGpuNodeCount: pulumi.Int(0),
    				ReservationAffinity: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecReservationAffinityArgs{
    					ReservationAffinityType: pulumi.String("string"),
    					Key:                     pulumi.String("string"),
    					Values: pulumi.StringArray{
    						pulumi.String("string"),
    					},
    				},
    				TpuTopology: pulumi.String("string"),
    			},
    			MinReplicaCount: pulumi.Int(0),
    			AutoscalingMetricSpecs: vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesAutoscalingMetricSpecArray{
    				&vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesAutoscalingMetricSpecArgs{
    					MetricName: pulumi.String("string"),
    					Target:     pulumi.Int(0),
    				},
    			},
    			MaxReplicaCount:      pulumi.Int(0),
    			RequiredReplicaCount: pulumi.Int(0),
    			Spot:                 pulumi.Bool(false),
    		},
    		FastTryoutEnabled: pulumi.Bool(false),
    		SystemLabels: pulumi.StringMap{
    			"string": pulumi.String("string"),
    		},
    	},
    	EndpointConfig: &vertex.AiEndpointWithModelGardenDeploymentEndpointConfigArgs{
    		DedicatedEndpointEnabled: pulumi.Bool(false),
    		EndpointDisplayName:      pulumi.String("string"),
    	},
    	HuggingFaceModelId: pulumi.String("string"),
    	ModelConfig: &vertex.AiEndpointWithModelGardenDeploymentModelConfigArgs{
    		AcceptEula: pulumi.Bool(false),
    		ContainerSpec: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecArgs{
    			ImageUri:          pulumi.String("string"),
    			HealthRoute:       pulumi.String("string"),
    			DeploymentTimeout: pulumi.String("string"),
    			Envs: vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecEnvArray{
    				&vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecEnvArgs{
    					Name:  pulumi.String("string"),
    					Value: pulumi.String("string"),
    				},
    			},
    			GrpcPorts: vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecGrpcPortArray{
    				&vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecGrpcPortArgs{
    					ContainerPort: pulumi.Int(0),
    				},
    			},
    			HealthProbe: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeArgs{
    				Exec: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeExecArgs{
    					Commands: pulumi.StringArray{
    						pulumi.String("string"),
    					},
    				},
    				FailureThreshold: pulumi.Int(0),
    				Grpc: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeGrpcArgs{
    					Port:    pulumi.Int(0),
    					Service: pulumi.String("string"),
    				},
    				HttpGet: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGetArgs{
    					Host: pulumi.String("string"),
    					HttpHeaders: vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGetHttpHeaderArray{
    						&vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGetHttpHeaderArgs{
    							Name:  pulumi.String("string"),
    							Value: pulumi.String("string"),
    						},
    					},
    					Path:   pulumi.String("string"),
    					Port:   pulumi.Int(0),
    					Scheme: pulumi.String("string"),
    				},
    				InitialDelaySeconds: pulumi.Int(0),
    				PeriodSeconds:       pulumi.Int(0),
    				SuccessThreshold:    pulumi.Int(0),
    				TcpSocket: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeTcpSocketArgs{
    					Host: pulumi.String("string"),
    					Port: pulumi.Int(0),
    				},
    				TimeoutSeconds: pulumi.Int(0),
    			},
    			Args: pulumi.StringArray{
    				pulumi.String("string"),
    			},
    			Commands: pulumi.StringArray{
    				pulumi.String("string"),
    			},
    			LivenessProbe: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeArgs{
    				Exec: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeExecArgs{
    					Commands: pulumi.StringArray{
    						pulumi.String("string"),
    					},
    				},
    				FailureThreshold: pulumi.Int(0),
    				Grpc: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeGrpcArgs{
    					Port:    pulumi.Int(0),
    					Service: pulumi.String("string"),
    				},
    				HttpGet: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGetArgs{
    					Host: pulumi.String("string"),
    					HttpHeaders: vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGetHttpHeaderArray{
    						&vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGetHttpHeaderArgs{
    							Name:  pulumi.String("string"),
    							Value: pulumi.String("string"),
    						},
    					},
    					Path:   pulumi.String("string"),
    					Port:   pulumi.Int(0),
    					Scheme: pulumi.String("string"),
    				},
    				InitialDelaySeconds: pulumi.Int(0),
    				PeriodSeconds:       pulumi.Int(0),
    				SuccessThreshold:    pulumi.Int(0),
    				TcpSocket: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeTcpSocketArgs{
    					Host: pulumi.String("string"),
    					Port: pulumi.Int(0),
    				},
    				TimeoutSeconds: pulumi.Int(0),
    			},
    			Ports: vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecPortArray{
    				&vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecPortArgs{
    					ContainerPort: pulumi.Int(0),
    				},
    			},
    			PredictRoute:       pulumi.String("string"),
    			SharedMemorySizeMb: pulumi.String("string"),
    			StartupProbe: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeArgs{
    				Exec: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeExecArgs{
    					Commands: pulumi.StringArray{
    						pulumi.String("string"),
    					},
    				},
    				FailureThreshold: pulumi.Int(0),
    				Grpc: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeGrpcArgs{
    					Port:    pulumi.Int(0),
    					Service: pulumi.String("string"),
    				},
    				HttpGet: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGetArgs{
    					Host: pulumi.String("string"),
    					HttpHeaders: vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGetHttpHeaderArray{
    						&vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGetHttpHeaderArgs{
    							Name:  pulumi.String("string"),
    							Value: pulumi.String("string"),
    						},
    					},
    					Path:   pulumi.String("string"),
    					Port:   pulumi.Int(0),
    					Scheme: pulumi.String("string"),
    				},
    				InitialDelaySeconds: pulumi.Int(0),
    				PeriodSeconds:       pulumi.Int(0),
    				SuccessThreshold:    pulumi.Int(0),
    				TcpSocket: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeTcpSocketArgs{
    					Host: pulumi.String("string"),
    					Port: pulumi.Int(0),
    				},
    				TimeoutSeconds: pulumi.Int(0),
    			},
    		},
    		HuggingFaceAccessToken:  pulumi.String("string"),
    		HuggingFaceCacheEnabled: pulumi.Bool(false),
    		ModelDisplayName:        pulumi.String("string"),
    	},
    	Project:            pulumi.String("string"),
    	PublisherModelName: pulumi.String("string"),
    })
    
    var aiEndpointWithModelGardenDeploymentResource = new AiEndpointWithModelGardenDeployment("aiEndpointWithModelGardenDeploymentResource", AiEndpointWithModelGardenDeploymentArgs.builder()
        .location("string")
        .deployConfig(AiEndpointWithModelGardenDeploymentDeployConfigArgs.builder()
            .dedicatedResources(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs.builder()
                .machineSpec(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs.builder()
                    .acceleratorCount(0)
                    .acceleratorType("string")
                    .machineType("string")
                    .multihostGpuNodeCount(0)
                    .reservationAffinity(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecReservationAffinityArgs.builder()
                        .reservationAffinityType("string")
                        .key("string")
                        .values("string")
                        .build())
                    .tpuTopology("string")
                    .build())
                .minReplicaCount(0)
                .autoscalingMetricSpecs(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesAutoscalingMetricSpecArgs.builder()
                    .metricName("string")
                    .target(0)
                    .build())
                .maxReplicaCount(0)
                .requiredReplicaCount(0)
                .spot(false)
                .build())
            .fastTryoutEnabled(false)
            .systemLabels(Map.of("string", "string"))
            .build())
        .endpointConfig(AiEndpointWithModelGardenDeploymentEndpointConfigArgs.builder()
            .dedicatedEndpointEnabled(false)
            .endpointDisplayName("string")
            .build())
        .huggingFaceModelId("string")
        .modelConfig(AiEndpointWithModelGardenDeploymentModelConfigArgs.builder()
            .acceptEula(false)
            .containerSpec(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecArgs.builder()
                .imageUri("string")
                .healthRoute("string")
                .deploymentTimeout("string")
                .envs(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecEnvArgs.builder()
                    .name("string")
                    .value("string")
                    .build())
                .grpcPorts(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecGrpcPortArgs.builder()
                    .containerPort(0)
                    .build())
                .healthProbe(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeArgs.builder()
                    .exec(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeExecArgs.builder()
                        .commands("string")
                        .build())
                    .failureThreshold(0)
                    .grpc(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeGrpcArgs.builder()
                        .port(0)
                        .service("string")
                        .build())
                    .httpGet(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGetArgs.builder()
                        .host("string")
                        .httpHeaders(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGetHttpHeaderArgs.builder()
                            .name("string")
                            .value("string")
                            .build())
                        .path("string")
                        .port(0)
                        .scheme("string")
                        .build())
                    .initialDelaySeconds(0)
                    .periodSeconds(0)
                    .successThreshold(0)
                    .tcpSocket(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeTcpSocketArgs.builder()
                        .host("string")
                        .port(0)
                        .build())
                    .timeoutSeconds(0)
                    .build())
                .args("string")
                .commands("string")
                .livenessProbe(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeArgs.builder()
                    .exec(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeExecArgs.builder()
                        .commands("string")
                        .build())
                    .failureThreshold(0)
                    .grpc(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeGrpcArgs.builder()
                        .port(0)
                        .service("string")
                        .build())
                    .httpGet(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGetArgs.builder()
                        .host("string")
                        .httpHeaders(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGetHttpHeaderArgs.builder()
                            .name("string")
                            .value("string")
                            .build())
                        .path("string")
                        .port(0)
                        .scheme("string")
                        .build())
                    .initialDelaySeconds(0)
                    .periodSeconds(0)
                    .successThreshold(0)
                    .tcpSocket(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeTcpSocketArgs.builder()
                        .host("string")
                        .port(0)
                        .build())
                    .timeoutSeconds(0)
                    .build())
                .ports(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecPortArgs.builder()
                    .containerPort(0)
                    .build())
                .predictRoute("string")
                .sharedMemorySizeMb("string")
                .startupProbe(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeArgs.builder()
                    .exec(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeExecArgs.builder()
                        .commands("string")
                        .build())
                    .failureThreshold(0)
                    .grpc(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeGrpcArgs.builder()
                        .port(0)
                        .service("string")
                        .build())
                    .httpGet(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGetArgs.builder()
                        .host("string")
                        .httpHeaders(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGetHttpHeaderArgs.builder()
                            .name("string")
                            .value("string")
                            .build())
                        .path("string")
                        .port(0)
                        .scheme("string")
                        .build())
                    .initialDelaySeconds(0)
                    .periodSeconds(0)
                    .successThreshold(0)
                    .tcpSocket(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeTcpSocketArgs.builder()
                        .host("string")
                        .port(0)
                        .build())
                    .timeoutSeconds(0)
                    .build())
                .build())
            .huggingFaceAccessToken("string")
            .huggingFaceCacheEnabled(false)
            .modelDisplayName("string")
            .build())
        .project("string")
        .publisherModelName("string")
        .build());
    
    ai_endpoint_with_model_garden_deployment_resource = gcp.vertex.AiEndpointWithModelGardenDeployment("aiEndpointWithModelGardenDeploymentResource",
        location="string",
        deploy_config={
            "dedicated_resources": {
                "machine_spec": {
                    "accelerator_count": 0,
                    "accelerator_type": "string",
                    "machine_type": "string",
                    "multihost_gpu_node_count": 0,
                    "reservation_affinity": {
                        "reservation_affinity_type": "string",
                        "key": "string",
                        "values": ["string"],
                    },
                    "tpu_topology": "string",
                },
                "min_replica_count": 0,
                "autoscaling_metric_specs": [{
                    "metric_name": "string",
                    "target": 0,
                }],
                "max_replica_count": 0,
                "required_replica_count": 0,
                "spot": False,
            },
            "fast_tryout_enabled": False,
            "system_labels": {
                "string": "string",
            },
        },
        endpoint_config={
            "dedicated_endpoint_enabled": False,
            "endpoint_display_name": "string",
        },
        hugging_face_model_id="string",
        model_config={
            "accept_eula": False,
            "container_spec": {
                "image_uri": "string",
                "health_route": "string",
                "deployment_timeout": "string",
                "envs": [{
                    "name": "string",
                    "value": "string",
                }],
                "grpc_ports": [{
                    "container_port": 0,
                }],
                "health_probe": {
                    "exec_": {
                        "commands": ["string"],
                    },
                    "failure_threshold": 0,
                    "grpc": {
                        "port": 0,
                        "service": "string",
                    },
                    "http_get": {
                        "host": "string",
                        "http_headers": [{
                            "name": "string",
                            "value": "string",
                        }],
                        "path": "string",
                        "port": 0,
                        "scheme": "string",
                    },
                    "initial_delay_seconds": 0,
                    "period_seconds": 0,
                    "success_threshold": 0,
                    "tcp_socket": {
                        "host": "string",
                        "port": 0,
                    },
                    "timeout_seconds": 0,
                },
                "args": ["string"],
                "commands": ["string"],
                "liveness_probe": {
                    "exec_": {
                        "commands": ["string"],
                    },
                    "failure_threshold": 0,
                    "grpc": {
                        "port": 0,
                        "service": "string",
                    },
                    "http_get": {
                        "host": "string",
                        "http_headers": [{
                            "name": "string",
                            "value": "string",
                        }],
                        "path": "string",
                        "port": 0,
                        "scheme": "string",
                    },
                    "initial_delay_seconds": 0,
                    "period_seconds": 0,
                    "success_threshold": 0,
                    "tcp_socket": {
                        "host": "string",
                        "port": 0,
                    },
                    "timeout_seconds": 0,
                },
                "ports": [{
                    "container_port": 0,
                }],
                "predict_route": "string",
                "shared_memory_size_mb": "string",
                "startup_probe": {
                    "exec_": {
                        "commands": ["string"],
                    },
                    "failure_threshold": 0,
                    "grpc": {
                        "port": 0,
                        "service": "string",
                    },
                    "http_get": {
                        "host": "string",
                        "http_headers": [{
                            "name": "string",
                            "value": "string",
                        }],
                        "path": "string",
                        "port": 0,
                        "scheme": "string",
                    },
                    "initial_delay_seconds": 0,
                    "period_seconds": 0,
                    "success_threshold": 0,
                    "tcp_socket": {
                        "host": "string",
                        "port": 0,
                    },
                    "timeout_seconds": 0,
                },
            },
            "hugging_face_access_token": "string",
            "hugging_face_cache_enabled": False,
            "model_display_name": "string",
        },
        project="string",
        publisher_model_name="string")
    
    const aiEndpointWithModelGardenDeploymentResource = new gcp.vertex.AiEndpointWithModelGardenDeployment("aiEndpointWithModelGardenDeploymentResource", {
        location: "string",
        deployConfig: {
            dedicatedResources: {
                machineSpec: {
                    acceleratorCount: 0,
                    acceleratorType: "string",
                    machineType: "string",
                    multihostGpuNodeCount: 0,
                    reservationAffinity: {
                        reservationAffinityType: "string",
                        key: "string",
                        values: ["string"],
                    },
                    tpuTopology: "string",
                },
                minReplicaCount: 0,
                autoscalingMetricSpecs: [{
                    metricName: "string",
                    target: 0,
                }],
                maxReplicaCount: 0,
                requiredReplicaCount: 0,
                spot: false,
            },
            fastTryoutEnabled: false,
            systemLabels: {
                string: "string",
            },
        },
        endpointConfig: {
            dedicatedEndpointEnabled: false,
            endpointDisplayName: "string",
        },
        huggingFaceModelId: "string",
        modelConfig: {
            acceptEula: false,
            containerSpec: {
                imageUri: "string",
                healthRoute: "string",
                deploymentTimeout: "string",
                envs: [{
                    name: "string",
                    value: "string",
                }],
                grpcPorts: [{
                    containerPort: 0,
                }],
                healthProbe: {
                    exec: {
                        commands: ["string"],
                    },
                    failureThreshold: 0,
                    grpc: {
                        port: 0,
                        service: "string",
                    },
                    httpGet: {
                        host: "string",
                        httpHeaders: [{
                            name: "string",
                            value: "string",
                        }],
                        path: "string",
                        port: 0,
                        scheme: "string",
                    },
                    initialDelaySeconds: 0,
                    periodSeconds: 0,
                    successThreshold: 0,
                    tcpSocket: {
                        host: "string",
                        port: 0,
                    },
                    timeoutSeconds: 0,
                },
                args: ["string"],
                commands: ["string"],
                livenessProbe: {
                    exec: {
                        commands: ["string"],
                    },
                    failureThreshold: 0,
                    grpc: {
                        port: 0,
                        service: "string",
                    },
                    httpGet: {
                        host: "string",
                        httpHeaders: [{
                            name: "string",
                            value: "string",
                        }],
                        path: "string",
                        port: 0,
                        scheme: "string",
                    },
                    initialDelaySeconds: 0,
                    periodSeconds: 0,
                    successThreshold: 0,
                    tcpSocket: {
                        host: "string",
                        port: 0,
                    },
                    timeoutSeconds: 0,
                },
                ports: [{
                    containerPort: 0,
                }],
                predictRoute: "string",
                sharedMemorySizeMb: "string",
                startupProbe: {
                    exec: {
                        commands: ["string"],
                    },
                    failureThreshold: 0,
                    grpc: {
                        port: 0,
                        service: "string",
                    },
                    httpGet: {
                        host: "string",
                        httpHeaders: [{
                            name: "string",
                            value: "string",
                        }],
                        path: "string",
                        port: 0,
                        scheme: "string",
                    },
                    initialDelaySeconds: 0,
                    periodSeconds: 0,
                    successThreshold: 0,
                    tcpSocket: {
                        host: "string",
                        port: 0,
                    },
                    timeoutSeconds: 0,
                },
            },
            huggingFaceAccessToken: "string",
            huggingFaceCacheEnabled: false,
            modelDisplayName: "string",
        },
        project: "string",
        publisherModelName: "string",
    });
    
    type: gcp:vertex:AiEndpointWithModelGardenDeployment
    properties:
        deployConfig:
            dedicatedResources:
                autoscalingMetricSpecs:
                    - metricName: string
                      target: 0
                machineSpec:
                    acceleratorCount: 0
                    acceleratorType: string
                    machineType: string
                    multihostGpuNodeCount: 0
                    reservationAffinity:
                        key: string
                        reservationAffinityType: string
                        values:
                            - string
                    tpuTopology: string
                maxReplicaCount: 0
                minReplicaCount: 0
                requiredReplicaCount: 0
                spot: false
            fastTryoutEnabled: false
            systemLabels:
                string: string
        endpointConfig:
            dedicatedEndpointEnabled: false
            endpointDisplayName: string
        huggingFaceModelId: string
        location: string
        modelConfig:
            acceptEula: false
            containerSpec:
                args:
                    - string
                commands:
                    - string
                deploymentTimeout: string
                envs:
                    - name: string
                      value: string
                grpcPorts:
                    - containerPort: 0
                healthProbe:
                    exec:
                        commands:
                            - string
                    failureThreshold: 0
                    grpc:
                        port: 0
                        service: string
                    httpGet:
                        host: string
                        httpHeaders:
                            - name: string
                              value: string
                        path: string
                        port: 0
                        scheme: string
                    initialDelaySeconds: 0
                    periodSeconds: 0
                    successThreshold: 0
                    tcpSocket:
                        host: string
                        port: 0
                    timeoutSeconds: 0
                healthRoute: string
                imageUri: string
                livenessProbe:
                    exec:
                        commands:
                            - string
                    failureThreshold: 0
                    grpc:
                        port: 0
                        service: string
                    httpGet:
                        host: string
                        httpHeaders:
                            - name: string
                              value: string
                        path: string
                        port: 0
                        scheme: string
                    initialDelaySeconds: 0
                    periodSeconds: 0
                    successThreshold: 0
                    tcpSocket:
                        host: string
                        port: 0
                    timeoutSeconds: 0
                ports:
                    - containerPort: 0
                predictRoute: string
                sharedMemorySizeMb: string
                startupProbe:
                    exec:
                        commands:
                            - string
                    failureThreshold: 0
                    grpc:
                        port: 0
                        service: string
                    httpGet:
                        host: string
                        httpHeaders:
                            - name: string
                              value: string
                        path: string
                        port: 0
                        scheme: string
                    initialDelaySeconds: 0
                    periodSeconds: 0
                    successThreshold: 0
                    tcpSocket:
                        host: string
                        port: 0
                    timeoutSeconds: 0
            huggingFaceAccessToken: string
            huggingFaceCacheEnabled: false
            modelDisplayName: string
        project: string
        publisherModelName: string
    

    AiEndpointWithModelGardenDeployment Resource Properties

    To learn more about resource properties and how to use them, see Inputs and Outputs in the Architecture and Concepts docs.

    Inputs

    In Python, inputs that are objects can be passed either as argument classes or as dictionary literals.

    The AiEndpointWithModelGardenDeployment resource accepts the following input properties:

    Location string
    Resource ID segment making up resource location. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
    DeployConfig AiEndpointWithModelGardenDeploymentDeployConfig
    The deploy config to use for the deployment. Structure is documented below.
    EndpointConfig AiEndpointWithModelGardenDeploymentEndpointConfig
    The endpoint config to use for the deployment. Structure is documented below.
    HuggingFaceModelId string
    The Hugging Face model to deploy. Format: Hugging Face model ID like google/gemma-2-2b-it.
    ModelConfig AiEndpointWithModelGardenDeploymentModelConfig
    The model config to use for the deployment. Structure is documented below.
    Project string
    The ID of the project in which the resource belongs. If it is not provided, the provider project is used.
    PublisherModelName string
    The Model Garden model to deploy. Format: publishers/{publisher}/models/{publisher_model}@{version_id}, or publishers/hf-{hugging-face-author}/models/{hugging-face-model-name}@001.
    Location string
    Resource ID segment making up resource location. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
    DeployConfig AiEndpointWithModelGardenDeploymentDeployConfigArgs
    The deploy config to use for the deployment. Structure is documented below.
    EndpointConfig AiEndpointWithModelGardenDeploymentEndpointConfigArgs
    The endpoint config to use for the deployment. Structure is documented below.
    HuggingFaceModelId string
    The Hugging Face model to deploy. Format: Hugging Face model ID like google/gemma-2-2b-it.
    ModelConfig AiEndpointWithModelGardenDeploymentModelConfigArgs
    The model config to use for the deployment. Structure is documented below.
    Project string
    The ID of the project in which the resource belongs. If it is not provided, the provider project is used.
    PublisherModelName string
    The Model Garden model to deploy. Format: publishers/{publisher}/models/{publisher_model}@{version_id}, or publishers/hf-{hugging-face-author}/models/{hugging-face-model-name}@001.
    location String
    Resource ID segment making up resource location. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
    deployConfig AiEndpointWithModelGardenDeploymentDeployConfig
    The deploy config to use for the deployment. Structure is documented below.
    endpointConfig AiEndpointWithModelGardenDeploymentEndpointConfig
    The endpoint config to use for the deployment. Structure is documented below.
    huggingFaceModelId String
    The Hugging Face model to deploy. Format: Hugging Face model ID like google/gemma-2-2b-it.
    modelConfig AiEndpointWithModelGardenDeploymentModelConfig
    The model config to use for the deployment. Structure is documented below.
    project String
    The ID of the project in which the resource belongs. If it is not provided, the provider project is used.
    publisherModelName String
    The Model Garden model to deploy. Format: publishers/{publisher}/models/{publisher_model}@{version_id}, or publishers/hf-{hugging-face-author}/models/{hugging-face-model-name}@001.
    location string
    Resource ID segment making up resource location. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
    deployConfig AiEndpointWithModelGardenDeploymentDeployConfig
    The deploy config to use for the deployment. Structure is documented below.
    endpointConfig AiEndpointWithModelGardenDeploymentEndpointConfig
    The endpoint config to use for the deployment. Structure is documented below.
    huggingFaceModelId string
    The Hugging Face model to deploy. Format: Hugging Face model ID like google/gemma-2-2b-it.
    modelConfig AiEndpointWithModelGardenDeploymentModelConfig
    The model config to use for the deployment. Structure is documented below.
    project string
    The ID of the project in which the resource belongs. If it is not provided, the provider project is used.
    publisherModelName string
    The Model Garden model to deploy. Format: publishers/{publisher}/models/{publisher_model}@{version_id}, or publishers/hf-{hugging-face-author}/models/{hugging-face-model-name}@001.
    location str
    Resource ID segment making up resource location. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
    deploy_config AiEndpointWithModelGardenDeploymentDeployConfigArgs
    The deploy config to use for the deployment. Structure is documented below.
    endpoint_config AiEndpointWithModelGardenDeploymentEndpointConfigArgs
    The endpoint config to use for the deployment. Structure is documented below.
    hugging_face_model_id str
    The Hugging Face model to deploy. Format: Hugging Face model ID like google/gemma-2-2b-it.
    model_config AiEndpointWithModelGardenDeploymentModelConfigArgs
    The model config to use for the deployment. Structure is documented below.
    project str
    The ID of the project in which the resource belongs. If it is not provided, the provider project is used.
    publisher_model_name str
    The Model Garden model to deploy. Format: publishers/{publisher}/models/{publisher_model}@{version_id}, or publishers/hf-{hugging-face-author}/models/{hugging-face-model-name}@001.
    location String
    Resource ID segment making up resource location. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
    deployConfig Property Map
    The deploy config to use for the deployment. Structure is documented below.
    endpointConfig Property Map
    The endpoint config to use for the deployment. Structure is documented below.
    huggingFaceModelId String
    The Hugging Face model to deploy. Format: Hugging Face model ID like google/gemma-2-2b-it.
    modelConfig Property Map
    The model config to use for the deployment. Structure is documented below.
    project String
    The ID of the project in which the resource belongs. If it is not provided, the provider project is used.
    publisherModelName String
    The Model Garden model to deploy. Format: publishers/{publisher}/models/{publisher_model}@{version_id}, or publishers/hf-{hugging-face-author}/models/{hugging-face-model-name}@001.

    Outputs

    All input properties are implicitly available as output properties. Additionally, the AiEndpointWithModelGardenDeployment resource produces the following output properties:

    DeployedModelDisplayName string
    Output only. The display name assigned to the model deployed to the endpoint. This is not required to delete the resource but is used for debug logging.
    DeployedModelId string
    Output only. The unique numeric ID that Vertex AI assigns to the model at the time it is deployed to the endpoint. It is required to undeploy the model from the endpoint during resource deletion as described in https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel.
    Endpoint string
    Resource ID segment making up resource endpoint. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
    Id string
    The provider-assigned unique ID for this managed resource.
    DeployedModelDisplayName string
    Output only. The display name assigned to the model deployed to the endpoint. This is not required to delete the resource but is used for debug logging.
    DeployedModelId string
    Output only. The unique numeric ID that Vertex AI assigns to the model at the time it is deployed to the endpoint. It is required to undeploy the model from the endpoint during resource deletion as described in https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel.
    Endpoint string
    Resource ID segment making up resource endpoint. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
    Id string
    The provider-assigned unique ID for this managed resource.
    deployedModelDisplayName String
    Output only. The display name assigned to the model deployed to the endpoint. This is not required to delete the resource but is used for debug logging.
    deployedModelId String
    Output only. The unique numeric ID that Vertex AI assigns to the model at the time it is deployed to the endpoint. It is required to undeploy the model from the endpoint during resource deletion as described in https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel.
    endpoint String
    Resource ID segment making up resource endpoint. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
    id String
    The provider-assigned unique ID for this managed resource.
    deployedModelDisplayName string
    Output only. The display name assigned to the model deployed to the endpoint. This is not required to delete the resource but is used for debug logging.
    deployedModelId string
    Output only. The unique numeric ID that Vertex AI assigns to the model at the time it is deployed to the endpoint. It is required to undeploy the model from the endpoint during resource deletion as described in https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel.
    endpoint string
    Resource ID segment making up resource endpoint. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
    id string
    The provider-assigned unique ID for this managed resource.
    deployed_model_display_name str
    Output only. The display name assigned to the model deployed to the endpoint. This is not required to delete the resource but is used for debug logging.
    deployed_model_id str
    Output only. The unique numeric ID that Vertex AI assigns to the model at the time it is deployed to the endpoint. It is required to undeploy the model from the endpoint during resource deletion as described in https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel.
    endpoint str
    Resource ID segment making up resource endpoint. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
    id str
    The provider-assigned unique ID for this managed resource.
    deployedModelDisplayName String
    Output only. The display name assigned to the model deployed to the endpoint. This is not required to delete the resource but is used for debug logging.
    deployedModelId String
    Output only. The unique numeric ID that Vertex AI assigns to the model at the time it is deployed to the endpoint. It is required to undeploy the model from the endpoint during resource deletion as described in https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel.
    endpoint String
    Resource ID segment making up resource endpoint. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
    id String
    The provider-assigned unique ID for this managed resource.

    Look up Existing AiEndpointWithModelGardenDeployment Resource

    Get an existing AiEndpointWithModelGardenDeployment resource’s state with the given name, ID, and optional extra properties used to qualify the lookup.

    public static get(name: string, id: Input<ID>, state?: AiEndpointWithModelGardenDeploymentState, opts?: CustomResourceOptions): AiEndpointWithModelGardenDeployment
    @staticmethod
    def get(resource_name: str,
            id: str,
            opts: Optional[ResourceOptions] = None,
            deploy_config: Optional[AiEndpointWithModelGardenDeploymentDeployConfigArgs] = None,
            deployed_model_display_name: Optional[str] = None,
            deployed_model_id: Optional[str] = None,
            endpoint: Optional[str] = None,
            endpoint_config: Optional[AiEndpointWithModelGardenDeploymentEndpointConfigArgs] = None,
            hugging_face_model_id: Optional[str] = None,
            location: Optional[str] = None,
            model_config: Optional[AiEndpointWithModelGardenDeploymentModelConfigArgs] = None,
            project: Optional[str] = None,
            publisher_model_name: Optional[str] = None) -> AiEndpointWithModelGardenDeployment
    func GetAiEndpointWithModelGardenDeployment(ctx *Context, name string, id IDInput, state *AiEndpointWithModelGardenDeploymentState, opts ...ResourceOption) (*AiEndpointWithModelGardenDeployment, error)
    public static AiEndpointWithModelGardenDeployment Get(string name, Input<string> id, AiEndpointWithModelGardenDeploymentState? state, CustomResourceOptions? opts = null)
    public static AiEndpointWithModelGardenDeployment get(String name, Output<String> id, AiEndpointWithModelGardenDeploymentState state, CustomResourceOptions options)
    resources:  _:    type: gcp:vertex:AiEndpointWithModelGardenDeployment    get:      id: ${id}
    name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    state
    Any extra arguments used during the lookup.
    opts
    A bag of options that control this resource's behavior.
    resource_name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    state
    Any extra arguments used during the lookup.
    opts
    A bag of options that control this resource's behavior.
    name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    state
    Any extra arguments used during the lookup.
    opts
    A bag of options that control this resource's behavior.
    name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    state
    Any extra arguments used during the lookup.
    opts
    A bag of options that control this resource's behavior.
    The following state arguments are supported:
    DeployConfig AiEndpointWithModelGardenDeploymentDeployConfig
    The deploy config to use for the deployment. Structure is documented below.
    DeployedModelDisplayName string
    Output only. The display name assigned to the model deployed to the endpoint. This is not required to delete the resource but is used for debug logging.
    DeployedModelId string
    Output only. The unique numeric ID that Vertex AI assigns to the model at the time it is deployed to the endpoint. It is required to undeploy the model from the endpoint during resource deletion as described in https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel.
    Endpoint string
    Resource ID segment making up resource endpoint. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
    EndpointConfig AiEndpointWithModelGardenDeploymentEndpointConfig
    The endpoint config to use for the deployment. Structure is documented below.
    HuggingFaceModelId string
    The Hugging Face model to deploy. Format: Hugging Face model ID like google/gemma-2-2b-it.
    Location string
    Resource ID segment making up resource location. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
    ModelConfig AiEndpointWithModelGardenDeploymentModelConfig
    The model config to use for the deployment. Structure is documented below.
    Project string
    The ID of the project in which the resource belongs. If it is not provided, the provider project is used.
    PublisherModelName string
    The Model Garden model to deploy. Format: publishers/{publisher}/models/{publisher_model}@{version_id}, or publishers/hf-{hugging-face-author}/models/{hugging-face-model-name}@001.
    DeployConfig AiEndpointWithModelGardenDeploymentDeployConfigArgs
    The deploy config to use for the deployment. Structure is documented below.
    DeployedModelDisplayName string
    Output only. The display name assigned to the model deployed to the endpoint. This is not required to delete the resource but is used for debug logging.
    DeployedModelId string
    Output only. The unique numeric ID that Vertex AI assigns to the model at the time it is deployed to the endpoint. It is required to undeploy the model from the endpoint during resource deletion as described in https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel.
    Endpoint string
    Resource ID segment making up resource endpoint. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
    EndpointConfig AiEndpointWithModelGardenDeploymentEndpointConfigArgs
    The endpoint config to use for the deployment. Structure is documented below.
    HuggingFaceModelId string
    The Hugging Face model to deploy. Format: Hugging Face model ID like google/gemma-2-2b-it.
    Location string
    Resource ID segment making up resource location. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
    ModelConfig AiEndpointWithModelGardenDeploymentModelConfigArgs
    The model config to use for the deployment. Structure is documented below.
    Project string
    The ID of the project in which the resource belongs. If it is not provided, the provider project is used.
    PublisherModelName string
    The Model Garden model to deploy. Format: publishers/{publisher}/models/{publisher_model}@{version_id}, or publishers/hf-{hugging-face-author}/models/{hugging-face-model-name}@001.
    deployConfig AiEndpointWithModelGardenDeploymentDeployConfig
    The deploy config to use for the deployment. Structure is documented below.
    deployedModelDisplayName String
    Output only. The display name assigned to the model deployed to the endpoint. This is not required to delete the resource but is used for debug logging.
    deployedModelId String
    Output only. The unique numeric ID that Vertex AI assigns to the model at the time it is deployed to the endpoint. It is required to undeploy the model from the endpoint during resource deletion as described in https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel.
    endpoint String
    Resource ID segment making up resource endpoint. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
    endpointConfig AiEndpointWithModelGardenDeploymentEndpointConfig
    The endpoint config to use for the deployment. Structure is documented below.
    huggingFaceModelId String
    The Hugging Face model to deploy. Format: Hugging Face model ID like google/gemma-2-2b-it.
    location String
    Resource ID segment making up resource location. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
    modelConfig AiEndpointWithModelGardenDeploymentModelConfig
    The model config to use for the deployment. Structure is documented below.
    project String
    The ID of the project in which the resource belongs. If it is not provided, the provider project is used.
    publisherModelName String
    The Model Garden model to deploy. Format: publishers/{publisher}/models/{publisher_model}@{version_id}, or publishers/hf-{hugging-face-author}/models/{hugging-face-model-name}@001.
    deployConfig AiEndpointWithModelGardenDeploymentDeployConfig
    The deploy config to use for the deployment. Structure is documented below.
    deployedModelDisplayName string
    Output only. The display name assigned to the model deployed to the endpoint. This is not required to delete the resource but is used for debug logging.
    deployedModelId string
    Output only. The unique numeric ID that Vertex AI assigns to the model at the time it is deployed to the endpoint. It is required to undeploy the model from the endpoint during resource deletion as described in https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel.
    endpoint string
    Resource ID segment making up resource endpoint. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
    endpointConfig AiEndpointWithModelGardenDeploymentEndpointConfig
    The endpoint config to use for the deployment. Structure is documented below.
    huggingFaceModelId string
    The Hugging Face model to deploy. Format: Hugging Face model ID like google/gemma-2-2b-it.
    location string
    Resource ID segment making up resource location. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
    modelConfig AiEndpointWithModelGardenDeploymentModelConfig
    The model config to use for the deployment. Structure is documented below.
    project string
    The ID of the project in which the resource belongs. If it is not provided, the provider project is used.
    publisherModelName string
    The Model Garden model to deploy. Format: publishers/{publisher}/models/{publisher_model}@{version_id}, or publishers/hf-{hugging-face-author}/models/{hugging-face-model-name}@001.
    deploy_config AiEndpointWithModelGardenDeploymentDeployConfigArgs
    The deploy config to use for the deployment. Structure is documented below.
    deployed_model_display_name str
    Output only. The display name assigned to the model deployed to the endpoint. This is not required to delete the resource but is used for debug logging.
    deployed_model_id str
    Output only. The unique numeric ID that Vertex AI assigns to the model at the time it is deployed to the endpoint. It is required to undeploy the model from the endpoint during resource deletion as described in https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel.
    endpoint str
    Resource ID segment making up resource endpoint. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
    endpoint_config AiEndpointWithModelGardenDeploymentEndpointConfigArgs
    The endpoint config to use for the deployment. Structure is documented below.
    hugging_face_model_id str
    The Hugging Face model to deploy. Format: Hugging Face model ID like google/gemma-2-2b-it.
    location str
    Resource ID segment making up resource location. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
    model_config AiEndpointWithModelGardenDeploymentModelConfigArgs
    The model config to use for the deployment. Structure is documented below.
    project str
    The ID of the project in which the resource belongs. If it is not provided, the provider project is used.
    publisher_model_name str
    The Model Garden model to deploy. Format: publishers/{publisher}/models/{publisher_model}@{version_id}, or publishers/hf-{hugging-face-author}/models/{hugging-face-model-name}@001.
    deployConfig Property Map
    The deploy config to use for the deployment. Structure is documented below.
    deployedModelDisplayName String
    Output only. The display name assigned to the model deployed to the endpoint. This is not required to delete the resource but is used for debug logging.
    deployedModelId String
    Output only. The unique numeric ID that Vertex AI assigns to the model at the time it is deployed to the endpoint. It is required to undeploy the model from the endpoint during resource deletion as described in https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel.
    endpoint String
    Resource ID segment making up resource endpoint. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
    endpointConfig Property Map
    The endpoint config to use for the deployment. Structure is documented below.
    huggingFaceModelId String
    The Hugging Face model to deploy. Format: Hugging Face model ID like google/gemma-2-2b-it.
    location String
    Resource ID segment making up resource location. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
    modelConfig Property Map
    The model config to use for the deployment. Structure is documented below.
    project String
    The ID of the project in which the resource belongs. If it is not provided, the provider project is used.
    publisherModelName String
    The Model Garden model to deploy. Format: publishers/{publisher}/models/{publisher_model}@{version_id}, or publishers/hf-{hugging-face-author}/models/{hugging-face-model-name}@001.

    Supporting Types

    AiEndpointWithModelGardenDeploymentDeployConfig, AiEndpointWithModelGardenDeploymentDeployConfigArgs

    DedicatedResources AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResources
    A description of resources that are dedicated to a DeployedModel or DeployedIndex, and that need a higher degree of manual configuration. Structure is documented below.
    FastTryoutEnabled bool
    If true, enable the QMT fast tryout feature for this model if possible.
    SystemLabels Dictionary<string, string>
    System labels for Model Garden deployments. These labels are managed by Google and for tracking purposes only.
    DedicatedResources AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResources
    A description of resources that are dedicated to a DeployedModel or DeployedIndex, and that need a higher degree of manual configuration. Structure is documented below.
    FastTryoutEnabled bool
    If true, enable the QMT fast tryout feature for this model if possible.
    SystemLabels map[string]string
    System labels for Model Garden deployments. These labels are managed by Google and for tracking purposes only.
    dedicatedResources AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResources
    A description of resources that are dedicated to a DeployedModel or DeployedIndex, and that need a higher degree of manual configuration. Structure is documented below.
    fastTryoutEnabled Boolean
    If true, enable the QMT fast tryout feature for this model if possible.
    systemLabels Map<String,String>
    System labels for Model Garden deployments. These labels are managed by Google and for tracking purposes only.
    dedicatedResources AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResources
    A description of resources that are dedicated to a DeployedModel or DeployedIndex, and that need a higher degree of manual configuration. Structure is documented below.
    fastTryoutEnabled boolean
    If true, enable the QMT fast tryout feature for this model if possible.
    systemLabels {[key: string]: string}
    System labels for Model Garden deployments. These labels are managed by Google and for tracking purposes only.
    dedicated_resources AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResources
    A description of resources that are dedicated to a DeployedModel or DeployedIndex, and that need a higher degree of manual configuration. Structure is documented below.
    fast_tryout_enabled bool
    If true, enable the QMT fast tryout feature for this model if possible.
    system_labels Mapping[str, str]
    System labels for Model Garden deployments. These labels are managed by Google and for tracking purposes only.
    dedicatedResources Property Map
    A description of resources that are dedicated to a DeployedModel or DeployedIndex, and that need a higher degree of manual configuration. Structure is documented below.
    fastTryoutEnabled Boolean
    If true, enable the QMT fast tryout feature for this model if possible.
    systemLabels Map<String>
    System labels for Model Garden deployments. These labels are managed by Google and for tracking purposes only.

    AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResources, AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs

    MachineSpec AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpec
    Specification of a single machine. Structure is documented below.
    MinReplicaCount int
    The minimum number of machine replicas that will be always deployed on. This value must be greater than or equal to 1. If traffic increases, it may dynamically be deployed onto more replicas, and as traffic decreases, some of these extra replicas may be freed.
    AutoscalingMetricSpecs List<AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesAutoscalingMetricSpec>
    The metric specifications that overrides a resource utilization metric (CPU utilization, accelerator's duty cycle, and so on) target value (default to 60 if not set). At most one entry is allowed per metric. If machine_spec.accelerator_count is above 0, the autoscaling will be based on both CPU utilization and accelerator's duty cycle metrics and scale up when either metrics exceeds its target value while scale down if both metrics are under their target value. The default target value is 60 for both metrics. If machine_spec.accelerator_count is 0, the autoscaling will be based on CPU utilization metric only with default target value 60 if not explicitly set. For example, in the case of Online Prediction, if you want to override target CPU utilization to 80, you should set autoscaling_metric_specs.metric_name to aiplatform.googleapis.com/prediction/online/cpu/utilization and autoscaling_metric_specs.target to 80. Structure is documented below.
    MaxReplicaCount int
    The maximum number of replicas that may be deployed on when the traffic against it increases. If the requested value is too large, the deployment will error, but if deployment succeeds then the ability to scale to that many replicas is guaranteed (barring service outages). If traffic increases beyond what its replicas at maximum may handle, a portion of the traffic will be dropped. If this value is not provided, will use min_replica_count as the default value. The value of this field impacts the charge against Vertex CPU and GPU quotas. Specifically, you will be charged for (max_replica_count * number of cores in the selected machine type) and (max_replica_count * number of GPUs per replica in the selected machine type).
    RequiredReplicaCount int
    Number of required available replicas for the deployment to succeed. This field is only needed when partial deployment/mutation is desired. If set, the deploy/mutate operation will succeed once available_replica_count reaches required_replica_count, and the rest of the replicas will be retried. If not set, the default required_replica_count will be min_replica_count.
    Spot bool
    If true, schedule the deployment workload on spot VMs.
    MachineSpec AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpec
    Specification of a single machine. Structure is documented below.
    MinReplicaCount int
    The minimum number of machine replicas that will be always deployed on. This value must be greater than or equal to 1. If traffic increases, it may dynamically be deployed onto more replicas, and as traffic decreases, some of these extra replicas may be freed.
    AutoscalingMetricSpecs []AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesAutoscalingMetricSpec
    The metric specifications that overrides a resource utilization metric (CPU utilization, accelerator's duty cycle, and so on) target value (default to 60 if not set). At most one entry is allowed per metric. If machine_spec.accelerator_count is above 0, the autoscaling will be based on both CPU utilization and accelerator's duty cycle metrics and scale up when either metrics exceeds its target value while scale down if both metrics are under their target value. The default target value is 60 for both metrics. If machine_spec.accelerator_count is 0, the autoscaling will be based on CPU utilization metric only with default target value 60 if not explicitly set. For example, in the case of Online Prediction, if you want to override target CPU utilization to 80, you should set autoscaling_metric_specs.metric_name to aiplatform.googleapis.com/prediction/online/cpu/utilization and autoscaling_metric_specs.target to 80. Structure is documented below.
    MaxReplicaCount int
    The maximum number of replicas that may be deployed on when the traffic against it increases. If the requested value is too large, the deployment will error, but if deployment succeeds then the ability to scale to that many replicas is guaranteed (barring service outages). If traffic increases beyond what its replicas at maximum may handle, a portion of the traffic will be dropped. If this value is not provided, will use min_replica_count as the default value. The value of this field impacts the charge against Vertex CPU and GPU quotas. Specifically, you will be charged for (max_replica_count * number of cores in the selected machine type) and (max_replica_count * number of GPUs per replica in the selected machine type).
    RequiredReplicaCount int
    Number of required available replicas for the deployment to succeed. This field is only needed when partial deployment/mutation is desired. If set, the deploy/mutate operation will succeed once available_replica_count reaches required_replica_count, and the rest of the replicas will be retried. If not set, the default required_replica_count will be min_replica_count.
    Spot bool
    If true, schedule the deployment workload on spot VMs.
    machineSpec AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpec
    Specification of a single machine. Structure is documented below.
    minReplicaCount Integer
    The minimum number of machine replicas that will be always deployed on. This value must be greater than or equal to 1. If traffic increases, it may dynamically be deployed onto more replicas, and as traffic decreases, some of these extra replicas may be freed.
    autoscalingMetricSpecs List<AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesAutoscalingMetricSpec>
    The metric specifications that overrides a resource utilization metric (CPU utilization, accelerator's duty cycle, and so on) target value (default to 60 if not set). At most one entry is allowed per metric. If machine_spec.accelerator_count is above 0, the autoscaling will be based on both CPU utilization and accelerator's duty cycle metrics and scale up when either metrics exceeds its target value while scale down if both metrics are under their target value. The default target value is 60 for both metrics. If machine_spec.accelerator_count is 0, the autoscaling will be based on CPU utilization metric only with default target value 60 if not explicitly set. For example, in the case of Online Prediction, if you want to override target CPU utilization to 80, you should set autoscaling_metric_specs.metric_name to aiplatform.googleapis.com/prediction/online/cpu/utilization and autoscaling_metric_specs.target to 80. Structure is documented below.
    maxReplicaCount Integer
    The maximum number of replicas that may be deployed on when the traffic against it increases. If the requested value is too large, the deployment will error, but if deployment succeeds then the ability to scale to that many replicas is guaranteed (barring service outages). If traffic increases beyond what its replicas at maximum may handle, a portion of the traffic will be dropped. If this value is not provided, will use min_replica_count as the default value. The value of this field impacts the charge against Vertex CPU and GPU quotas. Specifically, you will be charged for (max_replica_count * number of cores in the selected machine type) and (max_replica_count * number of GPUs per replica in the selected machine type).
    requiredReplicaCount Integer
    Number of required available replicas for the deployment to succeed. This field is only needed when partial deployment/mutation is desired. If set, the deploy/mutate operation will succeed once available_replica_count reaches required_replica_count, and the rest of the replicas will be retried. If not set, the default required_replica_count will be min_replica_count.
    spot Boolean
    If true, schedule the deployment workload on spot VMs.
    machineSpec AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpec
    Specification of a single machine. Structure is documented below.
    minReplicaCount number
    The minimum number of machine replicas that will be always deployed on. This value must be greater than or equal to 1. If traffic increases, it may dynamically be deployed onto more replicas, and as traffic decreases, some of these extra replicas may be freed.
    autoscalingMetricSpecs AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesAutoscalingMetricSpec[]
    The metric specifications that overrides a resource utilization metric (CPU utilization, accelerator's duty cycle, and so on) target value (default to 60 if not set). At most one entry is allowed per metric. If machine_spec.accelerator_count is above 0, the autoscaling will be based on both CPU utilization and accelerator's duty cycle metrics and scale up when either metrics exceeds its target value while scale down if both metrics are under their target value. The default target value is 60 for both metrics. If machine_spec.accelerator_count is 0, the autoscaling will be based on CPU utilization metric only with default target value 60 if not explicitly set. For example, in the case of Online Prediction, if you want to override target CPU utilization to 80, you should set autoscaling_metric_specs.metric_name to aiplatform.googleapis.com/prediction/online/cpu/utilization and autoscaling_metric_specs.target to 80. Structure is documented below.
    maxReplicaCount number
    The maximum number of replicas that may be deployed on when the traffic against it increases. If the requested value is too large, the deployment will error, but if deployment succeeds then the ability to scale to that many replicas is guaranteed (barring service outages). If traffic increases beyond what its replicas at maximum may handle, a portion of the traffic will be dropped. If this value is not provided, will use min_replica_count as the default value. The value of this field impacts the charge against Vertex CPU and GPU quotas. Specifically, you will be charged for (max_replica_count * number of cores in the selected machine type) and (max_replica_count * number of GPUs per replica in the selected machine type).
    requiredReplicaCount number
    Number of required available replicas for the deployment to succeed. This field is only needed when partial deployment/mutation is desired. If set, the deploy/mutate operation will succeed once available_replica_count reaches required_replica_count, and the rest of the replicas will be retried. If not set, the default required_replica_count will be min_replica_count.
    spot boolean
    If true, schedule the deployment workload on spot VMs.
    machine_spec AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpec
    Specification of a single machine. Structure is documented below.
    min_replica_count int
    The minimum number of machine replicas that will be always deployed on. This value must be greater than or equal to 1. If traffic increases, it may dynamically be deployed onto more replicas, and as traffic decreases, some of these extra replicas may be freed.
    autoscaling_metric_specs Sequence[AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesAutoscalingMetricSpec]
    The metric specifications that overrides a resource utilization metric (CPU utilization, accelerator's duty cycle, and so on) target value (default to 60 if not set). At most one entry is allowed per metric. If machine_spec.accelerator_count is above 0, the autoscaling will be based on both CPU utilization and accelerator's duty cycle metrics and scale up when either metrics exceeds its target value while scale down if both metrics are under their target value. The default target value is 60 for both metrics. If machine_spec.accelerator_count is 0, the autoscaling will be based on CPU utilization metric only with default target value 60 if not explicitly set. For example, in the case of Online Prediction, if you want to override target CPU utilization to 80, you should set autoscaling_metric_specs.metric_name to aiplatform.googleapis.com/prediction/online/cpu/utilization and autoscaling_metric_specs.target to 80. Structure is documented below.
    max_replica_count int
    The maximum number of replicas that may be deployed on when the traffic against it increases. If the requested value is too large, the deployment will error, but if deployment succeeds then the ability to scale to that many replicas is guaranteed (barring service outages). If traffic increases beyond what its replicas at maximum may handle, a portion of the traffic will be dropped. If this value is not provided, will use min_replica_count as the default value. The value of this field impacts the charge against Vertex CPU and GPU quotas. Specifically, you will be charged for (max_replica_count * number of cores in the selected machine type) and (max_replica_count * number of GPUs per replica in the selected machine type).
    required_replica_count int
    Number of required available replicas for the deployment to succeed. This field is only needed when partial deployment/mutation is desired. If set, the deploy/mutate operation will succeed once available_replica_count reaches required_replica_count, and the rest of the replicas will be retried. If not set, the default required_replica_count will be min_replica_count.
    spot bool
    If true, schedule the deployment workload on spot VMs.
    machineSpec Property Map
    Specification of a single machine. Structure is documented below.
    minReplicaCount Number
    The minimum number of machine replicas that will be always deployed on. This value must be greater than or equal to 1. If traffic increases, it may dynamically be deployed onto more replicas, and as traffic decreases, some of these extra replicas may be freed.
    autoscalingMetricSpecs List<Property Map>
    The metric specifications that overrides a resource utilization metric (CPU utilization, accelerator's duty cycle, and so on) target value (default to 60 if not set). At most one entry is allowed per metric. If machine_spec.accelerator_count is above 0, the autoscaling will be based on both CPU utilization and accelerator's duty cycle metrics and scale up when either metrics exceeds its target value while scale down if both metrics are under their target value. The default target value is 60 for both metrics. If machine_spec.accelerator_count is 0, the autoscaling will be based on CPU utilization metric only with default target value 60 if not explicitly set. For example, in the case of Online Prediction, if you want to override target CPU utilization to 80, you should set autoscaling_metric_specs.metric_name to aiplatform.googleapis.com/prediction/online/cpu/utilization and autoscaling_metric_specs.target to 80. Structure is documented below.
    maxReplicaCount Number
    The maximum number of replicas that may be deployed on when the traffic against it increases. If the requested value is too large, the deployment will error, but if deployment succeeds then the ability to scale to that many replicas is guaranteed (barring service outages). If traffic increases beyond what its replicas at maximum may handle, a portion of the traffic will be dropped. If this value is not provided, will use min_replica_count as the default value. The value of this field impacts the charge against Vertex CPU and GPU quotas. Specifically, you will be charged for (max_replica_count * number of cores in the selected machine type) and (max_replica_count * number of GPUs per replica in the selected machine type).
    requiredReplicaCount Number
    Number of required available replicas for the deployment to succeed. This field is only needed when partial deployment/mutation is desired. If set, the deploy/mutate operation will succeed once available_replica_count reaches required_replica_count, and the rest of the replicas will be retried. If not set, the default required_replica_count will be min_replica_count.
    spot Boolean
    If true, schedule the deployment workload on spot VMs.

    AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesAutoscalingMetricSpec, AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesAutoscalingMetricSpecArgs

    MetricName string
    The resource metric name. Supported metrics:

    • For Online Prediction:
    • aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle
    • aiplatform.googleapis.com/prediction/online/cpu/utilization
    Target int
    The target resource utilization in percentage (1% - 100%) for the given metric; once the real usage deviates from the target by a certain percentage, the machine replicas change. The default value is 60 (representing 60%) if not provided.
    MetricName string
    The resource metric name. Supported metrics:

    • For Online Prediction:
    • aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle
    • aiplatform.googleapis.com/prediction/online/cpu/utilization
    Target int
    The target resource utilization in percentage (1% - 100%) for the given metric; once the real usage deviates from the target by a certain percentage, the machine replicas change. The default value is 60 (representing 60%) if not provided.
    metricName String
    The resource metric name. Supported metrics:

    • For Online Prediction:
    • aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle
    • aiplatform.googleapis.com/prediction/online/cpu/utilization
    target Integer
    The target resource utilization in percentage (1% - 100%) for the given metric; once the real usage deviates from the target by a certain percentage, the machine replicas change. The default value is 60 (representing 60%) if not provided.
    metricName string
    The resource metric name. Supported metrics:

    • For Online Prediction:
    • aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle
    • aiplatform.googleapis.com/prediction/online/cpu/utilization
    target number
    The target resource utilization in percentage (1% - 100%) for the given metric; once the real usage deviates from the target by a certain percentage, the machine replicas change. The default value is 60 (representing 60%) if not provided.
    metric_name str
    The resource metric name. Supported metrics:

    • For Online Prediction:
    • aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle
    • aiplatform.googleapis.com/prediction/online/cpu/utilization
    target int
    The target resource utilization in percentage (1% - 100%) for the given metric; once the real usage deviates from the target by a certain percentage, the machine replicas change. The default value is 60 (representing 60%) if not provided.
    metricName String
    The resource metric name. Supported metrics:

    • For Online Prediction:
    • aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle
    • aiplatform.googleapis.com/prediction/online/cpu/utilization
    target Number
    The target resource utilization in percentage (1% - 100%) for the given metric; once the real usage deviates from the target by a certain percentage, the machine replicas change. The default value is 60 (representing 60%) if not provided.

    AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpec, AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs

    AcceleratorCount int
    The number of accelerators to attach to the machine.
    AcceleratorType string
    Possible values: ACCELERATOR_TYPE_UNSPECIFIED NVIDIA_TESLA_K80 NVIDIA_TESLA_P100 NVIDIA_TESLA_V100 NVIDIA_TESLA_P4 NVIDIA_TESLA_T4 NVIDIA_TESLA_A100 NVIDIA_A100_80GB NVIDIA_L4 NVIDIA_H100_80GB NVIDIA_H100_MEGA_80GB NVIDIA_H200_141GB NVIDIA_B200 TPU_V2 TPU_V3 TPU_V4_POD TPU_V5_LITEPOD
    MachineType string
    The type of the machine. See the list of machine types supported for prediction See the list of machine types supported for custom training. For DeployedModel this field is optional, and the default value is n1-standard-2. For BatchPredictionJob or as part of WorkerPoolSpec this field is required.
    MultihostGpuNodeCount int
    The number of nodes per replica for multihost GPU deployments.
    ReservationAffinity AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecReservationAffinity
    A ReservationAffinity can be used to configure a Vertex AI resource (e.g., a DeployedModel) to draw its Compute Engine resources from a Shared Reservation, or exclusively from on-demand capacity. Structure is documented below.
    TpuTopology string
    The topology of the TPUs. Corresponds to the TPU topologies available from GKE. (Example: tpu_topology: "2x2x1").
    AcceleratorCount int
    The number of accelerators to attach to the machine.
    AcceleratorType string
    Possible values: ACCELERATOR_TYPE_UNSPECIFIED NVIDIA_TESLA_K80 NVIDIA_TESLA_P100 NVIDIA_TESLA_V100 NVIDIA_TESLA_P4 NVIDIA_TESLA_T4 NVIDIA_TESLA_A100 NVIDIA_A100_80GB NVIDIA_L4 NVIDIA_H100_80GB NVIDIA_H100_MEGA_80GB NVIDIA_H200_141GB NVIDIA_B200 TPU_V2 TPU_V3 TPU_V4_POD TPU_V5_LITEPOD
    MachineType string
    The type of the machine. See the list of machine types supported for prediction See the list of machine types supported for custom training. For DeployedModel this field is optional, and the default value is n1-standard-2. For BatchPredictionJob or as part of WorkerPoolSpec this field is required.
    MultihostGpuNodeCount int
    The number of nodes per replica for multihost GPU deployments.
    ReservationAffinity AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecReservationAffinity
    A ReservationAffinity can be used to configure a Vertex AI resource (e.g., a DeployedModel) to draw its Compute Engine resources from a Shared Reservation, or exclusively from on-demand capacity. Structure is documented below.
    TpuTopology string
    The topology of the TPUs. Corresponds to the TPU topologies available from GKE. (Example: tpu_topology: "2x2x1").
    acceleratorCount Integer
    The number of accelerators to attach to the machine.
    acceleratorType String
    Possible values: ACCELERATOR_TYPE_UNSPECIFIED NVIDIA_TESLA_K80 NVIDIA_TESLA_P100 NVIDIA_TESLA_V100 NVIDIA_TESLA_P4 NVIDIA_TESLA_T4 NVIDIA_TESLA_A100 NVIDIA_A100_80GB NVIDIA_L4 NVIDIA_H100_80GB NVIDIA_H100_MEGA_80GB NVIDIA_H200_141GB NVIDIA_B200 TPU_V2 TPU_V3 TPU_V4_POD TPU_V5_LITEPOD
    machineType String
    The type of the machine. See the list of machine types supported for prediction See the list of machine types supported for custom training. For DeployedModel this field is optional, and the default value is n1-standard-2. For BatchPredictionJob or as part of WorkerPoolSpec this field is required.
    multihostGpuNodeCount Integer
    The number of nodes per replica for multihost GPU deployments.
    reservationAffinity AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecReservationAffinity
    A ReservationAffinity can be used to configure a Vertex AI resource (e.g., a DeployedModel) to draw its Compute Engine resources from a Shared Reservation, or exclusively from on-demand capacity. Structure is documented below.
    tpuTopology String
    The topology of the TPUs. Corresponds to the TPU topologies available from GKE. (Example: tpu_topology: "2x2x1").
    acceleratorCount number
    The number of accelerators to attach to the machine.
    acceleratorType string
    Possible values: ACCELERATOR_TYPE_UNSPECIFIED NVIDIA_TESLA_K80 NVIDIA_TESLA_P100 NVIDIA_TESLA_V100 NVIDIA_TESLA_P4 NVIDIA_TESLA_T4 NVIDIA_TESLA_A100 NVIDIA_A100_80GB NVIDIA_L4 NVIDIA_H100_80GB NVIDIA_H100_MEGA_80GB NVIDIA_H200_141GB NVIDIA_B200 TPU_V2 TPU_V3 TPU_V4_POD TPU_V5_LITEPOD
    machineType string
    The type of the machine. See the list of machine types supported for prediction See the list of machine types supported for custom training. For DeployedModel this field is optional, and the default value is n1-standard-2. For BatchPredictionJob or as part of WorkerPoolSpec this field is required.
    multihostGpuNodeCount number
    The number of nodes per replica for multihost GPU deployments.
    reservationAffinity AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecReservationAffinity
    A ReservationAffinity can be used to configure a Vertex AI resource (e.g., a DeployedModel) to draw its Compute Engine resources from a Shared Reservation, or exclusively from on-demand capacity. Structure is documented below.
    tpuTopology string
    The topology of the TPUs. Corresponds to the TPU topologies available from GKE. (Example: tpu_topology: "2x2x1").
    accelerator_count int
    The number of accelerators to attach to the machine.
    accelerator_type str
    Possible values: ACCELERATOR_TYPE_UNSPECIFIED NVIDIA_TESLA_K80 NVIDIA_TESLA_P100 NVIDIA_TESLA_V100 NVIDIA_TESLA_P4 NVIDIA_TESLA_T4 NVIDIA_TESLA_A100 NVIDIA_A100_80GB NVIDIA_L4 NVIDIA_H100_80GB NVIDIA_H100_MEGA_80GB NVIDIA_H200_141GB NVIDIA_B200 TPU_V2 TPU_V3 TPU_V4_POD TPU_V5_LITEPOD
    machine_type str
    The type of the machine. See the list of machine types supported for prediction See the list of machine types supported for custom training. For DeployedModel this field is optional, and the default value is n1-standard-2. For BatchPredictionJob or as part of WorkerPoolSpec this field is required.
    multihost_gpu_node_count int
    The number of nodes per replica for multihost GPU deployments.
    reservation_affinity AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecReservationAffinity
    A ReservationAffinity can be used to configure a Vertex AI resource (e.g., a DeployedModel) to draw its Compute Engine resources from a Shared Reservation, or exclusively from on-demand capacity. Structure is documented below.
    tpu_topology str
    The topology of the TPUs. Corresponds to the TPU topologies available from GKE. (Example: tpu_topology: "2x2x1").
    acceleratorCount Number
    The number of accelerators to attach to the machine.
    acceleratorType String
    Possible values: ACCELERATOR_TYPE_UNSPECIFIED NVIDIA_TESLA_K80 NVIDIA_TESLA_P100 NVIDIA_TESLA_V100 NVIDIA_TESLA_P4 NVIDIA_TESLA_T4 NVIDIA_TESLA_A100 NVIDIA_A100_80GB NVIDIA_L4 NVIDIA_H100_80GB NVIDIA_H100_MEGA_80GB NVIDIA_H200_141GB NVIDIA_B200 TPU_V2 TPU_V3 TPU_V4_POD TPU_V5_LITEPOD
    machineType String
    The type of the machine. See the list of machine types supported for prediction See the list of machine types supported for custom training. For DeployedModel this field is optional, and the default value is n1-standard-2. For BatchPredictionJob or as part of WorkerPoolSpec this field is required.
    multihostGpuNodeCount Number
    The number of nodes per replica for multihost GPU deployments.
    reservationAffinity Property Map
    A ReservationAffinity can be used to configure a Vertex AI resource (e.g., a DeployedModel) to draw its Compute Engine resources from a Shared Reservation, or exclusively from on-demand capacity. Structure is documented below.
    tpuTopology String
    The topology of the TPUs. Corresponds to the TPU topologies available from GKE. (Example: tpu_topology: "2x2x1").

    AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecReservationAffinity, AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecReservationAffinityArgs

    ReservationAffinityType string
    Specifies the reservation affinity type. Possible values: TYPE_UNSPECIFIED NO_RESERVATION ANY_RESERVATION SPECIFIC_RESERVATION
    Key string
    Corresponds to the label key of a reservation resource. To target a SPECIFIC_RESERVATION by name, use compute.googleapis.com/reservation-name as the key and specify the name of your reservation as its value.
    Values List<string>
    Corresponds to the label values of a reservation resource. This must be the full resource name of the reservation or reservation block.
    ReservationAffinityType string
    Specifies the reservation affinity type. Possible values: TYPE_UNSPECIFIED NO_RESERVATION ANY_RESERVATION SPECIFIC_RESERVATION
    Key string
    Corresponds to the label key of a reservation resource. To target a SPECIFIC_RESERVATION by name, use compute.googleapis.com/reservation-name as the key and specify the name of your reservation as its value.
    Values []string
    Corresponds to the label values of a reservation resource. This must be the full resource name of the reservation or reservation block.
    reservationAffinityType String
    Specifies the reservation affinity type. Possible values: TYPE_UNSPECIFIED NO_RESERVATION ANY_RESERVATION SPECIFIC_RESERVATION
    key String
    Corresponds to the label key of a reservation resource. To target a SPECIFIC_RESERVATION by name, use compute.googleapis.com/reservation-name as the key and specify the name of your reservation as its value.
    values List<String>
    Corresponds to the label values of a reservation resource. This must be the full resource name of the reservation or reservation block.
    reservationAffinityType string
    Specifies the reservation affinity type. Possible values: TYPE_UNSPECIFIED NO_RESERVATION ANY_RESERVATION SPECIFIC_RESERVATION
    key string
    Corresponds to the label key of a reservation resource. To target a SPECIFIC_RESERVATION by name, use compute.googleapis.com/reservation-name as the key and specify the name of your reservation as its value.
    values string[]
    Corresponds to the label values of a reservation resource. This must be the full resource name of the reservation or reservation block.
    reservation_affinity_type str
    Specifies the reservation affinity type. Possible values: TYPE_UNSPECIFIED NO_RESERVATION ANY_RESERVATION SPECIFIC_RESERVATION
    key str
    Corresponds to the label key of a reservation resource. To target a SPECIFIC_RESERVATION by name, use compute.googleapis.com/reservation-name as the key and specify the name of your reservation as its value.
    values Sequence[str]
    Corresponds to the label values of a reservation resource. This must be the full resource name of the reservation or reservation block.
    reservationAffinityType String
    Specifies the reservation affinity type. Possible values: TYPE_UNSPECIFIED NO_RESERVATION ANY_RESERVATION SPECIFIC_RESERVATION
    key String
    Corresponds to the label key of a reservation resource. To target a SPECIFIC_RESERVATION by name, use compute.googleapis.com/reservation-name as the key and specify the name of your reservation as its value.
    values List<String>
    Corresponds to the label values of a reservation resource. This must be the full resource name of the reservation or reservation block.

    AiEndpointWithModelGardenDeploymentEndpointConfig, AiEndpointWithModelGardenDeploymentEndpointConfigArgs

    DedicatedEndpointEnabled bool
    If true, the endpoint will be exposed through a dedicated DNS [Endpoint.dedicated_endpoint_dns]. Your request to the dedicated DNS will be isolated from other users' traffic and will have better performance and reliability. Note: Once you enabled dedicated endpoint, you won't be able to send request to the shared DNS {region}-aiplatform.googleapis.com. The limitations will be removed soon.
    EndpointDisplayName string
    The user-specified display name of the endpoint. If not set, a default name will be used.
    DedicatedEndpointEnabled bool
    If true, the endpoint will be exposed through a dedicated DNS [Endpoint.dedicated_endpoint_dns]. Your request to the dedicated DNS will be isolated from other users' traffic and will have better performance and reliability. Note: Once you enabled dedicated endpoint, you won't be able to send request to the shared DNS {region}-aiplatform.googleapis.com. The limitations will be removed soon.
    EndpointDisplayName string
    The user-specified display name of the endpoint. If not set, a default name will be used.
    dedicatedEndpointEnabled Boolean
    If true, the endpoint will be exposed through a dedicated DNS [Endpoint.dedicated_endpoint_dns]. Your request to the dedicated DNS will be isolated from other users' traffic and will have better performance and reliability. Note: Once you enabled dedicated endpoint, you won't be able to send request to the shared DNS {region}-aiplatform.googleapis.com. The limitations will be removed soon.
    endpointDisplayName String
    The user-specified display name of the endpoint. If not set, a default name will be used.
    dedicatedEndpointEnabled boolean
    If true, the endpoint will be exposed through a dedicated DNS [Endpoint.dedicated_endpoint_dns]. Your request to the dedicated DNS will be isolated from other users' traffic and will have better performance and reliability. Note: Once you enabled dedicated endpoint, you won't be able to send request to the shared DNS {region}-aiplatform.googleapis.com. The limitations will be removed soon.
    endpointDisplayName string
    The user-specified display name of the endpoint. If not set, a default name will be used.
    dedicated_endpoint_enabled bool
    If true, the endpoint will be exposed through a dedicated DNS [Endpoint.dedicated_endpoint_dns]. Your request to the dedicated DNS will be isolated from other users' traffic and will have better performance and reliability. Note: Once you enabled dedicated endpoint, you won't be able to send request to the shared DNS {region}-aiplatform.googleapis.com. The limitations will be removed soon.
    endpoint_display_name str
    The user-specified display name of the endpoint. If not set, a default name will be used.
    dedicatedEndpointEnabled Boolean
    If true, the endpoint will be exposed through a dedicated DNS [Endpoint.dedicated_endpoint_dns]. Your request to the dedicated DNS will be isolated from other users' traffic and will have better performance and reliability. Note: Once you enabled dedicated endpoint, you won't be able to send request to the shared DNS {region}-aiplatform.googleapis.com. The limitations will be removed soon.
    endpointDisplayName String
    The user-specified display name of the endpoint. If not set, a default name will be used.

    AiEndpointWithModelGardenDeploymentModelConfig, AiEndpointWithModelGardenDeploymentModelConfigArgs

    AcceptEula bool
    Whether the user accepts the End User License Agreement (EULA) for the model.
    ContainerSpec AiEndpointWithModelGardenDeploymentModelConfigContainerSpec
    Specification of a container for serving predictions. Some fields in this message correspond to fields in the Kubernetes Container v1 core specification. Structure is documented below.
    HuggingFaceAccessToken string
    The Hugging Face read access token used to access the model artifacts of gated models.
    HuggingFaceCacheEnabled bool
    If true, the model will deploy with a cached version instead of directly downloading the model artifacts from Hugging Face. This is suitable for VPC-SC users with limited internet access.
    ModelDisplayName string
    The user-specified display name of the uploaded model. If not set, a default name will be used.
    AcceptEula bool
    Whether the user accepts the End User License Agreement (EULA) for the model.
    ContainerSpec AiEndpointWithModelGardenDeploymentModelConfigContainerSpec
    Specification of a container for serving predictions. Some fields in this message correspond to fields in the Kubernetes Container v1 core specification. Structure is documented below.
    HuggingFaceAccessToken string
    The Hugging Face read access token used to access the model artifacts of gated models.
    HuggingFaceCacheEnabled bool
    If true, the model will deploy with a cached version instead of directly downloading the model artifacts from Hugging Face. This is suitable for VPC-SC users with limited internet access.
    ModelDisplayName string
    The user-specified display name of the uploaded model. If not set, a default name will be used.
    acceptEula Boolean
    Whether the user accepts the End User License Agreement (EULA) for the model.
    containerSpec AiEndpointWithModelGardenDeploymentModelConfigContainerSpec
    Specification of a container for serving predictions. Some fields in this message correspond to fields in the Kubernetes Container v1 core specification. Structure is documented below.
    huggingFaceAccessToken String
    The Hugging Face read access token used to access the model artifacts of gated models.
    huggingFaceCacheEnabled Boolean
    If true, the model will deploy with a cached version instead of directly downloading the model artifacts from Hugging Face. This is suitable for VPC-SC users with limited internet access.
    modelDisplayName String
    The user-specified display name of the uploaded model. If not set, a default name will be used.
    acceptEula boolean
    Whether the user accepts the End User License Agreement (EULA) for the model.
    containerSpec AiEndpointWithModelGardenDeploymentModelConfigContainerSpec
    Specification of a container for serving predictions. Some fields in this message correspond to fields in the Kubernetes Container v1 core specification. Structure is documented below.
    huggingFaceAccessToken string
    The Hugging Face read access token used to access the model artifacts of gated models.
    huggingFaceCacheEnabled boolean
    If true, the model will deploy with a cached version instead of directly downloading the model artifacts from Hugging Face. This is suitable for VPC-SC users with limited internet access.
    modelDisplayName string
    The user-specified display name of the uploaded model. If not set, a default name will be used.
    accept_eula bool
    Whether the user accepts the End User License Agreement (EULA) for the model.
    container_spec AiEndpointWithModelGardenDeploymentModelConfigContainerSpec
    Specification of a container for serving predictions. Some fields in this message correspond to fields in the Kubernetes Container v1 core specification. Structure is documented below.
    hugging_face_access_token str
    The Hugging Face read access token used to access the model artifacts of gated models.
    hugging_face_cache_enabled bool
    If true, the model will deploy with a cached version instead of directly downloading the model artifacts from Hugging Face. This is suitable for VPC-SC users with limited internet access.
    model_display_name str
    The user-specified display name of the uploaded model. If not set, a default name will be used.
    acceptEula Boolean
    Whether the user accepts the End User License Agreement (EULA) for the model.
    containerSpec Property Map
    Specification of a container for serving predictions. Some fields in this message correspond to fields in the Kubernetes Container v1 core specification. Structure is documented below.
    huggingFaceAccessToken String
    The Hugging Face read access token used to access the model artifacts of gated models.
    huggingFaceCacheEnabled Boolean
    If true, the model will deploy with a cached version instead of directly downloading the model artifacts from Hugging Face. This is suitable for VPC-SC users with limited internet access.
    modelDisplayName String
    The user-specified display name of the uploaded model. If not set, a default name will be used.

    AiEndpointWithModelGardenDeploymentModelConfigContainerSpec, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecArgs

    ImageUri string
    URI of the Docker image to be used as the custom container for serving predictions. This URI must identify an image in Artifact Registry or Container Registry. Learn more about the container publishing requirements, including permissions requirements for the Vertex AI Service Agent. The container image is ingested upon ModelService.UploadModel, stored internally, and this original path is afterwards not used. To learn about the requirements for the Docker image itself, see Custom container requirements. You can use the URI to one of Vertex AI's pre-built container images for prediction in this field.
    Args List<string>
    Specifies arguments for the command that runs when the container starts. This overrides the container's CMD. Specify this field as an array of executable and arguments, similar to a Docker CMD's "default parameters" form. If you don't specify this field but do specify the command field, then the command from the command field runs without any additional arguments. See the Kubernetes documentation about how the command and args fields interact with a container's ENTRYPOINT and CMD. If you don't specify this field and don't specify the command field, then the container's ENTRYPOINT and CMD determine what runs based on their default behavior. See the Docker documentation about how CMD and ENTRYPOINT interact. In this field, you can reference environment variables set by Vertex AI and environment variables set in the env field. You cannot reference environment variables set in the Docker image. In order for environment variables to be expanded, reference them by using the following syntax:$(VARIABLE_NAME) Note that this differs from Bash variable expansion, which does not use parentheses. If a variable cannot be resolved, the reference in the input string is used unchanged. To avoid variable expansion, you can escape this syntax with $$; for example:$$(VARIABLE_NAME) This field corresponds to the args field of the Kubernetes Containers v1 core API.
    Commands List<string>
    Specifies the command that runs when the container starts. This overrides the container's ENTRYPOINT. Specify this field as an array of executable and arguments, similar to a Docker ENTRYPOINT's "exec" form, not its "shell" form. If you do not specify this field, then the container's ENTRYPOINT runs, in conjunction with the args field or the container's CMD, if either exists. If this field is not specified and the container does not have an ENTRYPOINT, then refer to the Docker documentation about how CMD and ENTRYPOINT interact. If you specify this field, then you can also specify the args field to provide additional arguments for this command. However, if you specify this field, then the container's CMD is ignored. See the Kubernetes documentation about how the command and args fields interact with a container's ENTRYPOINT and CMD. In this field, you can reference environment variables set by Vertex AI and environment variables set in the env field. You cannot reference environment variables set in the Docker image. In order for environment variables to be expanded, reference them by using the following syntax:$(VARIABLE_NAME) Note that this differs from Bash variable expansion, which does not use parentheses. If a variable cannot be resolved, the reference in the input string is used unchanged. To avoid variable expansion, you can escape this syntax with $$; for example:$$(VARIABLE_NAME) This field corresponds to the command field of the Kubernetes Containers v1 core API.
    DeploymentTimeout string
    Deployment timeout. Limit for deployment timeout is 2 hours.
    Envs List<AiEndpointWithModelGardenDeploymentModelConfigContainerSpecEnv>

    List of environment variables to set in the container. After the container starts running, code running in the container can read these environment variables. Additionally, the command and args fields can reference these variables. Later entries in this list can also reference earlier entries. For example, the following example sets the variable VAR_2 to have the value foo bar:

    [
    {
    "name": "VAR_1",
    "value": "foo"
    },
    {
    "name": "VAR_2",
    "value": "$(VAR_1) bar"
    }
    ]
    

    If you switch the order of the variables in the example, then the expansion does not occur. This field corresponds to the env field of the Kubernetes Containers v1 core API. Structure is documented below.

    GrpcPorts List<AiEndpointWithModelGardenDeploymentModelConfigContainerSpecGrpcPort>
    List of ports to expose from the container. Vertex AI sends gRPC prediction requests that it receives to the first port on this list. Vertex AI also sends liveness and health checks to this port. If you do not specify this field, gRPC requests to the container will be disabled. Vertex AI does not use ports other than the first one listed. This field corresponds to the ports field of the Kubernetes Containers v1 core API. Structure is documented below.
    HealthProbe AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbe
    Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.
    HealthRoute string
    HTTP path on the container to send health checks to. Vertex AI intermittently sends GET requests to this path on the container's IP address and port to check that the container is healthy. Read more about health checks. For example, if you set this field to /bar, then Vertex AI intermittently sends a GET request to the /bar path on the port of your container specified by the first value of this ModelContainerSpec's ports field. If you don't specify this field, it defaults to the following value when you deploy this Model to an Endpoint:/v1/endpoints/ENDPOINT/deployedModels/DEPLOYED_MODEL:predict The placeholders in this value are replaced as follows:

    • ENDPOINT: The last segment (following endpoints/)of the Endpoint.name][] field of the Endpoint where this Model has been deployed. (Vertex AI makes this value available to your container code as the AIP_ENDPOINT_ID environment variable.)
    • DEPLOYED_MODEL: DeployedModel.id of the DeployedModel. (Vertex AI makes this value available to your container code as the AIP_DEPLOYED_MODEL_ID environment variable.)
    LivenessProbe AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbe
    Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.
    Ports List<AiEndpointWithModelGardenDeploymentModelConfigContainerSpecPort>

    List of ports to expose from the container. Vertex AI sends any prediction requests that it receives to the first port on this list. Vertex AI also sends liveness and health checks to this port. If you do not specify this field, it defaults to following value:

    [
    {
    "containerPort": 8080
    }
    ]
    

    Vertex AI does not use ports other than the first one listed. This field corresponds to the ports field of the Kubernetes Containers v1 core API. Structure is documented below.

    PredictRoute string
    HTTP path on the container to send prediction requests to. Vertex AI forwards requests sent using projects.locations.endpoints.predict to this path on the container's IP address and port. Vertex AI then returns the container's response in the API response. For example, if you set this field to /foo, then when Vertex AI receives a prediction request, it forwards the request body in a POST request to the /foo path on the port of your container specified by the first value of this ModelContainerSpec's ports field. If you don't specify this field, it defaults to the following value when you deploy this Model to an Endpoint:/v1/endpoints/ENDPOINT/deployedModels/DEPLOYED_MODEL:predict The placeholders in this value are replaced as follows:

    • ENDPOINT: The last segment (following endpoints/)of the Endpoint.name][] field of the Endpoint where this Model has been deployed. (Vertex AI makes this value available to your container code as the AIP_ENDPOINT_ID environment variable.)
    • DEPLOYED_MODEL: DeployedModel.id of the DeployedModel. (Vertex AI makes this value available to your container code as the AIP_DEPLOYED_MODEL_ID environment variable.)
    SharedMemorySizeMb string
    The amount of the VM memory to reserve as the shared memory for the model in megabytes.
    StartupProbe AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbe
    Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.
    ImageUri string
    URI of the Docker image to be used as the custom container for serving predictions. This URI must identify an image in Artifact Registry or Container Registry. Learn more about the container publishing requirements, including permissions requirements for the Vertex AI Service Agent. The container image is ingested upon ModelService.UploadModel, stored internally, and this original path is afterwards not used. To learn about the requirements for the Docker image itself, see Custom container requirements. You can use the URI to one of Vertex AI's pre-built container images for prediction in this field.
    Args []string
    Specifies arguments for the command that runs when the container starts. This overrides the container's CMD. Specify this field as an array of executable and arguments, similar to a Docker CMD's "default parameters" form. If you don't specify this field but do specify the command field, then the command from the command field runs without any additional arguments. See the Kubernetes documentation about how the command and args fields interact with a container's ENTRYPOINT and CMD. If you don't specify this field and don't specify the command field, then the container's ENTRYPOINT and CMD determine what runs based on their default behavior. See the Docker documentation about how CMD and ENTRYPOINT interact. In this field, you can reference environment variables set by Vertex AI and environment variables set in the env field. You cannot reference environment variables set in the Docker image. In order for environment variables to be expanded, reference them by using the following syntax:$(VARIABLE_NAME) Note that this differs from Bash variable expansion, which does not use parentheses. If a variable cannot be resolved, the reference in the input string is used unchanged. To avoid variable expansion, you can escape this syntax with $$; for example:$$(VARIABLE_NAME) This field corresponds to the args field of the Kubernetes Containers v1 core API.
    Commands []string
    Specifies the command that runs when the container starts. This overrides the container's ENTRYPOINT. Specify this field as an array of executable and arguments, similar to a Docker ENTRYPOINT's "exec" form, not its "shell" form. If you do not specify this field, then the container's ENTRYPOINT runs, in conjunction with the args field or the container's CMD, if either exists. If this field is not specified and the container does not have an ENTRYPOINT, then refer to the Docker documentation about how CMD and ENTRYPOINT interact. If you specify this field, then you can also specify the args field to provide additional arguments for this command. However, if you specify this field, then the container's CMD is ignored. See the Kubernetes documentation about how the command and args fields interact with a container's ENTRYPOINT and CMD. In this field, you can reference environment variables set by Vertex AI and environment variables set in the env field. You cannot reference environment variables set in the Docker image. In order for environment variables to be expanded, reference them by using the following syntax:$(VARIABLE_NAME) Note that this differs from Bash variable expansion, which does not use parentheses. If a variable cannot be resolved, the reference in the input string is used unchanged. To avoid variable expansion, you can escape this syntax with $$; for example:$$(VARIABLE_NAME) This field corresponds to the command field of the Kubernetes Containers v1 core API.
    DeploymentTimeout string
    Deployment timeout. Limit for deployment timeout is 2 hours.
    Envs []AiEndpointWithModelGardenDeploymentModelConfigContainerSpecEnv

    List of environment variables to set in the container. After the container starts running, code running in the container can read these environment variables. Additionally, the command and args fields can reference these variables. Later entries in this list can also reference earlier entries. For example, the following example sets the variable VAR_2 to have the value foo bar:

    [
    {
    "name": "VAR_1",
    "value": "foo"
    },
    {
    "name": "VAR_2",
    "value": "$(VAR_1) bar"
    }
    ]
    

    If you switch the order of the variables in the example, then the expansion does not occur. This field corresponds to the env field of the Kubernetes Containers v1 core API. Structure is documented below.

    GrpcPorts []AiEndpointWithModelGardenDeploymentModelConfigContainerSpecGrpcPort
    List of ports to expose from the container. Vertex AI sends gRPC prediction requests that it receives to the first port on this list. Vertex AI also sends liveness and health checks to this port. If you do not specify this field, gRPC requests to the container will be disabled. Vertex AI does not use ports other than the first one listed. This field corresponds to the ports field of the Kubernetes Containers v1 core API. Structure is documented below.
    HealthProbe AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbe
    Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.
    HealthRoute string
    HTTP path on the container to send health checks to. Vertex AI intermittently sends GET requests to this path on the container's IP address and port to check that the container is healthy. Read more about health checks. For example, if you set this field to /bar, then Vertex AI intermittently sends a GET request to the /bar path on the port of your container specified by the first value of this ModelContainerSpec's ports field. If you don't specify this field, it defaults to the following value when you deploy this Model to an Endpoint:/v1/endpoints/ENDPOINT/deployedModels/DEPLOYED_MODEL:predict The placeholders in this value are replaced as follows:

    • ENDPOINT: The last segment (following endpoints/)of the Endpoint.name][] field of the Endpoint where this Model has been deployed. (Vertex AI makes this value available to your container code as the AIP_ENDPOINT_ID environment variable.)
    • DEPLOYED_MODEL: DeployedModel.id of the DeployedModel. (Vertex AI makes this value available to your container code as the AIP_DEPLOYED_MODEL_ID environment variable.)
    LivenessProbe AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbe
    Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.
    Ports []AiEndpointWithModelGardenDeploymentModelConfigContainerSpecPort

    List of ports to expose from the container. Vertex AI sends any prediction requests that it receives to the first port on this list. Vertex AI also sends liveness and health checks to this port. If you do not specify this field, it defaults to following value:

    [
    {
    "containerPort": 8080
    }
    ]
    

    Vertex AI does not use ports other than the first one listed. This field corresponds to the ports field of the Kubernetes Containers v1 core API. Structure is documented below.

    PredictRoute string
    HTTP path on the container to send prediction requests to. Vertex AI forwards requests sent using projects.locations.endpoints.predict to this path on the container's IP address and port. Vertex AI then returns the container's response in the API response. For example, if you set this field to /foo, then when Vertex AI receives a prediction request, it forwards the request body in a POST request to the /foo path on the port of your container specified by the first value of this ModelContainerSpec's ports field. If you don't specify this field, it defaults to the following value when you deploy this Model to an Endpoint:/v1/endpoints/ENDPOINT/deployedModels/DEPLOYED_MODEL:predict The placeholders in this value are replaced as follows:

    • ENDPOINT: The last segment (following endpoints/)of the Endpoint.name][] field of the Endpoint where this Model has been deployed. (Vertex AI makes this value available to your container code as the AIP_ENDPOINT_ID environment variable.)
    • DEPLOYED_MODEL: DeployedModel.id of the DeployedModel. (Vertex AI makes this value available to your container code as the AIP_DEPLOYED_MODEL_ID environment variable.)
    SharedMemorySizeMb string
    The amount of the VM memory to reserve as the shared memory for the model in megabytes.
    StartupProbe AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbe
    Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.
    imageUri String
    URI of the Docker image to be used as the custom container for serving predictions. This URI must identify an image in Artifact Registry or Container Registry. Learn more about the container publishing requirements, including permissions requirements for the Vertex AI Service Agent. The container image is ingested upon ModelService.UploadModel, stored internally, and this original path is afterwards not used. To learn about the requirements for the Docker image itself, see Custom container requirements. You can use the URI to one of Vertex AI's pre-built container images for prediction in this field.
    args List<String>
    Specifies arguments for the command that runs when the container starts. This overrides the container's CMD. Specify this field as an array of executable and arguments, similar to a Docker CMD's "default parameters" form. If you don't specify this field but do specify the command field, then the command from the command field runs without any additional arguments. See the Kubernetes documentation about how the command and args fields interact with a container's ENTRYPOINT and CMD. If you don't specify this field and don't specify the command field, then the container's ENTRYPOINT and CMD determine what runs based on their default behavior. See the Docker documentation about how CMD and ENTRYPOINT interact. In this field, you can reference environment variables set by Vertex AI and environment variables set in the env field. You cannot reference environment variables set in the Docker image. In order for environment variables to be expanded, reference them by using the following syntax:$(VARIABLE_NAME) Note that this differs from Bash variable expansion, which does not use parentheses. If a variable cannot be resolved, the reference in the input string is used unchanged. To avoid variable expansion, you can escape this syntax with $$; for example:$$(VARIABLE_NAME) This field corresponds to the args field of the Kubernetes Containers v1 core API.
    commands List<String>
    Specifies the command that runs when the container starts. This overrides the container's ENTRYPOINT. Specify this field as an array of executable and arguments, similar to a Docker ENTRYPOINT's "exec" form, not its "shell" form. If you do not specify this field, then the container's ENTRYPOINT runs, in conjunction with the args field or the container's CMD, if either exists. If this field is not specified and the container does not have an ENTRYPOINT, then refer to the Docker documentation about how CMD and ENTRYPOINT interact. If you specify this field, then you can also specify the args field to provide additional arguments for this command. However, if you specify this field, then the container's CMD is ignored. See the Kubernetes documentation about how the command and args fields interact with a container's ENTRYPOINT and CMD. In this field, you can reference environment variables set by Vertex AI and environment variables set in the env field. You cannot reference environment variables set in the Docker image. In order for environment variables to be expanded, reference them by using the following syntax:$(VARIABLE_NAME) Note that this differs from Bash variable expansion, which does not use parentheses. If a variable cannot be resolved, the reference in the input string is used unchanged. To avoid variable expansion, you can escape this syntax with $$; for example:$$(VARIABLE_NAME) This field corresponds to the command field of the Kubernetes Containers v1 core API.
    deploymentTimeout String
    Deployment timeout. Limit for deployment timeout is 2 hours.
    envs List<AiEndpointWithModelGardenDeploymentModelConfigContainerSpecEnv>

    List of environment variables to set in the container. After the container starts running, code running in the container can read these environment variables. Additionally, the command and args fields can reference these variables. Later entries in this list can also reference earlier entries. For example, the following example sets the variable VAR_2 to have the value foo bar:

    [
    {
    "name": "VAR_1",
    "value": "foo"
    },
    {
    "name": "VAR_2",
    "value": "$(VAR_1) bar"
    }
    ]
    

    If you switch the order of the variables in the example, then the expansion does not occur. This field corresponds to the env field of the Kubernetes Containers v1 core API. Structure is documented below.

    grpcPorts List<AiEndpointWithModelGardenDeploymentModelConfigContainerSpecGrpcPort>
    List of ports to expose from the container. Vertex AI sends gRPC prediction requests that it receives to the first port on this list. Vertex AI also sends liveness and health checks to this port. If you do not specify this field, gRPC requests to the container will be disabled. Vertex AI does not use ports other than the first one listed. This field corresponds to the ports field of the Kubernetes Containers v1 core API. Structure is documented below.
    healthProbe AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbe
    Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.
    healthRoute String
    HTTP path on the container to send health checks to. Vertex AI intermittently sends GET requests to this path on the container's IP address and port to check that the container is healthy. Read more about health checks. For example, if you set this field to /bar, then Vertex AI intermittently sends a GET request to the /bar path on the port of your container specified by the first value of this ModelContainerSpec's ports field. If you don't specify this field, it defaults to the following value when you deploy this Model to an Endpoint:/v1/endpoints/ENDPOINT/deployedModels/DEPLOYED_MODEL:predict The placeholders in this value are replaced as follows:

    • ENDPOINT: The last segment (following endpoints/)of the Endpoint.name][] field of the Endpoint where this Model has been deployed. (Vertex AI makes this value available to your container code as the AIP_ENDPOINT_ID environment variable.)
    • DEPLOYED_MODEL: DeployedModel.id of the DeployedModel. (Vertex AI makes this value available to your container code as the AIP_DEPLOYED_MODEL_ID environment variable.)
    livenessProbe AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbe
    Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.
    ports List<AiEndpointWithModelGardenDeploymentModelConfigContainerSpecPort>

    List of ports to expose from the container. Vertex AI sends any prediction requests that it receives to the first port on this list. Vertex AI also sends liveness and health checks to this port. If you do not specify this field, it defaults to following value:

    [
    {
    "containerPort": 8080
    }
    ]
    

    Vertex AI does not use ports other than the first one listed. This field corresponds to the ports field of the Kubernetes Containers v1 core API. Structure is documented below.

    predictRoute String
    HTTP path on the container to send prediction requests to. Vertex AI forwards requests sent using projects.locations.endpoints.predict to this path on the container's IP address and port. Vertex AI then returns the container's response in the API response. For example, if you set this field to /foo, then when Vertex AI receives a prediction request, it forwards the request body in a POST request to the /foo path on the port of your container specified by the first value of this ModelContainerSpec's ports field. If you don't specify this field, it defaults to the following value when you deploy this Model to an Endpoint:/v1/endpoints/ENDPOINT/deployedModels/DEPLOYED_MODEL:predict The placeholders in this value are replaced as follows:

    • ENDPOINT: The last segment (following endpoints/)of the Endpoint.name][] field of the Endpoint where this Model has been deployed. (Vertex AI makes this value available to your container code as the AIP_ENDPOINT_ID environment variable.)
    • DEPLOYED_MODEL: DeployedModel.id of the DeployedModel. (Vertex AI makes this value available to your container code as the AIP_DEPLOYED_MODEL_ID environment variable.)
    sharedMemorySizeMb String
    The amount of the VM memory to reserve as the shared memory for the model in megabytes.
    startupProbe AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbe
    Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.
    imageUri string
    URI of the Docker image to be used as the custom container for serving predictions. This URI must identify an image in Artifact Registry or Container Registry. Learn more about the container publishing requirements, including permissions requirements for the Vertex AI Service Agent. The container image is ingested upon ModelService.UploadModel, stored internally, and this original path is afterwards not used. To learn about the requirements for the Docker image itself, see Custom container requirements. You can use the URI to one of Vertex AI's pre-built container images for prediction in this field.
    args string[]
    Specifies arguments for the command that runs when the container starts. This overrides the container's CMD. Specify this field as an array of executable and arguments, similar to a Docker CMD's "default parameters" form. If you don't specify this field but do specify the command field, then the command from the command field runs without any additional arguments. See the Kubernetes documentation about how the command and args fields interact with a container's ENTRYPOINT and CMD. If you don't specify this field and don't specify the command field, then the container's ENTRYPOINT and CMD determine what runs based on their default behavior. See the Docker documentation about how CMD and ENTRYPOINT interact. In this field, you can reference environment variables set by Vertex AI and environment variables set in the env field. You cannot reference environment variables set in the Docker image. In order for environment variables to be expanded, reference them by using the following syntax:$(VARIABLE_NAME) Note that this differs from Bash variable expansion, which does not use parentheses. If a variable cannot be resolved, the reference in the input string is used unchanged. To avoid variable expansion, you can escape this syntax with $$; for example:$$(VARIABLE_NAME) This field corresponds to the args field of the Kubernetes Containers v1 core API.
    commands string[]
    Specifies the command that runs when the container starts. This overrides the container's ENTRYPOINT. Specify this field as an array of executable and arguments, similar to a Docker ENTRYPOINT's "exec" form, not its "shell" form. If you do not specify this field, then the container's ENTRYPOINT runs, in conjunction with the args field or the container's CMD, if either exists. If this field is not specified and the container does not have an ENTRYPOINT, then refer to the Docker documentation about how CMD and ENTRYPOINT interact. If you specify this field, then you can also specify the args field to provide additional arguments for this command. However, if you specify this field, then the container's CMD is ignored. See the Kubernetes documentation about how the command and args fields interact with a container's ENTRYPOINT and CMD. In this field, you can reference environment variables set by Vertex AI and environment variables set in the env field. You cannot reference environment variables set in the Docker image. In order for environment variables to be expanded, reference them by using the following syntax:$(VARIABLE_NAME) Note that this differs from Bash variable expansion, which does not use parentheses. If a variable cannot be resolved, the reference in the input string is used unchanged. To avoid variable expansion, you can escape this syntax with $$; for example:$$(VARIABLE_NAME) This field corresponds to the command field of the Kubernetes Containers v1 core API.
    deploymentTimeout string
    Deployment timeout. Limit for deployment timeout is 2 hours.
    envs AiEndpointWithModelGardenDeploymentModelConfigContainerSpecEnv[]

    List of environment variables to set in the container. After the container starts running, code running in the container can read these environment variables. Additionally, the command and args fields can reference these variables. Later entries in this list can also reference earlier entries. For example, the following example sets the variable VAR_2 to have the value foo bar:

    [
    {
    "name": "VAR_1",
    "value": "foo"
    },
    {
    "name": "VAR_2",
    "value": "$(VAR_1) bar"
    }
    ]
    

    If you switch the order of the variables in the example, then the expansion does not occur. This field corresponds to the env field of the Kubernetes Containers v1 core API. Structure is documented below.

    grpcPorts AiEndpointWithModelGardenDeploymentModelConfigContainerSpecGrpcPort[]
    List of ports to expose from the container. Vertex AI sends gRPC prediction requests that it receives to the first port on this list. Vertex AI also sends liveness and health checks to this port. If you do not specify this field, gRPC requests to the container will be disabled. Vertex AI does not use ports other than the first one listed. This field corresponds to the ports field of the Kubernetes Containers v1 core API. Structure is documented below.
    healthProbe AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbe
    Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.
    healthRoute string
    HTTP path on the container to send health checks to. Vertex AI intermittently sends GET requests to this path on the container's IP address and port to check that the container is healthy. Read more about health checks. For example, if you set this field to /bar, then Vertex AI intermittently sends a GET request to the /bar path on the port of your container specified by the first value of this ModelContainerSpec's ports field. If you don't specify this field, it defaults to the following value when you deploy this Model to an Endpoint:/v1/endpoints/ENDPOINT/deployedModels/DEPLOYED_MODEL:predict The placeholders in this value are replaced as follows:

    • ENDPOINT: The last segment (following endpoints/)of the Endpoint.name][] field of the Endpoint where this Model has been deployed. (Vertex AI makes this value available to your container code as the AIP_ENDPOINT_ID environment variable.)
    • DEPLOYED_MODEL: DeployedModel.id of the DeployedModel. (Vertex AI makes this value available to your container code as the AIP_DEPLOYED_MODEL_ID environment variable.)
    livenessProbe AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbe
    Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.
    ports AiEndpointWithModelGardenDeploymentModelConfigContainerSpecPort[]

    List of ports to expose from the container. Vertex AI sends any prediction requests that it receives to the first port on this list. Vertex AI also sends liveness and health checks to this port. If you do not specify this field, it defaults to following value:

    [
    {
    "containerPort": 8080
    }
    ]
    

    Vertex AI does not use ports other than the first one listed. This field corresponds to the ports field of the Kubernetes Containers v1 core API. Structure is documented below.

    predictRoute string
    HTTP path on the container to send prediction requests to. Vertex AI forwards requests sent using projects.locations.endpoints.predict to this path on the container's IP address and port. Vertex AI then returns the container's response in the API response. For example, if you set this field to /foo, then when Vertex AI receives a prediction request, it forwards the request body in a POST request to the /foo path on the port of your container specified by the first value of this ModelContainerSpec's ports field. If you don't specify this field, it defaults to the following value when you deploy this Model to an Endpoint:/v1/endpoints/ENDPOINT/deployedModels/DEPLOYED_MODEL:predict The placeholders in this value are replaced as follows:

    • ENDPOINT: The last segment (following endpoints/)of the Endpoint.name][] field of the Endpoint where this Model has been deployed. (Vertex AI makes this value available to your container code as the AIP_ENDPOINT_ID environment variable.)
    • DEPLOYED_MODEL: DeployedModel.id of the DeployedModel. (Vertex AI makes this value available to your container code as the AIP_DEPLOYED_MODEL_ID environment variable.)
    sharedMemorySizeMb string
    The amount of the VM memory to reserve as the shared memory for the model in megabytes.
    startupProbe AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbe
    Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.
    image_uri str
    URI of the Docker image to be used as the custom container for serving predictions. This URI must identify an image in Artifact Registry or Container Registry. Learn more about the container publishing requirements, including permissions requirements for the Vertex AI Service Agent. The container image is ingested upon ModelService.UploadModel, stored internally, and this original path is afterwards not used. To learn about the requirements for the Docker image itself, see Custom container requirements. You can use the URI to one of Vertex AI's pre-built container images for prediction in this field.
    args Sequence[str]
    Specifies arguments for the command that runs when the container starts. This overrides the container's CMD. Specify this field as an array of executable and arguments, similar to a Docker CMD's "default parameters" form. If you don't specify this field but do specify the command field, then the command from the command field runs without any additional arguments. See the Kubernetes documentation about how the command and args fields interact with a container's ENTRYPOINT and CMD. If you don't specify this field and don't specify the command field, then the container's ENTRYPOINT and CMD determine what runs based on their default behavior. See the Docker documentation about how CMD and ENTRYPOINT interact. In this field, you can reference environment variables set by Vertex AI and environment variables set in the env field. You cannot reference environment variables set in the Docker image. In order for environment variables to be expanded, reference them by using the following syntax:$(VARIABLE_NAME) Note that this differs from Bash variable expansion, which does not use parentheses. If a variable cannot be resolved, the reference in the input string is used unchanged. To avoid variable expansion, you can escape this syntax with $$; for example:$$(VARIABLE_NAME) This field corresponds to the args field of the Kubernetes Containers v1 core API.
    commands Sequence[str]
    Specifies the command that runs when the container starts. This overrides the container's ENTRYPOINT. Specify this field as an array of executable and arguments, similar to a Docker ENTRYPOINT's "exec" form, not its "shell" form. If you do not specify this field, then the container's ENTRYPOINT runs, in conjunction with the args field or the container's CMD, if either exists. If this field is not specified and the container does not have an ENTRYPOINT, then refer to the Docker documentation about how CMD and ENTRYPOINT interact. If you specify this field, then you can also specify the args field to provide additional arguments for this command. However, if you specify this field, then the container's CMD is ignored. See the Kubernetes documentation about how the command and args fields interact with a container's ENTRYPOINT and CMD. In this field, you can reference environment variables set by Vertex AI and environment variables set in the env field. You cannot reference environment variables set in the Docker image. In order for environment variables to be expanded, reference them by using the following syntax:$(VARIABLE_NAME) Note that this differs from Bash variable expansion, which does not use parentheses. If a variable cannot be resolved, the reference in the input string is used unchanged. To avoid variable expansion, you can escape this syntax with $$; for example:$$(VARIABLE_NAME) This field corresponds to the command field of the Kubernetes Containers v1 core API.
    deployment_timeout str
    Deployment timeout. Limit for deployment timeout is 2 hours.
    envs Sequence[AiEndpointWithModelGardenDeploymentModelConfigContainerSpecEnv]

    List of environment variables to set in the container. After the container starts running, code running in the container can read these environment variables. Additionally, the command and args fields can reference these variables. Later entries in this list can also reference earlier entries. For example, the following example sets the variable VAR_2 to have the value foo bar:

    [
    {
    "name": "VAR_1",
    "value": "foo"
    },
    {
    "name": "VAR_2",
    "value": "$(VAR_1) bar"
    }
    ]
    

    If you switch the order of the variables in the example, then the expansion does not occur. This field corresponds to the env field of the Kubernetes Containers v1 core API. Structure is documented below.

    grpc_ports Sequence[AiEndpointWithModelGardenDeploymentModelConfigContainerSpecGrpcPort]
    List of ports to expose from the container. Vertex AI sends gRPC prediction requests that it receives to the first port on this list. Vertex AI also sends liveness and health checks to this port. If you do not specify this field, gRPC requests to the container will be disabled. Vertex AI does not use ports other than the first one listed. This field corresponds to the ports field of the Kubernetes Containers v1 core API. Structure is documented below.
    health_probe AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbe
    Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.
    health_route str
    HTTP path on the container to send health checks to. Vertex AI intermittently sends GET requests to this path on the container's IP address and port to check that the container is healthy. Read more about health checks. For example, if you set this field to /bar, then Vertex AI intermittently sends a GET request to the /bar path on the port of your container specified by the first value of this ModelContainerSpec's ports field. If you don't specify this field, it defaults to the following value when you deploy this Model to an Endpoint:/v1/endpoints/ENDPOINT/deployedModels/DEPLOYED_MODEL:predict The placeholders in this value are replaced as follows:

    • ENDPOINT: The last segment (following endpoints/)of the Endpoint.name][] field of the Endpoint where this Model has been deployed. (Vertex AI makes this value available to your container code as the AIP_ENDPOINT_ID environment variable.)
    • DEPLOYED_MODEL: DeployedModel.id of the DeployedModel. (Vertex AI makes this value available to your container code as the AIP_DEPLOYED_MODEL_ID environment variable.)
    liveness_probe AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbe
    Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.
    ports Sequence[AiEndpointWithModelGardenDeploymentModelConfigContainerSpecPort]

    List of ports to expose from the container. Vertex AI sends any prediction requests that it receives to the first port on this list. Vertex AI also sends liveness and health checks to this port. If you do not specify this field, it defaults to following value:

    [
    {
    "containerPort": 8080
    }
    ]
    

    Vertex AI does not use ports other than the first one listed. This field corresponds to the ports field of the Kubernetes Containers v1 core API. Structure is documented below.

    predict_route str
    HTTP path on the container to send prediction requests to. Vertex AI forwards requests sent using projects.locations.endpoints.predict to this path on the container's IP address and port. Vertex AI then returns the container's response in the API response. For example, if you set this field to /foo, then when Vertex AI receives a prediction request, it forwards the request body in a POST request to the /foo path on the port of your container specified by the first value of this ModelContainerSpec's ports field. If you don't specify this field, it defaults to the following value when you deploy this Model to an Endpoint:/v1/endpoints/ENDPOINT/deployedModels/DEPLOYED_MODEL:predict The placeholders in this value are replaced as follows:

    • ENDPOINT: The last segment (following endpoints/)of the Endpoint.name][] field of the Endpoint where this Model has been deployed. (Vertex AI makes this value available to your container code as the AIP_ENDPOINT_ID environment variable.)
    • DEPLOYED_MODEL: DeployedModel.id of the DeployedModel. (Vertex AI makes this value available to your container code as the AIP_DEPLOYED_MODEL_ID environment variable.)
    shared_memory_size_mb str
    The amount of the VM memory to reserve as the shared memory for the model in megabytes.
    startup_probe AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbe
    Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.
    imageUri String
    URI of the Docker image to be used as the custom container for serving predictions. This URI must identify an image in Artifact Registry or Container Registry. Learn more about the container publishing requirements, including permissions requirements for the Vertex AI Service Agent. The container image is ingested upon ModelService.UploadModel, stored internally, and this original path is afterwards not used. To learn about the requirements for the Docker image itself, see Custom container requirements. You can use the URI to one of Vertex AI's pre-built container images for prediction in this field.
    args List<String>
    Specifies arguments for the command that runs when the container starts. This overrides the container's CMD. Specify this field as an array of executable and arguments, similar to a Docker CMD's "default parameters" form. If you don't specify this field but do specify the command field, then the command from the command field runs without any additional arguments. See the Kubernetes documentation about how the command and args fields interact with a container's ENTRYPOINT and CMD. If you don't specify this field and don't specify the command field, then the container's ENTRYPOINT and CMD determine what runs based on their default behavior. See the Docker documentation about how CMD and ENTRYPOINT interact. In this field, you can reference environment variables set by Vertex AI and environment variables set in the env field. You cannot reference environment variables set in the Docker image. In order for environment variables to be expanded, reference them by using the following syntax:$(VARIABLE_NAME) Note that this differs from Bash variable expansion, which does not use parentheses. If a variable cannot be resolved, the reference in the input string is used unchanged. To avoid variable expansion, you can escape this syntax with $$; for example:$$(VARIABLE_NAME) This field corresponds to the args field of the Kubernetes Containers v1 core API.
    commands List<String>
    Specifies the command that runs when the container starts. This overrides the container's ENTRYPOINT. Specify this field as an array of executable and arguments, similar to a Docker ENTRYPOINT's "exec" form, not its "shell" form. If you do not specify this field, then the container's ENTRYPOINT runs, in conjunction with the args field or the container's CMD, if either exists. If this field is not specified and the container does not have an ENTRYPOINT, then refer to the Docker documentation about how CMD and ENTRYPOINT interact. If you specify this field, then you can also specify the args field to provide additional arguments for this command. However, if you specify this field, then the container's CMD is ignored. See the Kubernetes documentation about how the command and args fields interact with a container's ENTRYPOINT and CMD. In this field, you can reference environment variables set by Vertex AI and environment variables set in the env field. You cannot reference environment variables set in the Docker image. In order for environment variables to be expanded, reference them by using the following syntax:$(VARIABLE_NAME) Note that this differs from Bash variable expansion, which does not use parentheses. If a variable cannot be resolved, the reference in the input string is used unchanged. To avoid variable expansion, you can escape this syntax with $$; for example:$$(VARIABLE_NAME) This field corresponds to the command field of the Kubernetes Containers v1 core API.
    deploymentTimeout String
    Deployment timeout. Limit for deployment timeout is 2 hours.
    envs List<Property Map>

    List of environment variables to set in the container. After the container starts running, code running in the container can read these environment variables. Additionally, the command and args fields can reference these variables. Later entries in this list can also reference earlier entries. For example, the following example sets the variable VAR_2 to have the value foo bar:

    [
    {
    "name": "VAR_1",
    "value": "foo"
    },
    {
    "name": "VAR_2",
    "value": "$(VAR_1) bar"
    }
    ]
    

    If you switch the order of the variables in the example, then the expansion does not occur. This field corresponds to the env field of the Kubernetes Containers v1 core API. Structure is documented below.

    grpcPorts List<Property Map>
    List of ports to expose from the container. Vertex AI sends gRPC prediction requests that it receives to the first port on this list. Vertex AI also sends liveness and health checks to this port. If you do not specify this field, gRPC requests to the container will be disabled. Vertex AI does not use ports other than the first one listed. This field corresponds to the ports field of the Kubernetes Containers v1 core API. Structure is documented below.
    healthProbe Property Map
    Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.
    healthRoute String
    HTTP path on the container to send health checks to. Vertex AI intermittently sends GET requests to this path on the container's IP address and port to check that the container is healthy. Read more about health checks. For example, if you set this field to /bar, then Vertex AI intermittently sends a GET request to the /bar path on the port of your container specified by the first value of this ModelContainerSpec's ports field. If you don't specify this field, it defaults to the following value when you deploy this Model to an Endpoint:/v1/endpoints/ENDPOINT/deployedModels/DEPLOYED_MODEL:predict The placeholders in this value are replaced as follows:

    • ENDPOINT: The last segment (following endpoints/)of the Endpoint.name][] field of the Endpoint where this Model has been deployed. (Vertex AI makes this value available to your container code as the AIP_ENDPOINT_ID environment variable.)
    • DEPLOYED_MODEL: DeployedModel.id of the DeployedModel. (Vertex AI makes this value available to your container code as the AIP_DEPLOYED_MODEL_ID environment variable.)
    livenessProbe Property Map
    Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.
    ports List<Property Map>

    List of ports to expose from the container. Vertex AI sends any prediction requests that it receives to the first port on this list. Vertex AI also sends liveness and health checks to this port. If you do not specify this field, it defaults to following value:

    [
    {
    "containerPort": 8080
    }
    ]
    

    Vertex AI does not use ports other than the first one listed. This field corresponds to the ports field of the Kubernetes Containers v1 core API. Structure is documented below.

    predictRoute String
    HTTP path on the container to send prediction requests to. Vertex AI forwards requests sent using projects.locations.endpoints.predict to this path on the container's IP address and port. Vertex AI then returns the container's response in the API response. For example, if you set this field to /foo, then when Vertex AI receives a prediction request, it forwards the request body in a POST request to the /foo path on the port of your container specified by the first value of this ModelContainerSpec's ports field. If you don't specify this field, it defaults to the following value when you deploy this Model to an Endpoint:/v1/endpoints/ENDPOINT/deployedModels/DEPLOYED_MODEL:predict The placeholders in this value are replaced as follows:

    • ENDPOINT: The last segment (following endpoints/)of the Endpoint.name][] field of the Endpoint where this Model has been deployed. (Vertex AI makes this value available to your container code as the AIP_ENDPOINT_ID environment variable.)
    • DEPLOYED_MODEL: DeployedModel.id of the DeployedModel. (Vertex AI makes this value available to your container code as the AIP_DEPLOYED_MODEL_ID environment variable.)
    sharedMemorySizeMb String
    The amount of the VM memory to reserve as the shared memory for the model in megabytes.
    startupProbe Property Map
    Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.

    AiEndpointWithModelGardenDeploymentModelConfigContainerSpecEnv, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecEnvArgs

    Name string
    Name of the environment variable. Must be a valid C identifier.
    Value string
    Variables that reference a $(VAR_NAME) are expanded using the previous defined environment variables in the container and any service environment variables. If a variable cannot be resolved, the reference in the input string will be unchanged. The $(VAR_NAME) syntax can be escaped with a double $$, ie: $$(VAR_NAME). Escaped references will never be expanded, regardless of whether the variable exists or not.
    Name string
    Name of the environment variable. Must be a valid C identifier.
    Value string
    Variables that reference a $(VAR_NAME) are expanded using the previous defined environment variables in the container and any service environment variables. If a variable cannot be resolved, the reference in the input string will be unchanged. The $(VAR_NAME) syntax can be escaped with a double $$, ie: $$(VAR_NAME). Escaped references will never be expanded, regardless of whether the variable exists or not.
    name String
    Name of the environment variable. Must be a valid C identifier.
    value String
    Variables that reference a $(VAR_NAME) are expanded using the previous defined environment variables in the container and any service environment variables. If a variable cannot be resolved, the reference in the input string will be unchanged. The $(VAR_NAME) syntax can be escaped with a double $$, ie: $$(VAR_NAME). Escaped references will never be expanded, regardless of whether the variable exists or not.
    name string
    Name of the environment variable. Must be a valid C identifier.
    value string
    Variables that reference a $(VAR_NAME) are expanded using the previous defined environment variables in the container and any service environment variables. If a variable cannot be resolved, the reference in the input string will be unchanged. The $(VAR_NAME) syntax can be escaped with a double $$, ie: $$(VAR_NAME). Escaped references will never be expanded, regardless of whether the variable exists or not.
    name str
    Name of the environment variable. Must be a valid C identifier.
    value str
    Variables that reference a $(VAR_NAME) are expanded using the previous defined environment variables in the container and any service environment variables. If a variable cannot be resolved, the reference in the input string will be unchanged. The $(VAR_NAME) syntax can be escaped with a double $$, ie: $$(VAR_NAME). Escaped references will never be expanded, regardless of whether the variable exists or not.
    name String
    Name of the environment variable. Must be a valid C identifier.
    value String
    Variables that reference a $(VAR_NAME) are expanded using the previous defined environment variables in the container and any service environment variables. If a variable cannot be resolved, the reference in the input string will be unchanged. The $(VAR_NAME) syntax can be escaped with a double $$, ie: $$(VAR_NAME). Escaped references will never be expanded, regardless of whether the variable exists or not.

    AiEndpointWithModelGardenDeploymentModelConfigContainerSpecGrpcPort, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecGrpcPortArgs

    ContainerPort int
    The number of the port to expose on the pod's IP address. Must be a valid port number, between 1 and 65535 inclusive.
    ContainerPort int
    The number of the port to expose on the pod's IP address. Must be a valid port number, between 1 and 65535 inclusive.
    containerPort Integer
    The number of the port to expose on the pod's IP address. Must be a valid port number, between 1 and 65535 inclusive.
    containerPort number
    The number of the port to expose on the pod's IP address. Must be a valid port number, between 1 and 65535 inclusive.
    container_port int
    The number of the port to expose on the pod's IP address. Must be a valid port number, between 1 and 65535 inclusive.
    containerPort Number
    The number of the port to expose on the pod's IP address. Must be a valid port number, between 1 and 65535 inclusive.

    AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbe, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeArgs

    Exec AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeExec
    ExecAction specifies a command to execute. Structure is documented below.
    FailureThreshold int
    Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
    Grpc AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeGrpc
    GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
    HttpGet AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGet
    HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
    InitialDelaySeconds int
    Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
    PeriodSeconds int
    How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
    SuccessThreshold int
    Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
    TcpSocket AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeTcpSocket
    TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
    TimeoutSeconds int
    Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.
    Exec AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeExec
    ExecAction specifies a command to execute. Structure is documented below.
    FailureThreshold int
    Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
    Grpc AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeGrpc
    GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
    HttpGet AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGet
    HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
    InitialDelaySeconds int
    Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
    PeriodSeconds int
    How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
    SuccessThreshold int
    Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
    TcpSocket AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeTcpSocket
    TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
    TimeoutSeconds int
    Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.
    exec AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeExec
    ExecAction specifies a command to execute. Structure is documented below.
    failureThreshold Integer
    Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
    grpc AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeGrpc
    GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
    httpGet AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGet
    HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
    initialDelaySeconds Integer
    Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
    periodSeconds Integer
    How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
    successThreshold Integer
    Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
    tcpSocket AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeTcpSocket
    TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
    timeoutSeconds Integer
    Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.
    exec AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeExec
    ExecAction specifies a command to execute. Structure is documented below.
    failureThreshold number
    Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
    grpc AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeGrpc
    GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
    httpGet AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGet
    HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
    initialDelaySeconds number
    Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
    periodSeconds number
    How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
    successThreshold number
    Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
    tcpSocket AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeTcpSocket
    TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
    timeoutSeconds number
    Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.
    exec_ AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeExec
    ExecAction specifies a command to execute. Structure is documented below.
    failure_threshold int
    Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
    grpc AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeGrpc
    GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
    http_get AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGet
    HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
    initial_delay_seconds int
    Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
    period_seconds int
    How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
    success_threshold int
    Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
    tcp_socket AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeTcpSocket
    TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
    timeout_seconds int
    Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.
    exec Property Map
    ExecAction specifies a command to execute. Structure is documented below.
    failureThreshold Number
    Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
    grpc Property Map
    GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
    httpGet Property Map
    HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
    initialDelaySeconds Number
    Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
    periodSeconds Number
    How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
    successThreshold Number
    Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
    tcpSocket Property Map
    TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
    timeoutSeconds Number
    Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.

    AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeExec, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeExecArgs

    Commands List<string>
    Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
    Commands []string
    Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
    commands List<String>
    Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
    commands string[]
    Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
    commands Sequence[str]
    Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
    commands List<String>
    Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.

    AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeGrpc, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeGrpcArgs

    Port int
    Port number of the gRPC service. Number must be in the range 1 to 65535.
    Service string
    Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.
    Port int
    Port number of the gRPC service. Number must be in the range 1 to 65535.
    Service string
    Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.
    port Integer
    Port number of the gRPC service. Number must be in the range 1 to 65535.
    service String
    Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.
    port number
    Port number of the gRPC service. Number must be in the range 1 to 65535.
    service string
    Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.
    port int
    Port number of the gRPC service. Number must be in the range 1 to 65535.
    service str
    Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.
    port Number
    Port number of the gRPC service. Number must be in the range 1 to 65535.
    service String
    Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.

    AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGet, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGetArgs

    Host string
    Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
    HttpHeaders List<AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGetHttpHeader>
    Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
    Path string
    Path to access on the HTTP server.
    Port int
    Number of the port to access on the container. Number must be in the range 1 to 65535.
    Scheme string
    Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".
    Host string
    Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
    HttpHeaders []AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGetHttpHeader
    Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
    Path string
    Path to access on the HTTP server.
    Port int
    Number of the port to access on the container. Number must be in the range 1 to 65535.
    Scheme string
    Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".
    host String
    Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
    httpHeaders List<AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGetHttpHeader>
    Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
    path String
    Path to access on the HTTP server.
    port Integer
    Number of the port to access on the container. Number must be in the range 1 to 65535.
    scheme String
    Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".
    host string
    Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
    httpHeaders AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGetHttpHeader[]
    Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
    path string
    Path to access on the HTTP server.
    port number
    Number of the port to access on the container. Number must be in the range 1 to 65535.
    scheme string
    Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".
    host str
    Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
    http_headers Sequence[AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGetHttpHeader]
    Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
    path str
    Path to access on the HTTP server.
    port int
    Number of the port to access on the container. Number must be in the range 1 to 65535.
    scheme str
    Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".
    host String
    Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
    httpHeaders List<Property Map>
    Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
    path String
    Path to access on the HTTP server.
    port Number
    Number of the port to access on the container. Number must be in the range 1 to 65535.
    scheme String
    Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".

    AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGetHttpHeader, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGetHttpHeaderArgs

    Name string
    The header field name. This will be canonicalized upon output, so case-variant names will be understood as the same header.
    Value string
    The header field value
    Name string
    The header field name. This will be canonicalized upon output, so case-variant names will be understood as the same header.
    Value string
    The header field value
    name String
    The header field name. This will be canonicalized upon output, so case-variant names will be understood as the same header.
    value String
    The header field value
    name string
    The header field name. This will be canonicalized upon output, so case-variant names will be understood as the same header.
    value string
    The header field value
    name str
    The header field name. This will be canonicalized upon output, so case-variant names will be understood as the same header.
    value str
    The header field value
    name String
    The header field name. This will be canonicalized upon output, so case-variant names will be understood as the same header.
    value String
    The header field value

    AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeTcpSocket, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeTcpSocketArgs

    Host string
    Optional: Host name to connect to, defaults to the model serving container's IP.
    Port int
    Number of the port to access on the container. Number must be in the range 1 to 65535.
    Host string
    Optional: Host name to connect to, defaults to the model serving container's IP.
    Port int
    Number of the port to access on the container. Number must be in the range 1 to 65535.
    host String
    Optional: Host name to connect to, defaults to the model serving container's IP.
    port Integer
    Number of the port to access on the container. Number must be in the range 1 to 65535.
    host string
    Optional: Host name to connect to, defaults to the model serving container's IP.
    port number
    Number of the port to access on the container. Number must be in the range 1 to 65535.
    host str
    Optional: Host name to connect to, defaults to the model serving container's IP.
    port int
    Number of the port to access on the container. Number must be in the range 1 to 65535.
    host String
    Optional: Host name to connect to, defaults to the model serving container's IP.
    port Number
    Number of the port to access on the container. Number must be in the range 1 to 65535.

    AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbe, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeArgs

    Exec AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeExec
    ExecAction specifies a command to execute. Structure is documented below.
    FailureThreshold int
    Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
    Grpc AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeGrpc
    GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
    HttpGet AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGet
    HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
    InitialDelaySeconds int
    Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
    PeriodSeconds int
    How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
    SuccessThreshold int
    Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
    TcpSocket AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeTcpSocket
    TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
    TimeoutSeconds int
    Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.
    Exec AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeExec
    ExecAction specifies a command to execute. Structure is documented below.
    FailureThreshold int
    Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
    Grpc AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeGrpc
    GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
    HttpGet AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGet
    HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
    InitialDelaySeconds int
    Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
    PeriodSeconds int
    How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
    SuccessThreshold int
    Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
    TcpSocket AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeTcpSocket
    TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
    TimeoutSeconds int
    Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.
    exec AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeExec
    ExecAction specifies a command to execute. Structure is documented below.
    failureThreshold Integer
    Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
    grpc AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeGrpc
    GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
    httpGet AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGet
    HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
    initialDelaySeconds Integer
    Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
    periodSeconds Integer
    How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
    successThreshold Integer
    Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
    tcpSocket AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeTcpSocket
    TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
    timeoutSeconds Integer
    Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.
    exec AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeExec
    ExecAction specifies a command to execute. Structure is documented below.
    failureThreshold number
    Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
    grpc AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeGrpc
    GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
    httpGet AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGet
    HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
    initialDelaySeconds number
    Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
    periodSeconds number
    How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
    successThreshold number
    Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
    tcpSocket AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeTcpSocket
    TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
    timeoutSeconds number
    Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.
    exec_ AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeExec
    ExecAction specifies a command to execute. Structure is documented below.
    failure_threshold int
    Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
    grpc AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeGrpc
    GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
    http_get AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGet
    HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
    initial_delay_seconds int
    Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
    period_seconds int
    How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
    success_threshold int
    Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
    tcp_socket AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeTcpSocket
    TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
    timeout_seconds int
    Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.
    exec Property Map
    ExecAction specifies a command to execute. Structure is documented below.
    failureThreshold Number
    Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
    grpc Property Map
    GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
    httpGet Property Map
    HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
    initialDelaySeconds Number
    Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
    periodSeconds Number
    How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
    successThreshold Number
    Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
    tcpSocket Property Map
    TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
    timeoutSeconds Number
    Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.

    AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeExec, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeExecArgs

    Commands List<string>
    Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
    Commands []string
    Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
    commands List<String>
    Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
    commands string[]
    Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
    commands Sequence[str]
    Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
    commands List<String>
    Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.

    AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeGrpc, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeGrpcArgs

    Port int
    Port number of the gRPC service. Number must be in the range 1 to 65535.
    Service string
    Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.
    Port int
    Port number of the gRPC service. Number must be in the range 1 to 65535.
    Service string
    Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.
    port Integer
    Port number of the gRPC service. Number must be in the range 1 to 65535.
    service String
    Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.
    port number
    Port number of the gRPC service. Number must be in the range 1 to 65535.
    service string
    Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.
    port int
    Port number of the gRPC service. Number must be in the range 1 to 65535.
    service str
    Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.
    port Number
    Port number of the gRPC service. Number must be in the range 1 to 65535.
    service String
    Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.

    AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGet, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGetArgs

    Host string
    Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
    HttpHeaders List<AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGetHttpHeader>
    Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
    Path string
    Path to access on the HTTP server.
    Port int
    Number of the port to access on the container. Number must be in the range 1 to 65535.
    Scheme string
    Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".
    Host string
    Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
    HttpHeaders []AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGetHttpHeader
    Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
    Path string
    Path to access on the HTTP server.
    Port int
    Number of the port to access on the container. Number must be in the range 1 to 65535.
    Scheme string
    Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".
    host String
    Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
    httpHeaders List<AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGetHttpHeader>
    Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
    path String
    Path to access on the HTTP server.
    port Integer
    Number of the port to access on the container. Number must be in the range 1 to 65535.
    scheme String
    Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".
    host string
    Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
    httpHeaders AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGetHttpHeader[]
    Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
    path string
    Path to access on the HTTP server.
    port number
    Number of the port to access on the container. Number must be in the range 1 to 65535.
    scheme string
    Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".
    host str
    Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
    http_headers Sequence[AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGetHttpHeader]
    Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
    path str
    Path to access on the HTTP server.
    port int
    Number of the port to access on the container. Number must be in the range 1 to 65535.
    scheme str
    Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".
    host String
    Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
    httpHeaders List<Property Map>
    Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
    path String
    Path to access on the HTTP server.
    port Number
    Number of the port to access on the container. Number must be in the range 1 to 65535.
    scheme String
    Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".

    AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGetHttpHeader, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGetHttpHeaderArgs

    Name string
    The header field name. This will be canonicalized upon output, so case-variant names will be understood as the same header.
    Value string
    The header field value
    Name string
    The header field name. This will be canonicalized upon output, so case-variant names will be understood as the same header.
    Value string
    The header field value
    name String
    The header field name. This will be canonicalized upon output, so case-variant names will be understood as the same header.
    value String
    The header field value
    name string
    The header field name. This will be canonicalized upon output, so case-variant names will be understood as the same header.
    value string
    The header field value
    name str
    The header field name. This will be canonicalized upon output, so case-variant names will be understood as the same header.
    value str
    The header field value
    name String
    The header field name. This will be canonicalized upon output, so case-variant names will be understood as the same header.
    value String
    The header field value

    AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeTcpSocket, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeTcpSocketArgs

    Host string
    Optional: Host name to connect to, defaults to the model serving container's IP.
    Port int
    Number of the port to access on the container. Number must be in the range 1 to 65535.
    Host string
    Optional: Host name to connect to, defaults to the model serving container's IP.
    Port int
    Number of the port to access on the container. Number must be in the range 1 to 65535.
    host String
    Optional: Host name to connect to, defaults to the model serving container's IP.
    port Integer
    Number of the port to access on the container. Number must be in the range 1 to 65535.
    host string
    Optional: Host name to connect to, defaults to the model serving container's IP.
    port number
    Number of the port to access on the container. Number must be in the range 1 to 65535.
    host str
    Optional: Host name to connect to, defaults to the model serving container's IP.
    port int
    Number of the port to access on the container. Number must be in the range 1 to 65535.
    host String
    Optional: Host name to connect to, defaults to the model serving container's IP.
    port Number
    Number of the port to access on the container. Number must be in the range 1 to 65535.

    AiEndpointWithModelGardenDeploymentModelConfigContainerSpecPort, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecPortArgs

    ContainerPort int
    The number of the port to expose on the pod's IP address. Must be a valid port number, between 1 and 65535 inclusive.
    ContainerPort int
    The number of the port to expose on the pod's IP address. Must be a valid port number, between 1 and 65535 inclusive.
    containerPort Integer
    The number of the port to expose on the pod's IP address. Must be a valid port number, between 1 and 65535 inclusive.
    containerPort number
    The number of the port to expose on the pod's IP address. Must be a valid port number, between 1 and 65535 inclusive.
    container_port int
    The number of the port to expose on the pod's IP address. Must be a valid port number, between 1 and 65535 inclusive.
    containerPort Number
    The number of the port to expose on the pod's IP address. Must be a valid port number, between 1 and 65535 inclusive.

    AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbe, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeArgs

    Exec AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeExec
    ExecAction specifies a command to execute. Structure is documented below.
    FailureThreshold int
    Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
    Grpc AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeGrpc
    GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
    HttpGet AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGet
    HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
    InitialDelaySeconds int
    Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
    PeriodSeconds int
    How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
    SuccessThreshold int
    Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
    TcpSocket AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeTcpSocket
    TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
    TimeoutSeconds int
    Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.
    Exec AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeExec
    ExecAction specifies a command to execute. Structure is documented below.
    FailureThreshold int
    Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
    Grpc AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeGrpc
    GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
    HttpGet AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGet
    HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
    InitialDelaySeconds int
    Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
    PeriodSeconds int
    How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
    SuccessThreshold int
    Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
    TcpSocket AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeTcpSocket
    TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
    TimeoutSeconds int
    Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.
    exec AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeExec
    ExecAction specifies a command to execute. Structure is documented below.
    failureThreshold Integer
    Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
    grpc AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeGrpc
    GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
    httpGet AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGet
    HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
    initialDelaySeconds Integer
    Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
    periodSeconds Integer
    How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
    successThreshold Integer
    Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
    tcpSocket AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeTcpSocket
    TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
    timeoutSeconds Integer
    Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.
    exec AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeExec
    ExecAction specifies a command to execute. Structure is documented below.
    failureThreshold number
    Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
    grpc AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeGrpc
    GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
    httpGet AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGet
    HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
    initialDelaySeconds number
    Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
    periodSeconds number
    How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
    successThreshold number
    Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
    tcpSocket AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeTcpSocket
    TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
    timeoutSeconds number
    Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.
    exec_ AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeExec
    ExecAction specifies a command to execute. Structure is documented below.
    failure_threshold int
    Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
    grpc AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeGrpc
    GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
    http_get AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGet
    HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
    initial_delay_seconds int
    Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
    period_seconds int
    How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
    success_threshold int
    Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
    tcp_socket AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeTcpSocket
    TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
    timeout_seconds int
    Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.
    exec Property Map
    ExecAction specifies a command to execute. Structure is documented below.
    failureThreshold Number
    Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
    grpc Property Map
    GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
    httpGet Property Map
    HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
    initialDelaySeconds Number
    Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
    periodSeconds Number
    How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
    successThreshold Number
    Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
    tcpSocket Property Map
    TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
    timeoutSeconds Number
    Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.

    AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeExec, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeExecArgs

    Commands List<string>
    Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
    Commands []string
    Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
    commands List<String>
    Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
    commands string[]
    Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
    commands Sequence[str]
    Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
    commands List<String>
    Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.

    AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeGrpc, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeGrpcArgs

    Port int
    Port number of the gRPC service. Number must be in the range 1 to 65535.
    Service string
    Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.
    Port int
    Port number of the gRPC service. Number must be in the range 1 to 65535.
    Service string
    Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.
    port Integer
    Port number of the gRPC service. Number must be in the range 1 to 65535.
    service String
    Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.
    port number
    Port number of the gRPC service. Number must be in the range 1 to 65535.
    service string
    Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.
    port int
    Port number of the gRPC service. Number must be in the range 1 to 65535.
    service str
    Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.
    port Number
    Port number of the gRPC service. Number must be in the range 1 to 65535.
    service String
    Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.

    AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGet, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGetArgs

    Host string
    Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
    HttpHeaders List<AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGetHttpHeader>
    Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
    Path string
    Path to access on the HTTP server.
    Port int
    Number of the port to access on the container. Number must be in the range 1 to 65535.
    Scheme string
    Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".
    Host string
    Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
    HttpHeaders []AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGetHttpHeader
    Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
    Path string
    Path to access on the HTTP server.
    Port int
    Number of the port to access on the container. Number must be in the range 1 to 65535.
    Scheme string
    Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".
    host String
    Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
    httpHeaders List<AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGetHttpHeader>
    Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
    path String
    Path to access on the HTTP server.
    port Integer
    Number of the port to access on the container. Number must be in the range 1 to 65535.
    scheme String
    Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".
    host string
    Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
    httpHeaders AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGetHttpHeader[]
    Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
    path string
    Path to access on the HTTP server.
    port number
    Number of the port to access on the container. Number must be in the range 1 to 65535.
    scheme string
    Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".
    host str
    Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
    http_headers Sequence[AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGetHttpHeader]
    Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
    path str
    Path to access on the HTTP server.
    port int
    Number of the port to access on the container. Number must be in the range 1 to 65535.
    scheme str
    Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".
    host String
    Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
    httpHeaders List<Property Map>
    Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
    path String
    Path to access on the HTTP server.
    port Number
    Number of the port to access on the container. Number must be in the range 1 to 65535.
    scheme String
    Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".

    AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGetHttpHeader, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGetHttpHeaderArgs

    Name string
    The header field name. This will be canonicalized upon output, so case-variant names will be understood as the same header.
    Value string
    The header field value
    Name string
    The header field name. This will be canonicalized upon output, so case-variant names will be understood as the same header.
    Value string
    The header field value
    name String
    The header field name. This will be canonicalized upon output, so case-variant names will be understood as the same header.
    value String
    The header field value
    name string
    The header field name. This will be canonicalized upon output, so case-variant names will be understood as the same header.
    value string
    The header field value
    name str
    The header field name. This will be canonicalized upon output, so case-variant names will be understood as the same header.
    value str
    The header field value
    name String
    The header field name. This will be canonicalized upon output, so case-variant names will be understood as the same header.
    value String
    The header field value

    AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeTcpSocket, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeTcpSocketArgs

    Host string
    Optional: Host name to connect to, defaults to the model serving container's IP.
    Port int
    Number of the port to access on the container. Number must be in the range 1 to 65535.
    Host string
    Optional: Host name to connect to, defaults to the model serving container's IP.
    Port int
    Number of the port to access on the container. Number must be in the range 1 to 65535.
    host String
    Optional: Host name to connect to, defaults to the model serving container's IP.
    port Integer
    Number of the port to access on the container. Number must be in the range 1 to 65535.
    host string
    Optional: Host name to connect to, defaults to the model serving container's IP.
    port number
    Number of the port to access on the container. Number must be in the range 1 to 65535.
    host str
    Optional: Host name to connect to, defaults to the model serving container's IP.
    port int
    Number of the port to access on the container. Number must be in the range 1 to 65535.
    host String
    Optional: Host name to connect to, defaults to the model serving container's IP.
    port Number
    Number of the port to access on the container. Number must be in the range 1 to 65535.

    Import

    This resource does not support import.

    To learn more about importing existing cloud resources, see Importing resources.

    Package Details

    Repository
    Google Cloud (GCP) Classic pulumi/pulumi-gcp
    License
    Apache-2.0
    Notes
    This Pulumi package is based on the google-beta Terraform Provider.
    gcp logo
    Google Cloud v8.40.0 published on Monday, Aug 11, 2025 by Pulumi