gcp.vertex.AiEndpointWithModelGardenDeployment

Google Cloud v9.5.0, Nov 17 25

Google Cloud v9.5.0 published on Monday, Nov 17, 2025 by Pulumi

Schema (JSON)

pulumi/pulumi-gcp

Google Cloud v9.5.0 published on Monday, Nov 17, 2025 by Pulumi

Schema (JSON)

pulumi/pulumi-gcp

Example Usage

Vertex Ai Deploy Basic

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const deploy = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy", {
    publisherModelName: "publishers/google/models/paligemma@paligemma-224-float32",
    location: "us-central1",
    modelConfig: {
        acceptEula: true,
    },
});

import pulumi
import pulumi_gcp as gcp

deploy = gcp.vertex.AiEndpointWithModelGardenDeployment("deploy",
    publisher_model_name="publishers/google/models/paligemma@paligemma-224-float32",
    location="us-central1",
    model_config={
        "accept_eula": True,
    })

package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/vertex"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := vertex.NewAiEndpointWithModelGardenDeployment(ctx, "deploy", &vertex.AiEndpointWithModelGardenDeploymentArgs{
			PublisherModelName: pulumi.String("publishers/google/models/paligemma@paligemma-224-float32"),
			Location:           pulumi.String("us-central1"),
			ModelConfig: &vertex.AiEndpointWithModelGardenDeploymentModelConfigArgs{
				AcceptEula: pulumi.Bool(true),
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}

using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var deploy = new Gcp.Vertex.AiEndpointWithModelGardenDeployment("deploy", new()
    {
        PublisherModelName = "publishers/google/models/paligemma@paligemma-224-float32",
        Location = "us-central1",
        ModelConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs
        {
            AcceptEula = true,
        },
    });

});

package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.vertex.AiEndpointWithModelGardenDeployment;
import com.pulumi.gcp.vertex.AiEndpointWithModelGardenDeploymentArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var deploy = new AiEndpointWithModelGardenDeployment("deploy", AiEndpointWithModelGardenDeploymentArgs.builder()
            .publisherModelName("publishers/google/models/paligemma@paligemma-224-float32")
            .location("us-central1")
            .modelConfig(AiEndpointWithModelGardenDeploymentModelConfigArgs.builder()
                .acceptEula(true)
                .build())
            .build());

    }
}

resources:
  deploy:
    type: gcp:vertex:AiEndpointWithModelGardenDeployment
    properties:
      publisherModelName: publishers/google/models/paligemma@paligemma-224-float32
      location: us-central1
      modelConfig:
        acceptEula: true

Vertex Ai Deploy Huggingface Model

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const deploy = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy", {
    huggingFaceModelId: "Qwen/Qwen3-0.6B",
    location: "us-central1",
    modelConfig: {
        acceptEula: true,
    },
});

import pulumi
import pulumi_gcp as gcp

deploy = gcp.vertex.AiEndpointWithModelGardenDeployment("deploy",
    hugging_face_model_id="Qwen/Qwen3-0.6B",
    location="us-central1",
    model_config={
        "accept_eula": True,
    })

package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/vertex"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := vertex.NewAiEndpointWithModelGardenDeployment(ctx, "deploy", &vertex.AiEndpointWithModelGardenDeploymentArgs{
			HuggingFaceModelId: pulumi.String("Qwen/Qwen3-0.6B"),
			Location:           pulumi.String("us-central1"),
			ModelConfig: &vertex.AiEndpointWithModelGardenDeploymentModelConfigArgs{
				AcceptEula: pulumi.Bool(true),
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}

using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var deploy = new Gcp.Vertex.AiEndpointWithModelGardenDeployment("deploy", new()
    {
        HuggingFaceModelId = "Qwen/Qwen3-0.6B",
        Location = "us-central1",
        ModelConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs
        {
            AcceptEula = true,
        },
    });

});

package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.vertex.AiEndpointWithModelGardenDeployment;
import com.pulumi.gcp.vertex.AiEndpointWithModelGardenDeploymentArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var deploy = new AiEndpointWithModelGardenDeployment("deploy", AiEndpointWithModelGardenDeploymentArgs.builder()
            .huggingFaceModelId("Qwen/Qwen3-0.6B")
            .location("us-central1")
            .modelConfig(AiEndpointWithModelGardenDeploymentModelConfigArgs.builder()
                .acceptEula(true)
                .build())
            .build());

    }
}

resources:
  deploy:
    type: gcp:vertex:AiEndpointWithModelGardenDeployment
    properties:
      huggingFaceModelId: Qwen/Qwen3-0.6B
      location: us-central1
      modelConfig:
        acceptEula: true

Vertex Ai Deploy With Configs

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const deploy = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy", {
    publisherModelName: "publishers/google/models/paligemma@paligemma-224-float32",
    location: "us-central1",
    modelConfig: {
        acceptEula: true,
    },
    deployConfig: {
        dedicatedResources: {
            machineSpec: {
                machineType: "g2-standard-16",
                acceleratorType: "NVIDIA_L4",
                acceleratorCount: 1,
            },
            minReplicaCount: 1,
        },
    },
});

import pulumi
import pulumi_gcp as gcp

deploy = gcp.vertex.AiEndpointWithModelGardenDeployment("deploy",
    publisher_model_name="publishers/google/models/paligemma@paligemma-224-float32",
    location="us-central1",
    model_config={
        "accept_eula": True,
    },
    deploy_config={
        "dedicated_resources": {
            "machine_spec": {
                "machine_type": "g2-standard-16",
                "accelerator_type": "NVIDIA_L4",
                "accelerator_count": 1,
            },
            "min_replica_count": 1,
        },
    })

package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/vertex"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := vertex.NewAiEndpointWithModelGardenDeployment(ctx, "deploy", &vertex.AiEndpointWithModelGardenDeploymentArgs{
			PublisherModelName: pulumi.String("publishers/google/models/paligemma@paligemma-224-float32"),
			Location:           pulumi.String("us-central1"),
			ModelConfig: &vertex.AiEndpointWithModelGardenDeploymentModelConfigArgs{
				AcceptEula: pulumi.Bool(true),
			},
			DeployConfig: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigArgs{
				DedicatedResources: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs{
					MachineSpec: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs{
						MachineType:      pulumi.String("g2-standard-16"),
						AcceleratorType:  pulumi.String("NVIDIA_L4"),
						AcceleratorCount: pulumi.Int(1),
					},
					MinReplicaCount: pulumi.Int(1),
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}

using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var deploy = new Gcp.Vertex.AiEndpointWithModelGardenDeployment("deploy", new()
    {
        PublisherModelName = "publishers/google/models/paligemma@paligemma-224-float32",
        Location = "us-central1",
        ModelConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs
        {
            AcceptEula = true,
        },
        DeployConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigArgs
        {
            DedicatedResources = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs
            {
                MachineSpec = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs
                {
                    MachineType = "g2-standard-16",
                    AcceleratorType = "NVIDIA_L4",
                    AcceleratorCount = 1,
                },
                MinReplicaCount = 1,
            },
        },
    });

});

package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.vertex.AiEndpointWithModelGardenDeployment;
import com.pulumi.gcp.vertex.AiEndpointWithModelGardenDeploymentArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentDeployConfigArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var deploy = new AiEndpointWithModelGardenDeployment("deploy", AiEndpointWithModelGardenDeploymentArgs.builder()
            .publisherModelName("publishers/google/models/paligemma@paligemma-224-float32")
            .location("us-central1")
            .modelConfig(AiEndpointWithModelGardenDeploymentModelConfigArgs.builder()
                .acceptEula(true)
                .build())
            .deployConfig(AiEndpointWithModelGardenDeploymentDeployConfigArgs.builder()
                .dedicatedResources(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs.builder()
                    .machineSpec(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs.builder()
                        .machineType("g2-standard-16")
                        .acceleratorType("NVIDIA_L4")
                        .acceleratorCount(1)
                        .build())
                    .minReplicaCount(1)
                    .build())
                .build())
            .build());

    }
}

resources:
  deploy:
    type: gcp:vertex:AiEndpointWithModelGardenDeployment
    properties:
      publisherModelName: publishers/google/models/paligemma@paligemma-224-float32
      location: us-central1
      modelConfig:
        acceptEula: true
      deployConfig:
        dedicatedResources:
          machineSpec:
            machineType: g2-standard-16
            acceleratorType: NVIDIA_L4
            acceleratorCount: 1
          minReplicaCount: 1

Vertex Ai Deploy Multiple Models In Parallel

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const deploy_gemma_11_2b_it = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-gemma-1_1-2b-it", {
    publisherModelName: "publishers/google/models/gemma@gemma-1.1-2b-it",
    location: "us-central1",
    modelConfig: {
        acceptEula: true,
    },
    deployConfig: {
        dedicatedResources: {
            machineSpec: {
                machineType: "g2-standard-12",
                acceleratorType: "us-central1",
                acceleratorCount: 1,
            },
            minReplicaCount: 1,
        },
    },
});
const deploy_qwen3_06b = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-qwen3-0_6b", {
    huggingFaceModelId: "Qwen/Qwen3-0.6B",
    location: "us-central1",
    modelConfig: {
        acceptEula: true,
    },
    deployConfig: {
        dedicatedResources: {
            machineSpec: {
                machineType: "g2-standard-12",
                acceleratorType: "NVIDIA_L4",
                acceleratorCount: 1,
            },
            minReplicaCount: 1,
        },
    },
});
const deploy_llama_32_1b = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-llama-3_2-1b", {
    publisherModelName: "publishers/meta/models/llama3-2@llama-3.2-1b",
    location: "us-central1",
    modelConfig: {
        acceptEula: true,
    },
    deployConfig: {
        dedicatedResources: {
            machineSpec: {
                machineType: "g2-standard-12",
                acceleratorType: "NVIDIA_L4",
                acceleratorCount: 1,
            },
            minReplicaCount: 1,
        },
    },
});

import pulumi
import pulumi_gcp as gcp

deploy_gemma_11_2b_it = gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-gemma-1_1-2b-it",
    publisher_model_name="publishers/google/models/gemma@gemma-1.1-2b-it",
    location="us-central1",
    model_config={
        "accept_eula": True,
    },
    deploy_config={
        "dedicated_resources": {
            "machine_spec": {
                "machine_type": "g2-standard-12",
                "accelerator_type": "us-central1",
                "accelerator_count": 1,
            },
            "min_replica_count": 1,
        },
    })
deploy_qwen3_06b = gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-qwen3-0_6b",
    hugging_face_model_id="Qwen/Qwen3-0.6B",
    location="us-central1",
    model_config={
        "accept_eula": True,
    },
    deploy_config={
        "dedicated_resources": {
            "machine_spec": {
                "machine_type": "g2-standard-12",
                "accelerator_type": "NVIDIA_L4",
                "accelerator_count": 1,
            },
            "min_replica_count": 1,
        },
    })
deploy_llama_32_1b = gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-llama-3_2-1b",
    publisher_model_name="publishers/meta/models/llama3-2@llama-3.2-1b",
    location="us-central1",
    model_config={
        "accept_eula": True,
    },
    deploy_config={
        "dedicated_resources": {
            "machine_spec": {
                "machine_type": "g2-standard-12",
                "accelerator_type": "NVIDIA_L4",
                "accelerator_count": 1,
            },
            "min_replica_count": 1,
        },
    })

package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/vertex"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := vertex.NewAiEndpointWithModelGardenDeployment(ctx, "deploy-gemma-1_1-2b-it", &vertex.AiEndpointWithModelGardenDeploymentArgs{
			PublisherModelName: pulumi.String("publishers/google/models/gemma@gemma-1.1-2b-it"),
			Location:           pulumi.String("us-central1"),
			ModelConfig: &vertex.AiEndpointWithModelGardenDeploymentModelConfigArgs{
				AcceptEula: pulumi.Bool(true),
			},
			DeployConfig: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigArgs{
				DedicatedResources: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs{
					MachineSpec: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs{
						MachineType:      pulumi.String("g2-standard-12"),
						AcceleratorType:  pulumi.String("us-central1"),
						AcceleratorCount: pulumi.Int(1),
					},
					MinReplicaCount: pulumi.Int(1),
				},
			},
		})
		if err != nil {
			return err
		}
		_, err = vertex.NewAiEndpointWithModelGardenDeployment(ctx, "deploy-qwen3-0_6b", &vertex.AiEndpointWithModelGardenDeploymentArgs{
			HuggingFaceModelId: pulumi.String("Qwen/Qwen3-0.6B"),
			Location:           pulumi.String("us-central1"),
			ModelConfig: &vertex.AiEndpointWithModelGardenDeploymentModelConfigArgs{
				AcceptEula: pulumi.Bool(true),
			},
			DeployConfig: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigArgs{
				DedicatedResources: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs{
					MachineSpec: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs{
						MachineType:      pulumi.String("g2-standard-12"),
						AcceleratorType:  pulumi.String("NVIDIA_L4"),
						AcceleratorCount: pulumi.Int(1),
					},
					MinReplicaCount: pulumi.Int(1),
				},
			},
		})
		if err != nil {
			return err
		}
		_, err = vertex.NewAiEndpointWithModelGardenDeployment(ctx, "deploy-llama-3_2-1b", &vertex.AiEndpointWithModelGardenDeploymentArgs{
			PublisherModelName: pulumi.String("publishers/meta/models/llama3-2@llama-3.2-1b"),
			Location:           pulumi.String("us-central1"),
			ModelConfig: &vertex.AiEndpointWithModelGardenDeploymentModelConfigArgs{
				AcceptEula: pulumi.Bool(true),
			},
			DeployConfig: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigArgs{
				DedicatedResources: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs{
					MachineSpec: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs{
						MachineType:      pulumi.String("g2-standard-12"),
						AcceleratorType:  pulumi.String("NVIDIA_L4"),
						AcceleratorCount: pulumi.Int(1),
					},
					MinReplicaCount: pulumi.Int(1),
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}

using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var deploy_gemma_11_2b_it = new Gcp.Vertex.AiEndpointWithModelGardenDeployment("deploy-gemma-1_1-2b-it", new()
    {
        PublisherModelName = "publishers/google/models/gemma@gemma-1.1-2b-it",
        Location = "us-central1",
        ModelConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs
        {
            AcceptEula = true,
        },
        DeployConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigArgs
        {
            DedicatedResources = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs
            {
                MachineSpec = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs
                {
                    MachineType = "g2-standard-12",
                    AcceleratorType = "us-central1",
                    AcceleratorCount = 1,
                },
                MinReplicaCount = 1,
            },
        },
    });

    var deploy_qwen3_06b = new Gcp.Vertex.AiEndpointWithModelGardenDeployment("deploy-qwen3-0_6b", new()
    {
        HuggingFaceModelId = "Qwen/Qwen3-0.6B",
        Location = "us-central1",
        ModelConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs
        {
            AcceptEula = true,
        },
        DeployConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigArgs
        {
            DedicatedResources = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs
            {
                MachineSpec = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs
                {
                    MachineType = "g2-standard-12",
                    AcceleratorType = "NVIDIA_L4",
                    AcceleratorCount = 1,
                },
                MinReplicaCount = 1,
            },
        },
    });

    var deploy_llama_32_1b = new Gcp.Vertex.AiEndpointWithModelGardenDeployment("deploy-llama-3_2-1b", new()
    {
        PublisherModelName = "publishers/meta/models/llama3-2@llama-3.2-1b",
        Location = "us-central1",
        ModelConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs
        {
            AcceptEula = true,
        },
        DeployConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigArgs
        {
            DedicatedResources = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs
            {
                MachineSpec = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs
                {
                    MachineType = "g2-standard-12",
                    AcceleratorType = "NVIDIA_L4",
                    AcceleratorCount = 1,
                },
                MinReplicaCount = 1,
            },
        },
    });

});

package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.vertex.AiEndpointWithModelGardenDeployment;
import com.pulumi.gcp.vertex.AiEndpointWithModelGardenDeploymentArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentDeployConfigArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var deploy_gemma_11_2b_it = new AiEndpointWithModelGardenDeployment("deploy-gemma-11-2b-it", AiEndpointWithModelGardenDeploymentArgs.builder()
            .publisherModelName("publishers/google/models/gemma@gemma-1.1-2b-it")
            .location("us-central1")
            .modelConfig(AiEndpointWithModelGardenDeploymentModelConfigArgs.builder()
                .acceptEula(true)
                .build())
            .deployConfig(AiEndpointWithModelGardenDeploymentDeployConfigArgs.builder()
                .dedicatedResources(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs.builder()
                    .machineSpec(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs.builder()
                        .machineType("g2-standard-12")
                        .acceleratorType("us-central1")
                        .acceleratorCount(1)
                        .build())
                    .minReplicaCount(1)
                    .build())
                .build())
            .build());

        var deploy_qwen3_06b = new AiEndpointWithModelGardenDeployment("deploy-qwen3-06b", AiEndpointWithModelGardenDeploymentArgs.builder()
            .huggingFaceModelId("Qwen/Qwen3-0.6B")
            .location("us-central1")
            .modelConfig(AiEndpointWithModelGardenDeploymentModelConfigArgs.builder()
                .acceptEula(true)
                .build())
            .deployConfig(AiEndpointWithModelGardenDeploymentDeployConfigArgs.builder()
                .dedicatedResources(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs.builder()
                    .machineSpec(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs.builder()
                        .machineType("g2-standard-12")
                        .acceleratorType("NVIDIA_L4")
                        .acceleratorCount(1)
                        .build())
                    .minReplicaCount(1)
                    .build())
                .build())
            .build());

        var deploy_llama_32_1b = new AiEndpointWithModelGardenDeployment("deploy-llama-32-1b", AiEndpointWithModelGardenDeploymentArgs.builder()
            .publisherModelName("publishers/meta/models/llama3-2@llama-3.2-1b")
            .location("us-central1")
            .modelConfig(AiEndpointWithModelGardenDeploymentModelConfigArgs.builder()
                .acceptEula(true)
                .build())
            .deployConfig(AiEndpointWithModelGardenDeploymentDeployConfigArgs.builder()
                .dedicatedResources(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs.builder()
                    .machineSpec(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs.builder()
                        .machineType("g2-standard-12")
                        .acceleratorType("NVIDIA_L4")
                        .acceleratorCount(1)
                        .build())
                    .minReplicaCount(1)
                    .build())
                .build())
            .build());

    }
}

resources:
  deploy-gemma-11-2b-it:
    type: gcp:vertex:AiEndpointWithModelGardenDeployment
    name: deploy-gemma-1_1-2b-it
    properties:
      publisherModelName: publishers/google/models/gemma@gemma-1.1-2b-it
      location: us-central1
      modelConfig:
        acceptEula: true
      deployConfig:
        dedicatedResources:
          machineSpec:
            machineType: g2-standard-12
            acceleratorType: us-central1
            acceleratorCount: 1
          minReplicaCount: 1
  deploy-qwen3-06b:
    type: gcp:vertex:AiEndpointWithModelGardenDeployment
    name: deploy-qwen3-0_6b
    properties:
      huggingFaceModelId: Qwen/Qwen3-0.6B
      location: us-central1
      modelConfig:
        acceptEula: true
      deployConfig:
        dedicatedResources:
          machineSpec:
            machineType: g2-standard-12
            acceleratorType: NVIDIA_L4
            acceleratorCount: 1
          minReplicaCount: 1
  deploy-llama-32-1b:
    type: gcp:vertex:AiEndpointWithModelGardenDeployment
    name: deploy-llama-3_2-1b
    properties:
      publisherModelName: publishers/meta/models/llama3-2@llama-3.2-1b
      location: us-central1
      modelConfig:
        acceptEula: true
      deployConfig:
        dedicatedResources:
          machineSpec:
            machineType: g2-standard-12
            acceleratorType: NVIDIA_L4
            acceleratorCount: 1
          minReplicaCount: 1

Vertex Ai Deploy Multiple Models In Sequence

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const deploy_gemma_11_2b_it = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-gemma-1_1-2b-it", {
    publisherModelName: "publishers/google/models/gemma@gemma-1.1-2b-it",
    location: "us-central1",
    modelConfig: {
        acceptEula: true,
    },
    deployConfig: {
        dedicatedResources: {
            machineSpec: {
                machineType: "g2-standard-12",
                acceleratorType: "NVIDIA_L4",
                acceleratorCount: 1,
            },
            minReplicaCount: 1,
        },
    },
});
const deploy_qwen3_06b = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-qwen3-0_6b", {
    huggingFaceModelId: "Qwen/Qwen3-0.6B",
    location: "us-central1",
    modelConfig: {
        acceptEula: true,
    },
    deployConfig: {
        dedicatedResources: {
            machineSpec: {
                machineType: "g2-standard-12",
                acceleratorType: "NVIDIA_L4",
                acceleratorCount: 1,
            },
            minReplicaCount: 1,
        },
    },
}, {
    dependsOn: [deploy_gemma_11_2b_it],
});
const deploy_llama_32_1b = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-llama-3_2-1b", {
    publisherModelName: "publishers/meta/models/llama3-2@llama-3.2-1b",
    location: "us-central1",
    modelConfig: {
        acceptEula: true,
    },
    deployConfig: {
        dedicatedResources: {
            machineSpec: {
                machineType: "g2-standard-12",
                acceleratorType: "NVIDIA_L4",
                acceleratorCount: 1,
            },
            minReplicaCount: 1,
        },
    },
}, {
    dependsOn: [deploy_qwen3_06b],
});

import pulumi
import pulumi_gcp as gcp

deploy_gemma_11_2b_it = gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-gemma-1_1-2b-it",
    publisher_model_name="publishers/google/models/gemma@gemma-1.1-2b-it",
    location="us-central1",
    model_config={
        "accept_eula": True,
    },
    deploy_config={
        "dedicated_resources": {
            "machine_spec": {
                "machine_type": "g2-standard-12",
                "accelerator_type": "NVIDIA_L4",
                "accelerator_count": 1,
            },
            "min_replica_count": 1,
        },
    })
deploy_qwen3_06b = gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-qwen3-0_6b",
    hugging_face_model_id="Qwen/Qwen3-0.6B",
    location="us-central1",
    model_config={
        "accept_eula": True,
    },
    deploy_config={
        "dedicated_resources": {
            "machine_spec": {
                "machine_type": "g2-standard-12",
                "accelerator_type": "NVIDIA_L4",
                "accelerator_count": 1,
            },
            "min_replica_count": 1,
        },
    },
    opts = pulumi.ResourceOptions(depends_on=[deploy_gemma_11_2b_it]))
deploy_llama_32_1b = gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-llama-3_2-1b",
    publisher_model_name="publishers/meta/models/llama3-2@llama-3.2-1b",
    location="us-central1",
    model_config={
        "accept_eula": True,
    },
    deploy_config={
        "dedicated_resources": {
            "machine_spec": {
                "machine_type": "g2-standard-12",
                "accelerator_type": "NVIDIA_L4",
                "accelerator_count": 1,
            },
            "min_replica_count": 1,
        },
    },
    opts = pulumi.ResourceOptions(depends_on=[deploy_qwen3_06b]))

package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/vertex"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		deploy_gemma_11_2b_it, err := vertex.NewAiEndpointWithModelGardenDeployment(ctx, "deploy-gemma-1_1-2b-it", &vertex.AiEndpointWithModelGardenDeploymentArgs{
			PublisherModelName: pulumi.String("publishers/google/models/gemma@gemma-1.1-2b-it"),
			Location:           pulumi.String("us-central1"),
			ModelConfig: &vertex.AiEndpointWithModelGardenDeploymentModelConfigArgs{
				AcceptEula: pulumi.Bool(true),
			},
			DeployConfig: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigArgs{
				DedicatedResources: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs{
					MachineSpec: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs{
						MachineType:      pulumi.String("g2-standard-12"),
						AcceleratorType:  pulumi.String("NVIDIA_L4"),
						AcceleratorCount: pulumi.Int(1),
					},
					MinReplicaCount: pulumi.Int(1),
				},
			},
		})
		if err != nil {
			return err
		}
		deploy_qwen3_06b, err := vertex.NewAiEndpointWithModelGardenDeployment(ctx, "deploy-qwen3-0_6b", &vertex.AiEndpointWithModelGardenDeploymentArgs{
			HuggingFaceModelId: pulumi.String("Qwen/Qwen3-0.6B"),
			Location:           pulumi.String("us-central1"),
			ModelConfig: &vertex.AiEndpointWithModelGardenDeploymentModelConfigArgs{
				AcceptEula: pulumi.Bool(true),
			},
			DeployConfig: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigArgs{
				DedicatedResources: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs{
					MachineSpec: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs{
						MachineType:      pulumi.String("g2-standard-12"),
						AcceleratorType:  pulumi.String("NVIDIA_L4"),
						AcceleratorCount: pulumi.Int(1),
					},
					MinReplicaCount: pulumi.Int(1),
				},
			},
		}, pulumi.DependsOn([]pulumi.Resource{
			deploy_gemma_11_2b_it,
		}))
		if err != nil {
			return err
		}
		_, err = vertex.NewAiEndpointWithModelGardenDeployment(ctx, "deploy-llama-3_2-1b", &vertex.AiEndpointWithModelGardenDeploymentArgs{
			PublisherModelName: pulumi.String("publishers/meta/models/llama3-2@llama-3.2-1b"),
			Location:           pulumi.String("us-central1"),
			ModelConfig: &vertex.AiEndpointWithModelGardenDeploymentModelConfigArgs{
				AcceptEula: pulumi.Bool(true),
			},
			DeployConfig: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigArgs{
				DedicatedResources: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs{
					MachineSpec: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs{
						MachineType:      pulumi.String("g2-standard-12"),
						AcceleratorType:  pulumi.String("NVIDIA_L4"),
						AcceleratorCount: pulumi.Int(1),
					},
					MinReplicaCount: pulumi.Int(1),
				},
			},
		}, pulumi.DependsOn([]pulumi.Resource{
			deploy_qwen3_06b,
		}))
		if err != nil {
			return err
		}
		return nil
	})
}

using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var deploy_gemma_11_2b_it = new Gcp.Vertex.AiEndpointWithModelGardenDeployment("deploy-gemma-1_1-2b-it", new()
    {
        PublisherModelName = "publishers/google/models/gemma@gemma-1.1-2b-it",
        Location = "us-central1",
        ModelConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs
        {
            AcceptEula = true,
        },
        DeployConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigArgs
        {
            DedicatedResources = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs
            {
                MachineSpec = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs
                {
                    MachineType = "g2-standard-12",
                    AcceleratorType = "NVIDIA_L4",
                    AcceleratorCount = 1,
                },
                MinReplicaCount = 1,
            },
        },
    });

    var deploy_qwen3_06b = new Gcp.Vertex.AiEndpointWithModelGardenDeployment("deploy-qwen3-0_6b", new()
    {
        HuggingFaceModelId = "Qwen/Qwen3-0.6B",
        Location = "us-central1",
        ModelConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs
        {
            AcceptEula = true,
        },
        DeployConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigArgs
        {
            DedicatedResources = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs
            {
                MachineSpec = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs
                {
                    MachineType = "g2-standard-12",
                    AcceleratorType = "NVIDIA_L4",
                    AcceleratorCount = 1,
                },
                MinReplicaCount = 1,
            },
        },
    }, new CustomResourceOptions
    {
        DependsOn =
        {
            deploy_gemma_11_2b_it,
        },
    });

    var deploy_llama_32_1b = new Gcp.Vertex.AiEndpointWithModelGardenDeployment("deploy-llama-3_2-1b", new()
    {
        PublisherModelName = "publishers/meta/models/llama3-2@llama-3.2-1b",
        Location = "us-central1",
        ModelConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs
        {
            AcceptEula = true,
        },
        DeployConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigArgs
        {
            DedicatedResources = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs
            {
                MachineSpec = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs
                {
                    MachineType = "g2-standard-12",
                    AcceleratorType = "NVIDIA_L4",
                    AcceleratorCount = 1,
                },
                MinReplicaCount = 1,
            },
        },
    }, new CustomResourceOptions
    {
        DependsOn =
        {
            deploy_qwen3_06b,
        },
    });

});

package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.vertex.AiEndpointWithModelGardenDeployment;
import com.pulumi.gcp.vertex.AiEndpointWithModelGardenDeploymentArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentDeployConfigArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs;
import com.pulumi.resources.CustomResourceOptions;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var deploy_gemma_11_2b_it = new AiEndpointWithModelGardenDeployment("deploy-gemma-11-2b-it", AiEndpointWithModelGardenDeploymentArgs.builder()
            .publisherModelName("publishers/google/models/gemma@gemma-1.1-2b-it")
            .location("us-central1")
            .modelConfig(AiEndpointWithModelGardenDeploymentModelConfigArgs.builder()
                .acceptEula(true)
                .build())
            .deployConfig(AiEndpointWithModelGardenDeploymentDeployConfigArgs.builder()
                .dedicatedResources(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs.builder()
                    .machineSpec(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs.builder()
                        .machineType("g2-standard-12")
                        .acceleratorType("NVIDIA_L4")
                        .acceleratorCount(1)
                        .build())
                    .minReplicaCount(1)
                    .build())
                .build())
            .build());

        var deploy_qwen3_06b = new AiEndpointWithModelGardenDeployment("deploy-qwen3-06b", AiEndpointWithModelGardenDeploymentArgs.builder()
            .huggingFaceModelId("Qwen/Qwen3-0.6B")
            .location("us-central1")
            .modelConfig(AiEndpointWithModelGardenDeploymentModelConfigArgs.builder()
                .acceptEula(true)
                .build())
            .deployConfig(AiEndpointWithModelGardenDeploymentDeployConfigArgs.builder()
                .dedicatedResources(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs.builder()
                    .machineSpec(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs.builder()
                        .machineType("g2-standard-12")
                        .acceleratorType("NVIDIA_L4")
                        .acceleratorCount(1)
                        .build())
                    .minReplicaCount(1)
                    .build())
                .build())
            .build(), CustomResourceOptions.builder()
                .dependsOn(deploy_gemma_11_2b_it)
                .build());

        var deploy_llama_32_1b = new AiEndpointWithModelGardenDeployment("deploy-llama-32-1b", AiEndpointWithModelGardenDeploymentArgs.builder()
            .publisherModelName("publishers/meta/models/llama3-2@llama-3.2-1b")
            .location("us-central1")
            .modelConfig(AiEndpointWithModelGardenDeploymentModelConfigArgs.builder()
                .acceptEula(true)
                .build())
            .deployConfig(AiEndpointWithModelGardenDeploymentDeployConfigArgs.builder()
                .dedicatedResources(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs.builder()
                    .machineSpec(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs.builder()
                        .machineType("g2-standard-12")
                        .acceleratorType("NVIDIA_L4")
                        .acceleratorCount(1)
                        .build())
                    .minReplicaCount(1)
                    .build())
                .build())
            .build(), CustomResourceOptions.builder()
                .dependsOn(deploy_qwen3_06b)
                .build());

    }
}

resources:
  deploy-gemma-11-2b-it:
    type: gcp:vertex:AiEndpointWithModelGardenDeployment
    name: deploy-gemma-1_1-2b-it
    properties:
      publisherModelName: publishers/google/models/gemma@gemma-1.1-2b-it
      location: us-central1
      modelConfig:
        acceptEula: true
      deployConfig:
        dedicatedResources:
          machineSpec:
            machineType: g2-standard-12
            acceleratorType: NVIDIA_L4
            acceleratorCount: 1
          minReplicaCount: 1
  deploy-qwen3-06b:
    type: gcp:vertex:AiEndpointWithModelGardenDeployment
    name: deploy-qwen3-0_6b
    properties:
      huggingFaceModelId: Qwen/Qwen3-0.6B
      location: us-central1
      modelConfig:
        acceptEula: true
      deployConfig:
        dedicatedResources:
          machineSpec:
            machineType: g2-standard-12
            acceleratorType: NVIDIA_L4
            acceleratorCount: 1
          minReplicaCount: 1
    options:
      dependsOn:
        - ${["deploy-gemma-11-2b-it"]}
  deploy-llama-32-1b:
    type: gcp:vertex:AiEndpointWithModelGardenDeployment
    name: deploy-llama-3_2-1b
    properties:
      publisherModelName: publishers/meta/models/llama3-2@llama-3.2-1b
      location: us-central1
      modelConfig:
        acceptEula: true
      deployConfig:
        dedicatedResources:
          machineSpec:
            machineType: g2-standard-12
            acceleratorType: NVIDIA_L4
            acceleratorCount: 1
          minReplicaCount: 1
    options:
      dependsOn:
        - ${["deploy-qwen3-06b"]}

Vertex Ai Deploy Psc Endpoint

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const deploy = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy", {
    publisherModelName: "publishers/google/models/paligemma@paligemma-224-float32",
    location: "us-central1",
    modelConfig: {
        acceptEula: true,
    },
    endpointConfig: {
        privateServiceConnectConfig: {
            enablePrivateServiceConnect: true,
            projectAllowlists: ["my-project-id"],
        },
    },
});

import pulumi
import pulumi_gcp as gcp

deploy = gcp.vertex.AiEndpointWithModelGardenDeployment("deploy",
    publisher_model_name="publishers/google/models/paligemma@paligemma-224-float32",
    location="us-central1",
    model_config={
        "accept_eula": True,
    },
    endpoint_config={
        "private_service_connect_config": {
            "enable_private_service_connect": True,
            "project_allowlists": ["my-project-id"],
        },
    })

package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/vertex"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := vertex.NewAiEndpointWithModelGardenDeployment(ctx, "deploy", &vertex.AiEndpointWithModelGardenDeploymentArgs{
			PublisherModelName: pulumi.String("publishers/google/models/paligemma@paligemma-224-float32"),
			Location:           pulumi.String("us-central1"),
			ModelConfig: &vertex.AiEndpointWithModelGardenDeploymentModelConfigArgs{
				AcceptEula: pulumi.Bool(true),
			},
			EndpointConfig: &vertex.AiEndpointWithModelGardenDeploymentEndpointConfigArgs{
				PrivateServiceConnectConfig: &vertex.AiEndpointWithModelGardenDeploymentEndpointConfigPrivateServiceConnectConfigArgs{
					EnablePrivateServiceConnect: pulumi.Bool(true),
					ProjectAllowlists: pulumi.StringArray{
						pulumi.String("my-project-id"),
					},
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}

using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var deploy = new Gcp.Vertex.AiEndpointWithModelGardenDeployment("deploy", new()
    {
        PublisherModelName = "publishers/google/models/paligemma@paligemma-224-float32",
        Location = "us-central1",
        ModelConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs
        {
            AcceptEula = true,
        },
        EndpointConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentEndpointConfigArgs
        {
            PrivateServiceConnectConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentEndpointConfigPrivateServiceConnectConfigArgs
            {
                EnablePrivateServiceConnect = true,
                ProjectAllowlists = new[]
                {
                    "my-project-id",
                },
            },
        },
    });

});

package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.vertex.AiEndpointWithModelGardenDeployment;
import com.pulumi.gcp.vertex.AiEndpointWithModelGardenDeploymentArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentEndpointConfigArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentEndpointConfigPrivateServiceConnectConfigArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var deploy = new AiEndpointWithModelGardenDeployment("deploy", AiEndpointWithModelGardenDeploymentArgs.builder()
            .publisherModelName("publishers/google/models/paligemma@paligemma-224-float32")
            .location("us-central1")
            .modelConfig(AiEndpointWithModelGardenDeploymentModelConfigArgs.builder()
                .acceptEula(true)
                .build())
            .endpointConfig(AiEndpointWithModelGardenDeploymentEndpointConfigArgs.builder()
                .privateServiceConnectConfig(AiEndpointWithModelGardenDeploymentEndpointConfigPrivateServiceConnectConfigArgs.builder()
                    .enablePrivateServiceConnect(true)
                    .projectAllowlists("my-project-id")
                    .build())
                .build())
            .build());

    }
}

resources:
  deploy:
    type: gcp:vertex:AiEndpointWithModelGardenDeployment
    properties:
      publisherModelName: publishers/google/models/paligemma@paligemma-224-float32
      location: us-central1
      modelConfig:
        acceptEula: true
      endpointConfig:
        privateServiceConnectConfig:
          enablePrivateServiceConnect: true
          projectAllowlists:
            - my-project-id

Vertex Ai Deploy Psc Endpoint Automated

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const network = new gcp.compute.Network("network", {
    name: "network",
    autoCreateSubnetworks: false,
});
const project = gcp.organizations.getProject({});
const deploy = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy", {
    publisherModelName: "publishers/google/models/paligemma@paligemma-224-float32",
    location: "us-central1",
    modelConfig: {
        acceptEula: true,
    },
    endpointConfig: {
        privateServiceConnectConfig: {
            enablePrivateServiceConnect: true,
            projectAllowlists: [project.then(project => project.id)],
            pscAutomationConfigs: {
                projectId: project.then(project => project.id),
                network: network.id,
            },
        },
    },
});
const subnetwork = new gcp.compute.Subnetwork("subnetwork", {
    name: "subnetwork",
    ipCidrRange: "192.168.0.0/24",
    region: "us-central1",
    network: network.id,
});

import pulumi
import pulumi_gcp as gcp

network = gcp.compute.Network("network",
    name="network",
    auto_create_subnetworks=False)
project = gcp.organizations.get_project()
deploy = gcp.vertex.AiEndpointWithModelGardenDeployment("deploy",
    publisher_model_name="publishers/google/models/paligemma@paligemma-224-float32",
    location="us-central1",
    model_config={
        "accept_eula": True,
    },
    endpoint_config={
        "private_service_connect_config": {
            "enable_private_service_connect": True,
            "project_allowlists": [project.id],
            "psc_automation_configs": {
                "project_id": project.id,
                "network": network.id,
            },
        },
    })
subnetwork = gcp.compute.Subnetwork("subnetwork",
    name="subnetwork",
    ip_cidr_range="192.168.0.0/24",
    region="us-central1",
    network=network.id)

package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/compute"
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/organizations"
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/vertex"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		network, err := compute.NewNetwork(ctx, "network", &compute.NetworkArgs{
			Name:                  pulumi.String("network"),
			AutoCreateSubnetworks: pulumi.Bool(false),
		})
		if err != nil {
			return err
		}
		project, err := organizations.LookupProject(ctx, &organizations.LookupProjectArgs{}, nil)
		if err != nil {
			return err
		}
		_, err = vertex.NewAiEndpointWithModelGardenDeployment(ctx, "deploy", &vertex.AiEndpointWithModelGardenDeploymentArgs{
			PublisherModelName: pulumi.String("publishers/google/models/paligemma@paligemma-224-float32"),
			Location:           pulumi.String("us-central1"),
			ModelConfig: &vertex.AiEndpointWithModelGardenDeploymentModelConfigArgs{
				AcceptEula: pulumi.Bool(true),
			},
			EndpointConfig: &vertex.AiEndpointWithModelGardenDeploymentEndpointConfigArgs{
				PrivateServiceConnectConfig: &vertex.AiEndpointWithModelGardenDeploymentEndpointConfigPrivateServiceConnectConfigArgs{
					EnablePrivateServiceConnect: pulumi.Bool(true),
					ProjectAllowlists: pulumi.StringArray{
						pulumi.String(project.Id),
					},
					PscAutomationConfigs: &vertex.AiEndpointWithModelGardenDeploymentEndpointConfigPrivateServiceConnectConfigPscAutomationConfigsArgs{
						ProjectId: pulumi.String(project.Id),
						Network:   network.ID(),
					},
				},
			},
		})
		if err != nil {
			return err
		}
		_, err = compute.NewSubnetwork(ctx, "subnetwork", &compute.SubnetworkArgs{
			Name:        pulumi.String("subnetwork"),
			IpCidrRange: pulumi.String("192.168.0.0/24"),
			Region:      pulumi.String("us-central1"),
			Network:     network.ID(),
		})
		if err != nil {
			return err
		}
		return nil
	})
}

using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var network = new Gcp.Compute.Network("network", new()
    {
        Name = "network",
        AutoCreateSubnetworks = false,
    });

    var project = Gcp.Organizations.GetProject.Invoke();

    var deploy = new Gcp.Vertex.AiEndpointWithModelGardenDeployment("deploy", new()
    {
        PublisherModelName = "publishers/google/models/paligemma@paligemma-224-float32",
        Location = "us-central1",
        ModelConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs
        {
            AcceptEula = true,
        },
        EndpointConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentEndpointConfigArgs
        {
            PrivateServiceConnectConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentEndpointConfigPrivateServiceConnectConfigArgs
            {
                EnablePrivateServiceConnect = true,
                ProjectAllowlists = new[]
                {
                    project.Apply(getProjectResult => getProjectResult.Id),
                },
                PscAutomationConfigs = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentEndpointConfigPrivateServiceConnectConfigPscAutomationConfigsArgs
                {
                    ProjectId = project.Apply(getProjectResult => getProjectResult.Id),
                    Network = network.Id,
                },
            },
        },
    });

    var subnetwork = new Gcp.Compute.Subnetwork("subnetwork", new()
    {
        Name = "subnetwork",
        IpCidrRange = "192.168.0.0/24",
        Region = "us-central1",
        Network = network.Id,
    });

});

package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.compute.Network;
import com.pulumi.gcp.compute.NetworkArgs;
import com.pulumi.gcp.organizations.OrganizationsFunctions;
import com.pulumi.gcp.organizations.inputs.GetProjectArgs;
import com.pulumi.gcp.vertex.AiEndpointWithModelGardenDeployment;
import com.pulumi.gcp.vertex.AiEndpointWithModelGardenDeploymentArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentEndpointConfigArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentEndpointConfigPrivateServiceConnectConfigArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentEndpointConfigPrivateServiceConnectConfigPscAutomationConfigsArgs;
import com.pulumi.gcp.compute.Subnetwork;
import com.pulumi.gcp.compute.SubnetworkArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var network = new Network("network", NetworkArgs.builder()
            .name("network")
            .autoCreateSubnetworks(false)
            .build());

        final var project = OrganizationsFunctions.getProject(GetProjectArgs.builder()
            .build());

        var deploy = new AiEndpointWithModelGardenDeployment("deploy", AiEndpointWithModelGardenDeploymentArgs.builder()
            .publisherModelName("publishers/google/models/paligemma@paligemma-224-float32")
            .location("us-central1")
            .modelConfig(AiEndpointWithModelGardenDeploymentModelConfigArgs.builder()
                .acceptEula(true)
                .build())
            .endpointConfig(AiEndpointWithModelGardenDeploymentEndpointConfigArgs.builder()
                .privateServiceConnectConfig(AiEndpointWithModelGardenDeploymentEndpointConfigPrivateServiceConnectConfigArgs.builder()
                    .enablePrivateServiceConnect(true)
                    .projectAllowlists(project.id())
                    .pscAutomationConfigs(AiEndpointWithModelGardenDeploymentEndpointConfigPrivateServiceConnectConfigPscAutomationConfigsArgs.builder()
                        .projectId(project.id())
                        .network(network.id())
                        .build())
                    .build())
                .build())
            .build());

        var subnetwork = new Subnetwork("subnetwork", SubnetworkArgs.builder()
            .name("subnetwork")
            .ipCidrRange("192.168.0.0/24")
            .region("us-central1")
            .network(network.id())
            .build());

    }
}

resources:
  deploy:
    type: gcp:vertex:AiEndpointWithModelGardenDeployment
    properties:
      publisherModelName: publishers/google/models/paligemma@paligemma-224-float32
      location: us-central1
      modelConfig:
        acceptEula: true
      endpointConfig:
        privateServiceConnectConfig:
          enablePrivateServiceConnect: true
          projectAllowlists:
            - ${project.id}
          pscAutomationConfigs:
            projectId: ${project.id}
            network: ${network.id}
  subnetwork:
    type: gcp:compute:Subnetwork
    properties:
      name: subnetwork
      ipCidrRange: 192.168.0.0/24
      region: us-central1
      network: ${network.id}
  network:
    type: gcp:compute:Network
    properties:
      name: network
      autoCreateSubnetworks: false
variables:
  project:
    fn::invoke:
      function: gcp:organizations:getProject
      arguments: {}

Create AiEndpointWithModelGardenDeployment Resource

Resources are created with functions called constructors. To learn more about declaring and configuring resources, see Resources.

Constructor syntax

new AiEndpointWithModelGardenDeployment(name: string, args: AiEndpointWithModelGardenDeploymentArgs, opts?: CustomResourceOptions);

@overload
def AiEndpointWithModelGardenDeployment(resource_name: str,
                                        args: AiEndpointWithModelGardenDeploymentArgs,
                                        opts: Optional[ResourceOptions] = None)

@overload
def AiEndpointWithModelGardenDeployment(resource_name: str,
                                        opts: Optional[ResourceOptions] = None,
                                        location: Optional[str] = None,
                                        deploy_config: Optional[AiEndpointWithModelGardenDeploymentDeployConfigArgs] = None,
                                        endpoint_config: Optional[AiEndpointWithModelGardenDeploymentEndpointConfigArgs] = None,
                                        hugging_face_model_id: Optional[str] = None,
                                        model_config: Optional[AiEndpointWithModelGardenDeploymentModelConfigArgs] = None,
                                        project: Optional[str] = None,
                                        publisher_model_name: Optional[str] = None)

func NewAiEndpointWithModelGardenDeployment(ctx *Context, name string, args AiEndpointWithModelGardenDeploymentArgs, opts ...ResourceOption) (*AiEndpointWithModelGardenDeployment, error)

public AiEndpointWithModelGardenDeployment(string name, AiEndpointWithModelGardenDeploymentArgs args, CustomResourceOptions? opts = null)

public AiEndpointWithModelGardenDeployment(String name, AiEndpointWithModelGardenDeploymentArgs args)
public AiEndpointWithModelGardenDeployment(String name, AiEndpointWithModelGardenDeploymentArgs args, CustomResourceOptions options)

type: gcp:vertex:AiEndpointWithModelGardenDeployment
properties: # The arguments to resource properties.
options: # Bag of options to control resource's behavior.

Parameters

name string: The unique name of the resource.
args AiEndpointWithModelGardenDeploymentArgs: The arguments to resource properties.
opts CustomResourceOptions: Bag of options to control resource's behavior.

resource_name str: The unique name of the resource.
args AiEndpointWithModelGardenDeploymentArgs: The arguments to resource properties.
opts ResourceOptions: Bag of options to control resource's behavior.

ctx Context: Context object for the current deployment.
name string: The unique name of the resource.
args AiEndpointWithModelGardenDeploymentArgs: The arguments to resource properties.
opts ResourceOption: Bag of options to control resource's behavior.

name string: The unique name of the resource.
args AiEndpointWithModelGardenDeploymentArgs: The arguments to resource properties.
opts CustomResourceOptions: Bag of options to control resource's behavior.

name String: The unique name of the resource.
args AiEndpointWithModelGardenDeploymentArgs: The arguments to resource properties.
options CustomResourceOptions: Bag of options to control resource's behavior.

Constructor example

The following reference example uses placeholder values for all input properties.

var aiEndpointWithModelGardenDeploymentResource = new Gcp.Vertex.AiEndpointWithModelGardenDeployment("aiEndpointWithModelGardenDeploymentResource", new()
{
    Location = "string",
    DeployConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigArgs
    {
        DedicatedResources = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs
        {
            MachineSpec = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs
            {
                AcceleratorCount = 0,
                AcceleratorType = "string",
                MachineType = "string",
                MultihostGpuNodeCount = 0,
                ReservationAffinity = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecReservationAffinityArgs
                {
                    ReservationAffinityType = "string",
                    Key = "string",
                    Values = new[]
                    {
                        "string",
                    },
                },
                TpuTopology = "string",
            },
            MinReplicaCount = 0,
            AutoscalingMetricSpecs = new[]
            {
                new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesAutoscalingMetricSpecArgs
                {
                    MetricName = "string",
                    Target = 0,
                },
            },
            MaxReplicaCount = 0,
            RequiredReplicaCount = 0,
            Spot = false,
        },
        FastTryoutEnabled = false,
        SystemLabels = 
        {
            { "string", "string" },
        },
    },
    EndpointConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentEndpointConfigArgs
    {
        DedicatedEndpointEnabled = false,
        EndpointDisplayName = "string",
        PrivateServiceConnectConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentEndpointConfigPrivateServiceConnectConfigArgs
        {
            EnablePrivateServiceConnect = false,
            ProjectAllowlists = new[]
            {
                "string",
            },
            PscAutomationConfigs = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentEndpointConfigPrivateServiceConnectConfigPscAutomationConfigsArgs
            {
                Network = "string",
                ProjectId = "string",
                ErrorMessage = "string",
                ForwardingRule = "string",
                IpAddress = "string",
                State = "string",
            },
            ServiceAttachment = "string",
        },
    },
    HuggingFaceModelId = "string",
    ModelConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs
    {
        AcceptEula = false,
        ContainerSpec = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecArgs
        {
            ImageUri = "string",
            HealthRoute = "string",
            DeploymentTimeout = "string",
            Envs = new[]
            {
                new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecEnvArgs
                {
                    Name = "string",
                    Value = "string",
                },
            },
            GrpcPorts = new[]
            {
                new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecGrpcPortArgs
                {
                    ContainerPort = 0,
                },
            },
            HealthProbe = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeArgs
            {
                Exec = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeExecArgs
                {
                    Commands = new[]
                    {
                        "string",
                    },
                },
                FailureThreshold = 0,
                Grpc = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeGrpcArgs
                {
                    Port = 0,
                    Service = "string",
                },
                HttpGet = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGetArgs
                {
                    Host = "string",
                    HttpHeaders = new[]
                    {
                        new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGetHttpHeaderArgs
                        {
                            Name = "string",
                            Value = "string",
                        },
                    },
                    Path = "string",
                    Port = 0,
                    Scheme = "string",
                },
                InitialDelaySeconds = 0,
                PeriodSeconds = 0,
                SuccessThreshold = 0,
                TcpSocket = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeTcpSocketArgs
                {
                    Host = "string",
                    Port = 0,
                },
                TimeoutSeconds = 0,
            },
            Args = new[]
            {
                "string",
            },
            Commands = new[]
            {
                "string",
            },
            LivenessProbe = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeArgs
            {
                Exec = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeExecArgs
                {
                    Commands = new[]
                    {
                        "string",
                    },
                },
                FailureThreshold = 0,
                Grpc = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeGrpcArgs
                {
                    Port = 0,
                    Service = "string",
                },
                HttpGet = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGetArgs
                {
                    Host = "string",
                    HttpHeaders = new[]
                    {
                        new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGetHttpHeaderArgs
                        {
                            Name = "string",
                            Value = "string",
                        },
                    },
                    Path = "string",
                    Port = 0,
                    Scheme = "string",
                },
                InitialDelaySeconds = 0,
                PeriodSeconds = 0,
                SuccessThreshold = 0,
                TcpSocket = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeTcpSocketArgs
                {
                    Host = "string",
                    Port = 0,
                },
                TimeoutSeconds = 0,
            },
            Ports = new[]
            {
                new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecPortArgs
                {
                    ContainerPort = 0,
                },
            },
            PredictRoute = "string",
            SharedMemorySizeMb = "string",
            StartupProbe = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeArgs
            {
                Exec = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeExecArgs
                {
                    Commands = new[]
                    {
                        "string",
                    },
                },
                FailureThreshold = 0,
                Grpc = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeGrpcArgs
                {
                    Port = 0,
                    Service = "string",
                },
                HttpGet = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGetArgs
                {
                    Host = "string",
                    HttpHeaders = new[]
                    {
                        new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGetHttpHeaderArgs
                        {
                            Name = "string",
                            Value = "string",
                        },
                    },
                    Path = "string",
                    Port = 0,
                    Scheme = "string",
                },
                InitialDelaySeconds = 0,
                PeriodSeconds = 0,
                SuccessThreshold = 0,
                TcpSocket = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeTcpSocketArgs
                {
                    Host = "string",
                    Port = 0,
                },
                TimeoutSeconds = 0,
            },
        },
        HuggingFaceAccessToken = "string",
        HuggingFaceCacheEnabled = false,
        ModelDisplayName = "string",
    },
    Project = "string",
    PublisherModelName = "string",
});

example, err := vertex.NewAiEndpointWithModelGardenDeployment(ctx, "aiEndpointWithModelGardenDeploymentResource", &vertex.AiEndpointWithModelGardenDeploymentArgs{
	Location: pulumi.String("string"),
	DeployConfig: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigArgs{
		DedicatedResources: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs{
			MachineSpec: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs{
				AcceleratorCount:      pulumi.Int(0),
				AcceleratorType:       pulumi.String("string"),
				MachineType:           pulumi.String("string"),
				MultihostGpuNodeCount: pulumi.Int(0),
				ReservationAffinity: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecReservationAffinityArgs{
					ReservationAffinityType: pulumi.String("string"),
					Key:                     pulumi.String("string"),
					Values: pulumi.StringArray{
						pulumi.String("string"),
					},
				},
				TpuTopology: pulumi.String("string"),
			},
			MinReplicaCount: pulumi.Int(0),
			AutoscalingMetricSpecs: vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesAutoscalingMetricSpecArray{
				&vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesAutoscalingMetricSpecArgs{
					MetricName: pulumi.String("string"),
					Target:     pulumi.Int(0),
				},
			},
			MaxReplicaCount:      pulumi.Int(0),
			RequiredReplicaCount: pulumi.Int(0),
			Spot:                 pulumi.Bool(false),
		},
		FastTryoutEnabled: pulumi.Bool(false),
		SystemLabels: pulumi.StringMap{
			"string": pulumi.String("string"),
		},
	},
	EndpointConfig: &vertex.AiEndpointWithModelGardenDeploymentEndpointConfigArgs{
		DedicatedEndpointEnabled: pulumi.Bool(false),
		EndpointDisplayName:      pulumi.String("string"),
		PrivateServiceConnectConfig: &vertex.AiEndpointWithModelGardenDeploymentEndpointConfigPrivateServiceConnectConfigArgs{
			EnablePrivateServiceConnect: pulumi.Bool(false),
			ProjectAllowlists: pulumi.StringArray{
				pulumi.String("string"),
			},
			PscAutomationConfigs: &vertex.AiEndpointWithModelGardenDeploymentEndpointConfigPrivateServiceConnectConfigPscAutomationConfigsArgs{
				Network:        pulumi.String("string"),
				ProjectId:      pulumi.String("string"),
				ErrorMessage:   pulumi.String("string"),
				ForwardingRule: pulumi.String("string"),
				IpAddress:      pulumi.String("string"),
				State:          pulumi.String("string"),
			},
			ServiceAttachment: pulumi.String("string"),
		},
	},
	HuggingFaceModelId: pulumi.String("string"),
	ModelConfig: &vertex.AiEndpointWithModelGardenDeploymentModelConfigArgs{
		AcceptEula: pulumi.Bool(false),
		ContainerSpec: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecArgs{
			ImageUri:          pulumi.String("string"),
			HealthRoute:       pulumi.String("string"),
			DeploymentTimeout: pulumi.String("string"),
			Envs: vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecEnvArray{
				&vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecEnvArgs{
					Name:  pulumi.String("string"),
					Value: pulumi.String("string"),
				},
			},
			GrpcPorts: vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecGrpcPortArray{
				&vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecGrpcPortArgs{
					ContainerPort: pulumi.Int(0),
				},
			},
			HealthProbe: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeArgs{
				Exec: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeExecArgs{
					Commands: pulumi.StringArray{
						pulumi.String("string"),
					},
				},
				FailureThreshold: pulumi.Int(0),
				Grpc: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeGrpcArgs{
					Port:    pulumi.Int(0),
					Service: pulumi.String("string"),
				},
				HttpGet: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGetArgs{
					Host: pulumi.String("string"),
					HttpHeaders: vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGetHttpHeaderArray{
						&vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGetHttpHeaderArgs{
							Name:  pulumi.String("string"),
							Value: pulumi.String("string"),
						},
					},
					Path:   pulumi.String("string"),
					Port:   pulumi.Int(0),
					Scheme: pulumi.String("string"),
				},
				InitialDelaySeconds: pulumi.Int(0),
				PeriodSeconds:       pulumi.Int(0),
				SuccessThreshold:    pulumi.Int(0),
				TcpSocket: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeTcpSocketArgs{
					Host: pulumi.String("string"),
					Port: pulumi.Int(0),
				},
				TimeoutSeconds: pulumi.Int(0),
			},
			Args: pulumi.StringArray{
				pulumi.String("string"),
			},
			Commands: pulumi.StringArray{
				pulumi.String("string"),
			},
			LivenessProbe: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeArgs{
				Exec: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeExecArgs{
					Commands: pulumi.StringArray{
						pulumi.String("string"),
					},
				},
				FailureThreshold: pulumi.Int(0),
				Grpc: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeGrpcArgs{
					Port:    pulumi.Int(0),
					Service: pulumi.String("string"),
				},
				HttpGet: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGetArgs{
					Host: pulumi.String("string"),
					HttpHeaders: vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGetHttpHeaderArray{
						&vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGetHttpHeaderArgs{
							Name:  pulumi.String("string"),
							Value: pulumi.String("string"),
						},
					},
					Path:   pulumi.String("string"),
					Port:   pulumi.Int(0),
					Scheme: pulumi.String("string"),
				},
				InitialDelaySeconds: pulumi.Int(0),
				PeriodSeconds:       pulumi.Int(0),
				SuccessThreshold:    pulumi.Int(0),
				TcpSocket: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeTcpSocketArgs{
					Host: pulumi.String("string"),
					Port: pulumi.Int(0),
				},
				TimeoutSeconds: pulumi.Int(0),
			},
			Ports: vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecPortArray{
				&vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecPortArgs{
					ContainerPort: pulumi.Int(0),
				},
			},
			PredictRoute:       pulumi.String("string"),
			SharedMemorySizeMb: pulumi.String("string"),
			StartupProbe: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeArgs{
				Exec: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeExecArgs{
					Commands: pulumi.StringArray{
						pulumi.String("string"),
					},
				},
				FailureThreshold: pulumi.Int(0),
				Grpc: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeGrpcArgs{
					Port:    pulumi.Int(0),
					Service: pulumi.String("string"),
				},
				HttpGet: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGetArgs{
					Host: pulumi.String("string"),
					HttpHeaders: vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGetHttpHeaderArray{
						&vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGetHttpHeaderArgs{
							Name:  pulumi.String("string"),
							Value: pulumi.String("string"),
						},
					},
					Path:   pulumi.String("string"),
					Port:   pulumi.Int(0),
					Scheme: pulumi.String("string"),
				},
				InitialDelaySeconds: pulumi.Int(0),
				PeriodSeconds:       pulumi.Int(0),
				SuccessThreshold:    pulumi.Int(0),
				TcpSocket: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeTcpSocketArgs{
					Host: pulumi.String("string"),
					Port: pulumi.Int(0),
				},
				TimeoutSeconds: pulumi.Int(0),
			},
		},
		HuggingFaceAccessToken:  pulumi.String("string"),
		HuggingFaceCacheEnabled: pulumi.Bool(false),
		ModelDisplayName:        pulumi.String("string"),
	},
	Project:            pulumi.String("string"),
	PublisherModelName: pulumi.String("string"),
})

var aiEndpointWithModelGardenDeploymentResource = new AiEndpointWithModelGardenDeployment("aiEndpointWithModelGardenDeploymentResource", AiEndpointWithModelGardenDeploymentArgs.builder()
    .location("string")
    .deployConfig(AiEndpointWithModelGardenDeploymentDeployConfigArgs.builder()
        .dedicatedResources(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs.builder()
            .machineSpec(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs.builder()
                .acceleratorCount(0)
                .acceleratorType("string")
                .machineType("string")
                .multihostGpuNodeCount(0)
                .reservationAffinity(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecReservationAffinityArgs.builder()
                    .reservationAffinityType("string")
                    .key("string")
                    .values("string")
                    .build())
                .tpuTopology("string")
                .build())
            .minReplicaCount(0)
            .autoscalingMetricSpecs(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesAutoscalingMetricSpecArgs.builder()
                .metricName("string")
                .target(0)
                .build())
            .maxReplicaCount(0)
            .requiredReplicaCount(0)
            .spot(false)
            .build())
        .fastTryoutEnabled(false)
        .systemLabels(Map.of("string", "string"))
        .build())
    .endpointConfig(AiEndpointWithModelGardenDeploymentEndpointConfigArgs.builder()
        .dedicatedEndpointEnabled(false)
        .endpointDisplayName("string")
        .privateServiceConnectConfig(AiEndpointWithModelGardenDeploymentEndpointConfigPrivateServiceConnectConfigArgs.builder()
            .enablePrivateServiceConnect(false)
            .projectAllowlists("string")
            .pscAutomationConfigs(AiEndpointWithModelGardenDeploymentEndpointConfigPrivateServiceConnectConfigPscAutomationConfigsArgs.builder()
                .network("string")
                .projectId("string")
                .errorMessage("string")
                .forwardingRule("string")
                .ipAddress("string")
                .state("string")
                .build())
            .serviceAttachment("string")
            .build())
        .build())
    .huggingFaceModelId("string")
    .modelConfig(AiEndpointWithModelGardenDeploymentModelConfigArgs.builder()
        .acceptEula(false)
        .containerSpec(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecArgs.builder()
            .imageUri("string")
            .healthRoute("string")
            .deploymentTimeout("string")
            .envs(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecEnvArgs.builder()
                .name("string")
                .value("string")
                .build())
            .grpcPorts(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecGrpcPortArgs.builder()
                .containerPort(0)
                .build())
            .healthProbe(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeArgs.builder()
                .exec(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeExecArgs.builder()
                    .commands("string")
                    .build())
                .failureThreshold(0)
                .grpc(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeGrpcArgs.builder()
                    .port(0)
                    .service("string")
                    .build())
                .httpGet(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGetArgs.builder()
                    .host("string")
                    .httpHeaders(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGetHttpHeaderArgs.builder()
                        .name("string")
                        .value("string")
                        .build())
                    .path("string")
                    .port(0)
                    .scheme("string")
                    .build())
                .initialDelaySeconds(0)
                .periodSeconds(0)
                .successThreshold(0)
                .tcpSocket(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeTcpSocketArgs.builder()
                    .host("string")
                    .port(0)
                    .build())
                .timeoutSeconds(0)
                .build())
            .args("string")
            .commands("string")
            .livenessProbe(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeArgs.builder()
                .exec(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeExecArgs.builder()
                    .commands("string")
                    .build())
                .failureThreshold(0)
                .grpc(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeGrpcArgs.builder()
                    .port(0)
                    .service("string")
                    .build())
                .httpGet(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGetArgs.builder()
                    .host("string")
                    .httpHeaders(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGetHttpHeaderArgs.builder()
                        .name("string")
                        .value("string")
                        .build())
                    .path("string")
                    .port(0)
                    .scheme("string")
                    .build())
                .initialDelaySeconds(0)
                .periodSeconds(0)
                .successThreshold(0)
                .tcpSocket(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeTcpSocketArgs.builder()
                    .host("string")
                    .port(0)
                    .build())
                .timeoutSeconds(0)
                .build())
            .ports(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecPortArgs.builder()
                .containerPort(0)
                .build())
            .predictRoute("string")
            .sharedMemorySizeMb("string")
            .startupProbe(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeArgs.builder()
                .exec(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeExecArgs.builder()
                    .commands("string")
                    .build())
                .failureThreshold(0)
                .grpc(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeGrpcArgs.builder()
                    .port(0)
                    .service("string")
                    .build())
                .httpGet(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGetArgs.builder()
                    .host("string")
                    .httpHeaders(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGetHttpHeaderArgs.builder()
                        .name("string")
                        .value("string")
                        .build())
                    .path("string")
                    .port(0)
                    .scheme("string")
                    .build())
                .initialDelaySeconds(0)
                .periodSeconds(0)
                .successThreshold(0)
                .tcpSocket(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeTcpSocketArgs.builder()
                    .host("string")
                    .port(0)
                    .build())
                .timeoutSeconds(0)
                .build())
            .build())
        .huggingFaceAccessToken("string")
        .huggingFaceCacheEnabled(false)
        .modelDisplayName("string")
        .build())
    .project("string")
    .publisherModelName("string")
    .build());

ai_endpoint_with_model_garden_deployment_resource = gcp.vertex.AiEndpointWithModelGardenDeployment("aiEndpointWithModelGardenDeploymentResource",
    location="string",
    deploy_config={
        "dedicated_resources": {
            "machine_spec": {
                "accelerator_count": 0,
                "accelerator_type": "string",
                "machine_type": "string",
                "multihost_gpu_node_count": 0,
                "reservation_affinity": {
                    "reservation_affinity_type": "string",
                    "key": "string",
                    "values": ["string"],
                },
                "tpu_topology": "string",
            },
            "min_replica_count": 0,
            "autoscaling_metric_specs": [{
                "metric_name": "string",
                "target": 0,
            }],
            "max_replica_count": 0,
            "required_replica_count": 0,
            "spot": False,
        },
        "fast_tryout_enabled": False,
        "system_labels": {
            "string": "string",
        },
    },
    endpoint_config={
        "dedicated_endpoint_enabled": False,
        "endpoint_display_name": "string",
        "private_service_connect_config": {
            "enable_private_service_connect": False,
            "project_allowlists": ["string"],
            "psc_automation_configs": {
                "network": "string",
                "project_id": "string",
                "error_message": "string",
                "forwarding_rule": "string",
                "ip_address": "string",
                "state": "string",
            },
            "service_attachment": "string",
        },
    },
    hugging_face_model_id="string",
    model_config={
        "accept_eula": False,
        "container_spec": {
            "image_uri": "string",
            "health_route": "string",
            "deployment_timeout": "string",
            "envs": [{
                "name": "string",
                "value": "string",
            }],
            "grpc_ports": [{
                "container_port": 0,
            }],
            "health_probe": {
                "exec_": {
                    "commands": ["string"],
                },
                "failure_threshold": 0,
                "grpc": {
                    "port": 0,
                    "service": "string",
                },
                "http_get": {
                    "host": "string",
                    "http_headers": [{
                        "name": "string",
                        "value": "string",
                    }],
                    "path": "string",
                    "port": 0,
                    "scheme": "string",
                },
                "initial_delay_seconds": 0,
                "period_seconds": 0,
                "success_threshold": 0,
                "tcp_socket": {
                    "host": "string",
                    "port": 0,
                },
                "timeout_seconds": 0,
            },
            "args": ["string"],
            "commands": ["string"],
            "liveness_probe": {
                "exec_": {
                    "commands": ["string"],
                },
                "failure_threshold": 0,
                "grpc": {
                    "port": 0,
                    "service": "string",
                },
                "http_get": {
                    "host": "string",
                    "http_headers": [{
                        "name": "string",
                        "value": "string",
                    }],
                    "path": "string",
                    "port": 0,
                    "scheme": "string",
                },
                "initial_delay_seconds": 0,
                "period_seconds": 0,
                "success_threshold": 0,
                "tcp_socket": {
                    "host": "string",
                    "port": 0,
                },
                "timeout_seconds": 0,
            },
            "ports": [{
                "container_port": 0,
            }],
            "predict_route": "string",
            "shared_memory_size_mb": "string",
            "startup_probe": {
                "exec_": {
                    "commands": ["string"],
                },
                "failure_threshold": 0,
                "grpc": {
                    "port": 0,
                    "service": "string",
                },
                "http_get": {
                    "host": "string",
                    "http_headers": [{
                        "name": "string",
                        "value": "string",
                    }],
                    "path": "string",
                    "port": 0,
                    "scheme": "string",
                },
                "initial_delay_seconds": 0,
                "period_seconds": 0,
                "success_threshold": 0,
                "tcp_socket": {
                    "host": "string",
                    "port": 0,
                },
                "timeout_seconds": 0,
            },
        },
        "hugging_face_access_token": "string",
        "hugging_face_cache_enabled": False,
        "model_display_name": "string",
    },
    project="string",
    publisher_model_name="string")

const aiEndpointWithModelGardenDeploymentResource = new gcp.vertex.AiEndpointWithModelGardenDeployment("aiEndpointWithModelGardenDeploymentResource", {
    location: "string",
    deployConfig: {
        dedicatedResources: {
            machineSpec: {
                acceleratorCount: 0,
                acceleratorType: "string",
                machineType: "string",
                multihostGpuNodeCount: 0,
                reservationAffinity: {
                    reservationAffinityType: "string",
                    key: "string",
                    values: ["string"],
                },
                tpuTopology: "string",
            },
            minReplicaCount: 0,
            autoscalingMetricSpecs: [{
                metricName: "string",
                target: 0,
            }],
            maxReplicaCount: 0,
            requiredReplicaCount: 0,
            spot: false,
        },
        fastTryoutEnabled: false,
        systemLabels: {
            string: "string",
        },
    },
    endpointConfig: {
        dedicatedEndpointEnabled: false,
        endpointDisplayName: "string",
        privateServiceConnectConfig: {
            enablePrivateServiceConnect: false,
            projectAllowlists: ["string"],
            pscAutomationConfigs: {
                network: "string",
                projectId: "string",
                errorMessage: "string",
                forwardingRule: "string",
                ipAddress: "string",
                state: "string",
            },
            serviceAttachment: "string",
        },
    },
    huggingFaceModelId: "string",
    modelConfig: {
        acceptEula: false,
        containerSpec: {
            imageUri: "string",
            healthRoute: "string",
            deploymentTimeout: "string",
            envs: [{
                name: "string",
                value: "string",
            }],
            grpcPorts: [{
                containerPort: 0,
            }],
            healthProbe: {
                exec: {
                    commands: ["string"],
                },
                failureThreshold: 0,
                grpc: {
                    port: 0,
                    service: "string",
                },
                httpGet: {
                    host: "string",
                    httpHeaders: [{
                        name: "string",
                        value: "string",
                    }],
                    path: "string",
                    port: 0,
                    scheme: "string",
                },
                initialDelaySeconds: 0,
                periodSeconds: 0,
                successThreshold: 0,
                tcpSocket: {
                    host: "string",
                    port: 0,
                },
                timeoutSeconds: 0,
            },
            args: ["string"],
            commands: ["string"],
            livenessProbe: {
                exec: {
                    commands: ["string"],
                },
                failureThreshold: 0,
                grpc: {
                    port: 0,
                    service: "string",
                },
                httpGet: {
                    host: "string",
                    httpHeaders: [{
                        name: "string",
                        value: "string",
                    }],
                    path: "string",
                    port: 0,
                    scheme: "string",
                },
                initialDelaySeconds: 0,
                periodSeconds: 0,
                successThreshold: 0,
                tcpSocket: {
                    host: "string",
                    port: 0,
                },
                timeoutSeconds: 0,
            },
            ports: [{
                containerPort: 0,
            }],
            predictRoute: "string",
            sharedMemorySizeMb: "string",
            startupProbe: {
                exec: {
                    commands: ["string"],
                },
                failureThreshold: 0,
                grpc: {
                    port: 0,
                    service: "string",
                },
                httpGet: {
                    host: "string",
                    httpHeaders: [{
                        name: "string",
                        value: "string",
                    }],
                    path: "string",
                    port: 0,
                    scheme: "string",
                },
                initialDelaySeconds: 0,
                periodSeconds: 0,
                successThreshold: 0,
                tcpSocket: {
                    host: "string",
                    port: 0,
                },
                timeoutSeconds: 0,
            },
        },
        huggingFaceAccessToken: "string",
        huggingFaceCacheEnabled: false,
        modelDisplayName: "string",
    },
    project: "string",
    publisherModelName: "string",
});

type: gcp:vertex:AiEndpointWithModelGardenDeployment
properties:
    deployConfig:
        dedicatedResources:
            autoscalingMetricSpecs:
                - metricName: string
                  target: 0
            machineSpec:
                acceleratorCount: 0
                acceleratorType: string
                machineType: string
                multihostGpuNodeCount: 0
                reservationAffinity:
                    key: string
                    reservationAffinityType: string
                    values:
                        - string
                tpuTopology: string
            maxReplicaCount: 0
            minReplicaCount: 0
            requiredReplicaCount: 0
            spot: false
        fastTryoutEnabled: false
        systemLabels:
            string: string
    endpointConfig:
        dedicatedEndpointEnabled: false
        endpointDisplayName: string
        privateServiceConnectConfig:
            enablePrivateServiceConnect: false
            projectAllowlists:
                - string
            pscAutomationConfigs:
                errorMessage: string
                forwardingRule: string
                ipAddress: string
                network: string
                projectId: string
                state: string
            serviceAttachment: string
    huggingFaceModelId: string
    location: string
    modelConfig:
        acceptEula: false
        containerSpec:
            args:
                - string
            commands:
                - string
            deploymentTimeout: string
            envs:
                - name: string
                  value: string
            grpcPorts:
                - containerPort: 0
            healthProbe:
                exec:
                    commands:
                        - string
                failureThreshold: 0
                grpc:
                    port: 0
                    service: string
                httpGet:
                    host: string
                    httpHeaders:
                        - name: string
                          value: string
                    path: string
                    port: 0
                    scheme: string
                initialDelaySeconds: 0
                periodSeconds: 0
                successThreshold: 0
                tcpSocket:
                    host: string
                    port: 0
                timeoutSeconds: 0
            healthRoute: string
            imageUri: string
            livenessProbe:
                exec:
                    commands:
                        - string
                failureThreshold: 0
                grpc:
                    port: 0
                    service: string
                httpGet:
                    host: string
                    httpHeaders:
                        - name: string
                          value: string
                    path: string
                    port: 0
                    scheme: string
                initialDelaySeconds: 0
                periodSeconds: 0
                successThreshold: 0
                tcpSocket:
                    host: string
                    port: 0
                timeoutSeconds: 0
            ports:
                - containerPort: 0
            predictRoute: string
            sharedMemorySizeMb: string
            startupProbe:
                exec:
                    commands:
                        - string
                failureThreshold: 0
                grpc:
                    port: 0
                    service: string
                httpGet:
                    host: string
                    httpHeaders:
                        - name: string
                          value: string
                    path: string
                    port: 0
                    scheme: string
                initialDelaySeconds: 0
                periodSeconds: 0
                successThreshold: 0
                tcpSocket:
                    host: string
                    port: 0
                timeoutSeconds: 0
        huggingFaceAccessToken: string
        huggingFaceCacheEnabled: false
        modelDisplayName: string
    project: string
    publisherModelName: string

AiEndpointWithModelGardenDeployment Resource Properties

To learn more about resource properties and how to use them, see Inputs and Outputs in the Architecture and Concepts docs.

Inputs

In Python, inputs that are objects can be passed either as argument classes or as dictionary literals.

The AiEndpointWithModelGardenDeployment resource accepts the following input properties:

Location string: Resource ID segment making up resource location. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
DeployConfig AiEndpointWithModelGardenDeploymentDeployConfig: The deploy config to use for the deployment. Structure is documented below.
EndpointConfig AiEndpointWithModelGardenDeploymentEndpointConfig: The endpoint config to use for the deployment. Structure is documented below.
HuggingFaceModelId string: The Hugging Face model to deploy. Format: Hugging Face model ID like google/gemma-2-2b-it.
ModelConfig AiEndpointWithModelGardenDeploymentModelConfig: The model config to use for the deployment. Structure is documented below.
Project string: The ID of the project in which the resource belongs. If it is not provided, the provider project is used.
PublisherModelName string: The Model Garden model to deploy. Format: publishers/{publisher}/models/{publisher_model}@{version_id}, or publishers/hf-{hugging-face-author}/models/{hugging-face-model-name}@001.

Location string: Resource ID segment making up resource location. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
DeployConfig AiEndpointWithModelGardenDeploymentDeployConfigArgs: The deploy config to use for the deployment. Structure is documented below.
EndpointConfig AiEndpointWithModelGardenDeploymentEndpointConfigArgs: The endpoint config to use for the deployment. Structure is documented below.
HuggingFaceModelId string: The Hugging Face model to deploy. Format: Hugging Face model ID like google/gemma-2-2b-it.
ModelConfig AiEndpointWithModelGardenDeploymentModelConfigArgs: The model config to use for the deployment. Structure is documented below.
Project string: The ID of the project in which the resource belongs. If it is not provided, the provider project is used.
PublisherModelName string: The Model Garden model to deploy. Format: publishers/{publisher}/models/{publisher_model}@{version_id}, or publishers/hf-{hugging-face-author}/models/{hugging-face-model-name}@001.

location String: Resource ID segment making up resource location. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
deployConfig AiEndpointWithModelGardenDeploymentDeployConfig: The deploy config to use for the deployment. Structure is documented below.
endpointConfig AiEndpointWithModelGardenDeploymentEndpointConfig: The endpoint config to use for the deployment. Structure is documented below.
huggingFaceModelId String: The Hugging Face model to deploy. Format: Hugging Face model ID like google/gemma-2-2b-it.
modelConfig AiEndpointWithModelGardenDeploymentModelConfig: The model config to use for the deployment. Structure is documented below.
project String: The ID of the project in which the resource belongs. If it is not provided, the provider project is used.
publisherModelName String: The Model Garden model to deploy. Format: publishers/{publisher}/models/{publisher_model}@{version_id}, or publishers/hf-{hugging-face-author}/models/{hugging-face-model-name}@001.

location string: Resource ID segment making up resource location. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
deployConfig AiEndpointWithModelGardenDeploymentDeployConfig: The deploy config to use for the deployment. Structure is documented below.
endpointConfig AiEndpointWithModelGardenDeploymentEndpointConfig: The endpoint config to use for the deployment. Structure is documented below.
huggingFaceModelId string: The Hugging Face model to deploy. Format: Hugging Face model ID like google/gemma-2-2b-it.
modelConfig AiEndpointWithModelGardenDeploymentModelConfig: The model config to use for the deployment. Structure is documented below.
project string: The ID of the project in which the resource belongs. If it is not provided, the provider project is used.
publisherModelName string: The Model Garden model to deploy. Format: publishers/{publisher}/models/{publisher_model}@{version_id}, or publishers/hf-{hugging-face-author}/models/{hugging-face-model-name}@001.

location str: Resource ID segment making up resource location. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
deploy_config AiEndpointWithModelGardenDeploymentDeployConfigArgs: The deploy config to use for the deployment. Structure is documented below.
endpoint_config AiEndpointWithModelGardenDeploymentEndpointConfigArgs: The endpoint config to use for the deployment. Structure is documented below.
hugging_face_model_id str: The Hugging Face model to deploy. Format: Hugging Face model ID like google/gemma-2-2b-it.
model_config AiEndpointWithModelGardenDeploymentModelConfigArgs: The model config to use for the deployment. Structure is documented below.
project str: The ID of the project in which the resource belongs. If it is not provided, the provider project is used.
publisher_model_name str: The Model Garden model to deploy. Format: publishers/{publisher}/models/{publisher_model}@{version_id}, or publishers/hf-{hugging-face-author}/models/{hugging-face-model-name}@001.

location String: Resource ID segment making up resource location. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
deployConfig Property Map: The deploy config to use for the deployment. Structure is documented below.
endpointConfig Property Map: The endpoint config to use for the deployment. Structure is documented below.
huggingFaceModelId String: The Hugging Face model to deploy. Format: Hugging Face model ID like google/gemma-2-2b-it.
modelConfig Property Map: The model config to use for the deployment. Structure is documented below.
project String: The ID of the project in which the resource belongs. If it is not provided, the provider project is used.
publisherModelName String: The Model Garden model to deploy. Format: publishers/{publisher}/models/{publisher_model}@{version_id}, or publishers/hf-{hugging-face-author}/models/{hugging-face-model-name}@001.

Outputs

All input properties are implicitly available as output properties. Additionally, the AiEndpointWithModelGardenDeployment resource produces the following output properties:

DeployedModelDisplayName string: Output only. The display name assigned to the model deployed to the endpoint. This is not required to delete the resource but is used for debug logging.
DeployedModelId string: Output only. The unique numeric ID that Vertex AI assigns to the model at the time it is deployed to the endpoint. It is required to undeploy the model from the endpoint during resource deletion as described in https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel.
Endpoint string: Resource ID segment making up resource endpoint. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
Id string: The provider-assigned unique ID for this managed resource.

DeployedModelDisplayName string: Output only. The display name assigned to the model deployed to the endpoint. This is not required to delete the resource but is used for debug logging.
DeployedModelId string: Output only. The unique numeric ID that Vertex AI assigns to the model at the time it is deployed to the endpoint. It is required to undeploy the model from the endpoint during resource deletion as described in https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel.
Endpoint string: Resource ID segment making up resource endpoint. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
Id string: The provider-assigned unique ID for this managed resource.

deployedModelDisplayName String: Output only. The display name assigned to the model deployed to the endpoint. This is not required to delete the resource but is used for debug logging.
deployedModelId String: Output only. The unique numeric ID that Vertex AI assigns to the model at the time it is deployed to the endpoint. It is required to undeploy the model from the endpoint during resource deletion as described in https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel.
endpoint String: Resource ID segment making up resource endpoint. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
id String: The provider-assigned unique ID for this managed resource.

deployedModelDisplayName string: Output only. The display name assigned to the model deployed to the endpoint. This is not required to delete the resource but is used for debug logging.
deployedModelId string: Output only. The unique numeric ID that Vertex AI assigns to the model at the time it is deployed to the endpoint. It is required to undeploy the model from the endpoint during resource deletion as described in https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel.
endpoint string: Resource ID segment making up resource endpoint. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
id string: The provider-assigned unique ID for this managed resource.

deployed_model_display_name str: Output only. The display name assigned to the model deployed to the endpoint. This is not required to delete the resource but is used for debug logging.
deployed_model_id str: Output only. The unique numeric ID that Vertex AI assigns to the model at the time it is deployed to the endpoint. It is required to undeploy the model from the endpoint during resource deletion as described in https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel.
endpoint str: Resource ID segment making up resource endpoint. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
id str: The provider-assigned unique ID for this managed resource.

deployedModelDisplayName String: Output only. The display name assigned to the model deployed to the endpoint. This is not required to delete the resource but is used for debug logging.
deployedModelId String: Output only. The unique numeric ID that Vertex AI assigns to the model at the time it is deployed to the endpoint. It is required to undeploy the model from the endpoint during resource deletion as described in https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel.
endpoint String: Resource ID segment making up resource endpoint. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
id String: The provider-assigned unique ID for this managed resource.

Look up Existing AiEndpointWithModelGardenDeployment Resource

Get an existing AiEndpointWithModelGardenDeployment resource’s state with the given name, ID, and optional extra properties used to qualify the lookup.

public static get(name: string, id: Input<ID>, state?: AiEndpointWithModelGardenDeploymentState, opts?: CustomResourceOptions): AiEndpointWithModelGardenDeployment

@staticmethod
def get(resource_name: str,
        id: str,
        opts: Optional[ResourceOptions] = None,
        deploy_config: Optional[AiEndpointWithModelGardenDeploymentDeployConfigArgs] = None,
        deployed_model_display_name: Optional[str] = None,
        deployed_model_id: Optional[str] = None,
        endpoint: Optional[str] = None,
        endpoint_config: Optional[AiEndpointWithModelGardenDeploymentEndpointConfigArgs] = None,
        hugging_face_model_id: Optional[str] = None,
        location: Optional[str] = None,
        model_config: Optional[AiEndpointWithModelGardenDeploymentModelConfigArgs] = None,
        project: Optional[str] = None,
        publisher_model_name: Optional[str] = None) -> AiEndpointWithModelGardenDeployment

func GetAiEndpointWithModelGardenDeployment(ctx *Context, name string, id IDInput, state *AiEndpointWithModelGardenDeploymentState, opts ...ResourceOption) (*AiEndpointWithModelGardenDeployment, error)

public static AiEndpointWithModelGardenDeployment Get(string name, Input<string> id, AiEndpointWithModelGardenDeploymentState? state, CustomResourceOptions? opts = null)

public static AiEndpointWithModelGardenDeployment get(String name, Output<String> id, AiEndpointWithModelGardenDeploymentState state, CustomResourceOptions options)

resources:  _:    type: gcp:vertex:AiEndpointWithModelGardenDeployment    get:      id: ${id}

name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.
state: Any extra arguments used during the lookup.
opts: A bag of options that control this resource's behavior.

resource_name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.

name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.
state: Any extra arguments used during the lookup.
opts: A bag of options that control this resource's behavior.

name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.
state: Any extra arguments used during the lookup.
opts: A bag of options that control this resource's behavior.

name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.
state: Any extra arguments used during the lookup.
opts: A bag of options that control this resource's behavior.

The following state arguments are supported:

DeployConfig AiEndpointWithModelGardenDeploymentDeployConfig: The deploy config to use for the deployment. Structure is documented below.
DeployedModelDisplayName string: Output only. The display name assigned to the model deployed to the endpoint. This is not required to delete the resource but is used for debug logging.
DeployedModelId string: Output only. The unique numeric ID that Vertex AI assigns to the model at the time it is deployed to the endpoint. It is required to undeploy the model from the endpoint during resource deletion as described in https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel.
Endpoint string: Resource ID segment making up resource endpoint. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
EndpointConfig AiEndpointWithModelGardenDeploymentEndpointConfig: The endpoint config to use for the deployment. Structure is documented below.
HuggingFaceModelId string: The Hugging Face model to deploy. Format: Hugging Face model ID like google/gemma-2-2b-it.
Location string: Resource ID segment making up resource location. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
ModelConfig AiEndpointWithModelGardenDeploymentModelConfig: The model config to use for the deployment. Structure is documented below.
Project string: The ID of the project in which the resource belongs. If it is not provided, the provider project is used.
PublisherModelName string: The Model Garden model to deploy. Format: publishers/{publisher}/models/{publisher_model}@{version_id}, or publishers/hf-{hugging-face-author}/models/{hugging-face-model-name}@001.

DeployConfig AiEndpointWithModelGardenDeploymentDeployConfigArgs: The deploy config to use for the deployment. Structure is documented below.
DeployedModelDisplayName string: Output only. The display name assigned to the model deployed to the endpoint. This is not required to delete the resource but is used for debug logging.
DeployedModelId string: Output only. The unique numeric ID that Vertex AI assigns to the model at the time it is deployed to the endpoint. It is required to undeploy the model from the endpoint during resource deletion as described in https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel.
Endpoint string: Resource ID segment making up resource endpoint. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
EndpointConfig AiEndpointWithModelGardenDeploymentEndpointConfigArgs: The endpoint config to use for the deployment. Structure is documented below.
HuggingFaceModelId string: The Hugging Face model to deploy. Format: Hugging Face model ID like google/gemma-2-2b-it.
Location string: Resource ID segment making up resource location. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
ModelConfig AiEndpointWithModelGardenDeploymentModelConfigArgs: The model config to use for the deployment. Structure is documented below.
Project string: The ID of the project in which the resource belongs. If it is not provided, the provider project is used.
PublisherModelName string: The Model Garden model to deploy. Format: publishers/{publisher}/models/{publisher_model}@{version_id}, or publishers/hf-{hugging-face-author}/models/{hugging-face-model-name}@001.

deployConfig AiEndpointWithModelGardenDeploymentDeployConfig: The deploy config to use for the deployment. Structure is documented below.
deployedModelDisplayName String: Output only. The display name assigned to the model deployed to the endpoint. This is not required to delete the resource but is used for debug logging.
deployedModelId String: Output only. The unique numeric ID that Vertex AI assigns to the model at the time it is deployed to the endpoint. It is required to undeploy the model from the endpoint during resource deletion as described in https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel.
endpoint String: Resource ID segment making up resource endpoint. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
endpointConfig AiEndpointWithModelGardenDeploymentEndpointConfig: The endpoint config to use for the deployment. Structure is documented below.
huggingFaceModelId String: The Hugging Face model to deploy. Format: Hugging Face model ID like google/gemma-2-2b-it.
location String: Resource ID segment making up resource location. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
modelConfig AiEndpointWithModelGardenDeploymentModelConfig: The model config to use for the deployment. Structure is documented below.
project String: The ID of the project in which the resource belongs. If it is not provided, the provider project is used.
publisherModelName String: The Model Garden model to deploy. Format: publishers/{publisher}/models/{publisher_model}@{version_id}, or publishers/hf-{hugging-face-author}/models/{hugging-face-model-name}@001.

deployConfig AiEndpointWithModelGardenDeploymentDeployConfig: The deploy config to use for the deployment. Structure is documented below.
deployedModelDisplayName string: Output only. The display name assigned to the model deployed to the endpoint. This is not required to delete the resource but is used for debug logging.
deployedModelId string: Output only. The unique numeric ID that Vertex AI assigns to the model at the time it is deployed to the endpoint. It is required to undeploy the model from the endpoint during resource deletion as described in https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel.
endpoint string: Resource ID segment making up resource endpoint. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
endpointConfig AiEndpointWithModelGardenDeploymentEndpointConfig: The endpoint config to use for the deployment. Structure is documented below.
huggingFaceModelId string: The Hugging Face model to deploy. Format: Hugging Face model ID like google/gemma-2-2b-it.
location string: Resource ID segment making up resource location. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
modelConfig AiEndpointWithModelGardenDeploymentModelConfig: The model config to use for the deployment. Structure is documented below.
project string: The ID of the project in which the resource belongs. If it is not provided, the provider project is used.
publisherModelName string: The Model Garden model to deploy. Format: publishers/{publisher}/models/{publisher_model}@{version_id}, or publishers/hf-{hugging-face-author}/models/{hugging-face-model-name}@001.

deploy_config AiEndpointWithModelGardenDeploymentDeployConfigArgs: The deploy config to use for the deployment. Structure is documented below.
deployed_model_display_name str: Output only. The display name assigned to the model deployed to the endpoint. This is not required to delete the resource but is used for debug logging.
deployed_model_id str: Output only. The unique numeric ID that Vertex AI assigns to the model at the time it is deployed to the endpoint. It is required to undeploy the model from the endpoint during resource deletion as described in https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel.
endpoint str: Resource ID segment making up resource endpoint. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
endpoint_config AiEndpointWithModelGardenDeploymentEndpointConfigArgs: The endpoint config to use for the deployment. Structure is documented below.
hugging_face_model_id str: The Hugging Face model to deploy. Format: Hugging Face model ID like google/gemma-2-2b-it.
location str: Resource ID segment making up resource location. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
model_config AiEndpointWithModelGardenDeploymentModelConfigArgs: The model config to use for the deployment. Structure is documented below.
project str: The ID of the project in which the resource belongs. If it is not provided, the provider project is used.
publisher_model_name str: The Model Garden model to deploy. Format: publishers/{publisher}/models/{publisher_model}@{version_id}, or publishers/hf-{hugging-face-author}/models/{hugging-face-model-name}@001.

deployConfig Property Map: The deploy config to use for the deployment. Structure is documented below.
deployedModelDisplayName String: Output only. The display name assigned to the model deployed to the endpoint. This is not required to delete the resource but is used for debug logging.
deployedModelId String: Output only. The unique numeric ID that Vertex AI assigns to the model at the time it is deployed to the endpoint. It is required to undeploy the model from the endpoint during resource deletion as described in https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel.
endpoint String: Resource ID segment making up resource endpoint. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
endpointConfig Property Map: The endpoint config to use for the deployment. Structure is documented below.
huggingFaceModelId String: The Hugging Face model to deploy. Format: Hugging Face model ID like google/gemma-2-2b-it.
location String: Resource ID segment making up resource location. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
modelConfig Property Map: The model config to use for the deployment. Structure is documented below.
project String: The ID of the project in which the resource belongs. If it is not provided, the provider project is used.
publisherModelName String: The Model Garden model to deploy. Format: publishers/{publisher}/models/{publisher_model}@{version_id}, or publishers/hf-{hugging-face-author}/models/{hugging-face-model-name}@001.

Supporting Types

AiEndpointWithModelGardenDeploymentDeployConfig, AiEndpointWithModelGardenDeploymentDeployConfigArgs

DedicatedResources AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResources: A description of resources that are dedicated to a DeployedModel or DeployedIndex, and that need a higher degree of manual configuration. Structure is documented below.
FastTryoutEnabled bool: If true, enable the QMT fast tryout feature for this model if possible.
SystemLabels Dictionary<string, string>: System labels for Model Garden deployments. These labels are managed by Google and for tracking purposes only.

DedicatedResources AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResources: A description of resources that are dedicated to a DeployedModel or DeployedIndex, and that need a higher degree of manual configuration. Structure is documented below.
FastTryoutEnabled bool: If true, enable the QMT fast tryout feature for this model if possible.
SystemLabels map[string]string: System labels for Model Garden deployments. These labels are managed by Google and for tracking purposes only.

dedicatedResources AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResources: A description of resources that are dedicated to a DeployedModel or DeployedIndex, and that need a higher degree of manual configuration. Structure is documented below.
fastTryoutEnabled Boolean: If true, enable the QMT fast tryout feature for this model if possible.
systemLabels Map<String,String>: System labels for Model Garden deployments. These labels are managed by Google and for tracking purposes only.

dedicatedResources AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResources: A description of resources that are dedicated to a DeployedModel or DeployedIndex, and that need a higher degree of manual configuration. Structure is documented below.
fastTryoutEnabled boolean: If true, enable the QMT fast tryout feature for this model if possible.
systemLabels {[key: string]: string}: System labels for Model Garden deployments. These labels are managed by Google and for tracking purposes only.

dedicated_resources AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResources: A description of resources that are dedicated to a DeployedModel or DeployedIndex, and that need a higher degree of manual configuration. Structure is documented below.
fast_tryout_enabled bool: If true, enable the QMT fast tryout feature for this model if possible.
system_labels Mapping[str, str]: System labels for Model Garden deployments. These labels are managed by Google and for tracking purposes only.

dedicatedResources Property Map: A description of resources that are dedicated to a DeployedModel or DeployedIndex, and that need a higher degree of manual configuration. Structure is documented below.
fastTryoutEnabled Boolean: If true, enable the QMT fast tryout feature for this model if possible.
systemLabels Map<String>: System labels for Model Garden deployments. These labels are managed by Google and for tracking purposes only.

AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResources, AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs

MachineSpec AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpec: Specification of a single machine. Structure is documented below.
MinReplicaCount int: The minimum number of machine replicas that will be always deployed on. This value must be greater than or equal to 1. If traffic increases, it may dynamically be deployed onto more replicas, and as traffic decreases, some of these extra replicas may be freed.
AutoscalingMetricSpecs List<AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesAutoscalingMetricSpec>: The metric specifications that overrides a resource utilization metric (CPU utilization, accelerator's duty cycle, and so on) target value (default to 60 if not set). At most one entry is allowed per metric. If machine_spec.accelerator_count is above 0, the autoscaling will be based on both CPU utilization and accelerator's duty cycle metrics and scale up when either metrics exceeds its target value while scale down if both metrics are under their target value. The default target value is 60 for both metrics. If machine_spec.accelerator_count is 0, the autoscaling will be based on CPU utilization metric only with default target value 60 if not explicitly set. For example, in the case of Online Prediction, if you want to override target CPU utilization to 80, you should set autoscaling_metric_specs.metric_name to aiplatform.googleapis.com/prediction/online/cpu/utilization and autoscaling_metric_specs.target to 80. Structure is documented below.
MaxReplicaCount int: The maximum number of replicas that may be deployed on when the traffic against it increases. If the requested value is too large, the deployment will error, but if deployment succeeds then the ability to scale to that many replicas is guaranteed (barring service outages). If traffic increases beyond what its replicas at maximum may handle, a portion of the traffic will be dropped. If this value is not provided, will use min_replica_count as the default value. The value of this field impacts the charge against Vertex CPU and GPU quotas. Specifically, you will be charged for (max_replica_count * number of cores in the selected machine type) and (max_replica_count * number of GPUs per replica in the selected machine type).
RequiredReplicaCount int: Number of required available replicas for the deployment to succeed. This field is only needed when partial deployment/mutation is desired. If set, the deploy/mutate operation will succeed once available_replica_count reaches required_replica_count, and the rest of the replicas will be retried. If not set, the default required_replica_count will be min_replica_count.
Spot bool: If true, schedule the deployment workload on spot VMs.

MachineSpec AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpec: Specification of a single machine. Structure is documented below.
MinReplicaCount int: The minimum number of machine replicas that will be always deployed on. This value must be greater than or equal to 1. If traffic increases, it may dynamically be deployed onto more replicas, and as traffic decreases, some of these extra replicas may be freed.
AutoscalingMetricSpecs []AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesAutoscalingMetricSpec: The metric specifications that overrides a resource utilization metric (CPU utilization, accelerator's duty cycle, and so on) target value (default to 60 if not set). At most one entry is allowed per metric. If machine_spec.accelerator_count is above 0, the autoscaling will be based on both CPU utilization and accelerator's duty cycle metrics and scale up when either metrics exceeds its target value while scale down if both metrics are under their target value. The default target value is 60 for both metrics. If machine_spec.accelerator_count is 0, the autoscaling will be based on CPU utilization metric only with default target value 60 if not explicitly set. For example, in the case of Online Prediction, if you want to override target CPU utilization to 80, you should set autoscaling_metric_specs.metric_name to aiplatform.googleapis.com/prediction/online/cpu/utilization and autoscaling_metric_specs.target to 80. Structure is documented below.
MaxReplicaCount int: The maximum number of replicas that may be deployed on when the traffic against it increases. If the requested value is too large, the deployment will error, but if deployment succeeds then the ability to scale to that many replicas is guaranteed (barring service outages). If traffic increases beyond what its replicas at maximum may handle, a portion of the traffic will be dropped. If this value is not provided, will use min_replica_count as the default value. The value of this field impacts the charge against Vertex CPU and GPU quotas. Specifically, you will be charged for (max_replica_count * number of cores in the selected machine type) and (max_replica_count * number of GPUs per replica in the selected machine type).
RequiredReplicaCount int: Number of required available replicas for the deployment to succeed. This field is only needed when partial deployment/mutation is desired. If set, the deploy/mutate operation will succeed once available_replica_count reaches required_replica_count, and the rest of the replicas will be retried. If not set, the default required_replica_count will be min_replica_count.
Spot bool: If true, schedule the deployment workload on spot VMs.

machineSpec AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpec: Specification of a single machine. Structure is documented below.
minReplicaCount Integer: The minimum number of machine replicas that will be always deployed on. This value must be greater than or equal to 1. If traffic increases, it may dynamically be deployed onto more replicas, and as traffic decreases, some of these extra replicas may be freed.
autoscalingMetricSpecs List<AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesAutoscalingMetricSpec>: The metric specifications that overrides a resource utilization metric (CPU utilization, accelerator's duty cycle, and so on) target value (default to 60 if not set). At most one entry is allowed per metric. If machine_spec.accelerator_count is above 0, the autoscaling will be based on both CPU utilization and accelerator's duty cycle metrics and scale up when either metrics exceeds its target value while scale down if both metrics are under their target value. The default target value is 60 for both metrics. If machine_spec.accelerator_count is 0, the autoscaling will be based on CPU utilization metric only with default target value 60 if not explicitly set. For example, in the case of Online Prediction, if you want to override target CPU utilization to 80, you should set autoscaling_metric_specs.metric_name to aiplatform.googleapis.com/prediction/online/cpu/utilization and autoscaling_metric_specs.target to 80. Structure is documented below.
maxReplicaCount Integer: The maximum number of replicas that may be deployed on when the traffic against it increases. If the requested value is too large, the deployment will error, but if deployment succeeds then the ability to scale to that many replicas is guaranteed (barring service outages). If traffic increases beyond what its replicas at maximum may handle, a portion of the traffic will be dropped. If this value is not provided, will use min_replica_count as the default value. The value of this field impacts the charge against Vertex CPU and GPU quotas. Specifically, you will be charged for (max_replica_count * number of cores in the selected machine type) and (max_replica_count * number of GPUs per replica in the selected machine type).
requiredReplicaCount Integer: Number of required available replicas for the deployment to succeed. This field is only needed when partial deployment/mutation is desired. If set, the deploy/mutate operation will succeed once available_replica_count reaches required_replica_count, and the rest of the replicas will be retried. If not set, the default required_replica_count will be min_replica_count.
spot Boolean: If true, schedule the deployment workload on spot VMs.

machineSpec AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpec: Specification of a single machine. Structure is documented below.
minReplicaCount number: The minimum number of machine replicas that will be always deployed on. This value must be greater than or equal to 1. If traffic increases, it may dynamically be deployed onto more replicas, and as traffic decreases, some of these extra replicas may be freed.
autoscalingMetricSpecs AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesAutoscalingMetricSpec[]: The metric specifications that overrides a resource utilization metric (CPU utilization, accelerator's duty cycle, and so on) target value (default to 60 if not set). At most one entry is allowed per metric. If machine_spec.accelerator_count is above 0, the autoscaling will be based on both CPU utilization and accelerator's duty cycle metrics and scale up when either metrics exceeds its target value while scale down if both metrics are under their target value. The default target value is 60 for both metrics. If machine_spec.accelerator_count is 0, the autoscaling will be based on CPU utilization metric only with default target value 60 if not explicitly set. For example, in the case of Online Prediction, if you want to override target CPU utilization to 80, you should set autoscaling_metric_specs.metric_name to aiplatform.googleapis.com/prediction/online/cpu/utilization and autoscaling_metric_specs.target to 80. Structure is documented below.
maxReplicaCount number: The maximum number of replicas that may be deployed on when the traffic against it increases. If the requested value is too large, the deployment will error, but if deployment succeeds then the ability to scale to that many replicas is guaranteed (barring service outages). If traffic increases beyond what its replicas at maximum may handle, a portion of the traffic will be dropped. If this value is not provided, will use min_replica_count as the default value. The value of this field impacts the charge against Vertex CPU and GPU quotas. Specifically, you will be charged for (max_replica_count * number of cores in the selected machine type) and (max_replica_count * number of GPUs per replica in the selected machine type).
requiredReplicaCount number: Number of required available replicas for the deployment to succeed. This field is only needed when partial deployment/mutation is desired. If set, the deploy/mutate operation will succeed once available_replica_count reaches required_replica_count, and the rest of the replicas will be retried. If not set, the default required_replica_count will be min_replica_count.
spot boolean: If true, schedule the deployment workload on spot VMs.

machine_spec AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpec: Specification of a single machine. Structure is documented below.
min_replica_count int: The minimum number of machine replicas that will be always deployed on. This value must be greater than or equal to 1. If traffic increases, it may dynamically be deployed onto more replicas, and as traffic decreases, some of these extra replicas may be freed.
autoscaling_metric_specs Sequence[AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesAutoscalingMetricSpec]: The metric specifications that overrides a resource utilization metric (CPU utilization, accelerator's duty cycle, and so on) target value (default to 60 if not set). At most one entry is allowed per metric. If machine_spec.accelerator_count is above 0, the autoscaling will be based on both CPU utilization and accelerator's duty cycle metrics and scale up when either metrics exceeds its target value while scale down if both metrics are under their target value. The default target value is 60 for both metrics. If machine_spec.accelerator_count is 0, the autoscaling will be based on CPU utilization metric only with default target value 60 if not explicitly set. For example, in the case of Online Prediction, if you want to override target CPU utilization to 80, you should set autoscaling_metric_specs.metric_name to aiplatform.googleapis.com/prediction/online/cpu/utilization and autoscaling_metric_specs.target to 80. Structure is documented below.
max_replica_count int: The maximum number of replicas that may be deployed on when the traffic against it increases. If the requested value is too large, the deployment will error, but if deployment succeeds then the ability to scale to that many replicas is guaranteed (barring service outages). If traffic increases beyond what its replicas at maximum may handle, a portion of the traffic will be dropped. If this value is not provided, will use min_replica_count as the default value. The value of this field impacts the charge against Vertex CPU and GPU quotas. Specifically, you will be charged for (max_replica_count * number of cores in the selected machine type) and (max_replica_count * number of GPUs per replica in the selected machine type).
required_replica_count int: Number of required available replicas for the deployment to succeed. This field is only needed when partial deployment/mutation is desired. If set, the deploy/mutate operation will succeed once available_replica_count reaches required_replica_count, and the rest of the replicas will be retried. If not set, the default required_replica_count will be min_replica_count.
spot bool: If true, schedule the deployment workload on spot VMs.

machineSpec Property Map: Specification of a single machine. Structure is documented below.
minReplicaCount Number: The minimum number of machine replicas that will be always deployed on. This value must be greater than or equal to 1. If traffic increases, it may dynamically be deployed onto more replicas, and as traffic decreases, some of these extra replicas may be freed.
autoscalingMetricSpecs List<Property Map>: The metric specifications that overrides a resource utilization metric (CPU utilization, accelerator's duty cycle, and so on) target value (default to 60 if not set). At most one entry is allowed per metric. If machine_spec.accelerator_count is above 0, the autoscaling will be based on both CPU utilization and accelerator's duty cycle metrics and scale up when either metrics exceeds its target value while scale down if both metrics are under their target value. The default target value is 60 for both metrics. If machine_spec.accelerator_count is 0, the autoscaling will be based on CPU utilization metric only with default target value 60 if not explicitly set. For example, in the case of Online Prediction, if you want to override target CPU utilization to 80, you should set autoscaling_metric_specs.metric_name to aiplatform.googleapis.com/prediction/online/cpu/utilization and autoscaling_metric_specs.target to 80. Structure is documented below.
maxReplicaCount Number: The maximum number of replicas that may be deployed on when the traffic against it increases. If the requested value is too large, the deployment will error, but if deployment succeeds then the ability to scale to that many replicas is guaranteed (barring service outages). If traffic increases beyond what its replicas at maximum may handle, a portion of the traffic will be dropped. If this value is not provided, will use min_replica_count as the default value. The value of this field impacts the charge against Vertex CPU and GPU quotas. Specifically, you will be charged for (max_replica_count * number of cores in the selected machine type) and (max_replica_count * number of GPUs per replica in the selected machine type).
requiredReplicaCount Number: Number of required available replicas for the deployment to succeed. This field is only needed when partial deployment/mutation is desired. If set, the deploy/mutate operation will succeed once available_replica_count reaches required_replica_count, and the rest of the replicas will be retried. If not set, the default required_replica_count will be min_replica_count.
spot Boolean: If true, schedule the deployment workload on spot VMs.

AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesAutoscalingMetricSpec, AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesAutoscalingMetricSpecArgs

MetricName string

The resource metric name. Supported metrics:

For Online Prediction:
aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle
aiplatform.googleapis.com/prediction/online/cpu/utilization

Target int

The target resource utilization in percentage (1% - 100%) for the given metric; once the real usage deviates from the target by a certain percentage, the machine replicas change. The default value is 60 (representing 60%) if not provided.

MetricName string

The resource metric name. Supported metrics:

For Online Prediction:
aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle
aiplatform.googleapis.com/prediction/online/cpu/utilization

Target int

metricName String

The resource metric name. Supported metrics:

For Online Prediction:
aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle
aiplatform.googleapis.com/prediction/online/cpu/utilization

target Integer

metricName string

The resource metric name. Supported metrics:

For Online Prediction:
aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle
aiplatform.googleapis.com/prediction/online/cpu/utilization

target number

metric_name str

The resource metric name. Supported metrics:

For Online Prediction:
aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle
aiplatform.googleapis.com/prediction/online/cpu/utilization

target int

metricName String

The resource metric name. Supported metrics:

For Online Prediction:
aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle
aiplatform.googleapis.com/prediction/online/cpu/utilization

target Number

AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpec, AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs

AcceleratorCount int: The number of accelerators to attach to the machine.
AcceleratorType string: Possible values: ACCELERATOR_TYPE_UNSPECIFIED NVIDIA_TESLA_K80 NVIDIA_TESLA_P100 NVIDIA_TESLA_V100 NVIDIA_TESLA_P4 NVIDIA_TESLA_T4 NVIDIA_TESLA_A100 NVIDIA_A100_80GB NVIDIA_L4 NVIDIA_H100_80GB NVIDIA_H100_MEGA_80GB NVIDIA_H200_141GB NVIDIA_B200 TPU_V2 TPU_V3 TPU_V4_POD TPU_V5_LITEPOD
MachineType string: The type of the machine. See the list of machine types supported for prediction See the list of machine types supported for custom training. For DeployedModel this field is optional, and the default value is n1-standard-2. For BatchPredictionJob or as part of WorkerPoolSpec this field is required.
MultihostGpuNodeCount int: The number of nodes per replica for multihost GPU deployments.
ReservationAffinity AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecReservationAffinity: A ReservationAffinity can be used to configure a Vertex AI resource (e.g., a DeployedModel) to draw its Compute Engine resources from a Shared Reservation, or exclusively from on-demand capacity. Structure is documented below.
TpuTopology string: The topology of the TPUs. Corresponds to the TPU topologies available from GKE. (Example: tpu_topology: "2x2x1").

AcceleratorCount int: The number of accelerators to attach to the machine.
AcceleratorType string: Possible values: ACCELERATOR_TYPE_UNSPECIFIED NVIDIA_TESLA_K80 NVIDIA_TESLA_P100 NVIDIA_TESLA_V100 NVIDIA_TESLA_P4 NVIDIA_TESLA_T4 NVIDIA_TESLA_A100 NVIDIA_A100_80GB NVIDIA_L4 NVIDIA_H100_80GB NVIDIA_H100_MEGA_80GB NVIDIA_H200_141GB NVIDIA_B200 TPU_V2 TPU_V3 TPU_V4_POD TPU_V5_LITEPOD
MachineType string: The type of the machine. See the list of machine types supported for prediction See the list of machine types supported for custom training. For DeployedModel this field is optional, and the default value is n1-standard-2. For BatchPredictionJob or as part of WorkerPoolSpec this field is required.
MultihostGpuNodeCount int: The number of nodes per replica for multihost GPU deployments.
ReservationAffinity AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecReservationAffinity: A ReservationAffinity can be used to configure a Vertex AI resource (e.g., a DeployedModel) to draw its Compute Engine resources from a Shared Reservation, or exclusively from on-demand capacity. Structure is documented below.
TpuTopology string: The topology of the TPUs. Corresponds to the TPU topologies available from GKE. (Example: tpu_topology: "2x2x1").

acceleratorCount Integer: The number of accelerators to attach to the machine.
acceleratorType String: Possible values: ACCELERATOR_TYPE_UNSPECIFIED NVIDIA_TESLA_K80 NVIDIA_TESLA_P100 NVIDIA_TESLA_V100 NVIDIA_TESLA_P4 NVIDIA_TESLA_T4 NVIDIA_TESLA_A100 NVIDIA_A100_80GB NVIDIA_L4 NVIDIA_H100_80GB NVIDIA_H100_MEGA_80GB NVIDIA_H200_141GB NVIDIA_B200 TPU_V2 TPU_V3 TPU_V4_POD TPU_V5_LITEPOD
machineType String: The type of the machine. See the list of machine types supported for prediction See the list of machine types supported for custom training. For DeployedModel this field is optional, and the default value is n1-standard-2. For BatchPredictionJob or as part of WorkerPoolSpec this field is required.
multihostGpuNodeCount Integer: The number of nodes per replica for multihost GPU deployments.
reservationAffinity AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecReservationAffinity: A ReservationAffinity can be used to configure a Vertex AI resource (e.g., a DeployedModel) to draw its Compute Engine resources from a Shared Reservation, or exclusively from on-demand capacity. Structure is documented below.
tpuTopology String: The topology of the TPUs. Corresponds to the TPU topologies available from GKE. (Example: tpu_topology: "2x2x1").

acceleratorCount number: The number of accelerators to attach to the machine.
acceleratorType string: Possible values: ACCELERATOR_TYPE_UNSPECIFIED NVIDIA_TESLA_K80 NVIDIA_TESLA_P100 NVIDIA_TESLA_V100 NVIDIA_TESLA_P4 NVIDIA_TESLA_T4 NVIDIA_TESLA_A100 NVIDIA_A100_80GB NVIDIA_L4 NVIDIA_H100_80GB NVIDIA_H100_MEGA_80GB NVIDIA_H200_141GB NVIDIA_B200 TPU_V2 TPU_V3 TPU_V4_POD TPU_V5_LITEPOD
machineType string: The type of the machine. See the list of machine types supported for prediction See the list of machine types supported for custom training. For DeployedModel this field is optional, and the default value is n1-standard-2. For BatchPredictionJob or as part of WorkerPoolSpec this field is required.
multihostGpuNodeCount number: The number of nodes per replica for multihost GPU deployments.
reservationAffinity AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecReservationAffinity: A ReservationAffinity can be used to configure a Vertex AI resource (e.g., a DeployedModel) to draw its Compute Engine resources from a Shared Reservation, or exclusively from on-demand capacity. Structure is documented below.
tpuTopology string: The topology of the TPUs. Corresponds to the TPU topologies available from GKE. (Example: tpu_topology: "2x2x1").

accelerator_count int: The number of accelerators to attach to the machine.
accelerator_type str: Possible values: ACCELERATOR_TYPE_UNSPECIFIED NVIDIA_TESLA_K80 NVIDIA_TESLA_P100 NVIDIA_TESLA_V100 NVIDIA_TESLA_P4 NVIDIA_TESLA_T4 NVIDIA_TESLA_A100 NVIDIA_A100_80GB NVIDIA_L4 NVIDIA_H100_80GB NVIDIA_H100_MEGA_80GB NVIDIA_H200_141GB NVIDIA_B200 TPU_V2 TPU_V3 TPU_V4_POD TPU_V5_LITEPOD
machine_type str: The type of the machine. See the list of machine types supported for prediction See the list of machine types supported for custom training. For DeployedModel this field is optional, and the default value is n1-standard-2. For BatchPredictionJob or as part of WorkerPoolSpec this field is required.
multihost_gpu_node_count int: The number of nodes per replica for multihost GPU deployments.
reservation_affinity AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecReservationAffinity: A ReservationAffinity can be used to configure a Vertex AI resource (e.g., a DeployedModel) to draw its Compute Engine resources from a Shared Reservation, or exclusively from on-demand capacity. Structure is documented below.
tpu_topology str: The topology of the TPUs. Corresponds to the TPU topologies available from GKE. (Example: tpu_topology: "2x2x1").

acceleratorCount Number: The number of accelerators to attach to the machine.
acceleratorType String: Possible values: ACCELERATOR_TYPE_UNSPECIFIED NVIDIA_TESLA_K80 NVIDIA_TESLA_P100 NVIDIA_TESLA_V100 NVIDIA_TESLA_P4 NVIDIA_TESLA_T4 NVIDIA_TESLA_A100 NVIDIA_A100_80GB NVIDIA_L4 NVIDIA_H100_80GB NVIDIA_H100_MEGA_80GB NVIDIA_H200_141GB NVIDIA_B200 TPU_V2 TPU_V3 TPU_V4_POD TPU_V5_LITEPOD
machineType String: The type of the machine. See the list of machine types supported for prediction See the list of machine types supported for custom training. For DeployedModel this field is optional, and the default value is n1-standard-2. For BatchPredictionJob or as part of WorkerPoolSpec this field is required.
multihostGpuNodeCount Number: The number of nodes per replica for multihost GPU deployments.
reservationAffinity Property Map: A ReservationAffinity can be used to configure a Vertex AI resource (e.g., a DeployedModel) to draw its Compute Engine resources from a Shared Reservation, or exclusively from on-demand capacity. Structure is documented below.
tpuTopology String: The topology of the TPUs. Corresponds to the TPU topologies available from GKE. (Example: tpu_topology: "2x2x1").

AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecReservationAffinity, AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecReservationAffinityArgs

ReservationAffinityType string: Specifies the reservation affinity type. Possible values: TYPE_UNSPECIFIED NO_RESERVATION ANY_RESERVATION SPECIFIC_RESERVATION
Key string: Corresponds to the label key of a reservation resource. To target a SPECIFIC_RESERVATION by name, use compute.googleapis.com/reservation-name as the key and specify the name of your reservation as its value.
Values List<string>: Corresponds to the label values of a reservation resource. This must be the full resource name of the reservation or reservation block.

ReservationAffinityType string: Specifies the reservation affinity type. Possible values: TYPE_UNSPECIFIED NO_RESERVATION ANY_RESERVATION SPECIFIC_RESERVATION
Key string: Corresponds to the label key of a reservation resource. To target a SPECIFIC_RESERVATION by name, use compute.googleapis.com/reservation-name as the key and specify the name of your reservation as its value.
Values []string: Corresponds to the label values of a reservation resource. This must be the full resource name of the reservation or reservation block.

reservationAffinityType String: Specifies the reservation affinity type. Possible values: TYPE_UNSPECIFIED NO_RESERVATION ANY_RESERVATION SPECIFIC_RESERVATION
key String: Corresponds to the label key of a reservation resource. To target a SPECIFIC_RESERVATION by name, use compute.googleapis.com/reservation-name as the key and specify the name of your reservation as its value.
values List<String>: Corresponds to the label values of a reservation resource. This must be the full resource name of the reservation or reservation block.

reservationAffinityType string: Specifies the reservation affinity type. Possible values: TYPE_UNSPECIFIED NO_RESERVATION ANY_RESERVATION SPECIFIC_RESERVATION
key string: Corresponds to the label key of a reservation resource. To target a SPECIFIC_RESERVATION by name, use compute.googleapis.com/reservation-name as the key and specify the name of your reservation as its value.
values string[]: Corresponds to the label values of a reservation resource. This must be the full resource name of the reservation or reservation block.

reservation_affinity_type str: Specifies the reservation affinity type. Possible values: TYPE_UNSPECIFIED NO_RESERVATION ANY_RESERVATION SPECIFIC_RESERVATION
key str: Corresponds to the label key of a reservation resource. To target a SPECIFIC_RESERVATION by name, use compute.googleapis.com/reservation-name as the key and specify the name of your reservation as its value.
values Sequence[str]: Corresponds to the label values of a reservation resource. This must be the full resource name of the reservation or reservation block.

reservationAffinityType String: Specifies the reservation affinity type. Possible values: TYPE_UNSPECIFIED NO_RESERVATION ANY_RESERVATION SPECIFIC_RESERVATION
key String: Corresponds to the label key of a reservation resource. To target a SPECIFIC_RESERVATION by name, use compute.googleapis.com/reservation-name as the key and specify the name of your reservation as its value.
values List<String>: Corresponds to the label values of a reservation resource. This must be the full resource name of the reservation or reservation block.

AiEndpointWithModelGardenDeploymentEndpointConfig, AiEndpointWithModelGardenDeploymentEndpointConfigArgs

DedicatedEndpointEnabled bool: If true, the endpoint will be exposed through a dedicated DNS [Endpoint.dedicated_endpoint_dns]. Your request to the dedicated DNS will be isolated from other users' traffic and will have better performance and reliability. Note: Once you enabled dedicated endpoint, you won't be able to send request to the shared DNS {region}-aiplatform.googleapis.com. The limitations will be removed soon.
EndpointDisplayName string: The user-specified display name of the endpoint. If not set, a default name will be used.
PrivateServiceConnectConfig AiEndpointWithModelGardenDeploymentEndpointConfigPrivateServiceConnectConfig: The configuration for Private Service Connect (PSC). Structure is documented below.

DedicatedEndpointEnabled bool: If true, the endpoint will be exposed through a dedicated DNS [Endpoint.dedicated_endpoint_dns]. Your request to the dedicated DNS will be isolated from other users' traffic and will have better performance and reliability. Note: Once you enabled dedicated endpoint, you won't be able to send request to the shared DNS {region}-aiplatform.googleapis.com. The limitations will be removed soon.
EndpointDisplayName string: The user-specified display name of the endpoint. If not set, a default name will be used.
PrivateServiceConnectConfig AiEndpointWithModelGardenDeploymentEndpointConfigPrivateServiceConnectConfig: The configuration for Private Service Connect (PSC). Structure is documented below.

dedicatedEndpointEnabled Boolean: If true, the endpoint will be exposed through a dedicated DNS [Endpoint.dedicated_endpoint_dns]. Your request to the dedicated DNS will be isolated from other users' traffic and will have better performance and reliability. Note: Once you enabled dedicated endpoint, you won't be able to send request to the shared DNS {region}-aiplatform.googleapis.com. The limitations will be removed soon.
endpointDisplayName String: The user-specified display name of the endpoint. If not set, a default name will be used.
privateServiceConnectConfig AiEndpointWithModelGardenDeploymentEndpointConfigPrivateServiceConnectConfig: The configuration for Private Service Connect (PSC). Structure is documented below.

dedicatedEndpointEnabled boolean: If true, the endpoint will be exposed through a dedicated DNS [Endpoint.dedicated_endpoint_dns]. Your request to the dedicated DNS will be isolated from other users' traffic and will have better performance and reliability. Note: Once you enabled dedicated endpoint, you won't be able to send request to the shared DNS {region}-aiplatform.googleapis.com. The limitations will be removed soon.
endpointDisplayName string: The user-specified display name of the endpoint. If not set, a default name will be used.
privateServiceConnectConfig AiEndpointWithModelGardenDeploymentEndpointConfigPrivateServiceConnectConfig: The configuration for Private Service Connect (PSC). Structure is documented below.

dedicated_endpoint_enabled bool: If true, the endpoint will be exposed through a dedicated DNS [Endpoint.dedicated_endpoint_dns]. Your request to the dedicated DNS will be isolated from other users' traffic and will have better performance and reliability. Note: Once you enabled dedicated endpoint, you won't be able to send request to the shared DNS {region}-aiplatform.googleapis.com. The limitations will be removed soon.
endpoint_display_name str: The user-specified display name of the endpoint. If not set, a default name will be used.
private_service_connect_config AiEndpointWithModelGardenDeploymentEndpointConfigPrivateServiceConnectConfig: The configuration for Private Service Connect (PSC). Structure is documented below.

dedicatedEndpointEnabled Boolean: If true, the endpoint will be exposed through a dedicated DNS [Endpoint.dedicated_endpoint_dns]. Your request to the dedicated DNS will be isolated from other users' traffic and will have better performance and reliability. Note: Once you enabled dedicated endpoint, you won't be able to send request to the shared DNS {region}-aiplatform.googleapis.com. The limitations will be removed soon.
endpointDisplayName String: The user-specified display name of the endpoint. If not set, a default name will be used.
privateServiceConnectConfig Property Map: The configuration for Private Service Connect (PSC). Structure is documented below.

AiEndpointWithModelGardenDeploymentEndpointConfigPrivateServiceConnectConfig, AiEndpointWithModelGardenDeploymentEndpointConfigPrivateServiceConnectConfigArgs

EnablePrivateServiceConnect bool: Required. If true, expose the IndexEndpoint via private service connect.
ProjectAllowlists List<string>: A list of Projects from which the forwarding rule will target the service attachment.
PscAutomationConfigs AiEndpointWithModelGardenDeploymentEndpointConfigPrivateServiceConnectConfigPscAutomationConfigs: PSC config that is used to automatically create PSC endpoints in the user projects. Structure is documented below.
ServiceAttachment string: (Output) Output only. The name of the generated service attachment resource. This is only populated if the endpoint is deployed with PrivateServiceConnect.

EnablePrivateServiceConnect bool: Required. If true, expose the IndexEndpoint via private service connect.
ProjectAllowlists []string: A list of Projects from which the forwarding rule will target the service attachment.
PscAutomationConfigs AiEndpointWithModelGardenDeploymentEndpointConfigPrivateServiceConnectConfigPscAutomationConfigs: PSC config that is used to automatically create PSC endpoints in the user projects. Structure is documented below.
ServiceAttachment string: (Output) Output only. The name of the generated service attachment resource. This is only populated if the endpoint is deployed with PrivateServiceConnect.

enablePrivateServiceConnect Boolean: Required. If true, expose the IndexEndpoint via private service connect.
projectAllowlists List<String>: A list of Projects from which the forwarding rule will target the service attachment.
pscAutomationConfigs AiEndpointWithModelGardenDeploymentEndpointConfigPrivateServiceConnectConfigPscAutomationConfigs: PSC config that is used to automatically create PSC endpoints in the user projects. Structure is documented below.
serviceAttachment String: (Output) Output only. The name of the generated service attachment resource. This is only populated if the endpoint is deployed with PrivateServiceConnect.

enablePrivateServiceConnect boolean: Required. If true, expose the IndexEndpoint via private service connect.
projectAllowlists string[]: A list of Projects from which the forwarding rule will target the service attachment.
pscAutomationConfigs AiEndpointWithModelGardenDeploymentEndpointConfigPrivateServiceConnectConfigPscAutomationConfigs: PSC config that is used to automatically create PSC endpoints in the user projects. Structure is documented below.
serviceAttachment string: (Output) Output only. The name of the generated service attachment resource. This is only populated if the endpoint is deployed with PrivateServiceConnect.

enable_private_service_connect bool: Required. If true, expose the IndexEndpoint via private service connect.
project_allowlists Sequence[str]: A list of Projects from which the forwarding rule will target the service attachment.
psc_automation_configs AiEndpointWithModelGardenDeploymentEndpointConfigPrivateServiceConnectConfigPscAutomationConfigs: PSC config that is used to automatically create PSC endpoints in the user projects. Structure is documented below.
service_attachment str: (Output) Output only. The name of the generated service attachment resource. This is only populated if the endpoint is deployed with PrivateServiceConnect.

enablePrivateServiceConnect Boolean: Required. If true, expose the IndexEndpoint via private service connect.
projectAllowlists List<String>: A list of Projects from which the forwarding rule will target the service attachment.
pscAutomationConfigs Property Map: PSC config that is used to automatically create PSC endpoints in the user projects. Structure is documented below.
serviceAttachment String: (Output) Output only. The name of the generated service attachment resource. This is only populated if the endpoint is deployed with PrivateServiceConnect.

AiEndpointWithModelGardenDeploymentEndpointConfigPrivateServiceConnectConfigPscAutomationConfigs, AiEndpointWithModelGardenDeploymentEndpointConfigPrivateServiceConnectConfigPscAutomationConfigsArgs

Network string: Required. The full name of the Google Compute Engine network. Format: projects/{project}/global/networks/{network}.
ProjectId string: Required. Project id used to create forwarding rule.
ErrorMessage string: (Output) Output only. Error message if the PSC service automation failed.
ForwardingRule string: (Output) Output only. Forwarding rule created by the PSC service automation.
IpAddress string: (Output) Output only. IP address rule created by the PSC service automation.
State string: (Output) Output only. The state of the PSC service automation.

Network string: Required. The full name of the Google Compute Engine network. Format: projects/{project}/global/networks/{network}.
ProjectId string: Required. Project id used to create forwarding rule.
ErrorMessage string: (Output) Output only. Error message if the PSC service automation failed.
ForwardingRule string: (Output) Output only. Forwarding rule created by the PSC service automation.
IpAddress string: (Output) Output only. IP address rule created by the PSC service automation.
State string: (Output) Output only. The state of the PSC service automation.

network String: Required. The full name of the Google Compute Engine network. Format: projects/{project}/global/networks/{network}.
projectId String: Required. Project id used to create forwarding rule.
errorMessage String: (Output) Output only. Error message if the PSC service automation failed.
forwardingRule String: (Output) Output only. Forwarding rule created by the PSC service automation.
ipAddress String: (Output) Output only. IP address rule created by the PSC service automation.
state String: (Output) Output only. The state of the PSC service automation.

network string: Required. The full name of the Google Compute Engine network. Format: projects/{project}/global/networks/{network}.
projectId string: Required. Project id used to create forwarding rule.
errorMessage string: (Output) Output only. Error message if the PSC service automation failed.
forwardingRule string: (Output) Output only. Forwarding rule created by the PSC service automation.
ipAddress string: (Output) Output only. IP address rule created by the PSC service automation.
state string: (Output) Output only. The state of the PSC service automation.

network str: Required. The full name of the Google Compute Engine network. Format: projects/{project}/global/networks/{network}.
project_id str: Required. Project id used to create forwarding rule.
error_message str: (Output) Output only. Error message if the PSC service automation failed.
forwarding_rule str: (Output) Output only. Forwarding rule created by the PSC service automation.
ip_address str: (Output) Output only. IP address rule created by the PSC service automation.
state str: (Output) Output only. The state of the PSC service automation.

network String: Required. The full name of the Google Compute Engine network. Format: projects/{project}/global/networks/{network}.
projectId String: Required. Project id used to create forwarding rule.
errorMessage String: (Output) Output only. Error message if the PSC service automation failed.
forwardingRule String: (Output) Output only. Forwarding rule created by the PSC service automation.
ipAddress String: (Output) Output only. IP address rule created by the PSC service automation.
state String: (Output) Output only. The state of the PSC service automation.

AiEndpointWithModelGardenDeploymentModelConfig, AiEndpointWithModelGardenDeploymentModelConfigArgs

AcceptEula bool: Whether the user accepts the End User License Agreement (EULA) for the model.
ContainerSpec AiEndpointWithModelGardenDeploymentModelConfigContainerSpec: Specification of a container for serving predictions. Some fields in this message correspond to fields in the Kubernetes Container v1 core specification. Structure is documented below.
HuggingFaceAccessToken string: The Hugging Face read access token used to access the model artifacts of gated models.
HuggingFaceCacheEnabled bool: If true, the model will deploy with a cached version instead of directly downloading the model artifacts from Hugging Face. This is suitable for VPC-SC users with limited internet access.
ModelDisplayName string: The user-specified display name of the uploaded model. If not set, a default name will be used.

AcceptEula bool: Whether the user accepts the End User License Agreement (EULA) for the model.
ContainerSpec AiEndpointWithModelGardenDeploymentModelConfigContainerSpec: Specification of a container for serving predictions. Some fields in this message correspond to fields in the Kubernetes Container v1 core specification. Structure is documented below.
HuggingFaceAccessToken string: The Hugging Face read access token used to access the model artifacts of gated models.
HuggingFaceCacheEnabled bool: If true, the model will deploy with a cached version instead of directly downloading the model artifacts from Hugging Face. This is suitable for VPC-SC users with limited internet access.
ModelDisplayName string: The user-specified display name of the uploaded model. If not set, a default name will be used.

acceptEula Boolean: Whether the user accepts the End User License Agreement (EULA) for the model.
containerSpec AiEndpointWithModelGardenDeploymentModelConfigContainerSpec: Specification of a container for serving predictions. Some fields in this message correspond to fields in the Kubernetes Container v1 core specification. Structure is documented below.
huggingFaceAccessToken String: The Hugging Face read access token used to access the model artifacts of gated models.
huggingFaceCacheEnabled Boolean: If true, the model will deploy with a cached version instead of directly downloading the model artifacts from Hugging Face. This is suitable for VPC-SC users with limited internet access.
modelDisplayName String: The user-specified display name of the uploaded model. If not set, a default name will be used.

acceptEula boolean: Whether the user accepts the End User License Agreement (EULA) for the model.
containerSpec AiEndpointWithModelGardenDeploymentModelConfigContainerSpec: Specification of a container for serving predictions. Some fields in this message correspond to fields in the Kubernetes Container v1 core specification. Structure is documented below.
huggingFaceAccessToken string: The Hugging Face read access token used to access the model artifacts of gated models.
huggingFaceCacheEnabled boolean: If true, the model will deploy with a cached version instead of directly downloading the model artifacts from Hugging Face. This is suitable for VPC-SC users with limited internet access.
modelDisplayName string: The user-specified display name of the uploaded model. If not set, a default name will be used.

accept_eula bool: Whether the user accepts the End User License Agreement (EULA) for the model.
container_spec AiEndpointWithModelGardenDeploymentModelConfigContainerSpec: Specification of a container for serving predictions. Some fields in this message correspond to fields in the Kubernetes Container v1 core specification. Structure is documented below.
hugging_face_access_token str: The Hugging Face read access token used to access the model artifacts of gated models.
hugging_face_cache_enabled bool: If true, the model will deploy with a cached version instead of directly downloading the model artifacts from Hugging Face. This is suitable for VPC-SC users with limited internet access.
model_display_name str: The user-specified display name of the uploaded model. If not set, a default name will be used.

acceptEula Boolean: Whether the user accepts the End User License Agreement (EULA) for the model.
containerSpec Property Map: Specification of a container for serving predictions. Some fields in this message correspond to fields in the Kubernetes Container v1 core specification. Structure is documented below.
huggingFaceAccessToken String: The Hugging Face read access token used to access the model artifacts of gated models.
huggingFaceCacheEnabled Boolean: If true, the model will deploy with a cached version instead of directly downloading the model artifacts from Hugging Face. This is suitable for VPC-SC users with limited internet access.
modelDisplayName String: The user-specified display name of the uploaded model. If not set, a default name will be used.

AiEndpointWithModelGardenDeploymentModelConfigContainerSpec, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecArgs

ImageUri string

URI of the Docker image to be used as the custom container for serving predictions. This URI must identify an image in Artifact Registry or Container Registry. Learn more about the container publishing requirements, including permissions requirements for the Vertex AI Service Agent. The container image is ingested upon ModelService.UploadModel, stored internally, and this original path is afterwards not used. To learn about the requirements for the Docker image itself, see Custom container requirements. You can use the URI to one of Vertex AI's pre-built container images for prediction in this field.

Args List<string>

Specifies arguments for the command that runs when the container starts. This overrides the container's CMD. Specify this field as an array of executable and arguments, similar to a Docker CMD's "default parameters" form. If you don't specify this field but do specify the command field, then the command from the command field runs without any additional arguments. See the Kubernetes documentation about how the command and args fields interact with a container's ENTRYPOINT and CMD. If you don't specify this field and don't specify the command field, then the container's ENTRYPOINT and CMD determine what runs based on their default behavior. See the Docker documentation about how CMD and ENTRYPOINT interact. In this field, you can reference environment variables set by Vertex AI and environment variables set in the env field. You cannot reference environment variables set in the Docker image. In order for environment variables to be expanded, reference them by using the following syntax:$(VARIABLE_NAME) Note that this differs from Bash variable expansion, which does not use parentheses. If a variable cannot be resolved, the reference in the input string is used unchanged. To avoid variable expansion, you can escape this syntax with $$; for example:$$(VARIABLE_NAME) This field corresponds to the args field of the Kubernetes Containers v1 core API.

Commands List<string>

Specifies the command that runs when the container starts. This overrides the container's ENTRYPOINT. Specify this field as an array of executable and arguments, similar to a Docker ENTRYPOINT's "exec" form, not its "shell" form. If you do not specify this field, then the container's ENTRYPOINT runs, in conjunction with the args field or the container's CMD, if either exists. If this field is not specified and the container does not have an ENTRYPOINT, then refer to the Docker documentation about how CMD and ENTRYPOINT interact. If you specify this field, then you can also specify the args field to provide additional arguments for this command. However, if you specify this field, then the container's CMD is ignored. See the Kubernetes documentation about how the command and args fields interact with a container's ENTRYPOINT and CMD. In this field, you can reference environment variables set by Vertex AI and environment variables set in the env field. You cannot reference environment variables set in the Docker image. In order for environment variables to be expanded, reference them by using the following syntax:$(VARIABLE_NAME) Note that this differs from Bash variable expansion, which does not use parentheses. If a variable cannot be resolved, the reference in the input string is used unchanged. To avoid variable expansion, you can escape this syntax with $$; for example:$$(VARIABLE_NAME) This field corresponds to the command field of the Kubernetes Containers v1 core API.

DeploymentTimeout string

Deployment timeout. Limit for deployment timeout is 2 hours.

Envs List<AiEndpointWithModelGardenDeploymentModelConfigContainerSpecEnv>

List of environment variables to set in the container. After the container starts running, code running in the container can read these environment variables. Additionally, the command and args fields can reference these variables. Later entries in this list can also reference earlier entries. For example, the following example sets the variable VAR_2 to have the value foo bar:

[
{
"name": "VAR_1",
"value": "foo"
},
{
"name": "VAR_2",
"value": "$(VAR_1) bar"
}
]

If you switch the order of the variables in the example, then the expansion does not occur. This field corresponds to the env field of the Kubernetes Containers v1 core API. Structure is documented below.

GrpcPorts List<AiEndpointWithModelGardenDeploymentModelConfigContainerSpecGrpcPort>

List of ports to expose from the container. Vertex AI sends gRPC prediction requests that it receives to the first port on this list. Vertex AI also sends liveness and health checks to this port. If you do not specify this field, gRPC requests to the container will be disabled. Vertex AI does not use ports other than the first one listed. This field corresponds to the ports field of the Kubernetes Containers v1 core API. Structure is documented below.

HealthProbe AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbe

Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.

HealthRoute string

HTTP path on the container to send health checks to. Vertex AI intermittently sends GET requests to this path on the container's IP address and port to check that the container is healthy. Read more about health checks. For example, if you set this field to /bar, then Vertex AI intermittently sends a GET request to the /bar path on the port of your container specified by the first value of this ModelContainerSpec's ports field. If you don't specify this field, it defaults to the following value when you deploy this Model to an Endpoint:/v1/endpoints/ENDPOINT/deployedModels/DEPLOYED_MODEL:predict The placeholders in this value are replaced as follows:

ENDPOINT: The last segment (following endpoints/)of the Endpoint.name][] field of the Endpoint where this Model has been deployed. (Vertex AI makes this value available to your container code as the AIP_ENDPOINT_ID environment variable.)
DEPLOYED_MODEL: DeployedModel.id of the DeployedModel. (Vertex AI makes this value available to your container code as the AIP_DEPLOYED_MODEL_ID environment variable.)

LivenessProbe AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbe

Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.

Ports List<AiEndpointWithModelGardenDeploymentModelConfigContainerSpecPort>

List of ports to expose from the container. Vertex AI sends any prediction requests that it receives to the first port on this list. Vertex AI also sends liveness and health checks to this port. If you do not specify this field, it defaults to following value:

[
{
"containerPort": 8080
}
]

Vertex AI does not use ports other than the first one listed. This field corresponds to the ports field of the Kubernetes Containers v1 core API. Structure is documented below.

PredictRoute string

HTTP path on the container to send prediction requests to. Vertex AI forwards requests sent using projects.locations.endpoints.predict to this path on the container's IP address and port. Vertex AI then returns the container's response in the API response. For example, if you set this field to /foo, then when Vertex AI receives a prediction request, it forwards the request body in a POST request to the /foo path on the port of your container specified by the first value of this ModelContainerSpec's ports field. If you don't specify this field, it defaults to the following value when you deploy this Model to an Endpoint:/v1/endpoints/ENDPOINT/deployedModels/DEPLOYED_MODEL:predict The placeholders in this value are replaced as follows:

ENDPOINT: The last segment (following endpoints/)of the Endpoint.name][] field of the Endpoint where this Model has been deployed. (Vertex AI makes this value available to your container code as the AIP_ENDPOINT_ID environment variable.)
DEPLOYED_MODEL: DeployedModel.id of the DeployedModel. (Vertex AI makes this value available to your container code as the AIP_DEPLOYED_MODEL_ID environment variable.)

SharedMemorySizeMb string

The amount of the VM memory to reserve as the shared memory for the model in megabytes.

StartupProbe AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbe

Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.

ImageUri string

Args []string

Commands []string

DeploymentTimeout string

Deployment timeout. Limit for deployment timeout is 2 hours.

Envs []AiEndpointWithModelGardenDeploymentModelConfigContainerSpecEnv

[
{
"name": "VAR_1",
"value": "foo"
},
{
"name": "VAR_2",
"value": "$(VAR_1) bar"
}
]

GrpcPorts []AiEndpointWithModelGardenDeploymentModelConfigContainerSpecGrpcPort

HealthProbe AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbe

Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.

HealthRoute string

ENDPOINT: The last segment (following endpoints/)of the Endpoint.name][] field of the Endpoint where this Model has been deployed. (Vertex AI makes this value available to your container code as the AIP_ENDPOINT_ID environment variable.)
DEPLOYED_MODEL: DeployedModel.id of the DeployedModel. (Vertex AI makes this value available to your container code as the AIP_DEPLOYED_MODEL_ID environment variable.)

LivenessProbe AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbe

Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.

Ports []AiEndpointWithModelGardenDeploymentModelConfigContainerSpecPort

[
{
"containerPort": 8080
}
]

Vertex AI does not use ports other than the first one listed. This field corresponds to the ports field of the Kubernetes Containers v1 core API. Structure is documented below.

PredictRoute string

ENDPOINT: The last segment (following endpoints/)of the Endpoint.name][] field of the Endpoint where this Model has been deployed. (Vertex AI makes this value available to your container code as the AIP_ENDPOINT_ID environment variable.)
DEPLOYED_MODEL: DeployedModel.id of the DeployedModel. (Vertex AI makes this value available to your container code as the AIP_DEPLOYED_MODEL_ID environment variable.)

SharedMemorySizeMb string

The amount of the VM memory to reserve as the shared memory for the model in megabytes.

StartupProbe AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbe

Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.

imageUri String

args List<String>

commands List<String>

deploymentTimeout String

Deployment timeout. Limit for deployment timeout is 2 hours.

envs List<AiEndpointWithModelGardenDeploymentModelConfigContainerSpecEnv>

[
{
"name": "VAR_1",
"value": "foo"
},
{
"name": "VAR_2",
"value": "$(VAR_1) bar"
}
]

grpcPorts List<AiEndpointWithModelGardenDeploymentModelConfigContainerSpecGrpcPort>

healthProbe AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbe

Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.

healthRoute String

ENDPOINT: The last segment (following endpoints/)of the Endpoint.name][] field of the Endpoint where this Model has been deployed. (Vertex AI makes this value available to your container code as the AIP_ENDPOINT_ID environment variable.)
DEPLOYED_MODEL: DeployedModel.id of the DeployedModel. (Vertex AI makes this value available to your container code as the AIP_DEPLOYED_MODEL_ID environment variable.)

livenessProbe AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbe

Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.

ports List<AiEndpointWithModelGardenDeploymentModelConfigContainerSpecPort>

[
{
"containerPort": 8080
}
]

Vertex AI does not use ports other than the first one listed. This field corresponds to the ports field of the Kubernetes Containers v1 core API. Structure is documented below.

predictRoute String

ENDPOINT: The last segment (following endpoints/)of the Endpoint.name][] field of the Endpoint where this Model has been deployed. (Vertex AI makes this value available to your container code as the AIP_ENDPOINT_ID environment variable.)
DEPLOYED_MODEL: DeployedModel.id of the DeployedModel. (Vertex AI makes this value available to your container code as the AIP_DEPLOYED_MODEL_ID environment variable.)

sharedMemorySizeMb String

The amount of the VM memory to reserve as the shared memory for the model in megabytes.

startupProbe AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbe

Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.

imageUri string

args string[]

commands string[]

deploymentTimeout string

Deployment timeout. Limit for deployment timeout is 2 hours.

envs AiEndpointWithModelGardenDeploymentModelConfigContainerSpecEnv[]

[
{
"name": "VAR_1",
"value": "foo"
},
{
"name": "VAR_2",
"value": "$(VAR_1) bar"
}
]

grpcPorts AiEndpointWithModelGardenDeploymentModelConfigContainerSpecGrpcPort[]

healthProbe AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbe

Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.

healthRoute string

ENDPOINT: The last segment (following endpoints/)of the Endpoint.name][] field of the Endpoint where this Model has been deployed. (Vertex AI makes this value available to your container code as the AIP_ENDPOINT_ID environment variable.)
DEPLOYED_MODEL: DeployedModel.id of the DeployedModel. (Vertex AI makes this value available to your container code as the AIP_DEPLOYED_MODEL_ID environment variable.)

livenessProbe AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbe

Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.

ports AiEndpointWithModelGardenDeploymentModelConfigContainerSpecPort[]

[
{
"containerPort": 8080
}
]

Vertex AI does not use ports other than the first one listed. This field corresponds to the ports field of the Kubernetes Containers v1 core API. Structure is documented below.

predictRoute string

ENDPOINT: The last segment (following endpoints/)of the Endpoint.name][] field of the Endpoint where this Model has been deployed. (Vertex AI makes this value available to your container code as the AIP_ENDPOINT_ID environment variable.)
DEPLOYED_MODEL: DeployedModel.id of the DeployedModel. (Vertex AI makes this value available to your container code as the AIP_DEPLOYED_MODEL_ID environment variable.)

sharedMemorySizeMb string

The amount of the VM memory to reserve as the shared memory for the model in megabytes.

startupProbe AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbe

Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.

image_uri str

args Sequence[str]

commands Sequence[str]

deployment_timeout str

Deployment timeout. Limit for deployment timeout is 2 hours.

envs Sequence[AiEndpointWithModelGardenDeploymentModelConfigContainerSpecEnv]

[
{
"name": "VAR_1",
"value": "foo"
},
{
"name": "VAR_2",
"value": "$(VAR_1) bar"
}
]

grpc_ports Sequence[AiEndpointWithModelGardenDeploymentModelConfigContainerSpecGrpcPort]

health_probe AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbe

Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.

health_route str

ENDPOINT: The last segment (following endpoints/)of the Endpoint.name][] field of the Endpoint where this Model has been deployed. (Vertex AI makes this value available to your container code as the AIP_ENDPOINT_ID environment variable.)
DEPLOYED_MODEL: DeployedModel.id of the DeployedModel. (Vertex AI makes this value available to your container code as the AIP_DEPLOYED_MODEL_ID environment variable.)

liveness_probe AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbe

Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.

ports Sequence[AiEndpointWithModelGardenDeploymentModelConfigContainerSpecPort]

[
{
"containerPort": 8080
}
]

Vertex AI does not use ports other than the first one listed. This field corresponds to the ports field of the Kubernetes Containers v1 core API. Structure is documented below.

predict_route str

ENDPOINT: The last segment (following endpoints/)of the Endpoint.name][] field of the Endpoint where this Model has been deployed. (Vertex AI makes this value available to your container code as the AIP_ENDPOINT_ID environment variable.)
DEPLOYED_MODEL: DeployedModel.id of the DeployedModel. (Vertex AI makes this value available to your container code as the AIP_DEPLOYED_MODEL_ID environment variable.)

shared_memory_size_mb str

The amount of the VM memory to reserve as the shared memory for the model in megabytes.

startup_probe AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbe

Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.

imageUri String

args List<String>

commands List<String>

deploymentTimeout String

Deployment timeout. Limit for deployment timeout is 2 hours.

envs List<Property Map>

[
{
"name": "VAR_1",
"value": "foo"
},
{
"name": "VAR_2",
"value": "$(VAR_1) bar"
}
]

grpcPorts List<Property Map>

healthProbe Property Map

Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.

healthRoute String

ENDPOINT: The last segment (following endpoints/)of the Endpoint.name][] field of the Endpoint where this Model has been deployed. (Vertex AI makes this value available to your container code as the AIP_ENDPOINT_ID environment variable.)
DEPLOYED_MODEL: DeployedModel.id of the DeployedModel. (Vertex AI makes this value available to your container code as the AIP_DEPLOYED_MODEL_ID environment variable.)

livenessProbe Property Map

Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.

ports List<Property Map>

[
{
"containerPort": 8080
}
]

Vertex AI does not use ports other than the first one listed. This field corresponds to the ports field of the Kubernetes Containers v1 core API. Structure is documented below.

predictRoute String

ENDPOINT: The last segment (following endpoints/)of the Endpoint.name][] field of the Endpoint where this Model has been deployed. (Vertex AI makes this value available to your container code as the AIP_ENDPOINT_ID environment variable.)
DEPLOYED_MODEL: DeployedModel.id of the DeployedModel. (Vertex AI makes this value available to your container code as the AIP_DEPLOYED_MODEL_ID environment variable.)

sharedMemorySizeMb String

The amount of the VM memory to reserve as the shared memory for the model in megabytes.

startupProbe Property Map

Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.

AiEndpointWithModelGardenDeploymentModelConfigContainerSpecEnv, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecEnvArgs

Name string: Name of the environment variable. Must be a valid C identifier.
Value string: Variables that reference a $(VAR_NAME) are expanded using the previous defined environment variables in the container and any service environment variables. If a variable cannot be resolved, the reference in the input string will be unchanged. The $(VAR_NAME) syntax can be escaped with a double $$, ie: $$(VAR_NAME). Escaped references will never be expanded, regardless of whether the variable exists or not.

Name string: Name of the environment variable. Must be a valid C identifier.
Value string: Variables that reference a $(VAR_NAME) are expanded using the previous defined environment variables in the container and any service environment variables. If a variable cannot be resolved, the reference in the input string will be unchanged. The $(VAR_NAME) syntax can be escaped with a double $$, ie: $$(VAR_NAME). Escaped references will never be expanded, regardless of whether the variable exists or not.

name String: Name of the environment variable. Must be a valid C identifier.
value String: Variables that reference a $(VAR_NAME) are expanded using the previous defined environment variables in the container and any service environment variables. If a variable cannot be resolved, the reference in the input string will be unchanged. The $(VAR_NAME) syntax can be escaped with a double $$, ie: $$(VAR_NAME). Escaped references will never be expanded, regardless of whether the variable exists or not.

name string: Name of the environment variable. Must be a valid C identifier.
value string: Variables that reference a $(VAR_NAME) are expanded using the previous defined environment variables in the container and any service environment variables. If a variable cannot be resolved, the reference in the input string will be unchanged. The $(VAR_NAME) syntax can be escaped with a double $$, ie: $$(VAR_NAME). Escaped references will never be expanded, regardless of whether the variable exists or not.

name str: Name of the environment variable. Must be a valid C identifier.
value str: Variables that reference a $(VAR_NAME) are expanded using the previous defined environment variables in the container and any service environment variables. If a variable cannot be resolved, the reference in the input string will be unchanged. The $(VAR_NAME) syntax can be escaped with a double $$, ie: $$(VAR_NAME). Escaped references will never be expanded, regardless of whether the variable exists or not.

name String: Name of the environment variable. Must be a valid C identifier.
value String: Variables that reference a $(VAR_NAME) are expanded using the previous defined environment variables in the container and any service environment variables. If a variable cannot be resolved, the reference in the input string will be unchanged. The $(VAR_NAME) syntax can be escaped with a double $$, ie: $$(VAR_NAME). Escaped references will never be expanded, regardless of whether the variable exists or not.

AiEndpointWithModelGardenDeploymentModelConfigContainerSpecGrpcPort, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecGrpcPortArgs

ContainerPort int: The number of the port to expose on the pod's IP address. Must be a valid port number, between 1 and 65535 inclusive.

ContainerPort int: The number of the port to expose on the pod's IP address. Must be a valid port number, between 1 and 65535 inclusive.

containerPort Integer: The number of the port to expose on the pod's IP address. Must be a valid port number, between 1 and 65535 inclusive.

containerPort number: The number of the port to expose on the pod's IP address. Must be a valid port number, between 1 and 65535 inclusive.

container_port int: The number of the port to expose on the pod's IP address. Must be a valid port number, between 1 and 65535 inclusive.

containerPort Number: The number of the port to expose on the pod's IP address. Must be a valid port number, between 1 and 65535 inclusive.

AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbe, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeArgs

Exec AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeExec: ExecAction specifies a command to execute. Structure is documented below.
FailureThreshold int: Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
Grpc AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeGrpc: GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
HttpGet AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGet: HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
InitialDelaySeconds int: Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
PeriodSeconds int: How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
SuccessThreshold int: Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
TcpSocket AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeTcpSocket: TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
TimeoutSeconds int: Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.

Exec AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeExec: ExecAction specifies a command to execute. Structure is documented below.
FailureThreshold int: Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
Grpc AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeGrpc: GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
HttpGet AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGet: HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
InitialDelaySeconds int: Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
PeriodSeconds int: How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
SuccessThreshold int: Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
TcpSocket AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeTcpSocket: TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
TimeoutSeconds int: Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.

exec AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeExec: ExecAction specifies a command to execute. Structure is documented below.
failureThreshold Integer: Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
grpc AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeGrpc: GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
httpGet AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGet: HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
initialDelaySeconds Integer: Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
periodSeconds Integer: How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
successThreshold Integer: Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
tcpSocket AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeTcpSocket: TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
timeoutSeconds Integer: Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.

exec AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeExec: ExecAction specifies a command to execute. Structure is documented below.
failureThreshold number: Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
grpc AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeGrpc: GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
httpGet AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGet: HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
initialDelaySeconds number: Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
periodSeconds number: How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
successThreshold number: Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
tcpSocket AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeTcpSocket: TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
timeoutSeconds number: Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.

exec_ AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeExec: ExecAction specifies a command to execute. Structure is documented below.
failure_threshold int: Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
grpc AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeGrpc: GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
http_get AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGet: HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
initial_delay_seconds int: Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
period_seconds int: How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
success_threshold int: Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
tcp_socket AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeTcpSocket: TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
timeout_seconds int: Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.

exec Property Map: ExecAction specifies a command to execute. Structure is documented below.
failureThreshold Number: Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
grpc Property Map: GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
httpGet Property Map: HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
initialDelaySeconds Number: Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
periodSeconds Number: How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
successThreshold Number: Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
tcpSocket Property Map: TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
timeoutSeconds Number: Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.

AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeExec, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeExecArgs

Commands List<string>: Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.

Commands []string: Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.

commands List<String>: Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.

commands string[]: Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.

commands Sequence[str]: Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.

commands List<String>: Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.

AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeGrpc, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeGrpcArgs

Port int: Port number of the gRPC service. Number must be in the range 1 to 65535.
Service string: Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.

Port int: Port number of the gRPC service. Number must be in the range 1 to 65535.
Service string: Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.

port Integer: Port number of the gRPC service. Number must be in the range 1 to 65535.
service String: Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.

port number: Port number of the gRPC service. Number must be in the range 1 to 65535.
service string: Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.

port int: Port number of the gRPC service. Number must be in the range 1 to 65535.
service str: Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.

port Number: Port number of the gRPC service. Number must be in the range 1 to 65535.
service String: Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.

AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGet, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGetArgs

Host string: Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
HttpHeaders List<AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGetHttpHeader>: Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
Path string: Path to access on the HTTP server.
Port int: Number of the port to access on the container. Number must be in the range 1 to 65535.
Scheme string: Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".

Host string: Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
HttpHeaders []AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGetHttpHeader: Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
Path string: Path to access on the HTTP server.
Port int: Number of the port to access on the container. Number must be in the range 1 to 65535.
Scheme string: Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".

host String: Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
httpHeaders List<AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGetHttpHeader>: Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
path String: Path to access on the HTTP server.
port Integer: Number of the port to access on the container. Number must be in the range 1 to 65535.
scheme String: Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".

host string: Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
httpHeaders AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGetHttpHeader[]: Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
path string: Path to access on the HTTP server.
port number: Number of the port to access on the container. Number must be in the range 1 to 65535.
scheme string: Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".

host str: Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
http_headers Sequence[AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGetHttpHeader]: Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
path str: Path to access on the HTTP server.
port int: Number of the port to access on the container. Number must be in the range 1 to 65535.
scheme str: Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".

host String: Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
httpHeaders List<Property Map>: Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
path String: Path to access on the HTTP server.
port Number: Number of the port to access on the container. Number must be in the range 1 to 65535.
scheme String: Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".

AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGetHttpHeader, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGetHttpHeaderArgs

Name string: The header field name. This will be canonicalized upon output, so case-variant names will be understood as the same header.
Value string: The header field value

Name string: The header field name. This will be canonicalized upon output, so case-variant names will be understood as the same header.
Value string: The header field value

name String: The header field name. This will be canonicalized upon output, so case-variant names will be understood as the same header.
value String: The header field value

name string: The header field name. This will be canonicalized upon output, so case-variant names will be understood as the same header.
value string: The header field value

name str: The header field name. This will be canonicalized upon output, so case-variant names will be understood as the same header.
value str: The header field value

name String: The header field name. This will be canonicalized upon output, so case-variant names will be understood as the same header.
value String: The header field value

AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeTcpSocket, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeTcpSocketArgs

Host string: Optional: Host name to connect to, defaults to the model serving container's IP.
Port int: Number of the port to access on the container. Number must be in the range 1 to 65535.

Host string: Optional: Host name to connect to, defaults to the model serving container's IP.
Port int: Number of the port to access on the container. Number must be in the range 1 to 65535.

host String: Optional: Host name to connect to, defaults to the model serving container's IP.
port Integer: Number of the port to access on the container. Number must be in the range 1 to 65535.

host string: Optional: Host name to connect to, defaults to the model serving container's IP.
port number: Number of the port to access on the container. Number must be in the range 1 to 65535.

host str: Optional: Host name to connect to, defaults to the model serving container's IP.
port int: Number of the port to access on the container. Number must be in the range 1 to 65535.

host String: Optional: Host name to connect to, defaults to the model serving container's IP.
port Number: Number of the port to access on the container. Number must be in the range 1 to 65535.

AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbe, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeArgs

Exec AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeExec: ExecAction specifies a command to execute. Structure is documented below.
FailureThreshold int: Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
Grpc AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeGrpc: GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
HttpGet AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGet: HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
InitialDelaySeconds int: Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
PeriodSeconds int: How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
SuccessThreshold int: Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
TcpSocket AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeTcpSocket: TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
TimeoutSeconds int: Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.

Exec AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeExec: ExecAction specifies a command to execute. Structure is documented below.
FailureThreshold int: Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
Grpc AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeGrpc: GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
HttpGet AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGet: HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
InitialDelaySeconds int: Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
PeriodSeconds int: How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
SuccessThreshold int: Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
TcpSocket AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeTcpSocket: TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
TimeoutSeconds int: Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.

exec AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeExec: ExecAction specifies a command to execute. Structure is documented below.
failureThreshold Integer: Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
grpc AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeGrpc: GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
httpGet AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGet: HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
initialDelaySeconds Integer: Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
periodSeconds Integer: How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
successThreshold Integer: Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
tcpSocket AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeTcpSocket: TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
timeoutSeconds Integer: Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.

exec AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeExec: ExecAction specifies a command to execute. Structure is documented below.
failureThreshold number: Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
grpc AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeGrpc: GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
httpGet AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGet: HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
initialDelaySeconds number: Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
periodSeconds number: How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
successThreshold number: Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
tcpSocket AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeTcpSocket: TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
timeoutSeconds number: Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.

exec_ AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeExec: ExecAction specifies a command to execute. Structure is documented below.
failure_threshold int: Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
grpc AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeGrpc: GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
http_get AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGet: HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
initial_delay_seconds int: Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
period_seconds int: How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
success_threshold int: Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
tcp_socket AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeTcpSocket: TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
timeout_seconds int: Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.

exec Property Map: ExecAction specifies a command to execute. Structure is documented below.
failureThreshold Number: Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
grpc Property Map: GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
httpGet Property Map: HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
initialDelaySeconds Number: Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
periodSeconds Number: How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
successThreshold Number: Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
tcpSocket Property Map: TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
timeoutSeconds Number: Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.

AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeExec, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeExecArgs

Commands List<string>: Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.

Commands []string: Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.

commands List<String>: Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.

commands string[]: Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.

commands Sequence[str]: Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.

commands List<String>: Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.

AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeGrpc, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeGrpcArgs

Port int: Port number of the gRPC service. Number must be in the range 1 to 65535.
Service string: Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.

Port int: Port number of the gRPC service. Number must be in the range 1 to 65535.
Service string: Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.

port Integer: Port number of the gRPC service. Number must be in the range 1 to 65535.
service String: Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.

port number: Port number of the gRPC service. Number must be in the range 1 to 65535.
service string: Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.

port int: Port number of the gRPC service. Number must be in the range 1 to 65535.
service str: Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.

port Number: Port number of the gRPC service. Number must be in the range 1 to 65535.
service String: Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.

AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGet, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGetArgs

Host string: Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
HttpHeaders List<AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGetHttpHeader>: Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
Path string: Path to access on the HTTP server.
Port int: Number of the port to access on the container. Number must be in the range 1 to 65535.
Scheme string: Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".

Host string: Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
HttpHeaders []AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGetHttpHeader: Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
Path string: Path to access on the HTTP server.
Port int: Number of the port to access on the container. Number must be in the range 1 to 65535.
Scheme string: Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".

host String: Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
httpHeaders List<AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGetHttpHeader>: Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
path String: Path to access on the HTTP server.
port Integer: Number of the port to access on the container. Number must be in the range 1 to 65535.
scheme String: Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".

host string: Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
httpHeaders AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGetHttpHeader[]: Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
path string: Path to access on the HTTP server.
port number: Number of the port to access on the container. Number must be in the range 1 to 65535.
scheme string: Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".

host str: Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
http_headers Sequence[AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGetHttpHeader]: Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
path str: Path to access on the HTTP server.
port int: Number of the port to access on the container. Number must be in the range 1 to 65535.
scheme str: Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".

host String: Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
httpHeaders List<Property Map>: Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
path String: Path to access on the HTTP server.
port Number: Number of the port to access on the container. Number must be in the range 1 to 65535.
scheme String: Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".

AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGetHttpHeader, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGetHttpHeaderArgs

Name string: The header field name. This will be canonicalized upon output, so case-variant names will be understood as the same header.
Value string: The header field value

Name string: The header field name. This will be canonicalized upon output, so case-variant names will be understood as the same header.
Value string: The header field value

name String: The header field name. This will be canonicalized upon output, so case-variant names will be understood as the same header.
value String: The header field value

name string: The header field name. This will be canonicalized upon output, so case-variant names will be understood as the same header.
value string: The header field value

name str: The header field name. This will be canonicalized upon output, so case-variant names will be understood as the same header.
value str: The header field value

name String: The header field name. This will be canonicalized upon output, so case-variant names will be understood as the same header.
value String: The header field value

AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeTcpSocket, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeTcpSocketArgs

Host string: Optional: Host name to connect to, defaults to the model serving container's IP.
Port int: Number of the port to access on the container. Number must be in the range 1 to 65535.

Host string: Optional: Host name to connect to, defaults to the model serving container's IP.
Port int: Number of the port to access on the container. Number must be in the range 1 to 65535.

host String: Optional: Host name to connect to, defaults to the model serving container's IP.
port Integer: Number of the port to access on the container. Number must be in the range 1 to 65535.

host string: Optional: Host name to connect to, defaults to the model serving container's IP.
port number: Number of the port to access on the container. Number must be in the range 1 to 65535.

host str: Optional: Host name to connect to, defaults to the model serving container's IP.
port int: Number of the port to access on the container. Number must be in the range 1 to 65535.

host String: Optional: Host name to connect to, defaults to the model serving container's IP.
port Number: Number of the port to access on the container. Number must be in the range 1 to 65535.

AiEndpointWithModelGardenDeploymentModelConfigContainerSpecPort, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecPortArgs

ContainerPort int: The number of the port to expose on the pod's IP address. Must be a valid port number, between 1 and 65535 inclusive.

ContainerPort int: The number of the port to expose on the pod's IP address. Must be a valid port number, between 1 and 65535 inclusive.

containerPort Integer: The number of the port to expose on the pod's IP address. Must be a valid port number, between 1 and 65535 inclusive.

containerPort number: The number of the port to expose on the pod's IP address. Must be a valid port number, between 1 and 65535 inclusive.

container_port int: The number of the port to expose on the pod's IP address. Must be a valid port number, between 1 and 65535 inclusive.

containerPort Number: The number of the port to expose on the pod's IP address. Must be a valid port number, between 1 and 65535 inclusive.

AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbe, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeArgs

Exec AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeExec: ExecAction specifies a command to execute. Structure is documented below.
FailureThreshold int: Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
Grpc AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeGrpc: GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
HttpGet AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGet: HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
InitialDelaySeconds int: Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
PeriodSeconds int: How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
SuccessThreshold int: Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
TcpSocket AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeTcpSocket: TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
TimeoutSeconds int: Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.

Exec AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeExec: ExecAction specifies a command to execute. Structure is documented below.
FailureThreshold int: Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
Grpc AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeGrpc: GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
HttpGet AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGet: HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
InitialDelaySeconds int: Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
PeriodSeconds int: How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
SuccessThreshold int: Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
TcpSocket AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeTcpSocket: TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
TimeoutSeconds int: Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.

exec AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeExec: ExecAction specifies a command to execute. Structure is documented below.
failureThreshold Integer: Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
grpc AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeGrpc: GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
httpGet AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGet: HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
initialDelaySeconds Integer: Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
periodSeconds Integer: How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
successThreshold Integer: Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
tcpSocket AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeTcpSocket: TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
timeoutSeconds Integer: Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.

exec AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeExec: ExecAction specifies a command to execute. Structure is documented below.
failureThreshold number: Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
grpc AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeGrpc: GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
httpGet AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGet: HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
initialDelaySeconds number: Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
periodSeconds number: How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
successThreshold number: Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
tcpSocket AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeTcpSocket: TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
timeoutSeconds number: Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.

exec_ AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeExec: ExecAction specifies a command to execute. Structure is documented below.
failure_threshold int: Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
grpc AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeGrpc: GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
http_get AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGet: HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
initial_delay_seconds int: Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
period_seconds int: How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
success_threshold int: Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
tcp_socket AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeTcpSocket: TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
timeout_seconds int: Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.

exec Property Map: ExecAction specifies a command to execute. Structure is documented below.
failureThreshold Number: Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
grpc Property Map: GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
httpGet Property Map: HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
initialDelaySeconds Number: Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
periodSeconds Number: How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
successThreshold Number: Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
tcpSocket Property Map: TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
timeoutSeconds Number: Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.

AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeExec, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeExecArgs

Commands List<string>: Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.

Commands []string: Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.

commands List<String>: Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.

commands string[]: Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.

commands Sequence[str]: Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.

commands List<String>: Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.

AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeGrpc, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeGrpcArgs

Port int: Port number of the gRPC service. Number must be in the range 1 to 65535.
Service string: Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.

Port int: Port number of the gRPC service. Number must be in the range 1 to 65535.
Service string: Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.

port Integer: Port number of the gRPC service. Number must be in the range 1 to 65535.
service String: Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.

port number: Port number of the gRPC service. Number must be in the range 1 to 65535.
service string: Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.

port int: Port number of the gRPC service. Number must be in the range 1 to 65535.
service str: Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.

port Number: Port number of the gRPC service. Number must be in the range 1 to 65535.
service String: Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.

AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGet, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGetArgs

Host string: Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
HttpHeaders List<AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGetHttpHeader>: Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
Path string: Path to access on the HTTP server.
Port int: Number of the port to access on the container. Number must be in the range 1 to 65535.
Scheme string: Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".

Host string: Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
HttpHeaders []AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGetHttpHeader: Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
Path string: Path to access on the HTTP server.
Port int: Number of the port to access on the container. Number must be in the range 1 to 65535.
Scheme string: Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".

host String: Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
httpHeaders List<AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGetHttpHeader>: Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
path String: Path to access on the HTTP server.
port Integer: Number of the port to access on the container. Number must be in the range 1 to 65535.
scheme String: Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".

host string: Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
httpHeaders AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGetHttpHeader[]: Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
path string: Path to access on the HTTP server.
port number: Number of the port to access on the container. Number must be in the range 1 to 65535.
scheme string: Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".

host str: Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
http_headers Sequence[AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGetHttpHeader]: Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
path str: Path to access on the HTTP server.
port int: Number of the port to access on the container. Number must be in the range 1 to 65535.
scheme str: Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".

host String: Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
httpHeaders List<Property Map>: Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
path String: Path to access on the HTTP server.
port Number: Number of the port to access on the container. Number must be in the range 1 to 65535.
scheme String: Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".

AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGetHttpHeader, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGetHttpHeaderArgs

Name string: The header field name. This will be canonicalized upon output, so case-variant names will be understood as the same header.
Value string: The header field value

Name string: The header field name. This will be canonicalized upon output, so case-variant names will be understood as the same header.
Value string: The header field value

name String: The header field name. This will be canonicalized upon output, so case-variant names will be understood as the same header.
value String: The header field value

name string: The header field name. This will be canonicalized upon output, so case-variant names will be understood as the same header.
value string: The header field value

name str: The header field name. This will be canonicalized upon output, so case-variant names will be understood as the same header.
value str: The header field value

name String: The header field name. This will be canonicalized upon output, so case-variant names will be understood as the same header.
value String: The header field value

AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeTcpSocket, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeTcpSocketArgs

Host string: Optional: Host name to connect to, defaults to the model serving container's IP.
Port int: Number of the port to access on the container. Number must be in the range 1 to 65535.

Host string: Optional: Host name to connect to, defaults to the model serving container's IP.
Port int: Number of the port to access on the container. Number must be in the range 1 to 65535.

host String: Optional: Host name to connect to, defaults to the model serving container's IP.
port Integer: Number of the port to access on the container. Number must be in the range 1 to 65535.

host string: Optional: Host name to connect to, defaults to the model serving container's IP.
port number: Number of the port to access on the container. Number must be in the range 1 to 65535.

host str: Optional: Host name to connect to, defaults to the model serving container's IP.
port int: Number of the port to access on the container. Number must be in the range 1 to 65535.

host String: Optional: Host name to connect to, defaults to the model serving container's IP.
port Number: Number of the port to access on the container. Number must be in the range 1 to 65535.

Import

This resource does not support import.

To learn more about importing existing cloud resources, see Importing resources.

Package Details

Repository: Google Cloud (GCP) Classic pulumi/pulumi-gcp
License: Apache-2.0
Notes: This Pulumi package is based on the google-beta Terraform Provider.

Google Cloud v9.5.0 published on Monday, Nov 17, 2025 by Pulumi

Schema (JSON)

pulumi/pulumi-gcp

gcp.vertex.AiEndpointWithModelGardenDeployment

On this page

On this page

Example Usage

Vertex Ai Deploy Basic

Vertex Ai Deploy Huggingface Model

Vertex Ai Deploy With Configs

Vertex Ai Deploy Multiple Models In Parallel

Vertex Ai Deploy Multiple Models In Sequence

Vertex Ai Deploy Psc Endpoint

Vertex Ai Deploy Psc Endpoint Automated

Create AiEndpointWithModelGardenDeployment Resource

Constructor syntax

Parameters

Constructor example

AiEndpointWithModelGardenDeployment Resource Properties

Inputs

Outputs

Look up Existing AiEndpointWithModelGardenDeployment Resource

Supporting Types

AiEndpointWithModelGardenDeploymentDeployConfig, AiEndpointWithModelGardenDeploymentDeployConfigArgs

AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResources, AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs

AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesAutoscalingMetricSpec, AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesAutoscalingMetricSpecArgs

AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpec, AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs

AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecReservationAffinity, AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecReservationAffinityArgs

AiEndpointWithModelGardenDeploymentEndpointConfig, AiEndpointWithModelGardenDeploymentEndpointConfigArgs

AiEndpointWithModelGardenDeploymentEndpointConfigPrivateServiceConnectConfig, AiEndpointWithModelGardenDeploymentEndpointConfigPrivateServiceConnectConfigArgs

AiEndpointWithModelGardenDeploymentEndpointConfigPrivateServiceConnectConfigPscAutomationConfigs, AiEndpointWithModelGardenDeploymentEndpointConfigPrivateServiceConnectConfigPscAutomationConfigsArgs

AiEndpointWithModelGardenDeploymentModelConfig, AiEndpointWithModelGardenDeploymentModelConfigArgs

AiEndpointWithModelGardenDeploymentModelConfigContainerSpec, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecArgs

AiEndpointWithModelGardenDeploymentModelConfigContainerSpecEnv, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecEnvArgs

AiEndpointWithModelGardenDeploymentModelConfigContainerSpecGrpcPort, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecGrpcPortArgs

AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbe, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeArgs

AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeExec, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeExecArgs

AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeGrpc, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeGrpcArgs

AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGet, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGetArgs

AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGetHttpHeader, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGetHttpHeaderArgs

AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeTcpSocket, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeTcpSocketArgs

AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbe, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeArgs

AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeExec, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeExecArgs

AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeGrpc, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeGrpcArgs

AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGet, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGetArgs

AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGetHttpHeader, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGetHttpHeaderArgs

AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeTcpSocket, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeTcpSocketArgs

AiEndpointWithModelGardenDeploymentModelConfigContainerSpecPort, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecPortArgs

AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbe, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeArgs

AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeExec, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeExecArgs

AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeGrpc, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeGrpcArgs

AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGet, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGetArgs

AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGetHttpHeader, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGetHttpHeaderArgs

AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeTcpSocket, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeTcpSocketArgs

Import

Package Details

On this page

On this page