gcp.vertex.AiEndpointWithModelGardenDeployment
Explore with Pulumi AI
Create an Endpoint and deploy a Model Garden model to it.
To get more information about EndpointWithModelGardenDeployment, see:
- API documentation
- How-to Guides
Example Usage
Vertex Ai Deploy Basic
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const deploy = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy", {
publisherModelName: "publishers/google/models/paligemma@paligemma-224-float32",
location: "us-central1",
modelConfig: {
acceptEula: true,
},
});
import pulumi
import pulumi_gcp as gcp
deploy = gcp.vertex.AiEndpointWithModelGardenDeployment("deploy",
publisher_model_name="publishers/google/models/paligemma@paligemma-224-float32",
location="us-central1",
model_config={
"accept_eula": True,
})
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v8/go/gcp/vertex"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := vertex.NewAiEndpointWithModelGardenDeployment(ctx, "deploy", &vertex.AiEndpointWithModelGardenDeploymentArgs{
PublisherModelName: pulumi.String("publishers/google/models/paligemma@paligemma-224-float32"),
Location: pulumi.String("us-central1"),
ModelConfig: &vertex.AiEndpointWithModelGardenDeploymentModelConfigArgs{
AcceptEula: pulumi.Bool(true),
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var deploy = new Gcp.Vertex.AiEndpointWithModelGardenDeployment("deploy", new()
{
PublisherModelName = "publishers/google/models/paligemma@paligemma-224-float32",
Location = "us-central1",
ModelConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs
{
AcceptEula = true,
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.vertex.AiEndpointWithModelGardenDeployment;
import com.pulumi.gcp.vertex.AiEndpointWithModelGardenDeploymentArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var deploy = new AiEndpointWithModelGardenDeployment("deploy", AiEndpointWithModelGardenDeploymentArgs.builder()
.publisherModelName("publishers/google/models/paligemma@paligemma-224-float32")
.location("us-central1")
.modelConfig(AiEndpointWithModelGardenDeploymentModelConfigArgs.builder()
.acceptEula(true)
.build())
.build());
}
}
resources:
deploy:
type: gcp:vertex:AiEndpointWithModelGardenDeployment
properties:
publisherModelName: publishers/google/models/paligemma@paligemma-224-float32
location: us-central1
modelConfig:
acceptEula: true
Vertex Ai Deploy Huggingface Model
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const deploy = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy", {
huggingFaceModelId: "Qwen/Qwen3-0.6B",
location: "us-central1",
modelConfig: {
acceptEula: true,
},
});
import pulumi
import pulumi_gcp as gcp
deploy = gcp.vertex.AiEndpointWithModelGardenDeployment("deploy",
hugging_face_model_id="Qwen/Qwen3-0.6B",
location="us-central1",
model_config={
"accept_eula": True,
})
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v8/go/gcp/vertex"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := vertex.NewAiEndpointWithModelGardenDeployment(ctx, "deploy", &vertex.AiEndpointWithModelGardenDeploymentArgs{
HuggingFaceModelId: pulumi.String("Qwen/Qwen3-0.6B"),
Location: pulumi.String("us-central1"),
ModelConfig: &vertex.AiEndpointWithModelGardenDeploymentModelConfigArgs{
AcceptEula: pulumi.Bool(true),
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var deploy = new Gcp.Vertex.AiEndpointWithModelGardenDeployment("deploy", new()
{
HuggingFaceModelId = "Qwen/Qwen3-0.6B",
Location = "us-central1",
ModelConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs
{
AcceptEula = true,
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.vertex.AiEndpointWithModelGardenDeployment;
import com.pulumi.gcp.vertex.AiEndpointWithModelGardenDeploymentArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var deploy = new AiEndpointWithModelGardenDeployment("deploy", AiEndpointWithModelGardenDeploymentArgs.builder()
.huggingFaceModelId("Qwen/Qwen3-0.6B")
.location("us-central1")
.modelConfig(AiEndpointWithModelGardenDeploymentModelConfigArgs.builder()
.acceptEula(true)
.build())
.build());
}
}
resources:
deploy:
type: gcp:vertex:AiEndpointWithModelGardenDeployment
properties:
huggingFaceModelId: Qwen/Qwen3-0.6B
location: us-central1
modelConfig:
acceptEula: true
Vertex Ai Deploy With Configs
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const deploy = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy", {
publisherModelName: "publishers/google/models/paligemma@paligemma-224-float32",
location: "us-central1",
modelConfig: {
acceptEula: true,
},
deployConfig: {
dedicatedResources: {
machineSpec: {
machineType: "g2-standard-16",
acceleratorType: "NVIDIA_L4",
acceleratorCount: 1,
},
minReplicaCount: 1,
},
},
});
import pulumi
import pulumi_gcp as gcp
deploy = gcp.vertex.AiEndpointWithModelGardenDeployment("deploy",
publisher_model_name="publishers/google/models/paligemma@paligemma-224-float32",
location="us-central1",
model_config={
"accept_eula": True,
},
deploy_config={
"dedicated_resources": {
"machine_spec": {
"machine_type": "g2-standard-16",
"accelerator_type": "NVIDIA_L4",
"accelerator_count": 1,
},
"min_replica_count": 1,
},
})
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v8/go/gcp/vertex"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := vertex.NewAiEndpointWithModelGardenDeployment(ctx, "deploy", &vertex.AiEndpointWithModelGardenDeploymentArgs{
PublisherModelName: pulumi.String("publishers/google/models/paligemma@paligemma-224-float32"),
Location: pulumi.String("us-central1"),
ModelConfig: &vertex.AiEndpointWithModelGardenDeploymentModelConfigArgs{
AcceptEula: pulumi.Bool(true),
},
DeployConfig: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigArgs{
DedicatedResources: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs{
MachineSpec: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs{
MachineType: pulumi.String("g2-standard-16"),
AcceleratorType: pulumi.String("NVIDIA_L4"),
AcceleratorCount: pulumi.Int(1),
},
MinReplicaCount: pulumi.Int(1),
},
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var deploy = new Gcp.Vertex.AiEndpointWithModelGardenDeployment("deploy", new()
{
PublisherModelName = "publishers/google/models/paligemma@paligemma-224-float32",
Location = "us-central1",
ModelConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs
{
AcceptEula = true,
},
DeployConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigArgs
{
DedicatedResources = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs
{
MachineSpec = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs
{
MachineType = "g2-standard-16",
AcceleratorType = "NVIDIA_L4",
AcceleratorCount = 1,
},
MinReplicaCount = 1,
},
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.vertex.AiEndpointWithModelGardenDeployment;
import com.pulumi.gcp.vertex.AiEndpointWithModelGardenDeploymentArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentDeployConfigArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var deploy = new AiEndpointWithModelGardenDeployment("deploy", AiEndpointWithModelGardenDeploymentArgs.builder()
.publisherModelName("publishers/google/models/paligemma@paligemma-224-float32")
.location("us-central1")
.modelConfig(AiEndpointWithModelGardenDeploymentModelConfigArgs.builder()
.acceptEula(true)
.build())
.deployConfig(AiEndpointWithModelGardenDeploymentDeployConfigArgs.builder()
.dedicatedResources(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs.builder()
.machineSpec(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs.builder()
.machineType("g2-standard-16")
.acceleratorType("NVIDIA_L4")
.acceleratorCount(1)
.build())
.minReplicaCount(1)
.build())
.build())
.build());
}
}
resources:
deploy:
type: gcp:vertex:AiEndpointWithModelGardenDeployment
properties:
publisherModelName: publishers/google/models/paligemma@paligemma-224-float32
location: us-central1
modelConfig:
acceptEula: true
deployConfig:
dedicatedResources:
machineSpec:
machineType: g2-standard-16
acceleratorType: NVIDIA_L4
acceleratorCount: 1
minReplicaCount: 1
Vertex Ai Deploy Multiple Models In Parallel
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const deploy_gemma_11_2b_it = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-gemma-1_1-2b-it", {
publisherModelName: "publishers/google/models/gemma@gemma-1.1-2b-it",
location: "us-central1",
modelConfig: {
acceptEula: true,
},
deployConfig: {
dedicatedResources: {
machineSpec: {
machineType: "g2-standard-12",
acceleratorType: "us-central1",
acceleratorCount: 1,
},
minReplicaCount: 1,
},
},
});
const deploy_qwen3_06b = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-qwen3-0_6b", {
huggingFaceModelId: "Qwen/Qwen3-0.6B",
location: "us-central1",
modelConfig: {
acceptEula: true,
},
deployConfig: {
dedicatedResources: {
machineSpec: {
machineType: "g2-standard-12",
acceleratorType: "NVIDIA_L4",
acceleratorCount: 1,
},
minReplicaCount: 1,
},
},
});
const deploy_llama_32_1b = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-llama-3_2-1b", {
publisherModelName: "publishers/meta/models/llama3-2@llama-3.2-1b",
location: "us-central1",
modelConfig: {
acceptEula: true,
},
deployConfig: {
dedicatedResources: {
machineSpec: {
machineType: "g2-standard-12",
acceleratorType: "NVIDIA_L4",
acceleratorCount: 1,
},
minReplicaCount: 1,
},
},
});
import pulumi
import pulumi_gcp as gcp
deploy_gemma_11_2b_it = gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-gemma-1_1-2b-it",
publisher_model_name="publishers/google/models/gemma@gemma-1.1-2b-it",
location="us-central1",
model_config={
"accept_eula": True,
},
deploy_config={
"dedicated_resources": {
"machine_spec": {
"machine_type": "g2-standard-12",
"accelerator_type": "us-central1",
"accelerator_count": 1,
},
"min_replica_count": 1,
},
})
deploy_qwen3_06b = gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-qwen3-0_6b",
hugging_face_model_id="Qwen/Qwen3-0.6B",
location="us-central1",
model_config={
"accept_eula": True,
},
deploy_config={
"dedicated_resources": {
"machine_spec": {
"machine_type": "g2-standard-12",
"accelerator_type": "NVIDIA_L4",
"accelerator_count": 1,
},
"min_replica_count": 1,
},
})
deploy_llama_32_1b = gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-llama-3_2-1b",
publisher_model_name="publishers/meta/models/llama3-2@llama-3.2-1b",
location="us-central1",
model_config={
"accept_eula": True,
},
deploy_config={
"dedicated_resources": {
"machine_spec": {
"machine_type": "g2-standard-12",
"accelerator_type": "NVIDIA_L4",
"accelerator_count": 1,
},
"min_replica_count": 1,
},
})
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v8/go/gcp/vertex"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := vertex.NewAiEndpointWithModelGardenDeployment(ctx, "deploy-gemma-1_1-2b-it", &vertex.AiEndpointWithModelGardenDeploymentArgs{
PublisherModelName: pulumi.String("publishers/google/models/gemma@gemma-1.1-2b-it"),
Location: pulumi.String("us-central1"),
ModelConfig: &vertex.AiEndpointWithModelGardenDeploymentModelConfigArgs{
AcceptEula: pulumi.Bool(true),
},
DeployConfig: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigArgs{
DedicatedResources: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs{
MachineSpec: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs{
MachineType: pulumi.String("g2-standard-12"),
AcceleratorType: pulumi.String("us-central1"),
AcceleratorCount: pulumi.Int(1),
},
MinReplicaCount: pulumi.Int(1),
},
},
})
if err != nil {
return err
}
_, err = vertex.NewAiEndpointWithModelGardenDeployment(ctx, "deploy-qwen3-0_6b", &vertex.AiEndpointWithModelGardenDeploymentArgs{
HuggingFaceModelId: pulumi.String("Qwen/Qwen3-0.6B"),
Location: pulumi.String("us-central1"),
ModelConfig: &vertex.AiEndpointWithModelGardenDeploymentModelConfigArgs{
AcceptEula: pulumi.Bool(true),
},
DeployConfig: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigArgs{
DedicatedResources: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs{
MachineSpec: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs{
MachineType: pulumi.String("g2-standard-12"),
AcceleratorType: pulumi.String("NVIDIA_L4"),
AcceleratorCount: pulumi.Int(1),
},
MinReplicaCount: pulumi.Int(1),
},
},
})
if err != nil {
return err
}
_, err = vertex.NewAiEndpointWithModelGardenDeployment(ctx, "deploy-llama-3_2-1b", &vertex.AiEndpointWithModelGardenDeploymentArgs{
PublisherModelName: pulumi.String("publishers/meta/models/llama3-2@llama-3.2-1b"),
Location: pulumi.String("us-central1"),
ModelConfig: &vertex.AiEndpointWithModelGardenDeploymentModelConfigArgs{
AcceptEula: pulumi.Bool(true),
},
DeployConfig: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigArgs{
DedicatedResources: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs{
MachineSpec: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs{
MachineType: pulumi.String("g2-standard-12"),
AcceleratorType: pulumi.String("NVIDIA_L4"),
AcceleratorCount: pulumi.Int(1),
},
MinReplicaCount: pulumi.Int(1),
},
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var deploy_gemma_11_2b_it = new Gcp.Vertex.AiEndpointWithModelGardenDeployment("deploy-gemma-1_1-2b-it", new()
{
PublisherModelName = "publishers/google/models/gemma@gemma-1.1-2b-it",
Location = "us-central1",
ModelConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs
{
AcceptEula = true,
},
DeployConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigArgs
{
DedicatedResources = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs
{
MachineSpec = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs
{
MachineType = "g2-standard-12",
AcceleratorType = "us-central1",
AcceleratorCount = 1,
},
MinReplicaCount = 1,
},
},
});
var deploy_qwen3_06b = new Gcp.Vertex.AiEndpointWithModelGardenDeployment("deploy-qwen3-0_6b", new()
{
HuggingFaceModelId = "Qwen/Qwen3-0.6B",
Location = "us-central1",
ModelConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs
{
AcceptEula = true,
},
DeployConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigArgs
{
DedicatedResources = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs
{
MachineSpec = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs
{
MachineType = "g2-standard-12",
AcceleratorType = "NVIDIA_L4",
AcceleratorCount = 1,
},
MinReplicaCount = 1,
},
},
});
var deploy_llama_32_1b = new Gcp.Vertex.AiEndpointWithModelGardenDeployment("deploy-llama-3_2-1b", new()
{
PublisherModelName = "publishers/meta/models/llama3-2@llama-3.2-1b",
Location = "us-central1",
ModelConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs
{
AcceptEula = true,
},
DeployConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigArgs
{
DedicatedResources = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs
{
MachineSpec = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs
{
MachineType = "g2-standard-12",
AcceleratorType = "NVIDIA_L4",
AcceleratorCount = 1,
},
MinReplicaCount = 1,
},
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.vertex.AiEndpointWithModelGardenDeployment;
import com.pulumi.gcp.vertex.AiEndpointWithModelGardenDeploymentArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentDeployConfigArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var deploy_gemma_11_2b_it = new AiEndpointWithModelGardenDeployment("deploy-gemma-11-2b-it", AiEndpointWithModelGardenDeploymentArgs.builder()
.publisherModelName("publishers/google/models/gemma@gemma-1.1-2b-it")
.location("us-central1")
.modelConfig(AiEndpointWithModelGardenDeploymentModelConfigArgs.builder()
.acceptEula(true)
.build())
.deployConfig(AiEndpointWithModelGardenDeploymentDeployConfigArgs.builder()
.dedicatedResources(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs.builder()
.machineSpec(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs.builder()
.machineType("g2-standard-12")
.acceleratorType("us-central1")
.acceleratorCount(1)
.build())
.minReplicaCount(1)
.build())
.build())
.build());
var deploy_qwen3_06b = new AiEndpointWithModelGardenDeployment("deploy-qwen3-06b", AiEndpointWithModelGardenDeploymentArgs.builder()
.huggingFaceModelId("Qwen/Qwen3-0.6B")
.location("us-central1")
.modelConfig(AiEndpointWithModelGardenDeploymentModelConfigArgs.builder()
.acceptEula(true)
.build())
.deployConfig(AiEndpointWithModelGardenDeploymentDeployConfigArgs.builder()
.dedicatedResources(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs.builder()
.machineSpec(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs.builder()
.machineType("g2-standard-12")
.acceleratorType("NVIDIA_L4")
.acceleratorCount(1)
.build())
.minReplicaCount(1)
.build())
.build())
.build());
var deploy_llama_32_1b = new AiEndpointWithModelGardenDeployment("deploy-llama-32-1b", AiEndpointWithModelGardenDeploymentArgs.builder()
.publisherModelName("publishers/meta/models/llama3-2@llama-3.2-1b")
.location("us-central1")
.modelConfig(AiEndpointWithModelGardenDeploymentModelConfigArgs.builder()
.acceptEula(true)
.build())
.deployConfig(AiEndpointWithModelGardenDeploymentDeployConfigArgs.builder()
.dedicatedResources(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs.builder()
.machineSpec(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs.builder()
.machineType("g2-standard-12")
.acceleratorType("NVIDIA_L4")
.acceleratorCount(1)
.build())
.minReplicaCount(1)
.build())
.build())
.build());
}
}
resources:
deploy-gemma-11-2b-it:
type: gcp:vertex:AiEndpointWithModelGardenDeployment
name: deploy-gemma-1_1-2b-it
properties:
publisherModelName: publishers/google/models/gemma@gemma-1.1-2b-it
location: us-central1
modelConfig:
acceptEula: true
deployConfig:
dedicatedResources:
machineSpec:
machineType: g2-standard-12
acceleratorType: us-central1
acceleratorCount: 1
minReplicaCount: 1
deploy-qwen3-06b:
type: gcp:vertex:AiEndpointWithModelGardenDeployment
name: deploy-qwen3-0_6b
properties:
huggingFaceModelId: Qwen/Qwen3-0.6B
location: us-central1
modelConfig:
acceptEula: true
deployConfig:
dedicatedResources:
machineSpec:
machineType: g2-standard-12
acceleratorType: NVIDIA_L4
acceleratorCount: 1
minReplicaCount: 1
deploy-llama-32-1b:
type: gcp:vertex:AiEndpointWithModelGardenDeployment
name: deploy-llama-3_2-1b
properties:
publisherModelName: publishers/meta/models/llama3-2@llama-3.2-1b
location: us-central1
modelConfig:
acceptEula: true
deployConfig:
dedicatedResources:
machineSpec:
machineType: g2-standard-12
acceleratorType: NVIDIA_L4
acceleratorCount: 1
minReplicaCount: 1
Vertex Ai Deploy Multiple Models In Sequence
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const deploy_gemma_11_2b_it = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-gemma-1_1-2b-it", {
publisherModelName: "publishers/google/models/gemma@gemma-1.1-2b-it",
location: "us-central1",
modelConfig: {
acceptEula: true,
},
deployConfig: {
dedicatedResources: {
machineSpec: {
machineType: "g2-standard-12",
acceleratorType: "NVIDIA_L4",
acceleratorCount: 1,
},
minReplicaCount: 1,
},
},
});
const deploy_qwen3_06b = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-qwen3-0_6b", {
huggingFaceModelId: "Qwen/Qwen3-0.6B",
location: "us-central1",
modelConfig: {
acceptEula: true,
},
deployConfig: {
dedicatedResources: {
machineSpec: {
machineType: "g2-standard-12",
acceleratorType: "NVIDIA_L4",
acceleratorCount: 1,
},
minReplicaCount: 1,
},
},
}, {
dependsOn: [deploy_gemma_11_2b_it],
});
const deploy_llama_32_1b = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-llama-3_2-1b", {
publisherModelName: "publishers/meta/models/llama3-2@llama-3.2-1b",
location: "us-central1",
modelConfig: {
acceptEula: true,
},
deployConfig: {
dedicatedResources: {
machineSpec: {
machineType: "g2-standard-12",
acceleratorType: "NVIDIA_L4",
acceleratorCount: 1,
},
minReplicaCount: 1,
},
},
}, {
dependsOn: [deploy_qwen3_06b],
});
import pulumi
import pulumi_gcp as gcp
deploy_gemma_11_2b_it = gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-gemma-1_1-2b-it",
publisher_model_name="publishers/google/models/gemma@gemma-1.1-2b-it",
location="us-central1",
model_config={
"accept_eula": True,
},
deploy_config={
"dedicated_resources": {
"machine_spec": {
"machine_type": "g2-standard-12",
"accelerator_type": "NVIDIA_L4",
"accelerator_count": 1,
},
"min_replica_count": 1,
},
})
deploy_qwen3_06b = gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-qwen3-0_6b",
hugging_face_model_id="Qwen/Qwen3-0.6B",
location="us-central1",
model_config={
"accept_eula": True,
},
deploy_config={
"dedicated_resources": {
"machine_spec": {
"machine_type": "g2-standard-12",
"accelerator_type": "NVIDIA_L4",
"accelerator_count": 1,
},
"min_replica_count": 1,
},
},
opts = pulumi.ResourceOptions(depends_on=[deploy_gemma_11_2b_it]))
deploy_llama_32_1b = gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-llama-3_2-1b",
publisher_model_name="publishers/meta/models/llama3-2@llama-3.2-1b",
location="us-central1",
model_config={
"accept_eula": True,
},
deploy_config={
"dedicated_resources": {
"machine_spec": {
"machine_type": "g2-standard-12",
"accelerator_type": "NVIDIA_L4",
"accelerator_count": 1,
},
"min_replica_count": 1,
},
},
opts = pulumi.ResourceOptions(depends_on=[deploy_qwen3_06b]))
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v8/go/gcp/vertex"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
deploy_gemma_11_2b_it, err := vertex.NewAiEndpointWithModelGardenDeployment(ctx, "deploy-gemma-1_1-2b-it", &vertex.AiEndpointWithModelGardenDeploymentArgs{
PublisherModelName: pulumi.String("publishers/google/models/gemma@gemma-1.1-2b-it"),
Location: pulumi.String("us-central1"),
ModelConfig: &vertex.AiEndpointWithModelGardenDeploymentModelConfigArgs{
AcceptEula: pulumi.Bool(true),
},
DeployConfig: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigArgs{
DedicatedResources: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs{
MachineSpec: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs{
MachineType: pulumi.String("g2-standard-12"),
AcceleratorType: pulumi.String("NVIDIA_L4"),
AcceleratorCount: pulumi.Int(1),
},
MinReplicaCount: pulumi.Int(1),
},
},
})
if err != nil {
return err
}
deploy_qwen3_06b, err := vertex.NewAiEndpointWithModelGardenDeployment(ctx, "deploy-qwen3-0_6b", &vertex.AiEndpointWithModelGardenDeploymentArgs{
HuggingFaceModelId: pulumi.String("Qwen/Qwen3-0.6B"),
Location: pulumi.String("us-central1"),
ModelConfig: &vertex.AiEndpointWithModelGardenDeploymentModelConfigArgs{
AcceptEula: pulumi.Bool(true),
},
DeployConfig: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigArgs{
DedicatedResources: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs{
MachineSpec: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs{
MachineType: pulumi.String("g2-standard-12"),
AcceleratorType: pulumi.String("NVIDIA_L4"),
AcceleratorCount: pulumi.Int(1),
},
MinReplicaCount: pulumi.Int(1),
},
},
}, pulumi.DependsOn([]pulumi.Resource{
deploy_gemma_11_2b_it,
}))
if err != nil {
return err
}
_, err = vertex.NewAiEndpointWithModelGardenDeployment(ctx, "deploy-llama-3_2-1b", &vertex.AiEndpointWithModelGardenDeploymentArgs{
PublisherModelName: pulumi.String("publishers/meta/models/llama3-2@llama-3.2-1b"),
Location: pulumi.String("us-central1"),
ModelConfig: &vertex.AiEndpointWithModelGardenDeploymentModelConfigArgs{
AcceptEula: pulumi.Bool(true),
},
DeployConfig: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigArgs{
DedicatedResources: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs{
MachineSpec: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs{
MachineType: pulumi.String("g2-standard-12"),
AcceleratorType: pulumi.String("NVIDIA_L4"),
AcceleratorCount: pulumi.Int(1),
},
MinReplicaCount: pulumi.Int(1),
},
},
}, pulumi.DependsOn([]pulumi.Resource{
deploy_qwen3_06b,
}))
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var deploy_gemma_11_2b_it = new Gcp.Vertex.AiEndpointWithModelGardenDeployment("deploy-gemma-1_1-2b-it", new()
{
PublisherModelName = "publishers/google/models/gemma@gemma-1.1-2b-it",
Location = "us-central1",
ModelConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs
{
AcceptEula = true,
},
DeployConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigArgs
{
DedicatedResources = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs
{
MachineSpec = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs
{
MachineType = "g2-standard-12",
AcceleratorType = "NVIDIA_L4",
AcceleratorCount = 1,
},
MinReplicaCount = 1,
},
},
});
var deploy_qwen3_06b = new Gcp.Vertex.AiEndpointWithModelGardenDeployment("deploy-qwen3-0_6b", new()
{
HuggingFaceModelId = "Qwen/Qwen3-0.6B",
Location = "us-central1",
ModelConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs
{
AcceptEula = true,
},
DeployConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigArgs
{
DedicatedResources = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs
{
MachineSpec = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs
{
MachineType = "g2-standard-12",
AcceleratorType = "NVIDIA_L4",
AcceleratorCount = 1,
},
MinReplicaCount = 1,
},
},
}, new CustomResourceOptions
{
DependsOn =
{
deploy_gemma_11_2b_it,
},
});
var deploy_llama_32_1b = new Gcp.Vertex.AiEndpointWithModelGardenDeployment("deploy-llama-3_2-1b", new()
{
PublisherModelName = "publishers/meta/models/llama3-2@llama-3.2-1b",
Location = "us-central1",
ModelConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs
{
AcceptEula = true,
},
DeployConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigArgs
{
DedicatedResources = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs
{
MachineSpec = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs
{
MachineType = "g2-standard-12",
AcceleratorType = "NVIDIA_L4",
AcceleratorCount = 1,
},
MinReplicaCount = 1,
},
},
}, new CustomResourceOptions
{
DependsOn =
{
deploy_qwen3_06b,
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.vertex.AiEndpointWithModelGardenDeployment;
import com.pulumi.gcp.vertex.AiEndpointWithModelGardenDeploymentArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentDeployConfigArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs;
import com.pulumi.resources.CustomResourceOptions;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var deploy_gemma_11_2b_it = new AiEndpointWithModelGardenDeployment("deploy-gemma-11-2b-it", AiEndpointWithModelGardenDeploymentArgs.builder()
.publisherModelName("publishers/google/models/gemma@gemma-1.1-2b-it")
.location("us-central1")
.modelConfig(AiEndpointWithModelGardenDeploymentModelConfigArgs.builder()
.acceptEula(true)
.build())
.deployConfig(AiEndpointWithModelGardenDeploymentDeployConfigArgs.builder()
.dedicatedResources(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs.builder()
.machineSpec(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs.builder()
.machineType("g2-standard-12")
.acceleratorType("NVIDIA_L4")
.acceleratorCount(1)
.build())
.minReplicaCount(1)
.build())
.build())
.build());
var deploy_qwen3_06b = new AiEndpointWithModelGardenDeployment("deploy-qwen3-06b", AiEndpointWithModelGardenDeploymentArgs.builder()
.huggingFaceModelId("Qwen/Qwen3-0.6B")
.location("us-central1")
.modelConfig(AiEndpointWithModelGardenDeploymentModelConfigArgs.builder()
.acceptEula(true)
.build())
.deployConfig(AiEndpointWithModelGardenDeploymentDeployConfigArgs.builder()
.dedicatedResources(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs.builder()
.machineSpec(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs.builder()
.machineType("g2-standard-12")
.acceleratorType("NVIDIA_L4")
.acceleratorCount(1)
.build())
.minReplicaCount(1)
.build())
.build())
.build(), CustomResourceOptions.builder()
.dependsOn(deploy_gemma_11_2b_it)
.build());
var deploy_llama_32_1b = new AiEndpointWithModelGardenDeployment("deploy-llama-32-1b", AiEndpointWithModelGardenDeploymentArgs.builder()
.publisherModelName("publishers/meta/models/llama3-2@llama-3.2-1b")
.location("us-central1")
.modelConfig(AiEndpointWithModelGardenDeploymentModelConfigArgs.builder()
.acceptEula(true)
.build())
.deployConfig(AiEndpointWithModelGardenDeploymentDeployConfigArgs.builder()
.dedicatedResources(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs.builder()
.machineSpec(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs.builder()
.machineType("g2-standard-12")
.acceleratorType("NVIDIA_L4")
.acceleratorCount(1)
.build())
.minReplicaCount(1)
.build())
.build())
.build(), CustomResourceOptions.builder()
.dependsOn(deploy_qwen3_06b)
.build());
}
}
resources:
deploy-gemma-11-2b-it:
type: gcp:vertex:AiEndpointWithModelGardenDeployment
name: deploy-gemma-1_1-2b-it
properties:
publisherModelName: publishers/google/models/gemma@gemma-1.1-2b-it
location: us-central1
modelConfig:
acceptEula: true
deployConfig:
dedicatedResources:
machineSpec:
machineType: g2-standard-12
acceleratorType: NVIDIA_L4
acceleratorCount: 1
minReplicaCount: 1
deploy-qwen3-06b:
type: gcp:vertex:AiEndpointWithModelGardenDeployment
name: deploy-qwen3-0_6b
properties:
huggingFaceModelId: Qwen/Qwen3-0.6B
location: us-central1
modelConfig:
acceptEula: true
deployConfig:
dedicatedResources:
machineSpec:
machineType: g2-standard-12
acceleratorType: NVIDIA_L4
acceleratorCount: 1
minReplicaCount: 1
options:
dependsOn:
- ${["deploy-gemma-11-2b-it"]}
deploy-llama-32-1b:
type: gcp:vertex:AiEndpointWithModelGardenDeployment
name: deploy-llama-3_2-1b
properties:
publisherModelName: publishers/meta/models/llama3-2@llama-3.2-1b
location: us-central1
modelConfig:
acceptEula: true
deployConfig:
dedicatedResources:
machineSpec:
machineType: g2-standard-12
acceleratorType: NVIDIA_L4
acceleratorCount: 1
minReplicaCount: 1
options:
dependsOn:
- ${["deploy-qwen3-06b"]}
Create AiEndpointWithModelGardenDeployment Resource
Resources are created with functions called constructors. To learn more about declaring and configuring resources, see Resources.
Constructor syntax
new AiEndpointWithModelGardenDeployment(name: string, args: AiEndpointWithModelGardenDeploymentArgs, opts?: CustomResourceOptions);
@overload
def AiEndpointWithModelGardenDeployment(resource_name: str,
args: AiEndpointWithModelGardenDeploymentArgs,
opts: Optional[ResourceOptions] = None)
@overload
def AiEndpointWithModelGardenDeployment(resource_name: str,
opts: Optional[ResourceOptions] = None,
location: Optional[str] = None,
deploy_config: Optional[AiEndpointWithModelGardenDeploymentDeployConfigArgs] = None,
endpoint_config: Optional[AiEndpointWithModelGardenDeploymentEndpointConfigArgs] = None,
hugging_face_model_id: Optional[str] = None,
model_config: Optional[AiEndpointWithModelGardenDeploymentModelConfigArgs] = None,
project: Optional[str] = None,
publisher_model_name: Optional[str] = None)
func NewAiEndpointWithModelGardenDeployment(ctx *Context, name string, args AiEndpointWithModelGardenDeploymentArgs, opts ...ResourceOption) (*AiEndpointWithModelGardenDeployment, error)
public AiEndpointWithModelGardenDeployment(string name, AiEndpointWithModelGardenDeploymentArgs args, CustomResourceOptions? opts = null)
public AiEndpointWithModelGardenDeployment(String name, AiEndpointWithModelGardenDeploymentArgs args)
public AiEndpointWithModelGardenDeployment(String name, AiEndpointWithModelGardenDeploymentArgs args, CustomResourceOptions options)
type: gcp:vertex:AiEndpointWithModelGardenDeployment
properties: # The arguments to resource properties.
options: # Bag of options to control resource's behavior.
Parameters
- name string
- The unique name of the resource.
- args AiEndpointWithModelGardenDeploymentArgs
- The arguments to resource properties.
- opts CustomResourceOptions
- Bag of options to control resource's behavior.
- resource_name str
- The unique name of the resource.
- args AiEndpointWithModelGardenDeploymentArgs
- The arguments to resource properties.
- opts ResourceOptions
- Bag of options to control resource's behavior.
- ctx Context
- Context object for the current deployment.
- name string
- The unique name of the resource.
- args AiEndpointWithModelGardenDeploymentArgs
- The arguments to resource properties.
- opts ResourceOption
- Bag of options to control resource's behavior.
- name string
- The unique name of the resource.
- args AiEndpointWithModelGardenDeploymentArgs
- The arguments to resource properties.
- opts CustomResourceOptions
- Bag of options to control resource's behavior.
- name String
- The unique name of the resource.
- args AiEndpointWithModelGardenDeploymentArgs
- The arguments to resource properties.
- options CustomResourceOptions
- Bag of options to control resource's behavior.
Constructor example
The following reference example uses placeholder values for all input properties.
var aiEndpointWithModelGardenDeploymentResource = new Gcp.Vertex.AiEndpointWithModelGardenDeployment("aiEndpointWithModelGardenDeploymentResource", new()
{
Location = "string",
DeployConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigArgs
{
DedicatedResources = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs
{
MachineSpec = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs
{
AcceleratorCount = 0,
AcceleratorType = "string",
MachineType = "string",
MultihostGpuNodeCount = 0,
ReservationAffinity = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecReservationAffinityArgs
{
ReservationAffinityType = "string",
Key = "string",
Values = new[]
{
"string",
},
},
TpuTopology = "string",
},
MinReplicaCount = 0,
AutoscalingMetricSpecs = new[]
{
new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesAutoscalingMetricSpecArgs
{
MetricName = "string",
Target = 0,
},
},
MaxReplicaCount = 0,
RequiredReplicaCount = 0,
Spot = false,
},
FastTryoutEnabled = false,
SystemLabels =
{
{ "string", "string" },
},
},
EndpointConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentEndpointConfigArgs
{
DedicatedEndpointEnabled = false,
EndpointDisplayName = "string",
},
HuggingFaceModelId = "string",
ModelConfig = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigArgs
{
AcceptEula = false,
ContainerSpec = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecArgs
{
ImageUri = "string",
HealthRoute = "string",
DeploymentTimeout = "string",
Envs = new[]
{
new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecEnvArgs
{
Name = "string",
Value = "string",
},
},
GrpcPorts = new[]
{
new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecGrpcPortArgs
{
ContainerPort = 0,
},
},
HealthProbe = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeArgs
{
Exec = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeExecArgs
{
Commands = new[]
{
"string",
},
},
FailureThreshold = 0,
Grpc = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeGrpcArgs
{
Port = 0,
Service = "string",
},
HttpGet = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGetArgs
{
Host = "string",
HttpHeaders = new[]
{
new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGetHttpHeaderArgs
{
Name = "string",
Value = "string",
},
},
Path = "string",
Port = 0,
Scheme = "string",
},
InitialDelaySeconds = 0,
PeriodSeconds = 0,
SuccessThreshold = 0,
TcpSocket = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeTcpSocketArgs
{
Host = "string",
Port = 0,
},
TimeoutSeconds = 0,
},
Args = new[]
{
"string",
},
Commands = new[]
{
"string",
},
LivenessProbe = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeArgs
{
Exec = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeExecArgs
{
Commands = new[]
{
"string",
},
},
FailureThreshold = 0,
Grpc = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeGrpcArgs
{
Port = 0,
Service = "string",
},
HttpGet = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGetArgs
{
Host = "string",
HttpHeaders = new[]
{
new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGetHttpHeaderArgs
{
Name = "string",
Value = "string",
},
},
Path = "string",
Port = 0,
Scheme = "string",
},
InitialDelaySeconds = 0,
PeriodSeconds = 0,
SuccessThreshold = 0,
TcpSocket = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeTcpSocketArgs
{
Host = "string",
Port = 0,
},
TimeoutSeconds = 0,
},
Ports = new[]
{
new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecPortArgs
{
ContainerPort = 0,
},
},
PredictRoute = "string",
SharedMemorySizeMb = "string",
StartupProbe = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeArgs
{
Exec = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeExecArgs
{
Commands = new[]
{
"string",
},
},
FailureThreshold = 0,
Grpc = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeGrpcArgs
{
Port = 0,
Service = "string",
},
HttpGet = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGetArgs
{
Host = "string",
HttpHeaders = new[]
{
new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGetHttpHeaderArgs
{
Name = "string",
Value = "string",
},
},
Path = "string",
Port = 0,
Scheme = "string",
},
InitialDelaySeconds = 0,
PeriodSeconds = 0,
SuccessThreshold = 0,
TcpSocket = new Gcp.Vertex.Inputs.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeTcpSocketArgs
{
Host = "string",
Port = 0,
},
TimeoutSeconds = 0,
},
},
HuggingFaceAccessToken = "string",
HuggingFaceCacheEnabled = false,
ModelDisplayName = "string",
},
Project = "string",
PublisherModelName = "string",
});
example, err := vertex.NewAiEndpointWithModelGardenDeployment(ctx, "aiEndpointWithModelGardenDeploymentResource", &vertex.AiEndpointWithModelGardenDeploymentArgs{
Location: pulumi.String("string"),
DeployConfig: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigArgs{
DedicatedResources: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs{
MachineSpec: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs{
AcceleratorCount: pulumi.Int(0),
AcceleratorType: pulumi.String("string"),
MachineType: pulumi.String("string"),
MultihostGpuNodeCount: pulumi.Int(0),
ReservationAffinity: &vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecReservationAffinityArgs{
ReservationAffinityType: pulumi.String("string"),
Key: pulumi.String("string"),
Values: pulumi.StringArray{
pulumi.String("string"),
},
},
TpuTopology: pulumi.String("string"),
},
MinReplicaCount: pulumi.Int(0),
AutoscalingMetricSpecs: vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesAutoscalingMetricSpecArray{
&vertex.AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesAutoscalingMetricSpecArgs{
MetricName: pulumi.String("string"),
Target: pulumi.Int(0),
},
},
MaxReplicaCount: pulumi.Int(0),
RequiredReplicaCount: pulumi.Int(0),
Spot: pulumi.Bool(false),
},
FastTryoutEnabled: pulumi.Bool(false),
SystemLabels: pulumi.StringMap{
"string": pulumi.String("string"),
},
},
EndpointConfig: &vertex.AiEndpointWithModelGardenDeploymentEndpointConfigArgs{
DedicatedEndpointEnabled: pulumi.Bool(false),
EndpointDisplayName: pulumi.String("string"),
},
HuggingFaceModelId: pulumi.String("string"),
ModelConfig: &vertex.AiEndpointWithModelGardenDeploymentModelConfigArgs{
AcceptEula: pulumi.Bool(false),
ContainerSpec: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecArgs{
ImageUri: pulumi.String("string"),
HealthRoute: pulumi.String("string"),
DeploymentTimeout: pulumi.String("string"),
Envs: vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecEnvArray{
&vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecEnvArgs{
Name: pulumi.String("string"),
Value: pulumi.String("string"),
},
},
GrpcPorts: vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecGrpcPortArray{
&vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecGrpcPortArgs{
ContainerPort: pulumi.Int(0),
},
},
HealthProbe: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeArgs{
Exec: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeExecArgs{
Commands: pulumi.StringArray{
pulumi.String("string"),
},
},
FailureThreshold: pulumi.Int(0),
Grpc: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeGrpcArgs{
Port: pulumi.Int(0),
Service: pulumi.String("string"),
},
HttpGet: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGetArgs{
Host: pulumi.String("string"),
HttpHeaders: vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGetHttpHeaderArray{
&vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGetHttpHeaderArgs{
Name: pulumi.String("string"),
Value: pulumi.String("string"),
},
},
Path: pulumi.String("string"),
Port: pulumi.Int(0),
Scheme: pulumi.String("string"),
},
InitialDelaySeconds: pulumi.Int(0),
PeriodSeconds: pulumi.Int(0),
SuccessThreshold: pulumi.Int(0),
TcpSocket: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeTcpSocketArgs{
Host: pulumi.String("string"),
Port: pulumi.Int(0),
},
TimeoutSeconds: pulumi.Int(0),
},
Args: pulumi.StringArray{
pulumi.String("string"),
},
Commands: pulumi.StringArray{
pulumi.String("string"),
},
LivenessProbe: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeArgs{
Exec: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeExecArgs{
Commands: pulumi.StringArray{
pulumi.String("string"),
},
},
FailureThreshold: pulumi.Int(0),
Grpc: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeGrpcArgs{
Port: pulumi.Int(0),
Service: pulumi.String("string"),
},
HttpGet: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGetArgs{
Host: pulumi.String("string"),
HttpHeaders: vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGetHttpHeaderArray{
&vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGetHttpHeaderArgs{
Name: pulumi.String("string"),
Value: pulumi.String("string"),
},
},
Path: pulumi.String("string"),
Port: pulumi.Int(0),
Scheme: pulumi.String("string"),
},
InitialDelaySeconds: pulumi.Int(0),
PeriodSeconds: pulumi.Int(0),
SuccessThreshold: pulumi.Int(0),
TcpSocket: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeTcpSocketArgs{
Host: pulumi.String("string"),
Port: pulumi.Int(0),
},
TimeoutSeconds: pulumi.Int(0),
},
Ports: vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecPortArray{
&vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecPortArgs{
ContainerPort: pulumi.Int(0),
},
},
PredictRoute: pulumi.String("string"),
SharedMemorySizeMb: pulumi.String("string"),
StartupProbe: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeArgs{
Exec: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeExecArgs{
Commands: pulumi.StringArray{
pulumi.String("string"),
},
},
FailureThreshold: pulumi.Int(0),
Grpc: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeGrpcArgs{
Port: pulumi.Int(0),
Service: pulumi.String("string"),
},
HttpGet: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGetArgs{
Host: pulumi.String("string"),
HttpHeaders: vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGetHttpHeaderArray{
&vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGetHttpHeaderArgs{
Name: pulumi.String("string"),
Value: pulumi.String("string"),
},
},
Path: pulumi.String("string"),
Port: pulumi.Int(0),
Scheme: pulumi.String("string"),
},
InitialDelaySeconds: pulumi.Int(0),
PeriodSeconds: pulumi.Int(0),
SuccessThreshold: pulumi.Int(0),
TcpSocket: &vertex.AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeTcpSocketArgs{
Host: pulumi.String("string"),
Port: pulumi.Int(0),
},
TimeoutSeconds: pulumi.Int(0),
},
},
HuggingFaceAccessToken: pulumi.String("string"),
HuggingFaceCacheEnabled: pulumi.Bool(false),
ModelDisplayName: pulumi.String("string"),
},
Project: pulumi.String("string"),
PublisherModelName: pulumi.String("string"),
})
var aiEndpointWithModelGardenDeploymentResource = new AiEndpointWithModelGardenDeployment("aiEndpointWithModelGardenDeploymentResource", AiEndpointWithModelGardenDeploymentArgs.builder()
.location("string")
.deployConfig(AiEndpointWithModelGardenDeploymentDeployConfigArgs.builder()
.dedicatedResources(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs.builder()
.machineSpec(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs.builder()
.acceleratorCount(0)
.acceleratorType("string")
.machineType("string")
.multihostGpuNodeCount(0)
.reservationAffinity(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecReservationAffinityArgs.builder()
.reservationAffinityType("string")
.key("string")
.values("string")
.build())
.tpuTopology("string")
.build())
.minReplicaCount(0)
.autoscalingMetricSpecs(AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesAutoscalingMetricSpecArgs.builder()
.metricName("string")
.target(0)
.build())
.maxReplicaCount(0)
.requiredReplicaCount(0)
.spot(false)
.build())
.fastTryoutEnabled(false)
.systemLabels(Map.of("string", "string"))
.build())
.endpointConfig(AiEndpointWithModelGardenDeploymentEndpointConfigArgs.builder()
.dedicatedEndpointEnabled(false)
.endpointDisplayName("string")
.build())
.huggingFaceModelId("string")
.modelConfig(AiEndpointWithModelGardenDeploymentModelConfigArgs.builder()
.acceptEula(false)
.containerSpec(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecArgs.builder()
.imageUri("string")
.healthRoute("string")
.deploymentTimeout("string")
.envs(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecEnvArgs.builder()
.name("string")
.value("string")
.build())
.grpcPorts(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecGrpcPortArgs.builder()
.containerPort(0)
.build())
.healthProbe(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeArgs.builder()
.exec(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeExecArgs.builder()
.commands("string")
.build())
.failureThreshold(0)
.grpc(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeGrpcArgs.builder()
.port(0)
.service("string")
.build())
.httpGet(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGetArgs.builder()
.host("string")
.httpHeaders(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGetHttpHeaderArgs.builder()
.name("string")
.value("string")
.build())
.path("string")
.port(0)
.scheme("string")
.build())
.initialDelaySeconds(0)
.periodSeconds(0)
.successThreshold(0)
.tcpSocket(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeTcpSocketArgs.builder()
.host("string")
.port(0)
.build())
.timeoutSeconds(0)
.build())
.args("string")
.commands("string")
.livenessProbe(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeArgs.builder()
.exec(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeExecArgs.builder()
.commands("string")
.build())
.failureThreshold(0)
.grpc(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeGrpcArgs.builder()
.port(0)
.service("string")
.build())
.httpGet(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGetArgs.builder()
.host("string")
.httpHeaders(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGetHttpHeaderArgs.builder()
.name("string")
.value("string")
.build())
.path("string")
.port(0)
.scheme("string")
.build())
.initialDelaySeconds(0)
.periodSeconds(0)
.successThreshold(0)
.tcpSocket(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeTcpSocketArgs.builder()
.host("string")
.port(0)
.build())
.timeoutSeconds(0)
.build())
.ports(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecPortArgs.builder()
.containerPort(0)
.build())
.predictRoute("string")
.sharedMemorySizeMb("string")
.startupProbe(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeArgs.builder()
.exec(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeExecArgs.builder()
.commands("string")
.build())
.failureThreshold(0)
.grpc(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeGrpcArgs.builder()
.port(0)
.service("string")
.build())
.httpGet(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGetArgs.builder()
.host("string")
.httpHeaders(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGetHttpHeaderArgs.builder()
.name("string")
.value("string")
.build())
.path("string")
.port(0)
.scheme("string")
.build())
.initialDelaySeconds(0)
.periodSeconds(0)
.successThreshold(0)
.tcpSocket(AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeTcpSocketArgs.builder()
.host("string")
.port(0)
.build())
.timeoutSeconds(0)
.build())
.build())
.huggingFaceAccessToken("string")
.huggingFaceCacheEnabled(false)
.modelDisplayName("string")
.build())
.project("string")
.publisherModelName("string")
.build());
ai_endpoint_with_model_garden_deployment_resource = gcp.vertex.AiEndpointWithModelGardenDeployment("aiEndpointWithModelGardenDeploymentResource",
location="string",
deploy_config={
"dedicated_resources": {
"machine_spec": {
"accelerator_count": 0,
"accelerator_type": "string",
"machine_type": "string",
"multihost_gpu_node_count": 0,
"reservation_affinity": {
"reservation_affinity_type": "string",
"key": "string",
"values": ["string"],
},
"tpu_topology": "string",
},
"min_replica_count": 0,
"autoscaling_metric_specs": [{
"metric_name": "string",
"target": 0,
}],
"max_replica_count": 0,
"required_replica_count": 0,
"spot": False,
},
"fast_tryout_enabled": False,
"system_labels": {
"string": "string",
},
},
endpoint_config={
"dedicated_endpoint_enabled": False,
"endpoint_display_name": "string",
},
hugging_face_model_id="string",
model_config={
"accept_eula": False,
"container_spec": {
"image_uri": "string",
"health_route": "string",
"deployment_timeout": "string",
"envs": [{
"name": "string",
"value": "string",
}],
"grpc_ports": [{
"container_port": 0,
}],
"health_probe": {
"exec_": {
"commands": ["string"],
},
"failure_threshold": 0,
"grpc": {
"port": 0,
"service": "string",
},
"http_get": {
"host": "string",
"http_headers": [{
"name": "string",
"value": "string",
}],
"path": "string",
"port": 0,
"scheme": "string",
},
"initial_delay_seconds": 0,
"period_seconds": 0,
"success_threshold": 0,
"tcp_socket": {
"host": "string",
"port": 0,
},
"timeout_seconds": 0,
},
"args": ["string"],
"commands": ["string"],
"liveness_probe": {
"exec_": {
"commands": ["string"],
},
"failure_threshold": 0,
"grpc": {
"port": 0,
"service": "string",
},
"http_get": {
"host": "string",
"http_headers": [{
"name": "string",
"value": "string",
}],
"path": "string",
"port": 0,
"scheme": "string",
},
"initial_delay_seconds": 0,
"period_seconds": 0,
"success_threshold": 0,
"tcp_socket": {
"host": "string",
"port": 0,
},
"timeout_seconds": 0,
},
"ports": [{
"container_port": 0,
}],
"predict_route": "string",
"shared_memory_size_mb": "string",
"startup_probe": {
"exec_": {
"commands": ["string"],
},
"failure_threshold": 0,
"grpc": {
"port": 0,
"service": "string",
},
"http_get": {
"host": "string",
"http_headers": [{
"name": "string",
"value": "string",
}],
"path": "string",
"port": 0,
"scheme": "string",
},
"initial_delay_seconds": 0,
"period_seconds": 0,
"success_threshold": 0,
"tcp_socket": {
"host": "string",
"port": 0,
},
"timeout_seconds": 0,
},
},
"hugging_face_access_token": "string",
"hugging_face_cache_enabled": False,
"model_display_name": "string",
},
project="string",
publisher_model_name="string")
const aiEndpointWithModelGardenDeploymentResource = new gcp.vertex.AiEndpointWithModelGardenDeployment("aiEndpointWithModelGardenDeploymentResource", {
location: "string",
deployConfig: {
dedicatedResources: {
machineSpec: {
acceleratorCount: 0,
acceleratorType: "string",
machineType: "string",
multihostGpuNodeCount: 0,
reservationAffinity: {
reservationAffinityType: "string",
key: "string",
values: ["string"],
},
tpuTopology: "string",
},
minReplicaCount: 0,
autoscalingMetricSpecs: [{
metricName: "string",
target: 0,
}],
maxReplicaCount: 0,
requiredReplicaCount: 0,
spot: false,
},
fastTryoutEnabled: false,
systemLabels: {
string: "string",
},
},
endpointConfig: {
dedicatedEndpointEnabled: false,
endpointDisplayName: "string",
},
huggingFaceModelId: "string",
modelConfig: {
acceptEula: false,
containerSpec: {
imageUri: "string",
healthRoute: "string",
deploymentTimeout: "string",
envs: [{
name: "string",
value: "string",
}],
grpcPorts: [{
containerPort: 0,
}],
healthProbe: {
exec: {
commands: ["string"],
},
failureThreshold: 0,
grpc: {
port: 0,
service: "string",
},
httpGet: {
host: "string",
httpHeaders: [{
name: "string",
value: "string",
}],
path: "string",
port: 0,
scheme: "string",
},
initialDelaySeconds: 0,
periodSeconds: 0,
successThreshold: 0,
tcpSocket: {
host: "string",
port: 0,
},
timeoutSeconds: 0,
},
args: ["string"],
commands: ["string"],
livenessProbe: {
exec: {
commands: ["string"],
},
failureThreshold: 0,
grpc: {
port: 0,
service: "string",
},
httpGet: {
host: "string",
httpHeaders: [{
name: "string",
value: "string",
}],
path: "string",
port: 0,
scheme: "string",
},
initialDelaySeconds: 0,
periodSeconds: 0,
successThreshold: 0,
tcpSocket: {
host: "string",
port: 0,
},
timeoutSeconds: 0,
},
ports: [{
containerPort: 0,
}],
predictRoute: "string",
sharedMemorySizeMb: "string",
startupProbe: {
exec: {
commands: ["string"],
},
failureThreshold: 0,
grpc: {
port: 0,
service: "string",
},
httpGet: {
host: "string",
httpHeaders: [{
name: "string",
value: "string",
}],
path: "string",
port: 0,
scheme: "string",
},
initialDelaySeconds: 0,
periodSeconds: 0,
successThreshold: 0,
tcpSocket: {
host: "string",
port: 0,
},
timeoutSeconds: 0,
},
},
huggingFaceAccessToken: "string",
huggingFaceCacheEnabled: false,
modelDisplayName: "string",
},
project: "string",
publisherModelName: "string",
});
type: gcp:vertex:AiEndpointWithModelGardenDeployment
properties:
deployConfig:
dedicatedResources:
autoscalingMetricSpecs:
- metricName: string
target: 0
machineSpec:
acceleratorCount: 0
acceleratorType: string
machineType: string
multihostGpuNodeCount: 0
reservationAffinity:
key: string
reservationAffinityType: string
values:
- string
tpuTopology: string
maxReplicaCount: 0
minReplicaCount: 0
requiredReplicaCount: 0
spot: false
fastTryoutEnabled: false
systemLabels:
string: string
endpointConfig:
dedicatedEndpointEnabled: false
endpointDisplayName: string
huggingFaceModelId: string
location: string
modelConfig:
acceptEula: false
containerSpec:
args:
- string
commands:
- string
deploymentTimeout: string
envs:
- name: string
value: string
grpcPorts:
- containerPort: 0
healthProbe:
exec:
commands:
- string
failureThreshold: 0
grpc:
port: 0
service: string
httpGet:
host: string
httpHeaders:
- name: string
value: string
path: string
port: 0
scheme: string
initialDelaySeconds: 0
periodSeconds: 0
successThreshold: 0
tcpSocket:
host: string
port: 0
timeoutSeconds: 0
healthRoute: string
imageUri: string
livenessProbe:
exec:
commands:
- string
failureThreshold: 0
grpc:
port: 0
service: string
httpGet:
host: string
httpHeaders:
- name: string
value: string
path: string
port: 0
scheme: string
initialDelaySeconds: 0
periodSeconds: 0
successThreshold: 0
tcpSocket:
host: string
port: 0
timeoutSeconds: 0
ports:
- containerPort: 0
predictRoute: string
sharedMemorySizeMb: string
startupProbe:
exec:
commands:
- string
failureThreshold: 0
grpc:
port: 0
service: string
httpGet:
host: string
httpHeaders:
- name: string
value: string
path: string
port: 0
scheme: string
initialDelaySeconds: 0
periodSeconds: 0
successThreshold: 0
tcpSocket:
host: string
port: 0
timeoutSeconds: 0
huggingFaceAccessToken: string
huggingFaceCacheEnabled: false
modelDisplayName: string
project: string
publisherModelName: string
AiEndpointWithModelGardenDeployment Resource Properties
To learn more about resource properties and how to use them, see Inputs and Outputs in the Architecture and Concepts docs.
Inputs
In Python, inputs that are objects can be passed either as argument classes or as dictionary literals.
The AiEndpointWithModelGardenDeployment resource accepts the following input properties:
- Location string
- Resource ID segment making up resource
location
. It identifies the resource within its parent collection as described in https://google.aip.dev/122. - Deploy
Config AiEndpoint With Model Garden Deployment Deploy Config - The deploy config to use for the deployment. Structure is documented below.
- Endpoint
Config AiEndpoint With Model Garden Deployment Endpoint Config - The endpoint config to use for the deployment. Structure is documented below.
- Hugging
Face stringModel Id - The Hugging Face model to deploy.
Format: Hugging Face model ID like
google/gemma-2-2b-it
. - Model
Config AiEndpoint With Model Garden Deployment Model Config - The model config to use for the deployment. Structure is documented below.
- Project string
- The ID of the project in which the resource belongs. If it is not provided, the provider project is used.
- Publisher
Model stringName - The Model Garden model to deploy.
Format:
publishers/{publisher}/models/{publisher_model}@{version_id}
, orpublishers/hf-{hugging-face-author}/models/{hugging-face-model-name}@001
.
- Location string
- Resource ID segment making up resource
location
. It identifies the resource within its parent collection as described in https://google.aip.dev/122. - Deploy
Config AiEndpoint With Model Garden Deployment Deploy Config Args - The deploy config to use for the deployment. Structure is documented below.
- Endpoint
Config AiEndpoint With Model Garden Deployment Endpoint Config Args - The endpoint config to use for the deployment. Structure is documented below.
- Hugging
Face stringModel Id - The Hugging Face model to deploy.
Format: Hugging Face model ID like
google/gemma-2-2b-it
. - Model
Config AiEndpoint With Model Garden Deployment Model Config Args - The model config to use for the deployment. Structure is documented below.
- Project string
- The ID of the project in which the resource belongs. If it is not provided, the provider project is used.
- Publisher
Model stringName - The Model Garden model to deploy.
Format:
publishers/{publisher}/models/{publisher_model}@{version_id}
, orpublishers/hf-{hugging-face-author}/models/{hugging-face-model-name}@001
.
- location String
- Resource ID segment making up resource
location
. It identifies the resource within its parent collection as described in https://google.aip.dev/122. - deploy
Config AiEndpoint With Model Garden Deployment Deploy Config - The deploy config to use for the deployment. Structure is documented below.
- endpoint
Config AiEndpoint With Model Garden Deployment Endpoint Config - The endpoint config to use for the deployment. Structure is documented below.
- hugging
Face StringModel Id - The Hugging Face model to deploy.
Format: Hugging Face model ID like
google/gemma-2-2b-it
. - model
Config AiEndpoint With Model Garden Deployment Model Config - The model config to use for the deployment. Structure is documented below.
- project String
- The ID of the project in which the resource belongs. If it is not provided, the provider project is used.
- publisher
Model StringName - The Model Garden model to deploy.
Format:
publishers/{publisher}/models/{publisher_model}@{version_id}
, orpublishers/hf-{hugging-face-author}/models/{hugging-face-model-name}@001
.
- location string
- Resource ID segment making up resource
location
. It identifies the resource within its parent collection as described in https://google.aip.dev/122. - deploy
Config AiEndpoint With Model Garden Deployment Deploy Config - The deploy config to use for the deployment. Structure is documented below.
- endpoint
Config AiEndpoint With Model Garden Deployment Endpoint Config - The endpoint config to use for the deployment. Structure is documented below.
- hugging
Face stringModel Id - The Hugging Face model to deploy.
Format: Hugging Face model ID like
google/gemma-2-2b-it
. - model
Config AiEndpoint With Model Garden Deployment Model Config - The model config to use for the deployment. Structure is documented below.
- project string
- The ID of the project in which the resource belongs. If it is not provided, the provider project is used.
- publisher
Model stringName - The Model Garden model to deploy.
Format:
publishers/{publisher}/models/{publisher_model}@{version_id}
, orpublishers/hf-{hugging-face-author}/models/{hugging-face-model-name}@001
.
- location str
- Resource ID segment making up resource
location
. It identifies the resource within its parent collection as described in https://google.aip.dev/122. - deploy_
config AiEndpoint With Model Garden Deployment Deploy Config Args - The deploy config to use for the deployment. Structure is documented below.
- endpoint_
config AiEndpoint With Model Garden Deployment Endpoint Config Args - The endpoint config to use for the deployment. Structure is documented below.
- hugging_
face_ strmodel_ id - The Hugging Face model to deploy.
Format: Hugging Face model ID like
google/gemma-2-2b-it
. - model_
config AiEndpoint With Model Garden Deployment Model Config Args - The model config to use for the deployment. Structure is documented below.
- project str
- The ID of the project in which the resource belongs. If it is not provided, the provider project is used.
- publisher_
model_ strname - The Model Garden model to deploy.
Format:
publishers/{publisher}/models/{publisher_model}@{version_id}
, orpublishers/hf-{hugging-face-author}/models/{hugging-face-model-name}@001
.
- location String
- Resource ID segment making up resource
location
. It identifies the resource within its parent collection as described in https://google.aip.dev/122. - deploy
Config Property Map - The deploy config to use for the deployment. Structure is documented below.
- endpoint
Config Property Map - The endpoint config to use for the deployment. Structure is documented below.
- hugging
Face StringModel Id - The Hugging Face model to deploy.
Format: Hugging Face model ID like
google/gemma-2-2b-it
. - model
Config Property Map - The model config to use for the deployment. Structure is documented below.
- project String
- The ID of the project in which the resource belongs. If it is not provided, the provider project is used.
- publisher
Model StringName - The Model Garden model to deploy.
Format:
publishers/{publisher}/models/{publisher_model}@{version_id}
, orpublishers/hf-{hugging-face-author}/models/{hugging-face-model-name}@001
.
Outputs
All input properties are implicitly available as output properties. Additionally, the AiEndpointWithModelGardenDeployment resource produces the following output properties:
- Deployed
Model stringDisplay Name - Output only. The display name assigned to the model deployed to the endpoint. This is not required to delete the resource but is used for debug logging.
- Deployed
Model stringId - Output only. The unique numeric ID that Vertex AI assigns to the model at the time it is deployed to the endpoint. It is required to undeploy the model from the endpoint during resource deletion as described in https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel.
- Endpoint string
- Resource ID segment making up resource
endpoint
. It identifies the resource within its parent collection as described in https://google.aip.dev/122. - Id string
- The provider-assigned unique ID for this managed resource.
- Deployed
Model stringDisplay Name - Output only. The display name assigned to the model deployed to the endpoint. This is not required to delete the resource but is used for debug logging.
- Deployed
Model stringId - Output only. The unique numeric ID that Vertex AI assigns to the model at the time it is deployed to the endpoint. It is required to undeploy the model from the endpoint during resource deletion as described in https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel.
- Endpoint string
- Resource ID segment making up resource
endpoint
. It identifies the resource within its parent collection as described in https://google.aip.dev/122. - Id string
- The provider-assigned unique ID for this managed resource.
- deployed
Model StringDisplay Name - Output only. The display name assigned to the model deployed to the endpoint. This is not required to delete the resource but is used for debug logging.
- deployed
Model StringId - Output only. The unique numeric ID that Vertex AI assigns to the model at the time it is deployed to the endpoint. It is required to undeploy the model from the endpoint during resource deletion as described in https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel.
- endpoint String
- Resource ID segment making up resource
endpoint
. It identifies the resource within its parent collection as described in https://google.aip.dev/122. - id String
- The provider-assigned unique ID for this managed resource.
- deployed
Model stringDisplay Name - Output only. The display name assigned to the model deployed to the endpoint. This is not required to delete the resource but is used for debug logging.
- deployed
Model stringId - Output only. The unique numeric ID that Vertex AI assigns to the model at the time it is deployed to the endpoint. It is required to undeploy the model from the endpoint during resource deletion as described in https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel.
- endpoint string
- Resource ID segment making up resource
endpoint
. It identifies the resource within its parent collection as described in https://google.aip.dev/122. - id string
- The provider-assigned unique ID for this managed resource.
- deployed_
model_ strdisplay_ name - Output only. The display name assigned to the model deployed to the endpoint. This is not required to delete the resource but is used for debug logging.
- deployed_
model_ strid - Output only. The unique numeric ID that Vertex AI assigns to the model at the time it is deployed to the endpoint. It is required to undeploy the model from the endpoint during resource deletion as described in https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel.
- endpoint str
- Resource ID segment making up resource
endpoint
. It identifies the resource within its parent collection as described in https://google.aip.dev/122. - id str
- The provider-assigned unique ID for this managed resource.
- deployed
Model StringDisplay Name - Output only. The display name assigned to the model deployed to the endpoint. This is not required to delete the resource but is used for debug logging.
- deployed
Model StringId - Output only. The unique numeric ID that Vertex AI assigns to the model at the time it is deployed to the endpoint. It is required to undeploy the model from the endpoint during resource deletion as described in https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel.
- endpoint String
- Resource ID segment making up resource
endpoint
. It identifies the resource within its parent collection as described in https://google.aip.dev/122. - id String
- The provider-assigned unique ID for this managed resource.
Look up Existing AiEndpointWithModelGardenDeployment Resource
Get an existing AiEndpointWithModelGardenDeployment resource’s state with the given name, ID, and optional extra properties used to qualify the lookup.
public static get(name: string, id: Input<ID>, state?: AiEndpointWithModelGardenDeploymentState, opts?: CustomResourceOptions): AiEndpointWithModelGardenDeployment
@staticmethod
def get(resource_name: str,
id: str,
opts: Optional[ResourceOptions] = None,
deploy_config: Optional[AiEndpointWithModelGardenDeploymentDeployConfigArgs] = None,
deployed_model_display_name: Optional[str] = None,
deployed_model_id: Optional[str] = None,
endpoint: Optional[str] = None,
endpoint_config: Optional[AiEndpointWithModelGardenDeploymentEndpointConfigArgs] = None,
hugging_face_model_id: Optional[str] = None,
location: Optional[str] = None,
model_config: Optional[AiEndpointWithModelGardenDeploymentModelConfigArgs] = None,
project: Optional[str] = None,
publisher_model_name: Optional[str] = None) -> AiEndpointWithModelGardenDeployment
func GetAiEndpointWithModelGardenDeployment(ctx *Context, name string, id IDInput, state *AiEndpointWithModelGardenDeploymentState, opts ...ResourceOption) (*AiEndpointWithModelGardenDeployment, error)
public static AiEndpointWithModelGardenDeployment Get(string name, Input<string> id, AiEndpointWithModelGardenDeploymentState? state, CustomResourceOptions? opts = null)
public static AiEndpointWithModelGardenDeployment get(String name, Output<String> id, AiEndpointWithModelGardenDeploymentState state, CustomResourceOptions options)
resources: _: type: gcp:vertex:AiEndpointWithModelGardenDeployment get: id: ${id}
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- resource_name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- Deploy
Config AiEndpoint With Model Garden Deployment Deploy Config - The deploy config to use for the deployment. Structure is documented below.
- Deployed
Model stringDisplay Name - Output only. The display name assigned to the model deployed to the endpoint. This is not required to delete the resource but is used for debug logging.
- Deployed
Model stringId - Output only. The unique numeric ID that Vertex AI assigns to the model at the time it is deployed to the endpoint. It is required to undeploy the model from the endpoint during resource deletion as described in https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel.
- Endpoint string
- Resource ID segment making up resource
endpoint
. It identifies the resource within its parent collection as described in https://google.aip.dev/122. - Endpoint
Config AiEndpoint With Model Garden Deployment Endpoint Config - The endpoint config to use for the deployment. Structure is documented below.
- Hugging
Face stringModel Id - The Hugging Face model to deploy.
Format: Hugging Face model ID like
google/gemma-2-2b-it
. - Location string
- Resource ID segment making up resource
location
. It identifies the resource within its parent collection as described in https://google.aip.dev/122. - Model
Config AiEndpoint With Model Garden Deployment Model Config - The model config to use for the deployment. Structure is documented below.
- Project string
- The ID of the project in which the resource belongs. If it is not provided, the provider project is used.
- Publisher
Model stringName - The Model Garden model to deploy.
Format:
publishers/{publisher}/models/{publisher_model}@{version_id}
, orpublishers/hf-{hugging-face-author}/models/{hugging-face-model-name}@001
.
- Deploy
Config AiEndpoint With Model Garden Deployment Deploy Config Args - The deploy config to use for the deployment. Structure is documented below.
- Deployed
Model stringDisplay Name - Output only. The display name assigned to the model deployed to the endpoint. This is not required to delete the resource but is used for debug logging.
- Deployed
Model stringId - Output only. The unique numeric ID that Vertex AI assigns to the model at the time it is deployed to the endpoint. It is required to undeploy the model from the endpoint during resource deletion as described in https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel.
- Endpoint string
- Resource ID segment making up resource
endpoint
. It identifies the resource within its parent collection as described in https://google.aip.dev/122. - Endpoint
Config AiEndpoint With Model Garden Deployment Endpoint Config Args - The endpoint config to use for the deployment. Structure is documented below.
- Hugging
Face stringModel Id - The Hugging Face model to deploy.
Format: Hugging Face model ID like
google/gemma-2-2b-it
. - Location string
- Resource ID segment making up resource
location
. It identifies the resource within its parent collection as described in https://google.aip.dev/122. - Model
Config AiEndpoint With Model Garden Deployment Model Config Args - The model config to use for the deployment. Structure is documented below.
- Project string
- The ID of the project in which the resource belongs. If it is not provided, the provider project is used.
- Publisher
Model stringName - The Model Garden model to deploy.
Format:
publishers/{publisher}/models/{publisher_model}@{version_id}
, orpublishers/hf-{hugging-face-author}/models/{hugging-face-model-name}@001
.
- deploy
Config AiEndpoint With Model Garden Deployment Deploy Config - The deploy config to use for the deployment. Structure is documented below.
- deployed
Model StringDisplay Name - Output only. The display name assigned to the model deployed to the endpoint. This is not required to delete the resource but is used for debug logging.
- deployed
Model StringId - Output only. The unique numeric ID that Vertex AI assigns to the model at the time it is deployed to the endpoint. It is required to undeploy the model from the endpoint during resource deletion as described in https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel.
- endpoint String
- Resource ID segment making up resource
endpoint
. It identifies the resource within its parent collection as described in https://google.aip.dev/122. - endpoint
Config AiEndpoint With Model Garden Deployment Endpoint Config - The endpoint config to use for the deployment. Structure is documented below.
- hugging
Face StringModel Id - The Hugging Face model to deploy.
Format: Hugging Face model ID like
google/gemma-2-2b-it
. - location String
- Resource ID segment making up resource
location
. It identifies the resource within its parent collection as described in https://google.aip.dev/122. - model
Config AiEndpoint With Model Garden Deployment Model Config - The model config to use for the deployment. Structure is documented below.
- project String
- The ID of the project in which the resource belongs. If it is not provided, the provider project is used.
- publisher
Model StringName - The Model Garden model to deploy.
Format:
publishers/{publisher}/models/{publisher_model}@{version_id}
, orpublishers/hf-{hugging-face-author}/models/{hugging-face-model-name}@001
.
- deploy
Config AiEndpoint With Model Garden Deployment Deploy Config - The deploy config to use for the deployment. Structure is documented below.
- deployed
Model stringDisplay Name - Output only. The display name assigned to the model deployed to the endpoint. This is not required to delete the resource but is used for debug logging.
- deployed
Model stringId - Output only. The unique numeric ID that Vertex AI assigns to the model at the time it is deployed to the endpoint. It is required to undeploy the model from the endpoint during resource deletion as described in https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel.
- endpoint string
- Resource ID segment making up resource
endpoint
. It identifies the resource within its parent collection as described in https://google.aip.dev/122. - endpoint
Config AiEndpoint With Model Garden Deployment Endpoint Config - The endpoint config to use for the deployment. Structure is documented below.
- hugging
Face stringModel Id - The Hugging Face model to deploy.
Format: Hugging Face model ID like
google/gemma-2-2b-it
. - location string
- Resource ID segment making up resource
location
. It identifies the resource within its parent collection as described in https://google.aip.dev/122. - model
Config AiEndpoint With Model Garden Deployment Model Config - The model config to use for the deployment. Structure is documented below.
- project string
- The ID of the project in which the resource belongs. If it is not provided, the provider project is used.
- publisher
Model stringName - The Model Garden model to deploy.
Format:
publishers/{publisher}/models/{publisher_model}@{version_id}
, orpublishers/hf-{hugging-face-author}/models/{hugging-face-model-name}@001
.
- deploy_
config AiEndpoint With Model Garden Deployment Deploy Config Args - The deploy config to use for the deployment. Structure is documented below.
- deployed_
model_ strdisplay_ name - Output only. The display name assigned to the model deployed to the endpoint. This is not required to delete the resource but is used for debug logging.
- deployed_
model_ strid - Output only. The unique numeric ID that Vertex AI assigns to the model at the time it is deployed to the endpoint. It is required to undeploy the model from the endpoint during resource deletion as described in https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel.
- endpoint str
- Resource ID segment making up resource
endpoint
. It identifies the resource within its parent collection as described in https://google.aip.dev/122. - endpoint_
config AiEndpoint With Model Garden Deployment Endpoint Config Args - The endpoint config to use for the deployment. Structure is documented below.
- hugging_
face_ strmodel_ id - The Hugging Face model to deploy.
Format: Hugging Face model ID like
google/gemma-2-2b-it
. - location str
- Resource ID segment making up resource
location
. It identifies the resource within its parent collection as described in https://google.aip.dev/122. - model_
config AiEndpoint With Model Garden Deployment Model Config Args - The model config to use for the deployment. Structure is documented below.
- project str
- The ID of the project in which the resource belongs. If it is not provided, the provider project is used.
- publisher_
model_ strname - The Model Garden model to deploy.
Format:
publishers/{publisher}/models/{publisher_model}@{version_id}
, orpublishers/hf-{hugging-face-author}/models/{hugging-face-model-name}@001
.
- deploy
Config Property Map - The deploy config to use for the deployment. Structure is documented below.
- deployed
Model StringDisplay Name - Output only. The display name assigned to the model deployed to the endpoint. This is not required to delete the resource but is used for debug logging.
- deployed
Model StringId - Output only. The unique numeric ID that Vertex AI assigns to the model at the time it is deployed to the endpoint. It is required to undeploy the model from the endpoint during resource deletion as described in https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel.
- endpoint String
- Resource ID segment making up resource
endpoint
. It identifies the resource within its parent collection as described in https://google.aip.dev/122. - endpoint
Config Property Map - The endpoint config to use for the deployment. Structure is documented below.
- hugging
Face StringModel Id - The Hugging Face model to deploy.
Format: Hugging Face model ID like
google/gemma-2-2b-it
. - location String
- Resource ID segment making up resource
location
. It identifies the resource within its parent collection as described in https://google.aip.dev/122. - model
Config Property Map - The model config to use for the deployment. Structure is documented below.
- project String
- The ID of the project in which the resource belongs. If it is not provided, the provider project is used.
- publisher
Model StringName - The Model Garden model to deploy.
Format:
publishers/{publisher}/models/{publisher_model}@{version_id}
, orpublishers/hf-{hugging-face-author}/models/{hugging-face-model-name}@001
.
Supporting Types
AiEndpointWithModelGardenDeploymentDeployConfig, AiEndpointWithModelGardenDeploymentDeployConfigArgs
- Dedicated
Resources AiEndpoint With Model Garden Deployment Deploy Config Dedicated Resources - A description of resources that are dedicated to a DeployedModel or DeployedIndex, and that need a higher degree of manual configuration. Structure is documented below.
- Fast
Tryout boolEnabled - If true, enable the QMT fast tryout feature for this model if possible.
- System
Labels Dictionary<string, string> - System labels for Model Garden deployments. These labels are managed by Google and for tracking purposes only.
- Dedicated
Resources AiEndpoint With Model Garden Deployment Deploy Config Dedicated Resources - A description of resources that are dedicated to a DeployedModel or DeployedIndex, and that need a higher degree of manual configuration. Structure is documented below.
- Fast
Tryout boolEnabled - If true, enable the QMT fast tryout feature for this model if possible.
- System
Labels map[string]string - System labels for Model Garden deployments. These labels are managed by Google and for tracking purposes only.
- dedicated
Resources AiEndpoint With Model Garden Deployment Deploy Config Dedicated Resources - A description of resources that are dedicated to a DeployedModel or DeployedIndex, and that need a higher degree of manual configuration. Structure is documented below.
- fast
Tryout BooleanEnabled - If true, enable the QMT fast tryout feature for this model if possible.
- system
Labels Map<String,String> - System labels for Model Garden deployments. These labels are managed by Google and for tracking purposes only.
- dedicated
Resources AiEndpoint With Model Garden Deployment Deploy Config Dedicated Resources - A description of resources that are dedicated to a DeployedModel or DeployedIndex, and that need a higher degree of manual configuration. Structure is documented below.
- fast
Tryout booleanEnabled - If true, enable the QMT fast tryout feature for this model if possible.
- system
Labels {[key: string]: string} - System labels for Model Garden deployments. These labels are managed by Google and for tracking purposes only.
- dedicated_
resources AiEndpoint With Model Garden Deployment Deploy Config Dedicated Resources - A description of resources that are dedicated to a DeployedModel or DeployedIndex, and that need a higher degree of manual configuration. Structure is documented below.
- fast_
tryout_ boolenabled - If true, enable the QMT fast tryout feature for this model if possible.
- system_
labels Mapping[str, str] - System labels for Model Garden deployments. These labels are managed by Google and for tracking purposes only.
- dedicated
Resources Property Map - A description of resources that are dedicated to a DeployedModel or DeployedIndex, and that need a higher degree of manual configuration. Structure is documented below.
- fast
Tryout BooleanEnabled - If true, enable the QMT fast tryout feature for this model if possible.
- system
Labels Map<String> - System labels for Model Garden deployments. These labels are managed by Google and for tracking purposes only.
AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResources, AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesArgs
- Machine
Spec AiEndpoint With Model Garden Deployment Deploy Config Dedicated Resources Machine Spec - Specification of a single machine. Structure is documented below.
- Min
Replica intCount - The minimum number of machine replicas that will be always deployed on. This value must be greater than or equal to 1. If traffic increases, it may dynamically be deployed onto more replicas, and as traffic decreases, some of these extra replicas may be freed.
- Autoscaling
Metric List<AiSpecs Endpoint With Model Garden Deployment Deploy Config Dedicated Resources Autoscaling Metric Spec> - The metric specifications that overrides a resource
utilization metric (CPU utilization, accelerator's duty cycle, and so on)
target value (default to 60 if not set). At most one entry is allowed per
metric.
If machine_spec.accelerator_count is
above 0, the autoscaling will be based on both CPU utilization and
accelerator's duty cycle metrics and scale up when either metrics exceeds
its target value while scale down if both metrics are under their target
value. The default target value is 60 for both metrics.
If machine_spec.accelerator_count is
0, the autoscaling will be based on CPU utilization metric only with
default target value 60 if not explicitly set.
For example, in the case of Online Prediction, if you want to override
target CPU utilization to 80, you should set
autoscaling_metric_specs.metric_name
to
aiplatform.googleapis.com/prediction/online/cpu/utilization
and autoscaling_metric_specs.target to80
. Structure is documented below. - Max
Replica intCount - The maximum number of replicas that may be deployed on when the traffic against it increases. If the requested value is too large, the deployment will error, but if deployment succeeds then the ability to scale to that many replicas is guaranteed (barring service outages). If traffic increases beyond what its replicas at maximum may handle, a portion of the traffic will be dropped. If this value is not provided, will use min_replica_count as the default value. The value of this field impacts the charge against Vertex CPU and GPU quotas. Specifically, you will be charged for (max_replica_count * number of cores in the selected machine type) and (max_replica_count * number of GPUs per replica in the selected machine type).
- Required
Replica intCount - Number of required available replicas for the deployment to succeed. This field is only needed when partial deployment/mutation is desired. If set, the deploy/mutate operation will succeed once available_replica_count reaches required_replica_count, and the rest of the replicas will be retried. If not set, the default required_replica_count will be min_replica_count.
- Spot bool
- If true, schedule the deployment workload on spot VMs.
- Machine
Spec AiEndpoint With Model Garden Deployment Deploy Config Dedicated Resources Machine Spec - Specification of a single machine. Structure is documented below.
- Min
Replica intCount - The minimum number of machine replicas that will be always deployed on. This value must be greater than or equal to 1. If traffic increases, it may dynamically be deployed onto more replicas, and as traffic decreases, some of these extra replicas may be freed.
- Autoscaling
Metric []AiSpecs Endpoint With Model Garden Deployment Deploy Config Dedicated Resources Autoscaling Metric Spec - The metric specifications that overrides a resource
utilization metric (CPU utilization, accelerator's duty cycle, and so on)
target value (default to 60 if not set). At most one entry is allowed per
metric.
If machine_spec.accelerator_count is
above 0, the autoscaling will be based on both CPU utilization and
accelerator's duty cycle metrics and scale up when either metrics exceeds
its target value while scale down if both metrics are under their target
value. The default target value is 60 for both metrics.
If machine_spec.accelerator_count is
0, the autoscaling will be based on CPU utilization metric only with
default target value 60 if not explicitly set.
For example, in the case of Online Prediction, if you want to override
target CPU utilization to 80, you should set
autoscaling_metric_specs.metric_name
to
aiplatform.googleapis.com/prediction/online/cpu/utilization
and autoscaling_metric_specs.target to80
. Structure is documented below. - Max
Replica intCount - The maximum number of replicas that may be deployed on when the traffic against it increases. If the requested value is too large, the deployment will error, but if deployment succeeds then the ability to scale to that many replicas is guaranteed (barring service outages). If traffic increases beyond what its replicas at maximum may handle, a portion of the traffic will be dropped. If this value is not provided, will use min_replica_count as the default value. The value of this field impacts the charge against Vertex CPU and GPU quotas. Specifically, you will be charged for (max_replica_count * number of cores in the selected machine type) and (max_replica_count * number of GPUs per replica in the selected machine type).
- Required
Replica intCount - Number of required available replicas for the deployment to succeed. This field is only needed when partial deployment/mutation is desired. If set, the deploy/mutate operation will succeed once available_replica_count reaches required_replica_count, and the rest of the replicas will be retried. If not set, the default required_replica_count will be min_replica_count.
- Spot bool
- If true, schedule the deployment workload on spot VMs.
- machine
Spec AiEndpoint With Model Garden Deployment Deploy Config Dedicated Resources Machine Spec - Specification of a single machine. Structure is documented below.
- min
Replica IntegerCount - The minimum number of machine replicas that will be always deployed on. This value must be greater than or equal to 1. If traffic increases, it may dynamically be deployed onto more replicas, and as traffic decreases, some of these extra replicas may be freed.
- autoscaling
Metric List<AiSpecs Endpoint With Model Garden Deployment Deploy Config Dedicated Resources Autoscaling Metric Spec> - The metric specifications that overrides a resource
utilization metric (CPU utilization, accelerator's duty cycle, and so on)
target value (default to 60 if not set). At most one entry is allowed per
metric.
If machine_spec.accelerator_count is
above 0, the autoscaling will be based on both CPU utilization and
accelerator's duty cycle metrics and scale up when either metrics exceeds
its target value while scale down if both metrics are under their target
value. The default target value is 60 for both metrics.
If machine_spec.accelerator_count is
0, the autoscaling will be based on CPU utilization metric only with
default target value 60 if not explicitly set.
For example, in the case of Online Prediction, if you want to override
target CPU utilization to 80, you should set
autoscaling_metric_specs.metric_name
to
aiplatform.googleapis.com/prediction/online/cpu/utilization
and autoscaling_metric_specs.target to80
. Structure is documented below. - max
Replica IntegerCount - The maximum number of replicas that may be deployed on when the traffic against it increases. If the requested value is too large, the deployment will error, but if deployment succeeds then the ability to scale to that many replicas is guaranteed (barring service outages). If traffic increases beyond what its replicas at maximum may handle, a portion of the traffic will be dropped. If this value is not provided, will use min_replica_count as the default value. The value of this field impacts the charge against Vertex CPU and GPU quotas. Specifically, you will be charged for (max_replica_count * number of cores in the selected machine type) and (max_replica_count * number of GPUs per replica in the selected machine type).
- required
Replica IntegerCount - Number of required available replicas for the deployment to succeed. This field is only needed when partial deployment/mutation is desired. If set, the deploy/mutate operation will succeed once available_replica_count reaches required_replica_count, and the rest of the replicas will be retried. If not set, the default required_replica_count will be min_replica_count.
- spot Boolean
- If true, schedule the deployment workload on spot VMs.
- machine
Spec AiEndpoint With Model Garden Deployment Deploy Config Dedicated Resources Machine Spec - Specification of a single machine. Structure is documented below.
- min
Replica numberCount - The minimum number of machine replicas that will be always deployed on. This value must be greater than or equal to 1. If traffic increases, it may dynamically be deployed onto more replicas, and as traffic decreases, some of these extra replicas may be freed.
- autoscaling
Metric AiSpecs Endpoint With Model Garden Deployment Deploy Config Dedicated Resources Autoscaling Metric Spec[] - The metric specifications that overrides a resource
utilization metric (CPU utilization, accelerator's duty cycle, and so on)
target value (default to 60 if not set). At most one entry is allowed per
metric.
If machine_spec.accelerator_count is
above 0, the autoscaling will be based on both CPU utilization and
accelerator's duty cycle metrics and scale up when either metrics exceeds
its target value while scale down if both metrics are under their target
value. The default target value is 60 for both metrics.
If machine_spec.accelerator_count is
0, the autoscaling will be based on CPU utilization metric only with
default target value 60 if not explicitly set.
For example, in the case of Online Prediction, if you want to override
target CPU utilization to 80, you should set
autoscaling_metric_specs.metric_name
to
aiplatform.googleapis.com/prediction/online/cpu/utilization
and autoscaling_metric_specs.target to80
. Structure is documented below. - max
Replica numberCount - The maximum number of replicas that may be deployed on when the traffic against it increases. If the requested value is too large, the deployment will error, but if deployment succeeds then the ability to scale to that many replicas is guaranteed (barring service outages). If traffic increases beyond what its replicas at maximum may handle, a portion of the traffic will be dropped. If this value is not provided, will use min_replica_count as the default value. The value of this field impacts the charge against Vertex CPU and GPU quotas. Specifically, you will be charged for (max_replica_count * number of cores in the selected machine type) and (max_replica_count * number of GPUs per replica in the selected machine type).
- required
Replica numberCount - Number of required available replicas for the deployment to succeed. This field is only needed when partial deployment/mutation is desired. If set, the deploy/mutate operation will succeed once available_replica_count reaches required_replica_count, and the rest of the replicas will be retried. If not set, the default required_replica_count will be min_replica_count.
- spot boolean
- If true, schedule the deployment workload on spot VMs.
- machine_
spec AiEndpoint With Model Garden Deployment Deploy Config Dedicated Resources Machine Spec - Specification of a single machine. Structure is documented below.
- min_
replica_ intcount - The minimum number of machine replicas that will be always deployed on. This value must be greater than or equal to 1. If traffic increases, it may dynamically be deployed onto more replicas, and as traffic decreases, some of these extra replicas may be freed.
- autoscaling_
metric_ Sequence[Aispecs Endpoint With Model Garden Deployment Deploy Config Dedicated Resources Autoscaling Metric Spec] - The metric specifications that overrides a resource
utilization metric (CPU utilization, accelerator's duty cycle, and so on)
target value (default to 60 if not set). At most one entry is allowed per
metric.
If machine_spec.accelerator_count is
above 0, the autoscaling will be based on both CPU utilization and
accelerator's duty cycle metrics and scale up when either metrics exceeds
its target value while scale down if both metrics are under their target
value. The default target value is 60 for both metrics.
If machine_spec.accelerator_count is
0, the autoscaling will be based on CPU utilization metric only with
default target value 60 if not explicitly set.
For example, in the case of Online Prediction, if you want to override
target CPU utilization to 80, you should set
autoscaling_metric_specs.metric_name
to
aiplatform.googleapis.com/prediction/online/cpu/utilization
and autoscaling_metric_specs.target to80
. Structure is documented below. - max_
replica_ intcount - The maximum number of replicas that may be deployed on when the traffic against it increases. If the requested value is too large, the deployment will error, but if deployment succeeds then the ability to scale to that many replicas is guaranteed (barring service outages). If traffic increases beyond what its replicas at maximum may handle, a portion of the traffic will be dropped. If this value is not provided, will use min_replica_count as the default value. The value of this field impacts the charge against Vertex CPU and GPU quotas. Specifically, you will be charged for (max_replica_count * number of cores in the selected machine type) and (max_replica_count * number of GPUs per replica in the selected machine type).
- required_
replica_ intcount - Number of required available replicas for the deployment to succeed. This field is only needed when partial deployment/mutation is desired. If set, the deploy/mutate operation will succeed once available_replica_count reaches required_replica_count, and the rest of the replicas will be retried. If not set, the default required_replica_count will be min_replica_count.
- spot bool
- If true, schedule the deployment workload on spot VMs.
- machine
Spec Property Map - Specification of a single machine. Structure is documented below.
- min
Replica NumberCount - The minimum number of machine replicas that will be always deployed on. This value must be greater than or equal to 1. If traffic increases, it may dynamically be deployed onto more replicas, and as traffic decreases, some of these extra replicas may be freed.
- autoscaling
Metric List<Property Map>Specs - The metric specifications that overrides a resource
utilization metric (CPU utilization, accelerator's duty cycle, and so on)
target value (default to 60 if not set). At most one entry is allowed per
metric.
If machine_spec.accelerator_count is
above 0, the autoscaling will be based on both CPU utilization and
accelerator's duty cycle metrics and scale up when either metrics exceeds
its target value while scale down if both metrics are under their target
value. The default target value is 60 for both metrics.
If machine_spec.accelerator_count is
0, the autoscaling will be based on CPU utilization metric only with
default target value 60 if not explicitly set.
For example, in the case of Online Prediction, if you want to override
target CPU utilization to 80, you should set
autoscaling_metric_specs.metric_name
to
aiplatform.googleapis.com/prediction/online/cpu/utilization
and autoscaling_metric_specs.target to80
. Structure is documented below. - max
Replica NumberCount - The maximum number of replicas that may be deployed on when the traffic against it increases. If the requested value is too large, the deployment will error, but if deployment succeeds then the ability to scale to that many replicas is guaranteed (barring service outages). If traffic increases beyond what its replicas at maximum may handle, a portion of the traffic will be dropped. If this value is not provided, will use min_replica_count as the default value. The value of this field impacts the charge against Vertex CPU and GPU quotas. Specifically, you will be charged for (max_replica_count * number of cores in the selected machine type) and (max_replica_count * number of GPUs per replica in the selected machine type).
- required
Replica NumberCount - Number of required available replicas for the deployment to succeed. This field is only needed when partial deployment/mutation is desired. If set, the deploy/mutate operation will succeed once available_replica_count reaches required_replica_count, and the rest of the replicas will be retried. If not set, the default required_replica_count will be min_replica_count.
- spot Boolean
- If true, schedule the deployment workload on spot VMs.
AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesAutoscalingMetricSpec, AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesAutoscalingMetricSpecArgs
- Metric
Name string - The resource metric name.
Supported metrics:
- For Online Prediction:
aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle
aiplatform.googleapis.com/prediction/online/cpu/utilization
- Target int
- The target resource utilization in percentage (1% - 100%) for the given metric; once the real usage deviates from the target by a certain percentage, the machine replicas change. The default value is 60 (representing 60%) if not provided.
- Metric
Name string - The resource metric name.
Supported metrics:
- For Online Prediction:
aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle
aiplatform.googleapis.com/prediction/online/cpu/utilization
- Target int
- The target resource utilization in percentage (1% - 100%) for the given metric; once the real usage deviates from the target by a certain percentage, the machine replicas change. The default value is 60 (representing 60%) if not provided.
- metric
Name String - The resource metric name.
Supported metrics:
- For Online Prediction:
aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle
aiplatform.googleapis.com/prediction/online/cpu/utilization
- target Integer
- The target resource utilization in percentage (1% - 100%) for the given metric; once the real usage deviates from the target by a certain percentage, the machine replicas change. The default value is 60 (representing 60%) if not provided.
- metric
Name string - The resource metric name.
Supported metrics:
- For Online Prediction:
aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle
aiplatform.googleapis.com/prediction/online/cpu/utilization
- target number
- The target resource utilization in percentage (1% - 100%) for the given metric; once the real usage deviates from the target by a certain percentage, the machine replicas change. The default value is 60 (representing 60%) if not provided.
- metric_
name str - The resource metric name.
Supported metrics:
- For Online Prediction:
aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle
aiplatform.googleapis.com/prediction/online/cpu/utilization
- target int
- The target resource utilization in percentage (1% - 100%) for the given metric; once the real usage deviates from the target by a certain percentage, the machine replicas change. The default value is 60 (representing 60%) if not provided.
- metric
Name String - The resource metric name.
Supported metrics:
- For Online Prediction:
aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle
aiplatform.googleapis.com/prediction/online/cpu/utilization
- target Number
- The target resource utilization in percentage (1% - 100%) for the given metric; once the real usage deviates from the target by a certain percentage, the machine replicas change. The default value is 60 (representing 60%) if not provided.
AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpec, AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecArgs
- Accelerator
Count int - The number of accelerators to attach to the machine.
- Accelerator
Type string - Possible values: ACCELERATOR_TYPE_UNSPECIFIED NVIDIA_TESLA_K80 NVIDIA_TESLA_P100 NVIDIA_TESLA_V100 NVIDIA_TESLA_P4 NVIDIA_TESLA_T4 NVIDIA_TESLA_A100 NVIDIA_A100_80GB NVIDIA_L4 NVIDIA_H100_80GB NVIDIA_H100_MEGA_80GB NVIDIA_H200_141GB NVIDIA_B200 TPU_V2 TPU_V3 TPU_V4_POD TPU_V5_LITEPOD
- Machine
Type string - The type of the machine.
See the list of machine types supported for
prediction
See the list of machine types supported for custom
training.
For DeployedModel this field is optional, and the default
value is
n1-standard-2
. For BatchPredictionJob or as part of WorkerPoolSpec this field is required. - Multihost
Gpu intNode Count - The number of nodes per replica for multihost GPU deployments.
- Reservation
Affinity AiEndpoint With Model Garden Deployment Deploy Config Dedicated Resources Machine Spec Reservation Affinity - A ReservationAffinity can be used to configure a Vertex AI resource (e.g., a DeployedModel) to draw its Compute Engine resources from a Shared Reservation, or exclusively from on-demand capacity. Structure is documented below.
- Tpu
Topology string - The topology of the TPUs. Corresponds to the TPU topologies available from GKE. (Example: tpu_topology: "2x2x1").
- Accelerator
Count int - The number of accelerators to attach to the machine.
- Accelerator
Type string - Possible values: ACCELERATOR_TYPE_UNSPECIFIED NVIDIA_TESLA_K80 NVIDIA_TESLA_P100 NVIDIA_TESLA_V100 NVIDIA_TESLA_P4 NVIDIA_TESLA_T4 NVIDIA_TESLA_A100 NVIDIA_A100_80GB NVIDIA_L4 NVIDIA_H100_80GB NVIDIA_H100_MEGA_80GB NVIDIA_H200_141GB NVIDIA_B200 TPU_V2 TPU_V3 TPU_V4_POD TPU_V5_LITEPOD
- Machine
Type string - The type of the machine.
See the list of machine types supported for
prediction
See the list of machine types supported for custom
training.
For DeployedModel this field is optional, and the default
value is
n1-standard-2
. For BatchPredictionJob or as part of WorkerPoolSpec this field is required. - Multihost
Gpu intNode Count - The number of nodes per replica for multihost GPU deployments.
- Reservation
Affinity AiEndpoint With Model Garden Deployment Deploy Config Dedicated Resources Machine Spec Reservation Affinity - A ReservationAffinity can be used to configure a Vertex AI resource (e.g., a DeployedModel) to draw its Compute Engine resources from a Shared Reservation, or exclusively from on-demand capacity. Structure is documented below.
- Tpu
Topology string - The topology of the TPUs. Corresponds to the TPU topologies available from GKE. (Example: tpu_topology: "2x2x1").
- accelerator
Count Integer - The number of accelerators to attach to the machine.
- accelerator
Type String - Possible values: ACCELERATOR_TYPE_UNSPECIFIED NVIDIA_TESLA_K80 NVIDIA_TESLA_P100 NVIDIA_TESLA_V100 NVIDIA_TESLA_P4 NVIDIA_TESLA_T4 NVIDIA_TESLA_A100 NVIDIA_A100_80GB NVIDIA_L4 NVIDIA_H100_80GB NVIDIA_H100_MEGA_80GB NVIDIA_H200_141GB NVIDIA_B200 TPU_V2 TPU_V3 TPU_V4_POD TPU_V5_LITEPOD
- machine
Type String - The type of the machine.
See the list of machine types supported for
prediction
See the list of machine types supported for custom
training.
For DeployedModel this field is optional, and the default
value is
n1-standard-2
. For BatchPredictionJob or as part of WorkerPoolSpec this field is required. - multihost
Gpu IntegerNode Count - The number of nodes per replica for multihost GPU deployments.
- reservation
Affinity AiEndpoint With Model Garden Deployment Deploy Config Dedicated Resources Machine Spec Reservation Affinity - A ReservationAffinity can be used to configure a Vertex AI resource (e.g., a DeployedModel) to draw its Compute Engine resources from a Shared Reservation, or exclusively from on-demand capacity. Structure is documented below.
- tpu
Topology String - The topology of the TPUs. Corresponds to the TPU topologies available from GKE. (Example: tpu_topology: "2x2x1").
- accelerator
Count number - The number of accelerators to attach to the machine.
- accelerator
Type string - Possible values: ACCELERATOR_TYPE_UNSPECIFIED NVIDIA_TESLA_K80 NVIDIA_TESLA_P100 NVIDIA_TESLA_V100 NVIDIA_TESLA_P4 NVIDIA_TESLA_T4 NVIDIA_TESLA_A100 NVIDIA_A100_80GB NVIDIA_L4 NVIDIA_H100_80GB NVIDIA_H100_MEGA_80GB NVIDIA_H200_141GB NVIDIA_B200 TPU_V2 TPU_V3 TPU_V4_POD TPU_V5_LITEPOD
- machine
Type string - The type of the machine.
See the list of machine types supported for
prediction
See the list of machine types supported for custom
training.
For DeployedModel this field is optional, and the default
value is
n1-standard-2
. For BatchPredictionJob or as part of WorkerPoolSpec this field is required. - multihost
Gpu numberNode Count - The number of nodes per replica for multihost GPU deployments.
- reservation
Affinity AiEndpoint With Model Garden Deployment Deploy Config Dedicated Resources Machine Spec Reservation Affinity - A ReservationAffinity can be used to configure a Vertex AI resource (e.g., a DeployedModel) to draw its Compute Engine resources from a Shared Reservation, or exclusively from on-demand capacity. Structure is documented below.
- tpu
Topology string - The topology of the TPUs. Corresponds to the TPU topologies available from GKE. (Example: tpu_topology: "2x2x1").
- accelerator_
count int - The number of accelerators to attach to the machine.
- accelerator_
type str - Possible values: ACCELERATOR_TYPE_UNSPECIFIED NVIDIA_TESLA_K80 NVIDIA_TESLA_P100 NVIDIA_TESLA_V100 NVIDIA_TESLA_P4 NVIDIA_TESLA_T4 NVIDIA_TESLA_A100 NVIDIA_A100_80GB NVIDIA_L4 NVIDIA_H100_80GB NVIDIA_H100_MEGA_80GB NVIDIA_H200_141GB NVIDIA_B200 TPU_V2 TPU_V3 TPU_V4_POD TPU_V5_LITEPOD
- machine_
type str - The type of the machine.
See the list of machine types supported for
prediction
See the list of machine types supported for custom
training.
For DeployedModel this field is optional, and the default
value is
n1-standard-2
. For BatchPredictionJob or as part of WorkerPoolSpec this field is required. - multihost_
gpu_ intnode_ count - The number of nodes per replica for multihost GPU deployments.
- reservation_
affinity AiEndpoint With Model Garden Deployment Deploy Config Dedicated Resources Machine Spec Reservation Affinity - A ReservationAffinity can be used to configure a Vertex AI resource (e.g., a DeployedModel) to draw its Compute Engine resources from a Shared Reservation, or exclusively from on-demand capacity. Structure is documented below.
- tpu_
topology str - The topology of the TPUs. Corresponds to the TPU topologies available from GKE. (Example: tpu_topology: "2x2x1").
- accelerator
Count Number - The number of accelerators to attach to the machine.
- accelerator
Type String - Possible values: ACCELERATOR_TYPE_UNSPECIFIED NVIDIA_TESLA_K80 NVIDIA_TESLA_P100 NVIDIA_TESLA_V100 NVIDIA_TESLA_P4 NVIDIA_TESLA_T4 NVIDIA_TESLA_A100 NVIDIA_A100_80GB NVIDIA_L4 NVIDIA_H100_80GB NVIDIA_H100_MEGA_80GB NVIDIA_H200_141GB NVIDIA_B200 TPU_V2 TPU_V3 TPU_V4_POD TPU_V5_LITEPOD
- machine
Type String - The type of the machine.
See the list of machine types supported for
prediction
See the list of machine types supported for custom
training.
For DeployedModel this field is optional, and the default
value is
n1-standard-2
. For BatchPredictionJob or as part of WorkerPoolSpec this field is required. - multihost
Gpu NumberNode Count - The number of nodes per replica for multihost GPU deployments.
- reservation
Affinity Property Map - A ReservationAffinity can be used to configure a Vertex AI resource (e.g., a DeployedModel) to draw its Compute Engine resources from a Shared Reservation, or exclusively from on-demand capacity. Structure is documented below.
- tpu
Topology String - The topology of the TPUs. Corresponds to the TPU topologies available from GKE. (Example: tpu_topology: "2x2x1").
AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecReservationAffinity, AiEndpointWithModelGardenDeploymentDeployConfigDedicatedResourcesMachineSpecReservationAffinityArgs
- Reservation
Affinity stringType - Specifies the reservation affinity type. Possible values: TYPE_UNSPECIFIED NO_RESERVATION ANY_RESERVATION SPECIFIC_RESERVATION
- Key string
- Corresponds to the label key of a reservation resource. To target a
SPECIFIC_RESERVATION by name, use
compute.googleapis.com/reservation-name
as the key and specify the name of your reservation as its value. - Values List<string>
- Corresponds to the label values of a reservation resource. This must be the full resource name of the reservation or reservation block.
- Reservation
Affinity stringType - Specifies the reservation affinity type. Possible values: TYPE_UNSPECIFIED NO_RESERVATION ANY_RESERVATION SPECIFIC_RESERVATION
- Key string
- Corresponds to the label key of a reservation resource. To target a
SPECIFIC_RESERVATION by name, use
compute.googleapis.com/reservation-name
as the key and specify the name of your reservation as its value. - Values []string
- Corresponds to the label values of a reservation resource. This must be the full resource name of the reservation or reservation block.
- reservation
Affinity StringType - Specifies the reservation affinity type. Possible values: TYPE_UNSPECIFIED NO_RESERVATION ANY_RESERVATION SPECIFIC_RESERVATION
- key String
- Corresponds to the label key of a reservation resource. To target a
SPECIFIC_RESERVATION by name, use
compute.googleapis.com/reservation-name
as the key and specify the name of your reservation as its value. - values List<String>
- Corresponds to the label values of a reservation resource. This must be the full resource name of the reservation or reservation block.
- reservation
Affinity stringType - Specifies the reservation affinity type. Possible values: TYPE_UNSPECIFIED NO_RESERVATION ANY_RESERVATION SPECIFIC_RESERVATION
- key string
- Corresponds to the label key of a reservation resource. To target a
SPECIFIC_RESERVATION by name, use
compute.googleapis.com/reservation-name
as the key and specify the name of your reservation as its value. - values string[]
- Corresponds to the label values of a reservation resource. This must be the full resource name of the reservation or reservation block.
- reservation_
affinity_ strtype - Specifies the reservation affinity type. Possible values: TYPE_UNSPECIFIED NO_RESERVATION ANY_RESERVATION SPECIFIC_RESERVATION
- key str
- Corresponds to the label key of a reservation resource. To target a
SPECIFIC_RESERVATION by name, use
compute.googleapis.com/reservation-name
as the key and specify the name of your reservation as its value. - values Sequence[str]
- Corresponds to the label values of a reservation resource. This must be the full resource name of the reservation or reservation block.
- reservation
Affinity StringType - Specifies the reservation affinity type. Possible values: TYPE_UNSPECIFIED NO_RESERVATION ANY_RESERVATION SPECIFIC_RESERVATION
- key String
- Corresponds to the label key of a reservation resource. To target a
SPECIFIC_RESERVATION by name, use
compute.googleapis.com/reservation-name
as the key and specify the name of your reservation as its value. - values List<String>
- Corresponds to the label values of a reservation resource. This must be the full resource name of the reservation or reservation block.
AiEndpointWithModelGardenDeploymentEndpointConfig, AiEndpointWithModelGardenDeploymentEndpointConfigArgs
- Dedicated
Endpoint boolEnabled - If true, the endpoint will be exposed through a dedicated DNS [Endpoint.dedicated_endpoint_dns]. Your request to the dedicated DNS will be isolated from other users' traffic and will have better performance and reliability. Note: Once you enabled dedicated endpoint, you won't be able to send request to the shared DNS {region}-aiplatform.googleapis.com. The limitations will be removed soon.
- Endpoint
Display stringName - The user-specified display name of the endpoint. If not set, a default name will be used.
- Dedicated
Endpoint boolEnabled - If true, the endpoint will be exposed through a dedicated DNS [Endpoint.dedicated_endpoint_dns]. Your request to the dedicated DNS will be isolated from other users' traffic and will have better performance and reliability. Note: Once you enabled dedicated endpoint, you won't be able to send request to the shared DNS {region}-aiplatform.googleapis.com. The limitations will be removed soon.
- Endpoint
Display stringName - The user-specified display name of the endpoint. If not set, a default name will be used.
- dedicated
Endpoint BooleanEnabled - If true, the endpoint will be exposed through a dedicated DNS [Endpoint.dedicated_endpoint_dns]. Your request to the dedicated DNS will be isolated from other users' traffic and will have better performance and reliability. Note: Once you enabled dedicated endpoint, you won't be able to send request to the shared DNS {region}-aiplatform.googleapis.com. The limitations will be removed soon.
- endpoint
Display StringName - The user-specified display name of the endpoint. If not set, a default name will be used.
- dedicated
Endpoint booleanEnabled - If true, the endpoint will be exposed through a dedicated DNS [Endpoint.dedicated_endpoint_dns]. Your request to the dedicated DNS will be isolated from other users' traffic and will have better performance and reliability. Note: Once you enabled dedicated endpoint, you won't be able to send request to the shared DNS {region}-aiplatform.googleapis.com. The limitations will be removed soon.
- endpoint
Display stringName - The user-specified display name of the endpoint. If not set, a default name will be used.
- dedicated_
endpoint_ boolenabled - If true, the endpoint will be exposed through a dedicated DNS [Endpoint.dedicated_endpoint_dns]. Your request to the dedicated DNS will be isolated from other users' traffic and will have better performance and reliability. Note: Once you enabled dedicated endpoint, you won't be able to send request to the shared DNS {region}-aiplatform.googleapis.com. The limitations will be removed soon.
- endpoint_
display_ strname - The user-specified display name of the endpoint. If not set, a default name will be used.
- dedicated
Endpoint BooleanEnabled - If true, the endpoint will be exposed through a dedicated DNS [Endpoint.dedicated_endpoint_dns]. Your request to the dedicated DNS will be isolated from other users' traffic and will have better performance and reliability. Note: Once you enabled dedicated endpoint, you won't be able to send request to the shared DNS {region}-aiplatform.googleapis.com. The limitations will be removed soon.
- endpoint
Display StringName - The user-specified display name of the endpoint. If not set, a default name will be used.
AiEndpointWithModelGardenDeploymentModelConfig, AiEndpointWithModelGardenDeploymentModelConfigArgs
- Accept
Eula bool - Whether the user accepts the End User License Agreement (EULA) for the model.
- Container
Spec AiEndpoint With Model Garden Deployment Model Config Container Spec - Specification of a container for serving predictions. Some fields in this message correspond to fields in the Kubernetes Container v1 core specification. Structure is documented below.
- Hugging
Face stringAccess Token - The Hugging Face read access token used to access the model artifacts of gated models.
- Hugging
Face boolCache Enabled - If true, the model will deploy with a cached version instead of directly downloading the model artifacts from Hugging Face. This is suitable for VPC-SC users with limited internet access.
- Model
Display stringName - The user-specified display name of the uploaded model. If not set, a default name will be used.
- Accept
Eula bool - Whether the user accepts the End User License Agreement (EULA) for the model.
- Container
Spec AiEndpoint With Model Garden Deployment Model Config Container Spec - Specification of a container for serving predictions. Some fields in this message correspond to fields in the Kubernetes Container v1 core specification. Structure is documented below.
- Hugging
Face stringAccess Token - The Hugging Face read access token used to access the model artifacts of gated models.
- Hugging
Face boolCache Enabled - If true, the model will deploy with a cached version instead of directly downloading the model artifacts from Hugging Face. This is suitable for VPC-SC users with limited internet access.
- Model
Display stringName - The user-specified display name of the uploaded model. If not set, a default name will be used.
- accept
Eula Boolean - Whether the user accepts the End User License Agreement (EULA) for the model.
- container
Spec AiEndpoint With Model Garden Deployment Model Config Container Spec - Specification of a container for serving predictions. Some fields in this message correspond to fields in the Kubernetes Container v1 core specification. Structure is documented below.
- hugging
Face StringAccess Token - The Hugging Face read access token used to access the model artifacts of gated models.
- hugging
Face BooleanCache Enabled - If true, the model will deploy with a cached version instead of directly downloading the model artifacts from Hugging Face. This is suitable for VPC-SC users with limited internet access.
- model
Display StringName - The user-specified display name of the uploaded model. If not set, a default name will be used.
- accept
Eula boolean - Whether the user accepts the End User License Agreement (EULA) for the model.
- container
Spec AiEndpoint With Model Garden Deployment Model Config Container Spec - Specification of a container for serving predictions. Some fields in this message correspond to fields in the Kubernetes Container v1 core specification. Structure is documented below.
- hugging
Face stringAccess Token - The Hugging Face read access token used to access the model artifacts of gated models.
- hugging
Face booleanCache Enabled - If true, the model will deploy with a cached version instead of directly downloading the model artifacts from Hugging Face. This is suitable for VPC-SC users with limited internet access.
- model
Display stringName - The user-specified display name of the uploaded model. If not set, a default name will be used.
- accept_
eula bool - Whether the user accepts the End User License Agreement (EULA) for the model.
- container_
spec AiEndpoint With Model Garden Deployment Model Config Container Spec - Specification of a container for serving predictions. Some fields in this message correspond to fields in the Kubernetes Container v1 core specification. Structure is documented below.
- hugging_
face_ straccess_ token - The Hugging Face read access token used to access the model artifacts of gated models.
- hugging_
face_ boolcache_ enabled - If true, the model will deploy with a cached version instead of directly downloading the model artifacts from Hugging Face. This is suitable for VPC-SC users with limited internet access.
- model_
display_ strname - The user-specified display name of the uploaded model. If not set, a default name will be used.
- accept
Eula Boolean - Whether the user accepts the End User License Agreement (EULA) for the model.
- container
Spec Property Map - Specification of a container for serving predictions. Some fields in this message correspond to fields in the Kubernetes Container v1 core specification. Structure is documented below.
- hugging
Face StringAccess Token - The Hugging Face read access token used to access the model artifacts of gated models.
- hugging
Face BooleanCache Enabled - If true, the model will deploy with a cached version instead of directly downloading the model artifacts from Hugging Face. This is suitable for VPC-SC users with limited internet access.
- model
Display StringName - The user-specified display name of the uploaded model. If not set, a default name will be used.
AiEndpointWithModelGardenDeploymentModelConfigContainerSpec, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecArgs
- Image
Uri string - URI of the Docker image to be used as the custom container for serving predictions. This URI must identify an image in Artifact Registry or Container Registry. Learn more about the container publishing requirements, including permissions requirements for the Vertex AI Service Agent. The container image is ingested upon ModelService.UploadModel, stored internally, and this original path is afterwards not used. To learn about the requirements for the Docker image itself, see Custom container requirements. You can use the URI to one of Vertex AI's pre-built container images for prediction in this field.
- Args List<string>
- Specifies arguments for the command that runs when the container starts.
This overrides the container's
CMD
. Specify this field as an array of executable and arguments, similar to a DockerCMD
's "default parameters" form. If you don't specify this field but do specify the command field, then the command from thecommand
field runs without any additional arguments. See the Kubernetes documentation about how thecommand
andargs
fields interact with a container'sENTRYPOINT
andCMD
. If you don't specify this field and don't specify thecommand
field, then the container'sENTRYPOINT
andCMD
determine what runs based on their default behavior. See the Docker documentation about howCMD
andENTRYPOINT
interact. In this field, you can reference environment variables set by Vertex AI and environment variables set in the env field. You cannot reference environment variables set in the Docker image. In order for environment variables to be expanded, reference them by using the following syntax:$(VARIABLE_NAME) Note that this differs from Bash variable expansion, which does not use parentheses. If a variable cannot be resolved, the reference in the input string is used unchanged. To avoid variable expansion, you can escape this syntax with$$
; for example:$$(VARIABLE_NAME) This field corresponds to theargs
field of the Kubernetes Containers v1 core API. - Commands List<string>
- Specifies the command that runs when the container starts. This overrides
the container's
ENTRYPOINT.
Specify this field as an array of executable and arguments, similar to a
Docker
ENTRYPOINT
's "exec" form, not its "shell" form. If you do not specify this field, then the container'sENTRYPOINT
runs, in conjunction with the args field or the container'sCMD
, if either exists. If this field is not specified and the container does not have anENTRYPOINT
, then refer to the Docker documentation about howCMD
andENTRYPOINT
interact. If you specify this field, then you can also specify theargs
field to provide additional arguments for this command. However, if you specify this field, then the container'sCMD
is ignored. See the Kubernetes documentation about how thecommand
andargs
fields interact with a container'sENTRYPOINT
andCMD
. In this field, you can reference environment variables set by Vertex AI and environment variables set in the env field. You cannot reference environment variables set in the Docker image. In order for environment variables to be expanded, reference them by using the following syntax:$(VARIABLE_NAME) Note that this differs from Bash variable expansion, which does not use parentheses. If a variable cannot be resolved, the reference in the input string is used unchanged. To avoid variable expansion, you can escape this syntax with$$
; for example:$$(VARIABLE_NAME) This field corresponds to thecommand
field of the Kubernetes Containers v1 core API. - Deployment
Timeout string - Deployment timeout. Limit for deployment timeout is 2 hours.
- Envs
List<Ai
Endpoint With Model Garden Deployment Model Config Container Spec Env> List of environment variables to set in the container. After the container starts running, code running in the container can read these environment variables. Additionally, the command and args fields can reference these variables. Later entries in this list can also reference earlier entries. For example, the following example sets the variable
VAR_2
to have the valuefoo bar
:[ { "name": "VAR_1", "value": "foo" }, { "name": "VAR_2", "value": "$(VAR_1) bar" } ]
If you switch the order of the variables in the example, then the expansion does not occur. This field corresponds to the
env
field of the Kubernetes Containers v1 core API. Structure is documented below.- Grpc
Ports List<AiEndpoint With Model Garden Deployment Model Config Container Spec Grpc Port> - List of ports to expose from the container. Vertex AI sends gRPC
prediction requests that it receives to the first port on this list. Vertex
AI also sends liveness and health checks to this port.
If you do not specify this field, gRPC requests to the container will be
disabled.
Vertex AI does not use ports other than the first one listed. This field
corresponds to the
ports
field of the Kubernetes Containers v1 core API. Structure is documented below. - Health
Probe AiEndpoint With Model Garden Deployment Model Config Container Spec Health Probe - Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.
- Health
Route string - HTTP path on the container to send health checks to. Vertex AI
intermittently sends GET requests to this path on the container's IP
address and port to check that the container is healthy. Read more about
health
checks.
For example, if you set this field to
/bar
, then Vertex AI intermittently sends a GET request to the/bar
path on the port of your container specified by the first value of thisModelContainerSpec
's ports field. If you don't specify this field, it defaults to the following value when you deploy this Model to an Endpoint:/v1/endpoints/ENDPOINT/deployedModels/DEPLOYED_MODEL:predict The placeholders in this value are replaced as follows:- ENDPOINT: The last segment (following
endpoints/
)of the Endpoint.name][] field of the Endpoint where this Model has been deployed. (Vertex AI makes this value available to your container code as theAIP_ENDPOINT_ID
environment variable.) - DEPLOYED_MODEL: DeployedModel.id of the
DeployedModel
. (Vertex AI makes this value available to your container code as theAIP_DEPLOYED_MODEL_ID
environment variable.)
- ENDPOINT: The last segment (following
- Liveness
Probe AiEndpoint With Model Garden Deployment Model Config Container Spec Liveness Probe - Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.
- Ports
List<Ai
Endpoint With Model Garden Deployment Model Config Container Spec Port> List of ports to expose from the container. Vertex AI sends any prediction requests that it receives to the first port on this list. Vertex AI also sends liveness and health checks to this port. If you do not specify this field, it defaults to following value:
[ { "containerPort": 8080 } ]
Vertex AI does not use ports other than the first one listed. This field corresponds to the
ports
field of the Kubernetes Containers v1 core API. Structure is documented below.- Predict
Route string - HTTP path on the container to send prediction requests to. Vertex AI
forwards requests sent using
projects.locations.endpoints.predict to this
path on the container's IP address and port. Vertex AI then returns the
container's response in the API response.
For example, if you set this field to
/foo
, then when Vertex AI receives a prediction request, it forwards the request body in a POST request to the/foo
path on the port of your container specified by the first value of thisModelContainerSpec
's ports field. If you don't specify this field, it defaults to the following value when you deploy this Model to an Endpoint:/v1/endpoints/ENDPOINT/deployedModels/DEPLOYED_MODEL:predict The placeholders in this value are replaced as follows:- ENDPOINT: The last segment (following
endpoints/
)of the Endpoint.name][] field of the Endpoint where this Model has been deployed. (Vertex AI makes this value available to your container code as theAIP_ENDPOINT_ID
environment variable.) - DEPLOYED_MODEL: DeployedModel.id of the
DeployedModel
. (Vertex AI makes this value available to your container code as theAIP_DEPLOYED_MODEL_ID
environment variable.)
- ENDPOINT: The last segment (following
- string
- The amount of the VM memory to reserve as the shared memory for the model in megabytes.
- Startup
Probe AiEndpoint With Model Garden Deployment Model Config Container Spec Startup Probe - Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.
- Image
Uri string - URI of the Docker image to be used as the custom container for serving predictions. This URI must identify an image in Artifact Registry or Container Registry. Learn more about the container publishing requirements, including permissions requirements for the Vertex AI Service Agent. The container image is ingested upon ModelService.UploadModel, stored internally, and this original path is afterwards not used. To learn about the requirements for the Docker image itself, see Custom container requirements. You can use the URI to one of Vertex AI's pre-built container images for prediction in this field.
- Args []string
- Specifies arguments for the command that runs when the container starts.
This overrides the container's
CMD
. Specify this field as an array of executable and arguments, similar to a DockerCMD
's "default parameters" form. If you don't specify this field but do specify the command field, then the command from thecommand
field runs without any additional arguments. See the Kubernetes documentation about how thecommand
andargs
fields interact with a container'sENTRYPOINT
andCMD
. If you don't specify this field and don't specify thecommand
field, then the container'sENTRYPOINT
andCMD
determine what runs based on their default behavior. See the Docker documentation about howCMD
andENTRYPOINT
interact. In this field, you can reference environment variables set by Vertex AI and environment variables set in the env field. You cannot reference environment variables set in the Docker image. In order for environment variables to be expanded, reference them by using the following syntax:$(VARIABLE_NAME) Note that this differs from Bash variable expansion, which does not use parentheses. If a variable cannot be resolved, the reference in the input string is used unchanged. To avoid variable expansion, you can escape this syntax with$$
; for example:$$(VARIABLE_NAME) This field corresponds to theargs
field of the Kubernetes Containers v1 core API. - Commands []string
- Specifies the command that runs when the container starts. This overrides
the container's
ENTRYPOINT.
Specify this field as an array of executable and arguments, similar to a
Docker
ENTRYPOINT
's "exec" form, not its "shell" form. If you do not specify this field, then the container'sENTRYPOINT
runs, in conjunction with the args field or the container'sCMD
, if either exists. If this field is not specified and the container does not have anENTRYPOINT
, then refer to the Docker documentation about howCMD
andENTRYPOINT
interact. If you specify this field, then you can also specify theargs
field to provide additional arguments for this command. However, if you specify this field, then the container'sCMD
is ignored. See the Kubernetes documentation about how thecommand
andargs
fields interact with a container'sENTRYPOINT
andCMD
. In this field, you can reference environment variables set by Vertex AI and environment variables set in the env field. You cannot reference environment variables set in the Docker image. In order for environment variables to be expanded, reference them by using the following syntax:$(VARIABLE_NAME) Note that this differs from Bash variable expansion, which does not use parentheses. If a variable cannot be resolved, the reference in the input string is used unchanged. To avoid variable expansion, you can escape this syntax with$$
; for example:$$(VARIABLE_NAME) This field corresponds to thecommand
field of the Kubernetes Containers v1 core API. - Deployment
Timeout string - Deployment timeout. Limit for deployment timeout is 2 hours.
- Envs
[]Ai
Endpoint With Model Garden Deployment Model Config Container Spec Env List of environment variables to set in the container. After the container starts running, code running in the container can read these environment variables. Additionally, the command and args fields can reference these variables. Later entries in this list can also reference earlier entries. For example, the following example sets the variable
VAR_2
to have the valuefoo bar
:[ { "name": "VAR_1", "value": "foo" }, { "name": "VAR_2", "value": "$(VAR_1) bar" } ]
If you switch the order of the variables in the example, then the expansion does not occur. This field corresponds to the
env
field of the Kubernetes Containers v1 core API. Structure is documented below.- Grpc
Ports []AiEndpoint With Model Garden Deployment Model Config Container Spec Grpc Port - List of ports to expose from the container. Vertex AI sends gRPC
prediction requests that it receives to the first port on this list. Vertex
AI also sends liveness and health checks to this port.
If you do not specify this field, gRPC requests to the container will be
disabled.
Vertex AI does not use ports other than the first one listed. This field
corresponds to the
ports
field of the Kubernetes Containers v1 core API. Structure is documented below. - Health
Probe AiEndpoint With Model Garden Deployment Model Config Container Spec Health Probe - Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.
- Health
Route string - HTTP path on the container to send health checks to. Vertex AI
intermittently sends GET requests to this path on the container's IP
address and port to check that the container is healthy. Read more about
health
checks.
For example, if you set this field to
/bar
, then Vertex AI intermittently sends a GET request to the/bar
path on the port of your container specified by the first value of thisModelContainerSpec
's ports field. If you don't specify this field, it defaults to the following value when you deploy this Model to an Endpoint:/v1/endpoints/ENDPOINT/deployedModels/DEPLOYED_MODEL:predict The placeholders in this value are replaced as follows:- ENDPOINT: The last segment (following
endpoints/
)of the Endpoint.name][] field of the Endpoint where this Model has been deployed. (Vertex AI makes this value available to your container code as theAIP_ENDPOINT_ID
environment variable.) - DEPLOYED_MODEL: DeployedModel.id of the
DeployedModel
. (Vertex AI makes this value available to your container code as theAIP_DEPLOYED_MODEL_ID
environment variable.)
- ENDPOINT: The last segment (following
- Liveness
Probe AiEndpoint With Model Garden Deployment Model Config Container Spec Liveness Probe - Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.
- Ports
[]Ai
Endpoint With Model Garden Deployment Model Config Container Spec Port List of ports to expose from the container. Vertex AI sends any prediction requests that it receives to the first port on this list. Vertex AI also sends liveness and health checks to this port. If you do not specify this field, it defaults to following value:
[ { "containerPort": 8080 } ]
Vertex AI does not use ports other than the first one listed. This field corresponds to the
ports
field of the Kubernetes Containers v1 core API. Structure is documented below.- Predict
Route string - HTTP path on the container to send prediction requests to. Vertex AI
forwards requests sent using
projects.locations.endpoints.predict to this
path on the container's IP address and port. Vertex AI then returns the
container's response in the API response.
For example, if you set this field to
/foo
, then when Vertex AI receives a prediction request, it forwards the request body in a POST request to the/foo
path on the port of your container specified by the first value of thisModelContainerSpec
's ports field. If you don't specify this field, it defaults to the following value when you deploy this Model to an Endpoint:/v1/endpoints/ENDPOINT/deployedModels/DEPLOYED_MODEL:predict The placeholders in this value are replaced as follows:- ENDPOINT: The last segment (following
endpoints/
)of the Endpoint.name][] field of the Endpoint where this Model has been deployed. (Vertex AI makes this value available to your container code as theAIP_ENDPOINT_ID
environment variable.) - DEPLOYED_MODEL: DeployedModel.id of the
DeployedModel
. (Vertex AI makes this value available to your container code as theAIP_DEPLOYED_MODEL_ID
environment variable.)
- ENDPOINT: The last segment (following
- string
- The amount of the VM memory to reserve as the shared memory for the model in megabytes.
- Startup
Probe AiEndpoint With Model Garden Deployment Model Config Container Spec Startup Probe - Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.
- image
Uri String - URI of the Docker image to be used as the custom container for serving predictions. This URI must identify an image in Artifact Registry or Container Registry. Learn more about the container publishing requirements, including permissions requirements for the Vertex AI Service Agent. The container image is ingested upon ModelService.UploadModel, stored internally, and this original path is afterwards not used. To learn about the requirements for the Docker image itself, see Custom container requirements. You can use the URI to one of Vertex AI's pre-built container images for prediction in this field.
- args List<String>
- Specifies arguments for the command that runs when the container starts.
This overrides the container's
CMD
. Specify this field as an array of executable and arguments, similar to a DockerCMD
's "default parameters" form. If you don't specify this field but do specify the command field, then the command from thecommand
field runs without any additional arguments. See the Kubernetes documentation about how thecommand
andargs
fields interact with a container'sENTRYPOINT
andCMD
. If you don't specify this field and don't specify thecommand
field, then the container'sENTRYPOINT
andCMD
determine what runs based on their default behavior. See the Docker documentation about howCMD
andENTRYPOINT
interact. In this field, you can reference environment variables set by Vertex AI and environment variables set in the env field. You cannot reference environment variables set in the Docker image. In order for environment variables to be expanded, reference them by using the following syntax:$(VARIABLE_NAME) Note that this differs from Bash variable expansion, which does not use parentheses. If a variable cannot be resolved, the reference in the input string is used unchanged. To avoid variable expansion, you can escape this syntax with$$
; for example:$$(VARIABLE_NAME) This field corresponds to theargs
field of the Kubernetes Containers v1 core API. - commands List<String>
- Specifies the command that runs when the container starts. This overrides
the container's
ENTRYPOINT.
Specify this field as an array of executable and arguments, similar to a
Docker
ENTRYPOINT
's "exec" form, not its "shell" form. If you do not specify this field, then the container'sENTRYPOINT
runs, in conjunction with the args field or the container'sCMD
, if either exists. If this field is not specified and the container does not have anENTRYPOINT
, then refer to the Docker documentation about howCMD
andENTRYPOINT
interact. If you specify this field, then you can also specify theargs
field to provide additional arguments for this command. However, if you specify this field, then the container'sCMD
is ignored. See the Kubernetes documentation about how thecommand
andargs
fields interact with a container'sENTRYPOINT
andCMD
. In this field, you can reference environment variables set by Vertex AI and environment variables set in the env field. You cannot reference environment variables set in the Docker image. In order for environment variables to be expanded, reference them by using the following syntax:$(VARIABLE_NAME) Note that this differs from Bash variable expansion, which does not use parentheses. If a variable cannot be resolved, the reference in the input string is used unchanged. To avoid variable expansion, you can escape this syntax with$$
; for example:$$(VARIABLE_NAME) This field corresponds to thecommand
field of the Kubernetes Containers v1 core API. - deployment
Timeout String - Deployment timeout. Limit for deployment timeout is 2 hours.
- envs
List<Ai
Endpoint With Model Garden Deployment Model Config Container Spec Env> List of environment variables to set in the container. After the container starts running, code running in the container can read these environment variables. Additionally, the command and args fields can reference these variables. Later entries in this list can also reference earlier entries. For example, the following example sets the variable
VAR_2
to have the valuefoo bar
:[ { "name": "VAR_1", "value": "foo" }, { "name": "VAR_2", "value": "$(VAR_1) bar" } ]
If you switch the order of the variables in the example, then the expansion does not occur. This field corresponds to the
env
field of the Kubernetes Containers v1 core API. Structure is documented below.- grpc
Ports List<AiEndpoint With Model Garden Deployment Model Config Container Spec Grpc Port> - List of ports to expose from the container. Vertex AI sends gRPC
prediction requests that it receives to the first port on this list. Vertex
AI also sends liveness and health checks to this port.
If you do not specify this field, gRPC requests to the container will be
disabled.
Vertex AI does not use ports other than the first one listed. This field
corresponds to the
ports
field of the Kubernetes Containers v1 core API. Structure is documented below. - health
Probe AiEndpoint With Model Garden Deployment Model Config Container Spec Health Probe - Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.
- health
Route String - HTTP path on the container to send health checks to. Vertex AI
intermittently sends GET requests to this path on the container's IP
address and port to check that the container is healthy. Read more about
health
checks.
For example, if you set this field to
/bar
, then Vertex AI intermittently sends a GET request to the/bar
path on the port of your container specified by the first value of thisModelContainerSpec
's ports field. If you don't specify this field, it defaults to the following value when you deploy this Model to an Endpoint:/v1/endpoints/ENDPOINT/deployedModels/DEPLOYED_MODEL:predict The placeholders in this value are replaced as follows:- ENDPOINT: The last segment (following
endpoints/
)of the Endpoint.name][] field of the Endpoint where this Model has been deployed. (Vertex AI makes this value available to your container code as theAIP_ENDPOINT_ID
environment variable.) - DEPLOYED_MODEL: DeployedModel.id of the
DeployedModel
. (Vertex AI makes this value available to your container code as theAIP_DEPLOYED_MODEL_ID
environment variable.)
- ENDPOINT: The last segment (following
- liveness
Probe AiEndpoint With Model Garden Deployment Model Config Container Spec Liveness Probe - Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.
- ports
List<Ai
Endpoint With Model Garden Deployment Model Config Container Spec Port> List of ports to expose from the container. Vertex AI sends any prediction requests that it receives to the first port on this list. Vertex AI also sends liveness and health checks to this port. If you do not specify this field, it defaults to following value:
[ { "containerPort": 8080 } ]
Vertex AI does not use ports other than the first one listed. This field corresponds to the
ports
field of the Kubernetes Containers v1 core API. Structure is documented below.- predict
Route String - HTTP path on the container to send prediction requests to. Vertex AI
forwards requests sent using
projects.locations.endpoints.predict to this
path on the container's IP address and port. Vertex AI then returns the
container's response in the API response.
For example, if you set this field to
/foo
, then when Vertex AI receives a prediction request, it forwards the request body in a POST request to the/foo
path on the port of your container specified by the first value of thisModelContainerSpec
's ports field. If you don't specify this field, it defaults to the following value when you deploy this Model to an Endpoint:/v1/endpoints/ENDPOINT/deployedModels/DEPLOYED_MODEL:predict The placeholders in this value are replaced as follows:- ENDPOINT: The last segment (following
endpoints/
)of the Endpoint.name][] field of the Endpoint where this Model has been deployed. (Vertex AI makes this value available to your container code as theAIP_ENDPOINT_ID
environment variable.) - DEPLOYED_MODEL: DeployedModel.id of the
DeployedModel
. (Vertex AI makes this value available to your container code as theAIP_DEPLOYED_MODEL_ID
environment variable.)
- ENDPOINT: The last segment (following
- String
- The amount of the VM memory to reserve as the shared memory for the model in megabytes.
- startup
Probe AiEndpoint With Model Garden Deployment Model Config Container Spec Startup Probe - Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.
- image
Uri string - URI of the Docker image to be used as the custom container for serving predictions. This URI must identify an image in Artifact Registry or Container Registry. Learn more about the container publishing requirements, including permissions requirements for the Vertex AI Service Agent. The container image is ingested upon ModelService.UploadModel, stored internally, and this original path is afterwards not used. To learn about the requirements for the Docker image itself, see Custom container requirements. You can use the URI to one of Vertex AI's pre-built container images for prediction in this field.
- args string[]
- Specifies arguments for the command that runs when the container starts.
This overrides the container's
CMD
. Specify this field as an array of executable and arguments, similar to a DockerCMD
's "default parameters" form. If you don't specify this field but do specify the command field, then the command from thecommand
field runs without any additional arguments. See the Kubernetes documentation about how thecommand
andargs
fields interact with a container'sENTRYPOINT
andCMD
. If you don't specify this field and don't specify thecommand
field, then the container'sENTRYPOINT
andCMD
determine what runs based on their default behavior. See the Docker documentation about howCMD
andENTRYPOINT
interact. In this field, you can reference environment variables set by Vertex AI and environment variables set in the env field. You cannot reference environment variables set in the Docker image. In order for environment variables to be expanded, reference them by using the following syntax:$(VARIABLE_NAME) Note that this differs from Bash variable expansion, which does not use parentheses. If a variable cannot be resolved, the reference in the input string is used unchanged. To avoid variable expansion, you can escape this syntax with$$
; for example:$$(VARIABLE_NAME) This field corresponds to theargs
field of the Kubernetes Containers v1 core API. - commands string[]
- Specifies the command that runs when the container starts. This overrides
the container's
ENTRYPOINT.
Specify this field as an array of executable and arguments, similar to a
Docker
ENTRYPOINT
's "exec" form, not its "shell" form. If you do not specify this field, then the container'sENTRYPOINT
runs, in conjunction with the args field or the container'sCMD
, if either exists. If this field is not specified and the container does not have anENTRYPOINT
, then refer to the Docker documentation about howCMD
andENTRYPOINT
interact. If you specify this field, then you can also specify theargs
field to provide additional arguments for this command. However, if you specify this field, then the container'sCMD
is ignored. See the Kubernetes documentation about how thecommand
andargs
fields interact with a container'sENTRYPOINT
andCMD
. In this field, you can reference environment variables set by Vertex AI and environment variables set in the env field. You cannot reference environment variables set in the Docker image. In order for environment variables to be expanded, reference them by using the following syntax:$(VARIABLE_NAME) Note that this differs from Bash variable expansion, which does not use parentheses. If a variable cannot be resolved, the reference in the input string is used unchanged. To avoid variable expansion, you can escape this syntax with$$
; for example:$$(VARIABLE_NAME) This field corresponds to thecommand
field of the Kubernetes Containers v1 core API. - deployment
Timeout string - Deployment timeout. Limit for deployment timeout is 2 hours.
- envs
Ai
Endpoint With Model Garden Deployment Model Config Container Spec Env[] List of environment variables to set in the container. After the container starts running, code running in the container can read these environment variables. Additionally, the command and args fields can reference these variables. Later entries in this list can also reference earlier entries. For example, the following example sets the variable
VAR_2
to have the valuefoo bar
:[ { "name": "VAR_1", "value": "foo" }, { "name": "VAR_2", "value": "$(VAR_1) bar" } ]
If you switch the order of the variables in the example, then the expansion does not occur. This field corresponds to the
env
field of the Kubernetes Containers v1 core API. Structure is documented below.- grpc
Ports AiEndpoint With Model Garden Deployment Model Config Container Spec Grpc Port[] - List of ports to expose from the container. Vertex AI sends gRPC
prediction requests that it receives to the first port on this list. Vertex
AI also sends liveness and health checks to this port.
If you do not specify this field, gRPC requests to the container will be
disabled.
Vertex AI does not use ports other than the first one listed. This field
corresponds to the
ports
field of the Kubernetes Containers v1 core API. Structure is documented below. - health
Probe AiEndpoint With Model Garden Deployment Model Config Container Spec Health Probe - Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.
- health
Route string - HTTP path on the container to send health checks to. Vertex AI
intermittently sends GET requests to this path on the container's IP
address and port to check that the container is healthy. Read more about
health
checks.
For example, if you set this field to
/bar
, then Vertex AI intermittently sends a GET request to the/bar
path on the port of your container specified by the first value of thisModelContainerSpec
's ports field. If you don't specify this field, it defaults to the following value when you deploy this Model to an Endpoint:/v1/endpoints/ENDPOINT/deployedModels/DEPLOYED_MODEL:predict The placeholders in this value are replaced as follows:- ENDPOINT: The last segment (following
endpoints/
)of the Endpoint.name][] field of the Endpoint where this Model has been deployed. (Vertex AI makes this value available to your container code as theAIP_ENDPOINT_ID
environment variable.) - DEPLOYED_MODEL: DeployedModel.id of the
DeployedModel
. (Vertex AI makes this value available to your container code as theAIP_DEPLOYED_MODEL_ID
environment variable.)
- ENDPOINT: The last segment (following
- liveness
Probe AiEndpoint With Model Garden Deployment Model Config Container Spec Liveness Probe - Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.
- ports
Ai
Endpoint With Model Garden Deployment Model Config Container Spec Port[] List of ports to expose from the container. Vertex AI sends any prediction requests that it receives to the first port on this list. Vertex AI also sends liveness and health checks to this port. If you do not specify this field, it defaults to following value:
[ { "containerPort": 8080 } ]
Vertex AI does not use ports other than the first one listed. This field corresponds to the
ports
field of the Kubernetes Containers v1 core API. Structure is documented below.- predict
Route string - HTTP path on the container to send prediction requests to. Vertex AI
forwards requests sent using
projects.locations.endpoints.predict to this
path on the container's IP address and port. Vertex AI then returns the
container's response in the API response.
For example, if you set this field to
/foo
, then when Vertex AI receives a prediction request, it forwards the request body in a POST request to the/foo
path on the port of your container specified by the first value of thisModelContainerSpec
's ports field. If you don't specify this field, it defaults to the following value when you deploy this Model to an Endpoint:/v1/endpoints/ENDPOINT/deployedModels/DEPLOYED_MODEL:predict The placeholders in this value are replaced as follows:- ENDPOINT: The last segment (following
endpoints/
)of the Endpoint.name][] field of the Endpoint where this Model has been deployed. (Vertex AI makes this value available to your container code as theAIP_ENDPOINT_ID
environment variable.) - DEPLOYED_MODEL: DeployedModel.id of the
DeployedModel
. (Vertex AI makes this value available to your container code as theAIP_DEPLOYED_MODEL_ID
environment variable.)
- ENDPOINT: The last segment (following
- string
- The amount of the VM memory to reserve as the shared memory for the model in megabytes.
- startup
Probe AiEndpoint With Model Garden Deployment Model Config Container Spec Startup Probe - Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.
- image_
uri str - URI of the Docker image to be used as the custom container for serving predictions. This URI must identify an image in Artifact Registry or Container Registry. Learn more about the container publishing requirements, including permissions requirements for the Vertex AI Service Agent. The container image is ingested upon ModelService.UploadModel, stored internally, and this original path is afterwards not used. To learn about the requirements for the Docker image itself, see Custom container requirements. You can use the URI to one of Vertex AI's pre-built container images for prediction in this field.
- args Sequence[str]
- Specifies arguments for the command that runs when the container starts.
This overrides the container's
CMD
. Specify this field as an array of executable and arguments, similar to a DockerCMD
's "default parameters" form. If you don't specify this field but do specify the command field, then the command from thecommand
field runs without any additional arguments. See the Kubernetes documentation about how thecommand
andargs
fields interact with a container'sENTRYPOINT
andCMD
. If you don't specify this field and don't specify thecommand
field, then the container'sENTRYPOINT
andCMD
determine what runs based on their default behavior. See the Docker documentation about howCMD
andENTRYPOINT
interact. In this field, you can reference environment variables set by Vertex AI and environment variables set in the env field. You cannot reference environment variables set in the Docker image. In order for environment variables to be expanded, reference them by using the following syntax:$(VARIABLE_NAME) Note that this differs from Bash variable expansion, which does not use parentheses. If a variable cannot be resolved, the reference in the input string is used unchanged. To avoid variable expansion, you can escape this syntax with$$
; for example:$$(VARIABLE_NAME) This field corresponds to theargs
field of the Kubernetes Containers v1 core API. - commands Sequence[str]
- Specifies the command that runs when the container starts. This overrides
the container's
ENTRYPOINT.
Specify this field as an array of executable and arguments, similar to a
Docker
ENTRYPOINT
's "exec" form, not its "shell" form. If you do not specify this field, then the container'sENTRYPOINT
runs, in conjunction with the args field or the container'sCMD
, if either exists. If this field is not specified and the container does not have anENTRYPOINT
, then refer to the Docker documentation about howCMD
andENTRYPOINT
interact. If you specify this field, then you can also specify theargs
field to provide additional arguments for this command. However, if you specify this field, then the container'sCMD
is ignored. See the Kubernetes documentation about how thecommand
andargs
fields interact with a container'sENTRYPOINT
andCMD
. In this field, you can reference environment variables set by Vertex AI and environment variables set in the env field. You cannot reference environment variables set in the Docker image. In order for environment variables to be expanded, reference them by using the following syntax:$(VARIABLE_NAME) Note that this differs from Bash variable expansion, which does not use parentheses. If a variable cannot be resolved, the reference in the input string is used unchanged. To avoid variable expansion, you can escape this syntax with$$
; for example:$$(VARIABLE_NAME) This field corresponds to thecommand
field of the Kubernetes Containers v1 core API. - deployment_
timeout str - Deployment timeout. Limit for deployment timeout is 2 hours.
- envs
Sequence[Ai
Endpoint With Model Garden Deployment Model Config Container Spec Env] List of environment variables to set in the container. After the container starts running, code running in the container can read these environment variables. Additionally, the command and args fields can reference these variables. Later entries in this list can also reference earlier entries. For example, the following example sets the variable
VAR_2
to have the valuefoo bar
:[ { "name": "VAR_1", "value": "foo" }, { "name": "VAR_2", "value": "$(VAR_1) bar" } ]
If you switch the order of the variables in the example, then the expansion does not occur. This field corresponds to the
env
field of the Kubernetes Containers v1 core API. Structure is documented below.- grpc_
ports Sequence[AiEndpoint With Model Garden Deployment Model Config Container Spec Grpc Port] - List of ports to expose from the container. Vertex AI sends gRPC
prediction requests that it receives to the first port on this list. Vertex
AI also sends liveness and health checks to this port.
If you do not specify this field, gRPC requests to the container will be
disabled.
Vertex AI does not use ports other than the first one listed. This field
corresponds to the
ports
field of the Kubernetes Containers v1 core API. Structure is documented below. - health_
probe AiEndpoint With Model Garden Deployment Model Config Container Spec Health Probe - Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.
- health_
route str - HTTP path on the container to send health checks to. Vertex AI
intermittently sends GET requests to this path on the container's IP
address and port to check that the container is healthy. Read more about
health
checks.
For example, if you set this field to
/bar
, then Vertex AI intermittently sends a GET request to the/bar
path on the port of your container specified by the first value of thisModelContainerSpec
's ports field. If you don't specify this field, it defaults to the following value when you deploy this Model to an Endpoint:/v1/endpoints/ENDPOINT/deployedModels/DEPLOYED_MODEL:predict The placeholders in this value are replaced as follows:- ENDPOINT: The last segment (following
endpoints/
)of the Endpoint.name][] field of the Endpoint where this Model has been deployed. (Vertex AI makes this value available to your container code as theAIP_ENDPOINT_ID
environment variable.) - DEPLOYED_MODEL: DeployedModel.id of the
DeployedModel
. (Vertex AI makes this value available to your container code as theAIP_DEPLOYED_MODEL_ID
environment variable.)
- ENDPOINT: The last segment (following
- liveness_
probe AiEndpoint With Model Garden Deployment Model Config Container Spec Liveness Probe - Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.
- ports
Sequence[Ai
Endpoint With Model Garden Deployment Model Config Container Spec Port] List of ports to expose from the container. Vertex AI sends any prediction requests that it receives to the first port on this list. Vertex AI also sends liveness and health checks to this port. If you do not specify this field, it defaults to following value:
[ { "containerPort": 8080 } ]
Vertex AI does not use ports other than the first one listed. This field corresponds to the
ports
field of the Kubernetes Containers v1 core API. Structure is documented below.- predict_
route str - HTTP path on the container to send prediction requests to. Vertex AI
forwards requests sent using
projects.locations.endpoints.predict to this
path on the container's IP address and port. Vertex AI then returns the
container's response in the API response.
For example, if you set this field to
/foo
, then when Vertex AI receives a prediction request, it forwards the request body in a POST request to the/foo
path on the port of your container specified by the first value of thisModelContainerSpec
's ports field. If you don't specify this field, it defaults to the following value when you deploy this Model to an Endpoint:/v1/endpoints/ENDPOINT/deployedModels/DEPLOYED_MODEL:predict The placeholders in this value are replaced as follows:- ENDPOINT: The last segment (following
endpoints/
)of the Endpoint.name][] field of the Endpoint where this Model has been deployed. (Vertex AI makes this value available to your container code as theAIP_ENDPOINT_ID
environment variable.) - DEPLOYED_MODEL: DeployedModel.id of the
DeployedModel
. (Vertex AI makes this value available to your container code as theAIP_DEPLOYED_MODEL_ID
environment variable.)
- ENDPOINT: The last segment (following
- str
- The amount of the VM memory to reserve as the shared memory for the model in megabytes.
- startup_
probe AiEndpoint With Model Garden Deployment Model Config Container Spec Startup Probe - Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.
- image
Uri String - URI of the Docker image to be used as the custom container for serving predictions. This URI must identify an image in Artifact Registry or Container Registry. Learn more about the container publishing requirements, including permissions requirements for the Vertex AI Service Agent. The container image is ingested upon ModelService.UploadModel, stored internally, and this original path is afterwards not used. To learn about the requirements for the Docker image itself, see Custom container requirements. You can use the URI to one of Vertex AI's pre-built container images for prediction in this field.
- args List<String>
- Specifies arguments for the command that runs when the container starts.
This overrides the container's
CMD
. Specify this field as an array of executable and arguments, similar to a DockerCMD
's "default parameters" form. If you don't specify this field but do specify the command field, then the command from thecommand
field runs without any additional arguments. See the Kubernetes documentation about how thecommand
andargs
fields interact with a container'sENTRYPOINT
andCMD
. If you don't specify this field and don't specify thecommand
field, then the container'sENTRYPOINT
andCMD
determine what runs based on their default behavior. See the Docker documentation about howCMD
andENTRYPOINT
interact. In this field, you can reference environment variables set by Vertex AI and environment variables set in the env field. You cannot reference environment variables set in the Docker image. In order for environment variables to be expanded, reference them by using the following syntax:$(VARIABLE_NAME) Note that this differs from Bash variable expansion, which does not use parentheses. If a variable cannot be resolved, the reference in the input string is used unchanged. To avoid variable expansion, you can escape this syntax with$$
; for example:$$(VARIABLE_NAME) This field corresponds to theargs
field of the Kubernetes Containers v1 core API. - commands List<String>
- Specifies the command that runs when the container starts. This overrides
the container's
ENTRYPOINT.
Specify this field as an array of executable and arguments, similar to a
Docker
ENTRYPOINT
's "exec" form, not its "shell" form. If you do not specify this field, then the container'sENTRYPOINT
runs, in conjunction with the args field or the container'sCMD
, if either exists. If this field is not specified and the container does not have anENTRYPOINT
, then refer to the Docker documentation about howCMD
andENTRYPOINT
interact. If you specify this field, then you can also specify theargs
field to provide additional arguments for this command. However, if you specify this field, then the container'sCMD
is ignored. See the Kubernetes documentation about how thecommand
andargs
fields interact with a container'sENTRYPOINT
andCMD
. In this field, you can reference environment variables set by Vertex AI and environment variables set in the env field. You cannot reference environment variables set in the Docker image. In order for environment variables to be expanded, reference them by using the following syntax:$(VARIABLE_NAME) Note that this differs from Bash variable expansion, which does not use parentheses. If a variable cannot be resolved, the reference in the input string is used unchanged. To avoid variable expansion, you can escape this syntax with$$
; for example:$$(VARIABLE_NAME) This field corresponds to thecommand
field of the Kubernetes Containers v1 core API. - deployment
Timeout String - Deployment timeout. Limit for deployment timeout is 2 hours.
- envs List<Property Map>
List of environment variables to set in the container. After the container starts running, code running in the container can read these environment variables. Additionally, the command and args fields can reference these variables. Later entries in this list can also reference earlier entries. For example, the following example sets the variable
VAR_2
to have the valuefoo bar
:[ { "name": "VAR_1", "value": "foo" }, { "name": "VAR_2", "value": "$(VAR_1) bar" } ]
If you switch the order of the variables in the example, then the expansion does not occur. This field corresponds to the
env
field of the Kubernetes Containers v1 core API. Structure is documented below.- grpc
Ports List<Property Map> - List of ports to expose from the container. Vertex AI sends gRPC
prediction requests that it receives to the first port on this list. Vertex
AI also sends liveness and health checks to this port.
If you do not specify this field, gRPC requests to the container will be
disabled.
Vertex AI does not use ports other than the first one listed. This field
corresponds to the
ports
field of the Kubernetes Containers v1 core API. Structure is documented below. - health
Probe Property Map - Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.
- health
Route String - HTTP path on the container to send health checks to. Vertex AI
intermittently sends GET requests to this path on the container's IP
address and port to check that the container is healthy. Read more about
health
checks.
For example, if you set this field to
/bar
, then Vertex AI intermittently sends a GET request to the/bar
path on the port of your container specified by the first value of thisModelContainerSpec
's ports field. If you don't specify this field, it defaults to the following value when you deploy this Model to an Endpoint:/v1/endpoints/ENDPOINT/deployedModels/DEPLOYED_MODEL:predict The placeholders in this value are replaced as follows:- ENDPOINT: The last segment (following
endpoints/
)of the Endpoint.name][] field of the Endpoint where this Model has been deployed. (Vertex AI makes this value available to your container code as theAIP_ENDPOINT_ID
environment variable.) - DEPLOYED_MODEL: DeployedModel.id of the
DeployedModel
. (Vertex AI makes this value available to your container code as theAIP_DEPLOYED_MODEL_ID
environment variable.)
- ENDPOINT: The last segment (following
- liveness
Probe Property Map - Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.
- ports List<Property Map>
List of ports to expose from the container. Vertex AI sends any prediction requests that it receives to the first port on this list. Vertex AI also sends liveness and health checks to this port. If you do not specify this field, it defaults to following value:
[ { "containerPort": 8080 } ]
Vertex AI does not use ports other than the first one listed. This field corresponds to the
ports
field of the Kubernetes Containers v1 core API. Structure is documented below.- predict
Route String - HTTP path on the container to send prediction requests to. Vertex AI
forwards requests sent using
projects.locations.endpoints.predict to this
path on the container's IP address and port. Vertex AI then returns the
container's response in the API response.
For example, if you set this field to
/foo
, then when Vertex AI receives a prediction request, it forwards the request body in a POST request to the/foo
path on the port of your container specified by the first value of thisModelContainerSpec
's ports field. If you don't specify this field, it defaults to the following value when you deploy this Model to an Endpoint:/v1/endpoints/ENDPOINT/deployedModels/DEPLOYED_MODEL:predict The placeholders in this value are replaced as follows:- ENDPOINT: The last segment (following
endpoints/
)of the Endpoint.name][] field of the Endpoint where this Model has been deployed. (Vertex AI makes this value available to your container code as theAIP_ENDPOINT_ID
environment variable.) - DEPLOYED_MODEL: DeployedModel.id of the
DeployedModel
. (Vertex AI makes this value available to your container code as theAIP_DEPLOYED_MODEL_ID
environment variable.)
- ENDPOINT: The last segment (following
- String
- The amount of the VM memory to reserve as the shared memory for the model in megabytes.
- startup
Probe Property Map - Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. Structure is documented below.
AiEndpointWithModelGardenDeploymentModelConfigContainerSpecEnv, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecEnvArgs
- Name string
- Name of the environment variable. Must be a valid C identifier.
- Value string
- Variables that reference a $(VAR_NAME) are expanded using the previous defined environment variables in the container and any service environment variables. If a variable cannot be resolved, the reference in the input string will be unchanged. The $(VAR_NAME) syntax can be escaped with a double $$, ie: $$(VAR_NAME). Escaped references will never be expanded, regardless of whether the variable exists or not.
- Name string
- Name of the environment variable. Must be a valid C identifier.
- Value string
- Variables that reference a $(VAR_NAME) are expanded using the previous defined environment variables in the container and any service environment variables. If a variable cannot be resolved, the reference in the input string will be unchanged. The $(VAR_NAME) syntax can be escaped with a double $$, ie: $$(VAR_NAME). Escaped references will never be expanded, regardless of whether the variable exists or not.
- name String
- Name of the environment variable. Must be a valid C identifier.
- value String
- Variables that reference a $(VAR_NAME) are expanded using the previous defined environment variables in the container and any service environment variables. If a variable cannot be resolved, the reference in the input string will be unchanged. The $(VAR_NAME) syntax can be escaped with a double $$, ie: $$(VAR_NAME). Escaped references will never be expanded, regardless of whether the variable exists or not.
- name string
- Name of the environment variable. Must be a valid C identifier.
- value string
- Variables that reference a $(VAR_NAME) are expanded using the previous defined environment variables in the container and any service environment variables. If a variable cannot be resolved, the reference in the input string will be unchanged. The $(VAR_NAME) syntax can be escaped with a double $$, ie: $$(VAR_NAME). Escaped references will never be expanded, regardless of whether the variable exists or not.
- name str
- Name of the environment variable. Must be a valid C identifier.
- value str
- Variables that reference a $(VAR_NAME) are expanded using the previous defined environment variables in the container and any service environment variables. If a variable cannot be resolved, the reference in the input string will be unchanged. The $(VAR_NAME) syntax can be escaped with a double $$, ie: $$(VAR_NAME). Escaped references will never be expanded, regardless of whether the variable exists or not.
- name String
- Name of the environment variable. Must be a valid C identifier.
- value String
- Variables that reference a $(VAR_NAME) are expanded using the previous defined environment variables in the container and any service environment variables. If a variable cannot be resolved, the reference in the input string will be unchanged. The $(VAR_NAME) syntax can be escaped with a double $$, ie: $$(VAR_NAME). Escaped references will never be expanded, regardless of whether the variable exists or not.
AiEndpointWithModelGardenDeploymentModelConfigContainerSpecGrpcPort, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecGrpcPortArgs
- Container
Port int - The number of the port to expose on the pod's IP address. Must be a valid port number, between 1 and 65535 inclusive.
- Container
Port int - The number of the port to expose on the pod's IP address. Must be a valid port number, between 1 and 65535 inclusive.
- container
Port Integer - The number of the port to expose on the pod's IP address. Must be a valid port number, between 1 and 65535 inclusive.
- container
Port number - The number of the port to expose on the pod's IP address. Must be a valid port number, between 1 and 65535 inclusive.
- container_
port int - The number of the port to expose on the pod's IP address. Must be a valid port number, between 1 and 65535 inclusive.
- container
Port Number - The number of the port to expose on the pod's IP address. Must be a valid port number, between 1 and 65535 inclusive.
AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbe, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeArgs
- Exec
Ai
Endpoint With Model Garden Deployment Model Config Container Spec Health Probe Exec - ExecAction specifies a command to execute. Structure is documented below.
- Failure
Threshold int - Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
- Grpc
Ai
Endpoint With Model Garden Deployment Model Config Container Spec Health Probe Grpc - GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
- Http
Get AiEndpoint With Model Garden Deployment Model Config Container Spec Health Probe Http Get - HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
- Initial
Delay intSeconds - Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
- Period
Seconds int - How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
- Success
Threshold int - Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
- Tcp
Socket AiEndpoint With Model Garden Deployment Model Config Container Spec Health Probe Tcp Socket - TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
- Timeout
Seconds int - Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.
- Exec
Ai
Endpoint With Model Garden Deployment Model Config Container Spec Health Probe Exec - ExecAction specifies a command to execute. Structure is documented below.
- Failure
Threshold int - Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
- Grpc
Ai
Endpoint With Model Garden Deployment Model Config Container Spec Health Probe Grpc - GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
- Http
Get AiEndpoint With Model Garden Deployment Model Config Container Spec Health Probe Http Get - HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
- Initial
Delay intSeconds - Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
- Period
Seconds int - How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
- Success
Threshold int - Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
- Tcp
Socket AiEndpoint With Model Garden Deployment Model Config Container Spec Health Probe Tcp Socket - TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
- Timeout
Seconds int - Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.
- exec
Ai
Endpoint With Model Garden Deployment Model Config Container Spec Health Probe Exec - ExecAction specifies a command to execute. Structure is documented below.
- failure
Threshold Integer - Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
- grpc
Ai
Endpoint With Model Garden Deployment Model Config Container Spec Health Probe Grpc - GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
- http
Get AiEndpoint With Model Garden Deployment Model Config Container Spec Health Probe Http Get - HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
- initial
Delay IntegerSeconds - Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
- period
Seconds Integer - How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
- success
Threshold Integer - Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
- tcp
Socket AiEndpoint With Model Garden Deployment Model Config Container Spec Health Probe Tcp Socket - TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
- timeout
Seconds Integer - Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.
- exec
Ai
Endpoint With Model Garden Deployment Model Config Container Spec Health Probe Exec - ExecAction specifies a command to execute. Structure is documented below.
- failure
Threshold number - Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
- grpc
Ai
Endpoint With Model Garden Deployment Model Config Container Spec Health Probe Grpc - GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
- http
Get AiEndpoint With Model Garden Deployment Model Config Container Spec Health Probe Http Get - HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
- initial
Delay numberSeconds - Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
- period
Seconds number - How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
- success
Threshold number - Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
- tcp
Socket AiEndpoint With Model Garden Deployment Model Config Container Spec Health Probe Tcp Socket - TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
- timeout
Seconds number - Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.
- exec_
Ai
Endpoint With Model Garden Deployment Model Config Container Spec Health Probe Exec - ExecAction specifies a command to execute. Structure is documented below.
- failure_
threshold int - Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
- grpc
Ai
Endpoint With Model Garden Deployment Model Config Container Spec Health Probe Grpc - GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
- http_
get AiEndpoint With Model Garden Deployment Model Config Container Spec Health Probe Http Get - HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
- initial_
delay_ intseconds - Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
- period_
seconds int - How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
- success_
threshold int - Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
- tcp_
socket AiEndpoint With Model Garden Deployment Model Config Container Spec Health Probe Tcp Socket - TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
- timeout_
seconds int - Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.
- exec Property Map
- ExecAction specifies a command to execute. Structure is documented below.
- failure
Threshold Number - Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
- grpc Property Map
- GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
- http
Get Property Map - HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
- initial
Delay NumberSeconds - Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
- period
Seconds Number - How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
- success
Threshold Number - Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
- tcp
Socket Property Map - TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
- timeout
Seconds Number - Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.
AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeExec, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeExecArgs
- Commands List<string>
- Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
- Commands []string
- Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
- commands List<String>
- Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
- commands string[]
- Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
- commands Sequence[str]
- Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
- commands List<String>
- Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeGrpc, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeGrpcArgs
- Port int
- Port number of the gRPC service. Number must be in the range 1 to 65535.
- Service string
- Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.
- Port int
- Port number of the gRPC service. Number must be in the range 1 to 65535.
- Service string
- Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.
- port Integer
- Port number of the gRPC service. Number must be in the range 1 to 65535.
- service String
- Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.
- port number
- Port number of the gRPC service. Number must be in the range 1 to 65535.
- service string
- Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.
- port int
- Port number of the gRPC service. Number must be in the range 1 to 65535.
- service str
- Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.
- port Number
- Port number of the gRPC service. Number must be in the range 1 to 65535.
- service String
- Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.
AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGet, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGetArgs
- Host string
- Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
- Http
Headers List<AiEndpoint With Model Garden Deployment Model Config Container Spec Health Probe Http Get Http Header> - Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
- Path string
- Path to access on the HTTP server.
- Port int
- Number of the port to access on the container. Number must be in the range 1 to 65535.
- Scheme string
- Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".
- Host string
- Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
- Http
Headers []AiEndpoint With Model Garden Deployment Model Config Container Spec Health Probe Http Get Http Header - Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
- Path string
- Path to access on the HTTP server.
- Port int
- Number of the port to access on the container. Number must be in the range 1 to 65535.
- Scheme string
- Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".
- host String
- Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
- http
Headers List<AiEndpoint With Model Garden Deployment Model Config Container Spec Health Probe Http Get Http Header> - Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
- path String
- Path to access on the HTTP server.
- port Integer
- Number of the port to access on the container. Number must be in the range 1 to 65535.
- scheme String
- Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".
- host string
- Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
- http
Headers AiEndpoint With Model Garden Deployment Model Config Container Spec Health Probe Http Get Http Header[] - Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
- path string
- Path to access on the HTTP server.
- port number
- Number of the port to access on the container. Number must be in the range 1 to 65535.
- scheme string
- Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".
- host str
- Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
- http_
headers Sequence[AiEndpoint With Model Garden Deployment Model Config Container Spec Health Probe Http Get Http Header] - Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
- path str
- Path to access on the HTTP server.
- port int
- Number of the port to access on the container. Number must be in the range 1 to 65535.
- scheme str
- Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".
- host String
- Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
- http
Headers List<Property Map> - Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
- path String
- Path to access on the HTTP server.
- port Number
- Number of the port to access on the container. Number must be in the range 1 to 65535.
- scheme String
- Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".
AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGetHttpHeader, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeHttpGetHttpHeaderArgs
AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeTcpSocket, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecHealthProbeTcpSocketArgs
AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbe, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeArgs
- Exec
Ai
Endpoint With Model Garden Deployment Model Config Container Spec Liveness Probe Exec - ExecAction specifies a command to execute. Structure is documented below.
- Failure
Threshold int - Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
- Grpc
Ai
Endpoint With Model Garden Deployment Model Config Container Spec Liveness Probe Grpc - GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
- Http
Get AiEndpoint With Model Garden Deployment Model Config Container Spec Liveness Probe Http Get - HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
- Initial
Delay intSeconds - Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
- Period
Seconds int - How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
- Success
Threshold int - Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
- Tcp
Socket AiEndpoint With Model Garden Deployment Model Config Container Spec Liveness Probe Tcp Socket - TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
- Timeout
Seconds int - Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.
- Exec
Ai
Endpoint With Model Garden Deployment Model Config Container Spec Liveness Probe Exec - ExecAction specifies a command to execute. Structure is documented below.
- Failure
Threshold int - Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
- Grpc
Ai
Endpoint With Model Garden Deployment Model Config Container Spec Liveness Probe Grpc - GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
- Http
Get AiEndpoint With Model Garden Deployment Model Config Container Spec Liveness Probe Http Get - HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
- Initial
Delay intSeconds - Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
- Period
Seconds int - How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
- Success
Threshold int - Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
- Tcp
Socket AiEndpoint With Model Garden Deployment Model Config Container Spec Liveness Probe Tcp Socket - TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
- Timeout
Seconds int - Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.
- exec
Ai
Endpoint With Model Garden Deployment Model Config Container Spec Liveness Probe Exec - ExecAction specifies a command to execute. Structure is documented below.
- failure
Threshold Integer - Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
- grpc
Ai
Endpoint With Model Garden Deployment Model Config Container Spec Liveness Probe Grpc - GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
- http
Get AiEndpoint With Model Garden Deployment Model Config Container Spec Liveness Probe Http Get - HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
- initial
Delay IntegerSeconds - Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
- period
Seconds Integer - How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
- success
Threshold Integer - Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
- tcp
Socket AiEndpoint With Model Garden Deployment Model Config Container Spec Liveness Probe Tcp Socket - TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
- timeout
Seconds Integer - Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.
- exec
Ai
Endpoint With Model Garden Deployment Model Config Container Spec Liveness Probe Exec - ExecAction specifies a command to execute. Structure is documented below.
- failure
Threshold number - Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
- grpc
Ai
Endpoint With Model Garden Deployment Model Config Container Spec Liveness Probe Grpc - GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
- http
Get AiEndpoint With Model Garden Deployment Model Config Container Spec Liveness Probe Http Get - HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
- initial
Delay numberSeconds - Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
- period
Seconds number - How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
- success
Threshold number - Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
- tcp
Socket AiEndpoint With Model Garden Deployment Model Config Container Spec Liveness Probe Tcp Socket - TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
- timeout
Seconds number - Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.
- exec_
Ai
Endpoint With Model Garden Deployment Model Config Container Spec Liveness Probe Exec - ExecAction specifies a command to execute. Structure is documented below.
- failure_
threshold int - Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
- grpc
Ai
Endpoint With Model Garden Deployment Model Config Container Spec Liveness Probe Grpc - GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
- http_
get AiEndpoint With Model Garden Deployment Model Config Container Spec Liveness Probe Http Get - HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
- initial_
delay_ intseconds - Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
- period_
seconds int - How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
- success_
threshold int - Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
- tcp_
socket AiEndpoint With Model Garden Deployment Model Config Container Spec Liveness Probe Tcp Socket - TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
- timeout_
seconds int - Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.
- exec Property Map
- ExecAction specifies a command to execute. Structure is documented below.
- failure
Threshold Number - Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
- grpc Property Map
- GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
- http
Get Property Map - HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
- initial
Delay NumberSeconds - Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
- period
Seconds Number - How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
- success
Threshold Number - Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
- tcp
Socket Property Map - TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
- timeout
Seconds Number - Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.
AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeExec, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeExecArgs
- Commands List<string>
- Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
- Commands []string
- Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
- commands List<String>
- Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
- commands string[]
- Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
- commands Sequence[str]
- Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
- commands List<String>
- Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeGrpc, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeGrpcArgs
- Port int
- Port number of the gRPC service. Number must be in the range 1 to 65535.
- Service string
- Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.
- Port int
- Port number of the gRPC service. Number must be in the range 1 to 65535.
- Service string
- Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.
- port Integer
- Port number of the gRPC service. Number must be in the range 1 to 65535.
- service String
- Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.
- port number
- Port number of the gRPC service. Number must be in the range 1 to 65535.
- service string
- Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.
- port int
- Port number of the gRPC service. Number must be in the range 1 to 65535.
- service str
- Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.
- port Number
- Port number of the gRPC service. Number must be in the range 1 to 65535.
- service String
- Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.
AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGet, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGetArgs
- Host string
- Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
- Http
Headers List<AiEndpoint With Model Garden Deployment Model Config Container Spec Liveness Probe Http Get Http Header> - Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
- Path string
- Path to access on the HTTP server.
- Port int
- Number of the port to access on the container. Number must be in the range 1 to 65535.
- Scheme string
- Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".
- Host string
- Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
- Http
Headers []AiEndpoint With Model Garden Deployment Model Config Container Spec Liveness Probe Http Get Http Header - Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
- Path string
- Path to access on the HTTP server.
- Port int
- Number of the port to access on the container. Number must be in the range 1 to 65535.
- Scheme string
- Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".
- host String
- Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
- http
Headers List<AiEndpoint With Model Garden Deployment Model Config Container Spec Liveness Probe Http Get Http Header> - Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
- path String
- Path to access on the HTTP server.
- port Integer
- Number of the port to access on the container. Number must be in the range 1 to 65535.
- scheme String
- Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".
- host string
- Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
- http
Headers AiEndpoint With Model Garden Deployment Model Config Container Spec Liveness Probe Http Get Http Header[] - Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
- path string
- Path to access on the HTTP server.
- port number
- Number of the port to access on the container. Number must be in the range 1 to 65535.
- scheme string
- Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".
- host str
- Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
- http_
headers Sequence[AiEndpoint With Model Garden Deployment Model Config Container Spec Liveness Probe Http Get Http Header] - Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
- path str
- Path to access on the HTTP server.
- port int
- Number of the port to access on the container. Number must be in the range 1 to 65535.
- scheme str
- Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".
- host String
- Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
- http
Headers List<Property Map> - Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
- path String
- Path to access on the HTTP server.
- port Number
- Number of the port to access on the container. Number must be in the range 1 to 65535.
- scheme String
- Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".
AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGetHttpHeader, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeHttpGetHttpHeaderArgs
AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeTcpSocket, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecLivenessProbeTcpSocketArgs
AiEndpointWithModelGardenDeploymentModelConfigContainerSpecPort, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecPortArgs
- Container
Port int - The number of the port to expose on the pod's IP address. Must be a valid port number, between 1 and 65535 inclusive.
- Container
Port int - The number of the port to expose on the pod's IP address. Must be a valid port number, between 1 and 65535 inclusive.
- container
Port Integer - The number of the port to expose on the pod's IP address. Must be a valid port number, between 1 and 65535 inclusive.
- container
Port number - The number of the port to expose on the pod's IP address. Must be a valid port number, between 1 and 65535 inclusive.
- container_
port int - The number of the port to expose on the pod's IP address. Must be a valid port number, between 1 and 65535 inclusive.
- container
Port Number - The number of the port to expose on the pod's IP address. Must be a valid port number, between 1 and 65535 inclusive.
AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbe, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeArgs
- Exec
Ai
Endpoint With Model Garden Deployment Model Config Container Spec Startup Probe Exec - ExecAction specifies a command to execute. Structure is documented below.
- Failure
Threshold int - Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
- Grpc
Ai
Endpoint With Model Garden Deployment Model Config Container Spec Startup Probe Grpc - GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
- Http
Get AiEndpoint With Model Garden Deployment Model Config Container Spec Startup Probe Http Get - HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
- Initial
Delay intSeconds - Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
- Period
Seconds int - How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
- Success
Threshold int - Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
- Tcp
Socket AiEndpoint With Model Garden Deployment Model Config Container Spec Startup Probe Tcp Socket - TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
- Timeout
Seconds int - Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.
- Exec
Ai
Endpoint With Model Garden Deployment Model Config Container Spec Startup Probe Exec - ExecAction specifies a command to execute. Structure is documented below.
- Failure
Threshold int - Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
- Grpc
Ai
Endpoint With Model Garden Deployment Model Config Container Spec Startup Probe Grpc - GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
- Http
Get AiEndpoint With Model Garden Deployment Model Config Container Spec Startup Probe Http Get - HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
- Initial
Delay intSeconds - Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
- Period
Seconds int - How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
- Success
Threshold int - Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
- Tcp
Socket AiEndpoint With Model Garden Deployment Model Config Container Spec Startup Probe Tcp Socket - TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
- Timeout
Seconds int - Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.
- exec
Ai
Endpoint With Model Garden Deployment Model Config Container Spec Startup Probe Exec - ExecAction specifies a command to execute. Structure is documented below.
- failure
Threshold Integer - Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
- grpc
Ai
Endpoint With Model Garden Deployment Model Config Container Spec Startup Probe Grpc - GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
- http
Get AiEndpoint With Model Garden Deployment Model Config Container Spec Startup Probe Http Get - HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
- initial
Delay IntegerSeconds - Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
- period
Seconds Integer - How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
- success
Threshold Integer - Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
- tcp
Socket AiEndpoint With Model Garden Deployment Model Config Container Spec Startup Probe Tcp Socket - TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
- timeout
Seconds Integer - Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.
- exec
Ai
Endpoint With Model Garden Deployment Model Config Container Spec Startup Probe Exec - ExecAction specifies a command to execute. Structure is documented below.
- failure
Threshold number - Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
- grpc
Ai
Endpoint With Model Garden Deployment Model Config Container Spec Startup Probe Grpc - GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
- http
Get AiEndpoint With Model Garden Deployment Model Config Container Spec Startup Probe Http Get - HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
- initial
Delay numberSeconds - Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
- period
Seconds number - How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
- success
Threshold number - Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
- tcp
Socket AiEndpoint With Model Garden Deployment Model Config Container Spec Startup Probe Tcp Socket - TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
- timeout
Seconds number - Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.
- exec_
Ai
Endpoint With Model Garden Deployment Model Config Container Spec Startup Probe Exec - ExecAction specifies a command to execute. Structure is documented below.
- failure_
threshold int - Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
- grpc
Ai
Endpoint With Model Garden Deployment Model Config Container Spec Startup Probe Grpc - GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
- http_
get AiEndpoint With Model Garden Deployment Model Config Container Spec Startup Probe Http Get - HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
- initial_
delay_ intseconds - Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
- period_
seconds int - How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
- success_
threshold int - Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
- tcp_
socket AiEndpoint With Model Garden Deployment Model Config Container Spec Startup Probe Tcp Socket - TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
- timeout_
seconds int - Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.
- exec Property Map
- ExecAction specifies a command to execute. Structure is documented below.
- failure
Threshold Number - Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.
- grpc Property Map
- GrpcAction checks the health of a container using a gRPC service. Structure is documented below.
- http
Get Property Map - HttpGetAction describes an action based on HTTP Get requests. Structure is documented below.
- initial
Delay NumberSeconds - Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.
- period
Seconds Number - How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.
- success
Threshold Number - Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.
- tcp
Socket Property Map - TcpSocketAction probes the health of a container by opening a TCP socket connection. Structure is documented below.
- timeout
Seconds Number - Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.
AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeExec, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeExecArgs
- Commands List<string>
- Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
- Commands []string
- Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
- commands List<String>
- Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
- commands string[]
- Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
- commands Sequence[str]
- Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
- commands List<String>
- Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeGrpc, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeGrpcArgs
- Port int
- Port number of the gRPC service. Number must be in the range 1 to 65535.
- Service string
- Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.
- Port int
- Port number of the gRPC service. Number must be in the range 1 to 65535.
- Service string
- Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.
- port Integer
- Port number of the gRPC service. Number must be in the range 1 to 65535.
- service String
- Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.
- port number
- Port number of the gRPC service. Number must be in the range 1 to 65535.
- service string
- Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.
- port int
- Port number of the gRPC service. Number must be in the range 1 to 65535.
- service str
- Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.
- port Number
- Port number of the gRPC service. Number must be in the range 1 to 65535.
- service String
- Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.
AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGet, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGetArgs
- Host string
- Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
- Http
Headers List<AiEndpoint With Model Garden Deployment Model Config Container Spec Startup Probe Http Get Http Header> - Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
- Path string
- Path to access on the HTTP server.
- Port int
- Number of the port to access on the container. Number must be in the range 1 to 65535.
- Scheme string
- Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".
- Host string
- Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
- Http
Headers []AiEndpoint With Model Garden Deployment Model Config Container Spec Startup Probe Http Get Http Header - Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
- Path string
- Path to access on the HTTP server.
- Port int
- Number of the port to access on the container. Number must be in the range 1 to 65535.
- Scheme string
- Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".
- host String
- Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
- http
Headers List<AiEndpoint With Model Garden Deployment Model Config Container Spec Startup Probe Http Get Http Header> - Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
- path String
- Path to access on the HTTP server.
- port Integer
- Number of the port to access on the container. Number must be in the range 1 to 65535.
- scheme String
- Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".
- host string
- Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
- http
Headers AiEndpoint With Model Garden Deployment Model Config Container Spec Startup Probe Http Get Http Header[] - Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
- path string
- Path to access on the HTTP server.
- port number
- Number of the port to access on the container. Number must be in the range 1 to 65535.
- scheme string
- Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".
- host str
- Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
- http_
headers Sequence[AiEndpoint With Model Garden Deployment Model Config Container Spec Startup Probe Http Get Http Header] - Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
- path str
- Path to access on the HTTP server.
- port int
- Number of the port to access on the container. Number must be in the range 1 to 65535.
- scheme str
- Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".
- host String
- Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.
- http
Headers List<Property Map> - Custom headers to set in the request. HTTP allows repeated headers. Structure is documented below.
- path String
- Path to access on the HTTP server.
- port Number
- Number of the port to access on the container. Number must be in the range 1 to 65535.
- scheme String
- Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".
AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGetHttpHeader, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeHttpGetHttpHeaderArgs
AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeTcpSocket, AiEndpointWithModelGardenDeploymentModelConfigContainerSpecStartupProbeTcpSocketArgs
Import
This resource does not support import.
To learn more about importing existing cloud resources, see Importing resources.
Package Details
- Repository
- Google Cloud (GCP) Classic pulumi/pulumi-gcp
- License
- Apache-2.0
- Notes
- This Pulumi package is based on the
google-beta
Terraform Provider.