The gcp:vertex/aiEndpoint:AiEndpoint resource, part of the Pulumi GCP provider, defines a Vertex AI endpoint that serves deployed models for online prediction requests. This guide focuses on three capabilities: VPC networking options (peering and Private Service Connect), prediction logging to BigQuery, and dedicated DNS for traffic isolation.
Endpoints serve as the runtime infrastructure for deployed models. They require models to be deployed before accepting traffic, and may reference VPC networks, KMS keys, or BigQuery datasets for logging. The examples are intentionally small. Combine them with your own model deployment and traffic management configuration.
Peer endpoint to VPC for private network access
ML inference workloads often need to access private resources like databases or internal services. VPC peering connects the endpoint to your network while logging predictions to BigQuery for monitoring.
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const vertexNetwork = new gcp.compute.Network("vertex_network", {name: "network-name"});
const vertexRange = new gcp.compute.GlobalAddress("vertex_range", {
name: "address-name",
purpose: "VPC_PEERING",
addressType: "INTERNAL",
prefixLength: 24,
network: vertexNetwork.id,
});
const vertexVpcConnection = new gcp.servicenetworking.Connection("vertex_vpc_connection", {
network: vertexNetwork.id,
service: "servicenetworking.googleapis.com",
reservedPeeringRanges: [vertexRange.name],
});
const bqDataset = new gcp.bigquery.Dataset("bq_dataset", {
datasetId: "some_dataset",
friendlyName: "logging dataset",
description: "This is a dataset that requests are logged to",
location: "US",
deleteContentsOnDestroy: true,
});
const project = gcp.organizations.getProject({});
const endpoint = new gcp.vertex.AiEndpoint("endpoint", {
name: "endpoint-name",
displayName: "sample-endpoint",
description: "A sample vertex endpoint",
location: "us-central1",
region: "us-central1",
labels: {
"label-one": "value-one",
},
network: pulumi.all([project, vertexNetwork.name]).apply(([project, name]) => `projects/${project.number}/global/networks/${name}`),
encryptionSpec: {
kmsKeyName: "kms-name",
},
predictRequestResponseLoggingConfig: {
bigqueryDestination: {
outputUri: pulumi.all([project, bqDataset.datasetId]).apply(([project, datasetId]) => `bq://${project.projectId}.${datasetId}.request_response_logging`),
},
enabled: true,
samplingRate: 0.1,
},
trafficSplit: JSON.stringify({
"12345": 100,
}),
}, {
dependsOn: [vertexVpcConnection],
});
const cryptoKey = new gcp.kms.CryptoKeyIAMMember("crypto_key", {
cryptoKeyId: "kms-name",
role: "roles/cloudkms.cryptoKeyEncrypterDecrypter",
member: project.then(project => `serviceAccount:service-${project.number}@gcp-sa-aiplatform.iam.gserviceaccount.com`),
});
import pulumi
import json
import pulumi_gcp as gcp
vertex_network = gcp.compute.Network("vertex_network", name="network-name")
vertex_range = gcp.compute.GlobalAddress("vertex_range",
name="address-name",
purpose="VPC_PEERING",
address_type="INTERNAL",
prefix_length=24,
network=vertex_network.id)
vertex_vpc_connection = gcp.servicenetworking.Connection("vertex_vpc_connection",
network=vertex_network.id,
service="servicenetworking.googleapis.com",
reserved_peering_ranges=[vertex_range.name])
bq_dataset = gcp.bigquery.Dataset("bq_dataset",
dataset_id="some_dataset",
friendly_name="logging dataset",
description="This is a dataset that requests are logged to",
location="US",
delete_contents_on_destroy=True)
project = gcp.organizations.get_project()
endpoint = gcp.vertex.AiEndpoint("endpoint",
name="endpoint-name",
display_name="sample-endpoint",
description="A sample vertex endpoint",
location="us-central1",
region="us-central1",
labels={
"label-one": "value-one",
},
network=vertex_network.name.apply(lambda name: f"projects/{project.number}/global/networks/{name}"),
encryption_spec={
"kms_key_name": "kms-name",
},
predict_request_response_logging_config={
"bigquery_destination": {
"output_uri": bq_dataset.dataset_id.apply(lambda dataset_id: f"bq://{project.project_id}.{dataset_id}.request_response_logging"),
},
"enabled": True,
"sampling_rate": 0.1,
},
traffic_split=json.dumps({
"12345": 100,
}),
opts = pulumi.ResourceOptions(depends_on=[vertex_vpc_connection]))
crypto_key = gcp.kms.CryptoKeyIAMMember("crypto_key",
crypto_key_id="kms-name",
role="roles/cloudkms.cryptoKeyEncrypterDecrypter",
member=f"serviceAccount:service-{project.number}@gcp-sa-aiplatform.iam.gserviceaccount.com")
package main
import (
"encoding/json"
"fmt"
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/bigquery"
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/compute"
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/kms"
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/organizations"
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/servicenetworking"
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/vertex"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
vertexNetwork, err := compute.NewNetwork(ctx, "vertex_network", &compute.NetworkArgs{
Name: pulumi.String("network-name"),
})
if err != nil {
return err
}
vertexRange, err := compute.NewGlobalAddress(ctx, "vertex_range", &compute.GlobalAddressArgs{
Name: pulumi.String("address-name"),
Purpose: pulumi.String("VPC_PEERING"),
AddressType: pulumi.String("INTERNAL"),
PrefixLength: pulumi.Int(24),
Network: vertexNetwork.ID(),
})
if err != nil {
return err
}
vertexVpcConnection, err := servicenetworking.NewConnection(ctx, "vertex_vpc_connection", &servicenetworking.ConnectionArgs{
Network: vertexNetwork.ID(),
Service: pulumi.String("servicenetworking.googleapis.com"),
ReservedPeeringRanges: pulumi.StringArray{
vertexRange.Name,
},
})
if err != nil {
return err
}
bqDataset, err := bigquery.NewDataset(ctx, "bq_dataset", &bigquery.DatasetArgs{
DatasetId: pulumi.String("some_dataset"),
FriendlyName: pulumi.String("logging dataset"),
Description: pulumi.String("This is a dataset that requests are logged to"),
Location: pulumi.String("US"),
DeleteContentsOnDestroy: pulumi.Bool(true),
})
if err != nil {
return err
}
project, err := organizations.LookupProject(ctx, &organizations.LookupProjectArgs{}, nil)
if err != nil {
return err
}
tmpJSON0, err := json.Marshal(map[string]interface{}{
"12345": 100,
})
if err != nil {
return err
}
json0 := string(tmpJSON0)
_, err = vertex.NewAiEndpoint(ctx, "endpoint", &vertex.AiEndpointArgs{
Name: pulumi.String("endpoint-name"),
DisplayName: pulumi.String("sample-endpoint"),
Description: pulumi.String("A sample vertex endpoint"),
Location: pulumi.String("us-central1"),
Region: pulumi.String("us-central1"),
Labels: pulumi.StringMap{
"label-one": pulumi.String("value-one"),
},
Network: vertexNetwork.Name.ApplyT(func(name string) (string, error) {
return fmt.Sprintf("projects/%v/global/networks/%v", project.Number, name), nil
}).(pulumi.StringOutput),
EncryptionSpec: &vertex.AiEndpointEncryptionSpecArgs{
KmsKeyName: pulumi.String("kms-name"),
},
PredictRequestResponseLoggingConfig: &vertex.AiEndpointPredictRequestResponseLoggingConfigArgs{
BigqueryDestination: &vertex.AiEndpointPredictRequestResponseLoggingConfigBigqueryDestinationArgs{
OutputUri: bqDataset.DatasetId.ApplyT(func(datasetId string) (string, error) {
return fmt.Sprintf("bq://%v.%v.request_response_logging", project.ProjectId, datasetId), nil
}).(pulumi.StringOutput),
},
Enabled: pulumi.Bool(true),
SamplingRate: pulumi.Float64(0.1),
},
TrafficSplit: pulumi.String(json0),
}, pulumi.DependsOn([]pulumi.Resource{
vertexVpcConnection,
}))
if err != nil {
return err
}
_, err = kms.NewCryptoKeyIAMMember(ctx, "crypto_key", &kms.CryptoKeyIAMMemberArgs{
CryptoKeyId: pulumi.String("kms-name"),
Role: pulumi.String("roles/cloudkms.cryptoKeyEncrypterDecrypter"),
Member: pulumi.Sprintf("serviceAccount:service-%v@gcp-sa-aiplatform.iam.gserviceaccount.com", project.Number),
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using System.Text.Json;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var vertexNetwork = new Gcp.Compute.Network("vertex_network", new()
{
Name = "network-name",
});
var vertexRange = new Gcp.Compute.GlobalAddress("vertex_range", new()
{
Name = "address-name",
Purpose = "VPC_PEERING",
AddressType = "INTERNAL",
PrefixLength = 24,
Network = vertexNetwork.Id,
});
var vertexVpcConnection = new Gcp.ServiceNetworking.Connection("vertex_vpc_connection", new()
{
Network = vertexNetwork.Id,
Service = "servicenetworking.googleapis.com",
ReservedPeeringRanges = new[]
{
vertexRange.Name,
},
});
var bqDataset = new Gcp.BigQuery.Dataset("bq_dataset", new()
{
DatasetId = "some_dataset",
FriendlyName = "logging dataset",
Description = "This is a dataset that requests are logged to",
Location = "US",
DeleteContentsOnDestroy = true,
});
var project = Gcp.Organizations.GetProject.Invoke();
var endpoint = new Gcp.Vertex.AiEndpoint("endpoint", new()
{
Name = "endpoint-name",
DisplayName = "sample-endpoint",
Description = "A sample vertex endpoint",
Location = "us-central1",
Region = "us-central1",
Labels =
{
{ "label-one", "value-one" },
},
Network = Output.Tuple(project, vertexNetwork.Name).Apply(values =>
{
var project = values.Item1;
var name = values.Item2;
return $"projects/{project.Apply(getProjectResult => getProjectResult.Number)}/global/networks/{name}";
}),
EncryptionSpec = new Gcp.Vertex.Inputs.AiEndpointEncryptionSpecArgs
{
KmsKeyName = "kms-name",
},
PredictRequestResponseLoggingConfig = new Gcp.Vertex.Inputs.AiEndpointPredictRequestResponseLoggingConfigArgs
{
BigqueryDestination = new Gcp.Vertex.Inputs.AiEndpointPredictRequestResponseLoggingConfigBigqueryDestinationArgs
{
OutputUri = Output.Tuple(project, bqDataset.DatasetId).Apply(values =>
{
var project = values.Item1;
var datasetId = values.Item2;
return $"bq://{project.Apply(getProjectResult => getProjectResult.ProjectId)}.{datasetId}.request_response_logging";
}),
},
Enabled = true,
SamplingRate = 0.1,
},
TrafficSplit = JsonSerializer.Serialize(new Dictionary<string, object?>
{
["12345"] = 100,
}),
}, new CustomResourceOptions
{
DependsOn =
{
vertexVpcConnection,
},
});
var cryptoKey = new Gcp.Kms.CryptoKeyIAMMember("crypto_key", new()
{
CryptoKeyId = "kms-name",
Role = "roles/cloudkms.cryptoKeyEncrypterDecrypter",
Member = $"serviceAccount:service-{project.Apply(getProjectResult => getProjectResult.Number)}@gcp-sa-aiplatform.iam.gserviceaccount.com",
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.compute.Network;
import com.pulumi.gcp.compute.NetworkArgs;
import com.pulumi.gcp.compute.GlobalAddress;
import com.pulumi.gcp.compute.GlobalAddressArgs;
import com.pulumi.gcp.servicenetworking.Connection;
import com.pulumi.gcp.servicenetworking.ConnectionArgs;
import com.pulumi.gcp.bigquery.Dataset;
import com.pulumi.gcp.bigquery.DatasetArgs;
import com.pulumi.gcp.organizations.OrganizationsFunctions;
import com.pulumi.gcp.organizations.inputs.GetProjectArgs;
import com.pulumi.gcp.vertex.AiEndpoint;
import com.pulumi.gcp.vertex.AiEndpointArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointEncryptionSpecArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointPredictRequestResponseLoggingConfigArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointPredictRequestResponseLoggingConfigBigqueryDestinationArgs;
import com.pulumi.gcp.kms.CryptoKeyIAMMember;
import com.pulumi.gcp.kms.CryptoKeyIAMMemberArgs;
import static com.pulumi.codegen.internal.Serialization.*;
import com.pulumi.resources.CustomResourceOptions;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var vertexNetwork = new Network("vertexNetwork", NetworkArgs.builder()
.name("network-name")
.build());
var vertexRange = new GlobalAddress("vertexRange", GlobalAddressArgs.builder()
.name("address-name")
.purpose("VPC_PEERING")
.addressType("INTERNAL")
.prefixLength(24)
.network(vertexNetwork.id())
.build());
var vertexVpcConnection = new Connection("vertexVpcConnection", ConnectionArgs.builder()
.network(vertexNetwork.id())
.service("servicenetworking.googleapis.com")
.reservedPeeringRanges(vertexRange.name())
.build());
var bqDataset = new Dataset("bqDataset", DatasetArgs.builder()
.datasetId("some_dataset")
.friendlyName("logging dataset")
.description("This is a dataset that requests are logged to")
.location("US")
.deleteContentsOnDestroy(true)
.build());
final var project = OrganizationsFunctions.getProject(GetProjectArgs.builder()
.build());
var endpoint = new AiEndpoint("endpoint", AiEndpointArgs.builder()
.name("endpoint-name")
.displayName("sample-endpoint")
.description("A sample vertex endpoint")
.location("us-central1")
.region("us-central1")
.labels(Map.of("label-one", "value-one"))
.network(vertexNetwork.name().applyValue(_name -> String.format("projects/%s/global/networks/%s", project.number(),_name)))
.encryptionSpec(AiEndpointEncryptionSpecArgs.builder()
.kmsKeyName("kms-name")
.build())
.predictRequestResponseLoggingConfig(AiEndpointPredictRequestResponseLoggingConfigArgs.builder()
.bigqueryDestination(AiEndpointPredictRequestResponseLoggingConfigBigqueryDestinationArgs.builder()
.outputUri(bqDataset.datasetId().applyValue(_datasetId -> String.format("bq://%s.%s.request_response_logging", project.projectId(),_datasetId)))
.build())
.enabled(true)
.samplingRate(0.1)
.build())
.trafficSplit(serializeJson(
jsonObject(
jsonProperty("12345", 100)
)))
.build(), CustomResourceOptions.builder()
.dependsOn(vertexVpcConnection)
.build());
var cryptoKey = new CryptoKeyIAMMember("cryptoKey", CryptoKeyIAMMemberArgs.builder()
.cryptoKeyId("kms-name")
.role("roles/cloudkms.cryptoKeyEncrypterDecrypter")
.member(String.format("serviceAccount:service-%s@gcp-sa-aiplatform.iam.gserviceaccount.com", project.number()))
.build());
}
}
resources:
endpoint:
type: gcp:vertex:AiEndpoint
properties:
name: endpoint-name
displayName: sample-endpoint
description: A sample vertex endpoint
location: us-central1
region: us-central1
labels:
label-one: value-one
network: projects/${project.number}/global/networks/${vertexNetwork.name}
encryptionSpec:
kmsKeyName: kms-name
predictRequestResponseLoggingConfig:
bigqueryDestination:
outputUri: bq://${project.projectId}.${bqDataset.datasetId}.request_response_logging
enabled: true
samplingRate: 0.1
trafficSplit:
fn::toJSON:
'12345': 100
options:
dependsOn:
- ${vertexVpcConnection}
vertexVpcConnection:
type: gcp:servicenetworking:Connection
name: vertex_vpc_connection
properties:
network: ${vertexNetwork.id}
service: servicenetworking.googleapis.com
reservedPeeringRanges:
- ${vertexRange.name}
vertexRange:
type: gcp:compute:GlobalAddress
name: vertex_range
properties:
name: address-name
purpose: VPC_PEERING
addressType: INTERNAL
prefixLength: 24
network: ${vertexNetwork.id}
vertexNetwork:
type: gcp:compute:Network
name: vertex_network
properties:
name: network-name
cryptoKey:
type: gcp:kms:CryptoKeyIAMMember
name: crypto_key
properties:
cryptoKeyId: kms-name
role: roles/cloudkms.cryptoKeyEncrypterDecrypter
member: serviceAccount:service-${project.number}@gcp-sa-aiplatform.iam.gserviceaccount.com
bqDataset:
type: gcp:bigquery:Dataset
name: bq_dataset
properties:
datasetId: some_dataset
friendlyName: logging dataset
description: This is a dataset that requests are logged to
location: US
deleteContentsOnDestroy: true
variables:
project:
fn::invoke:
function: gcp:organizations:getProject
arguments: {}
The network property connects the endpoint to your VPC using the format projects/{project_number}/global/networks/{network_name}. The predictRequestResponseLoggingConfig sends prediction requests and responses to BigQuery at the specified samplingRate (0.1 means 10% of requests). The encryptionSpec secures the endpoint with a customer-managed KMS key. The trafficSplit property controls which deployed models receive traffic; here "12345": 100 routes all traffic to model ID 12345. Note that VPC peering must be established via servicenetworking.Connection before the endpoint can use the network.
Expose endpoint through Private Service Connect
Private Service Connect provides an alternative to VPC peering by exposing endpoints through dedicated service attachments, allowing cross-project access without network peering.
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const _default = new gcp.compute.Network("default", {name: "psc-network-_42503"});
const project = gcp.organizations.getProject({});
const endpoint = new gcp.vertex.AiEndpoint("endpoint", {
name: "endpoint-name_9991",
displayName: "sample-endpoint",
description: "A sample vertex endpoint",
location: "us-central1",
region: "us-central1",
labels: {
"label-one": "value-one",
},
privateServiceConnectConfig: {
enablePrivateServiceConnect: true,
projectAllowlists: [project.then(project => project.projectId)],
pscAutomationConfigs: [{
projectId: project.then(project => project.projectId),
network: _default.id,
}],
},
});
import pulumi
import pulumi_gcp as gcp
default = gcp.compute.Network("default", name="psc-network-_42503")
project = gcp.organizations.get_project()
endpoint = gcp.vertex.AiEndpoint("endpoint",
name="endpoint-name_9991",
display_name="sample-endpoint",
description="A sample vertex endpoint",
location="us-central1",
region="us-central1",
labels={
"label-one": "value-one",
},
private_service_connect_config={
"enable_private_service_connect": True,
"project_allowlists": [project.project_id],
"psc_automation_configs": [{
"project_id": project.project_id,
"network": default.id,
}],
})
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/compute"
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/organizations"
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/vertex"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_default, err := compute.NewNetwork(ctx, "default", &compute.NetworkArgs{
Name: pulumi.String("psc-network-_42503"),
})
if err != nil {
return err
}
project, err := organizations.LookupProject(ctx, &organizations.LookupProjectArgs{}, nil)
if err != nil {
return err
}
_, err = vertex.NewAiEndpoint(ctx, "endpoint", &vertex.AiEndpointArgs{
Name: pulumi.String("endpoint-name_9991"),
DisplayName: pulumi.String("sample-endpoint"),
Description: pulumi.String("A sample vertex endpoint"),
Location: pulumi.String("us-central1"),
Region: pulumi.String("us-central1"),
Labels: pulumi.StringMap{
"label-one": pulumi.String("value-one"),
},
PrivateServiceConnectConfig: &vertex.AiEndpointPrivateServiceConnectConfigArgs{
EnablePrivateServiceConnect: pulumi.Bool(true),
ProjectAllowlists: pulumi.StringArray{
pulumi.String(project.ProjectId),
},
PscAutomationConfigs: vertex.AiEndpointPrivateServiceConnectConfigPscAutomationConfigArray{
&vertex.AiEndpointPrivateServiceConnectConfigPscAutomationConfigArgs{
ProjectId: pulumi.String(project.ProjectId),
Network: _default.ID(),
},
},
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var @default = new Gcp.Compute.Network("default", new()
{
Name = "psc-network-_42503",
});
var project = Gcp.Organizations.GetProject.Invoke();
var endpoint = new Gcp.Vertex.AiEndpoint("endpoint", new()
{
Name = "endpoint-name_9991",
DisplayName = "sample-endpoint",
Description = "A sample vertex endpoint",
Location = "us-central1",
Region = "us-central1",
Labels =
{
{ "label-one", "value-one" },
},
PrivateServiceConnectConfig = new Gcp.Vertex.Inputs.AiEndpointPrivateServiceConnectConfigArgs
{
EnablePrivateServiceConnect = true,
ProjectAllowlists = new[]
{
project.Apply(getProjectResult => getProjectResult.ProjectId),
},
PscAutomationConfigs = new[]
{
new Gcp.Vertex.Inputs.AiEndpointPrivateServiceConnectConfigPscAutomationConfigArgs
{
ProjectId = project.Apply(getProjectResult => getProjectResult.ProjectId),
Network = @default.Id,
},
},
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.compute.Network;
import com.pulumi.gcp.compute.NetworkArgs;
import com.pulumi.gcp.organizations.OrganizationsFunctions;
import com.pulumi.gcp.organizations.inputs.GetProjectArgs;
import com.pulumi.gcp.vertex.AiEndpoint;
import com.pulumi.gcp.vertex.AiEndpointArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointPrivateServiceConnectConfigArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var default_ = new Network("default", NetworkArgs.builder()
.name("psc-network-_42503")
.build());
final var project = OrganizationsFunctions.getProject(GetProjectArgs.builder()
.build());
var endpoint = new AiEndpoint("endpoint", AiEndpointArgs.builder()
.name("endpoint-name_9991")
.displayName("sample-endpoint")
.description("A sample vertex endpoint")
.location("us-central1")
.region("us-central1")
.labels(Map.of("label-one", "value-one"))
.privateServiceConnectConfig(AiEndpointPrivateServiceConnectConfigArgs.builder()
.enablePrivateServiceConnect(true)
.projectAllowlists(project.projectId())
.pscAutomationConfigs(AiEndpointPrivateServiceConnectConfigPscAutomationConfigArgs.builder()
.projectId(project.projectId())
.network(default_.id())
.build())
.build())
.build());
}
}
resources:
default:
type: gcp:compute:Network
properties:
name: psc-network-_42503
endpoint:
type: gcp:vertex:AiEndpoint
properties:
name: endpoint-name_9991
displayName: sample-endpoint
description: A sample vertex endpoint
location: us-central1
region: us-central1
labels:
label-one: value-one
privateServiceConnectConfig:
enablePrivateServiceConnect: true
projectAllowlists:
- ${project.projectId}
pscAutomationConfigs:
- projectId: ${project.projectId}
network: ${default.id}
variables:
project:
fn::invoke:
function: gcp:organizations:getProject
arguments: {}
The privateServiceConnectConfig enables Private Service Connect and defines which projects can access the endpoint. The projectAllowlists property specifies allowed project IDs, while pscAutomationConfigs defines the network where PSC attachments are created. This approach isolates endpoint traffic without requiring VPC peering, making it useful for cross-project or multi-tenant scenarios. Only one of network or privateServiceConnectConfig can be set on an endpoint.
Enable dedicated DNS for isolated traffic
Production workloads requiring performance isolation can use dedicated endpoints, which provide a separate DNS hostname and isolated traffic path.
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const endpoint = new gcp.vertex.AiEndpoint("endpoint", {
name: "endpoint-name_12125",
displayName: "sample-endpoint",
description: "A sample vertex endpoint",
location: "us-central1",
region: "us-central1",
labels: {
"label-one": "value-one",
},
dedicatedEndpointEnabled: true,
});
const project = gcp.organizations.getProject({});
import pulumi
import pulumi_gcp as gcp
endpoint = gcp.vertex.AiEndpoint("endpoint",
name="endpoint-name_12125",
display_name="sample-endpoint",
description="A sample vertex endpoint",
location="us-central1",
region="us-central1",
labels={
"label-one": "value-one",
},
dedicated_endpoint_enabled=True)
project = gcp.organizations.get_project()
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/organizations"
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/vertex"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := vertex.NewAiEndpoint(ctx, "endpoint", &vertex.AiEndpointArgs{
Name: pulumi.String("endpoint-name_12125"),
DisplayName: pulumi.String("sample-endpoint"),
Description: pulumi.String("A sample vertex endpoint"),
Location: pulumi.String("us-central1"),
Region: pulumi.String("us-central1"),
Labels: pulumi.StringMap{
"label-one": pulumi.String("value-one"),
},
DedicatedEndpointEnabled: pulumi.Bool(true),
})
if err != nil {
return err
}
_, err = organizations.LookupProject(ctx, &organizations.LookupProjectArgs{}, nil)
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var endpoint = new Gcp.Vertex.AiEndpoint("endpoint", new()
{
Name = "endpoint-name_12125",
DisplayName = "sample-endpoint",
Description = "A sample vertex endpoint",
Location = "us-central1",
Region = "us-central1",
Labels =
{
{ "label-one", "value-one" },
},
DedicatedEndpointEnabled = true,
});
var project = Gcp.Organizations.GetProject.Invoke();
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.vertex.AiEndpoint;
import com.pulumi.gcp.vertex.AiEndpointArgs;
import com.pulumi.gcp.organizations.OrganizationsFunctions;
import com.pulumi.gcp.organizations.inputs.GetProjectArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var endpoint = new AiEndpoint("endpoint", AiEndpointArgs.builder()
.name("endpoint-name_12125")
.displayName("sample-endpoint")
.description("A sample vertex endpoint")
.location("us-central1")
.region("us-central1")
.labels(Map.of("label-one", "value-one"))
.dedicatedEndpointEnabled(true)
.build());
final var project = OrganizationsFunctions.getProject(GetProjectArgs.builder()
.build());
}
}
resources:
endpoint:
type: gcp:vertex:AiEndpoint
properties:
name: endpoint-name_12125
displayName: sample-endpoint
description: A sample vertex endpoint
location: us-central1
region: us-central1
labels:
label-one: value-one
dedicatedEndpointEnabled: true
variables:
project:
fn::invoke:
function: gcp:organizations:getProject
arguments: {}
Setting dedicatedEndpointEnabled to true provisions a dedicated DNS hostname (available in the dedicatedEndpointDns output property) in the format https://{endpointId}.{region}-{projectNumber}.prediction.vertexai.goog. Requests to this DNS are isolated from shared infrastructure traffic. Once enabled, the endpoint no longer accepts requests at the shared regional DNS. This is the simplest networking configuration, requiring no VPC setup.
Beyond these examples
These snippets focus on specific endpoint-level features: VPC peering and Private Service Connect networking, prediction logging to BigQuery, and dedicated DNS endpoints. They’re intentionally minimal rather than full ML serving deployments.
The examples may reference pre-existing infrastructure such as VPC networks and service networking connections, KMS encryption keys with IAM bindings, and BigQuery datasets (or API access to create them). They focus on configuring the endpoint rather than deploying models or managing traffic.
To keep things focused, common endpoint patterns are omitted, including:
- Model deployment and traffic splitting (trafficSplit configuration)
- Monitoring job configuration (modelDeploymentMonitoringJob)
- Label management and organization
- Cross-region or multi-region deployment
These omissions are intentional: the goal is to illustrate how each endpoint feature is wired, not provide drop-in ML serving modules. See the Vertex AI Endpoint resource reference for all available configuration options.
Let's deploy GCP Vertex AI Endpoints
Get started with Pulumi Cloud, then follow our quick setup guide to deploy this infrastructure.
Try Pulumi Cloud for FREEFrequently Asked Questions
Networking & Connectivity
network and privateServiceConnectConfig are mutually exclusive. Choose one connectivity method: use network for VPC peering or privateServiceConnectConfig for Private Service Connect.network property with the full VPC network path (format: projects/{project}/global/networks/{network}). You’ll need to create a servicenetworking.Connection resource first and reference it with dependsOn to ensure proper setup order.privateServiceConnectConfig with enablePrivateServiceConnect set to true, specify projectAllowlists, and configure pscAutomationConfigs with the project ID and network.dedicatedEndpointEnabled, you can’t send requests to the shared DNS anymore. This limitation will be removed in a future release, so evaluate carefully before enabling.Traffic Management
trafficSplit to "{}", apply the change, then remove the field from your configuration. You can’t simply omit the field initially.Security & Monitoring
encryptionSpec.kmsKeyName to your KMS key name. You’ll also need to grant the Vertex AI service account (service-{project-number}@gcp-sa-aiplatform.iam.gserviceaccount.com) the roles/cloudkms.cryptoKeyEncrypterDecrypter role on the key.predictRequestResponseLoggingConfig with a bigqueryDestination.outputUri (format: bq://{project}.{dataset}.{table}), set enabled to true, and specify a samplingRate (e.g., 0.1 for 10%).Resource Configuration
location, name, project, region, encryptionSpec, and network. Changing any of these requires recreating the endpoint.Using a different cloud?
Explore analytics guides for other cloud providers: