Deploy GCP Vertex AI Endpoints

The gcp:vertex/aiEndpoint:AiEndpoint resource, part of the Pulumi GCP provider, defines a Vertex AI endpoint that serves deployed ML models and handles prediction requests. This guide focuses on three capabilities: VPC peering for private connectivity, Private Service Connect for isolated access, and dedicated endpoints for performance isolation.

Endpoints reference VPC networks, KMS keys, and BigQuery datasets that must exist separately. Models are deployed to endpoints after creation using separate API calls. The examples are intentionally small. Combine them with your own model deployment and traffic management configuration.

Peer endpoint to VPC for private model access

ML deployments that need to access private resources or restrict network exposure often peer Vertex AI endpoints to a VPC network with private service access configured.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const vertexNetwork = new gcp.compute.Network("vertex_network", {name: "network-name"});
const vertexRange = new gcp.compute.GlobalAddress("vertex_range", {
    name: "address-name",
    purpose: "VPC_PEERING",
    addressType: "INTERNAL",
    prefixLength: 24,
    network: vertexNetwork.id,
});
const vertexVpcConnection = new gcp.servicenetworking.Connection("vertex_vpc_connection", {
    network: vertexNetwork.id,
    service: "servicenetworking.googleapis.com",
    reservedPeeringRanges: [vertexRange.name],
});
const bqDataset = new gcp.bigquery.Dataset("bq_dataset", {
    datasetId: "some_dataset",
    friendlyName: "logging dataset",
    description: "This is a dataset that requests are logged to",
    location: "US",
    deleteContentsOnDestroy: true,
});
const project = gcp.organizations.getProject({});
const endpoint = new gcp.vertex.AiEndpoint("endpoint", {
    name: "endpoint-name",
    displayName: "sample-endpoint",
    description: "A sample vertex endpoint",
    location: "us-central1",
    region: "us-central1",
    labels: {
        "label-one": "value-one",
    },
    network: pulumi.all([project, vertexNetwork.name]).apply(([project, name]) => `projects/${project.number}/global/networks/${name}`),
    encryptionSpec: {
        kmsKeyName: "kms-name",
    },
    predictRequestResponseLoggingConfig: {
        bigqueryDestination: {
            outputUri: pulumi.all([project, bqDataset.datasetId]).apply(([project, datasetId]) => `bq://${project.projectId}.${datasetId}.request_response_logging`),
        },
        enabled: true,
        samplingRate: 0.1,
    },
    trafficSplit: JSON.stringify({
        "12345": 100,
    }),
}, {
    dependsOn: [vertexVpcConnection],
});
const cryptoKey = new gcp.kms.CryptoKeyIAMMember("crypto_key", {
    cryptoKeyId: "kms-name",
    role: "roles/cloudkms.cryptoKeyEncrypterDecrypter",
    member: project.then(project => `serviceAccount:service-${project.number}@gcp-sa-aiplatform.iam.gserviceaccount.com`),
});

import pulumi
import json
import pulumi_gcp as gcp

vertex_network = gcp.compute.Network("vertex_network", name="network-name")
vertex_range = gcp.compute.GlobalAddress("vertex_range",
    name="address-name",
    purpose="VPC_PEERING",
    address_type="INTERNAL",
    prefix_length=24,
    network=vertex_network.id)
vertex_vpc_connection = gcp.servicenetworking.Connection("vertex_vpc_connection",
    network=vertex_network.id,
    service="servicenetworking.googleapis.com",
    reserved_peering_ranges=[vertex_range.name])
bq_dataset = gcp.bigquery.Dataset("bq_dataset",
    dataset_id="some_dataset",
    friendly_name="logging dataset",
    description="This is a dataset that requests are logged to",
    location="US",
    delete_contents_on_destroy=True)
project = gcp.organizations.get_project()
endpoint = gcp.vertex.AiEndpoint("endpoint",
    name="endpoint-name",
    display_name="sample-endpoint",
    description="A sample vertex endpoint",
    location="us-central1",
    region="us-central1",
    labels={
        "label-one": "value-one",
    },
    network=vertex_network.name.apply(lambda name: f"projects/{project.number}/global/networks/{name}"),
    encryption_spec={
        "kms_key_name": "kms-name",
    },
    predict_request_response_logging_config={
        "bigquery_destination": {
            "output_uri": bq_dataset.dataset_id.apply(lambda dataset_id: f"bq://{project.project_id}.{dataset_id}.request_response_logging"),
        },
        "enabled": True,
        "sampling_rate": 0.1,
    },
    traffic_split=json.dumps({
        "12345": 100,
    }),
    opts = pulumi.ResourceOptions(depends_on=[vertex_vpc_connection]))
crypto_key = gcp.kms.CryptoKeyIAMMember("crypto_key",
    crypto_key_id="kms-name",
    role="roles/cloudkms.cryptoKeyEncrypterDecrypter",
    member=f"serviceAccount:service-{project.number}@gcp-sa-aiplatform.iam.gserviceaccount.com")

package main

import (
	"encoding/json"
	"fmt"

	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/bigquery"
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/compute"
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/kms"
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/organizations"
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/servicenetworking"
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/vertex"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		vertexNetwork, err := compute.NewNetwork(ctx, "vertex_network", &compute.NetworkArgs{
			Name: pulumi.String("network-name"),
		})
		if err != nil {
			return err
		}
		vertexRange, err := compute.NewGlobalAddress(ctx, "vertex_range", &compute.GlobalAddressArgs{
			Name:         pulumi.String("address-name"),
			Purpose:      pulumi.String("VPC_PEERING"),
			AddressType:  pulumi.String("INTERNAL"),
			PrefixLength: pulumi.Int(24),
			Network:      vertexNetwork.ID(),
		})
		if err != nil {
			return err
		}
		vertexVpcConnection, err := servicenetworking.NewConnection(ctx, "vertex_vpc_connection", &servicenetworking.ConnectionArgs{
			Network: vertexNetwork.ID(),
			Service: pulumi.String("servicenetworking.googleapis.com"),
			ReservedPeeringRanges: pulumi.StringArray{
				vertexRange.Name,
			},
		})
		if err != nil {
			return err
		}
		bqDataset, err := bigquery.NewDataset(ctx, "bq_dataset", &bigquery.DatasetArgs{
			DatasetId:               pulumi.String("some_dataset"),
			FriendlyName:            pulumi.String("logging dataset"),
			Description:             pulumi.String("This is a dataset that requests are logged to"),
			Location:                pulumi.String("US"),
			DeleteContentsOnDestroy: pulumi.Bool(true),
		})
		if err != nil {
			return err
		}
		project, err := organizations.LookupProject(ctx, &organizations.LookupProjectArgs{}, nil)
		if err != nil {
			return err
		}
		tmpJSON0, err := json.Marshal(map[string]interface{}{
			"12345": 100,
		})
		if err != nil {
			return err
		}
		json0 := string(tmpJSON0)
		_, err = vertex.NewAiEndpoint(ctx, "endpoint", &vertex.AiEndpointArgs{
			Name:        pulumi.String("endpoint-name"),
			DisplayName: pulumi.String("sample-endpoint"),
			Description: pulumi.String("A sample vertex endpoint"),
			Location:    pulumi.String("us-central1"),
			Region:      pulumi.String("us-central1"),
			Labels: pulumi.StringMap{
				"label-one": pulumi.String("value-one"),
			},
			Network: vertexNetwork.Name.ApplyT(func(name string) (string, error) {
				return fmt.Sprintf("projects/%v/global/networks/%v", project.Number, name), nil
			}).(pulumi.StringOutput),
			EncryptionSpec: &vertex.AiEndpointEncryptionSpecArgs{
				KmsKeyName: pulumi.String("kms-name"),
			},
			PredictRequestResponseLoggingConfig: &vertex.AiEndpointPredictRequestResponseLoggingConfigArgs{
				BigqueryDestination: &vertex.AiEndpointPredictRequestResponseLoggingConfigBigqueryDestinationArgs{
					OutputUri: bqDataset.DatasetId.ApplyT(func(datasetId string) (string, error) {
						return fmt.Sprintf("bq://%v.%v.request_response_logging", project.ProjectId, datasetId), nil
					}).(pulumi.StringOutput),
				},
				Enabled:      pulumi.Bool(true),
				SamplingRate: pulumi.Float64(0.1),
			},
			TrafficSplit: pulumi.String(json0),
		}, pulumi.DependsOn([]pulumi.Resource{
			vertexVpcConnection,
		}))
		if err != nil {
			return err
		}
		_, err = kms.NewCryptoKeyIAMMember(ctx, "crypto_key", &kms.CryptoKeyIAMMemberArgs{
			CryptoKeyId: pulumi.String("kms-name"),
			Role:        pulumi.String("roles/cloudkms.cryptoKeyEncrypterDecrypter"),
			Member:      pulumi.Sprintf("serviceAccount:service-%v@gcp-sa-aiplatform.iam.gserviceaccount.com", project.Number),
		})
		if err != nil {
			return err
		}
		return nil
	})
}

using System.Collections.Generic;
using System.Linq;
using System.Text.Json;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var vertexNetwork = new Gcp.Compute.Network("vertex_network", new()
    {
        Name = "network-name",
    });

    var vertexRange = new Gcp.Compute.GlobalAddress("vertex_range", new()
    {
        Name = "address-name",
        Purpose = "VPC_PEERING",
        AddressType = "INTERNAL",
        PrefixLength = 24,
        Network = vertexNetwork.Id,
    });

    var vertexVpcConnection = new Gcp.ServiceNetworking.Connection("vertex_vpc_connection", new()
    {
        Network = vertexNetwork.Id,
        Service = "servicenetworking.googleapis.com",
        ReservedPeeringRanges = new[]
        {
            vertexRange.Name,
        },
    });

    var bqDataset = new Gcp.BigQuery.Dataset("bq_dataset", new()
    {
        DatasetId = "some_dataset",
        FriendlyName = "logging dataset",
        Description = "This is a dataset that requests are logged to",
        Location = "US",
        DeleteContentsOnDestroy = true,
    });

    var project = Gcp.Organizations.GetProject.Invoke();

    var endpoint = new Gcp.Vertex.AiEndpoint("endpoint", new()
    {
        Name = "endpoint-name",
        DisplayName = "sample-endpoint",
        Description = "A sample vertex endpoint",
        Location = "us-central1",
        Region = "us-central1",
        Labels = 
        {
            { "label-one", "value-one" },
        },
        Network = Output.Tuple(project, vertexNetwork.Name).Apply(values =>
        {
            var project = values.Item1;
            var name = values.Item2;
            return $"projects/{project.Apply(getProjectResult => getProjectResult.Number)}/global/networks/{name}";
        }),
        EncryptionSpec = new Gcp.Vertex.Inputs.AiEndpointEncryptionSpecArgs
        {
            KmsKeyName = "kms-name",
        },
        PredictRequestResponseLoggingConfig = new Gcp.Vertex.Inputs.AiEndpointPredictRequestResponseLoggingConfigArgs
        {
            BigqueryDestination = new Gcp.Vertex.Inputs.AiEndpointPredictRequestResponseLoggingConfigBigqueryDestinationArgs
            {
                OutputUri = Output.Tuple(project, bqDataset.DatasetId).Apply(values =>
                {
                    var project = values.Item1;
                    var datasetId = values.Item2;
                    return $"bq://{project.Apply(getProjectResult => getProjectResult.ProjectId)}.{datasetId}.request_response_logging";
                }),
            },
            Enabled = true,
            SamplingRate = 0.1,
        },
        TrafficSplit = JsonSerializer.Serialize(new Dictionary<string, object?>
        {
            ["12345"] = 100,
        }),
    }, new CustomResourceOptions
    {
        DependsOn =
        {
            vertexVpcConnection,
        },
    });

    var cryptoKey = new Gcp.Kms.CryptoKeyIAMMember("crypto_key", new()
    {
        CryptoKeyId = "kms-name",
        Role = "roles/cloudkms.cryptoKeyEncrypterDecrypter",
        Member = $"serviceAccount:service-{project.Apply(getProjectResult => getProjectResult.Number)}@gcp-sa-aiplatform.iam.gserviceaccount.com",
    });

});

package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.compute.Network;
import com.pulumi.gcp.compute.NetworkArgs;
import com.pulumi.gcp.compute.GlobalAddress;
import com.pulumi.gcp.compute.GlobalAddressArgs;
import com.pulumi.gcp.servicenetworking.Connection;
import com.pulumi.gcp.servicenetworking.ConnectionArgs;
import com.pulumi.gcp.bigquery.Dataset;
import com.pulumi.gcp.bigquery.DatasetArgs;
import com.pulumi.gcp.organizations.OrganizationsFunctions;
import com.pulumi.gcp.organizations.inputs.GetProjectArgs;
import com.pulumi.gcp.vertex.AiEndpoint;
import com.pulumi.gcp.vertex.AiEndpointArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointEncryptionSpecArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointPredictRequestResponseLoggingConfigArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointPredictRequestResponseLoggingConfigBigqueryDestinationArgs;
import com.pulumi.gcp.kms.CryptoKeyIAMMember;
import com.pulumi.gcp.kms.CryptoKeyIAMMemberArgs;
import static com.pulumi.codegen.internal.Serialization.*;
import com.pulumi.resources.CustomResourceOptions;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var vertexNetwork = new Network("vertexNetwork", NetworkArgs.builder()
            .name("network-name")
            .build());

        var vertexRange = new GlobalAddress("vertexRange", GlobalAddressArgs.builder()
            .name("address-name")
            .purpose("VPC_PEERING")
            .addressType("INTERNAL")
            .prefixLength(24)
            .network(vertexNetwork.id())
            .build());

        var vertexVpcConnection = new Connection("vertexVpcConnection", ConnectionArgs.builder()
            .network(vertexNetwork.id())
            .service("servicenetworking.googleapis.com")
            .reservedPeeringRanges(vertexRange.name())
            .build());

        var bqDataset = new Dataset("bqDataset", DatasetArgs.builder()
            .datasetId("some_dataset")
            .friendlyName("logging dataset")
            .description("This is a dataset that requests are logged to")
            .location("US")
            .deleteContentsOnDestroy(true)
            .build());

        final var project = OrganizationsFunctions.getProject(GetProjectArgs.builder()
            .build());

        var endpoint = new AiEndpoint("endpoint", AiEndpointArgs.builder()
            .name("endpoint-name")
            .displayName("sample-endpoint")
            .description("A sample vertex endpoint")
            .location("us-central1")
            .region("us-central1")
            .labels(Map.of("label-one", "value-one"))
            .network(vertexNetwork.name().applyValue(_name -> String.format("projects/%s/global/networks/%s", project.number(),_name)))
            .encryptionSpec(AiEndpointEncryptionSpecArgs.builder()
                .kmsKeyName("kms-name")
                .build())
            .predictRequestResponseLoggingConfig(AiEndpointPredictRequestResponseLoggingConfigArgs.builder()
                .bigqueryDestination(AiEndpointPredictRequestResponseLoggingConfigBigqueryDestinationArgs.builder()
                    .outputUri(bqDataset.datasetId().applyValue(_datasetId -> String.format("bq://%s.%s.request_response_logging", project.projectId(),_datasetId)))
                    .build())
                .enabled(true)
                .samplingRate(0.1)
                .build())
            .trafficSplit(serializeJson(
                jsonObject(
                    jsonProperty("12345", 100)
                )))
            .build(), CustomResourceOptions.builder()
                .dependsOn(vertexVpcConnection)
                .build());

        var cryptoKey = new CryptoKeyIAMMember("cryptoKey", CryptoKeyIAMMemberArgs.builder()
            .cryptoKeyId("kms-name")
            .role("roles/cloudkms.cryptoKeyEncrypterDecrypter")
            .member(String.format("serviceAccount:service-%s@gcp-sa-aiplatform.iam.gserviceaccount.com", project.number()))
            .build());

    }
}

resources:
  endpoint:
    type: gcp:vertex:AiEndpoint
    properties:
      name: endpoint-name
      displayName: sample-endpoint
      description: A sample vertex endpoint
      location: us-central1
      region: us-central1
      labels:
        label-one: value-one
      network: projects/${project.number}/global/networks/${vertexNetwork.name}
      encryptionSpec:
        kmsKeyName: kms-name
      predictRequestResponseLoggingConfig:
        bigqueryDestination:
          outputUri: bq://${project.projectId}.${bqDataset.datasetId}.request_response_logging
        enabled: true
        samplingRate: 0.1
      trafficSplit:
        fn::toJSON:
          '12345': 100
    options:
      dependsOn:
        - ${vertexVpcConnection}
  vertexVpcConnection:
    type: gcp:servicenetworking:Connection
    name: vertex_vpc_connection
    properties:
      network: ${vertexNetwork.id}
      service: servicenetworking.googleapis.com
      reservedPeeringRanges:
        - ${vertexRange.name}
  vertexRange:
    type: gcp:compute:GlobalAddress
    name: vertex_range
    properties:
      name: address-name
      purpose: VPC_PEERING
      addressType: INTERNAL
      prefixLength: 24
      network: ${vertexNetwork.id}
  vertexNetwork:
    type: gcp:compute:Network
    name: vertex_network
    properties:
      name: network-name
  cryptoKey:
    type: gcp:kms:CryptoKeyIAMMember
    name: crypto_key
    properties:
      cryptoKeyId: kms-name
      role: roles/cloudkms.cryptoKeyEncrypterDecrypter
      member: serviceAccount:service-${project.number}@gcp-sa-aiplatform.iam.gserviceaccount.com
  bqDataset:
    type: gcp:bigquery:Dataset
    name: bq_dataset
    properties:
      datasetId: some_dataset
      friendlyName: logging dataset
      description: This is a dataset that requests are logged to
      location: US
      deleteContentsOnDestroy: true
variables:
  project:
    fn::invoke:
      function: gcp:organizations:getProject
      arguments: {}

The endpoint connects to your VPC through the network property, which requires VPC peering already configured via servicenetworking.Connection. The predictRequestResponseLoggingConfig sends prediction data to BigQuery for analysis, with samplingRate controlling what percentage of requests are logged. The encryptionSpec secures the endpoint and its data with a customer-managed KMS key. The trafficSplit property maps deployed model IDs to traffic percentages; here “12345” receives 100% of traffic.

Enable Private Service Connect for isolated access

Teams that need stronger network isolation can use Private Service Connect to expose endpoints through dedicated connections without VPC peering.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const _default = new gcp.compute.Network("default", {name: "psc-network-_7495"});
const project = gcp.organizations.getProject({});
const endpoint = new gcp.vertex.AiEndpoint("endpoint", {
    name: "endpoint-name_21912",
    displayName: "sample-endpoint",
    description: "A sample vertex endpoint",
    location: "us-central1",
    region: "us-central1",
    labels: {
        "label-one": "value-one",
    },
    privateServiceConnectConfig: {
        enablePrivateServiceConnect: true,
        projectAllowlists: [project.then(project => project.projectId)],
        pscAutomationConfigs: [{
            projectId: project.then(project => project.projectId),
            network: _default.id,
        }],
    },
});

import pulumi
import pulumi_gcp as gcp

default = gcp.compute.Network("default", name="psc-network-_7495")
project = gcp.organizations.get_project()
endpoint = gcp.vertex.AiEndpoint("endpoint",
    name="endpoint-name_21912",
    display_name="sample-endpoint",
    description="A sample vertex endpoint",
    location="us-central1",
    region="us-central1",
    labels={
        "label-one": "value-one",
    },
    private_service_connect_config={
        "enable_private_service_connect": True,
        "project_allowlists": [project.project_id],
        "psc_automation_configs": [{
            "project_id": project.project_id,
            "network": default.id,
        }],
    })

package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/compute"
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/organizations"
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/vertex"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_default, err := compute.NewNetwork(ctx, "default", &compute.NetworkArgs{
			Name: pulumi.String("psc-network-_7495"),
		})
		if err != nil {
			return err
		}
		project, err := organizations.LookupProject(ctx, &organizations.LookupProjectArgs{}, nil)
		if err != nil {
			return err
		}
		_, err = vertex.NewAiEndpoint(ctx, "endpoint", &vertex.AiEndpointArgs{
			Name:        pulumi.String("endpoint-name_21912"),
			DisplayName: pulumi.String("sample-endpoint"),
			Description: pulumi.String("A sample vertex endpoint"),
			Location:    pulumi.String("us-central1"),
			Region:      pulumi.String("us-central1"),
			Labels: pulumi.StringMap{
				"label-one": pulumi.String("value-one"),
			},
			PrivateServiceConnectConfig: &vertex.AiEndpointPrivateServiceConnectConfigArgs{
				EnablePrivateServiceConnect: pulumi.Bool(true),
				ProjectAllowlists: pulumi.StringArray{
					pulumi.String(project.ProjectId),
				},
				PscAutomationConfigs: vertex.AiEndpointPrivateServiceConnectConfigPscAutomationConfigArray{
					&vertex.AiEndpointPrivateServiceConnectConfigPscAutomationConfigArgs{
						ProjectId: pulumi.String(project.ProjectId),
						Network:   _default.ID(),
					},
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}

using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var @default = new Gcp.Compute.Network("default", new()
    {
        Name = "psc-network-_7495",
    });

    var project = Gcp.Organizations.GetProject.Invoke();

    var endpoint = new Gcp.Vertex.AiEndpoint("endpoint", new()
    {
        Name = "endpoint-name_21912",
        DisplayName = "sample-endpoint",
        Description = "A sample vertex endpoint",
        Location = "us-central1",
        Region = "us-central1",
        Labels = 
        {
            { "label-one", "value-one" },
        },
        PrivateServiceConnectConfig = new Gcp.Vertex.Inputs.AiEndpointPrivateServiceConnectConfigArgs
        {
            EnablePrivateServiceConnect = true,
            ProjectAllowlists = new[]
            {
                project.Apply(getProjectResult => getProjectResult.ProjectId),
            },
            PscAutomationConfigs = new[]
            {
                new Gcp.Vertex.Inputs.AiEndpointPrivateServiceConnectConfigPscAutomationConfigArgs
                {
                    ProjectId = project.Apply(getProjectResult => getProjectResult.ProjectId),
                    Network = @default.Id,
                },
            },
        },
    });

});

package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.compute.Network;
import com.pulumi.gcp.compute.NetworkArgs;
import com.pulumi.gcp.organizations.OrganizationsFunctions;
import com.pulumi.gcp.organizations.inputs.GetProjectArgs;
import com.pulumi.gcp.vertex.AiEndpoint;
import com.pulumi.gcp.vertex.AiEndpointArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointPrivateServiceConnectConfigArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var default_ = new Network("default", NetworkArgs.builder()
            .name("psc-network-_7495")
            .build());

        final var project = OrganizationsFunctions.getProject(GetProjectArgs.builder()
            .build());

        var endpoint = new AiEndpoint("endpoint", AiEndpointArgs.builder()
            .name("endpoint-name_21912")
            .displayName("sample-endpoint")
            .description("A sample vertex endpoint")
            .location("us-central1")
            .region("us-central1")
            .labels(Map.of("label-one", "value-one"))
            .privateServiceConnectConfig(AiEndpointPrivateServiceConnectConfigArgs.builder()
                .enablePrivateServiceConnect(true)
                .projectAllowlists(project.projectId())
                .pscAutomationConfigs(AiEndpointPrivateServiceConnectConfigPscAutomationConfigArgs.builder()
                    .projectId(project.projectId())
                    .network(default_.id())
                    .build())
                .build())
            .build());

    }
}

resources:
  default:
    type: gcp:compute:Network
    properties:
      name: psc-network-_7495
  endpoint:
    type: gcp:vertex:AiEndpoint
    properties:
      name: endpoint-name_21912
      displayName: sample-endpoint
      description: A sample vertex endpoint
      location: us-central1
      region: us-central1
      labels:
        label-one: value-one
      privateServiceConnectConfig:
        enablePrivateServiceConnect: true
        projectAllowlists:
          - ${project.projectId}
        pscAutomationConfigs:
          - projectId: ${project.projectId}
            network: ${default.id}
variables:
  project:
    fn::invoke:
      function: gcp:organizations:getProject
      arguments: {}

Private Service Connect provides an alternative to VPC peering. The privateServiceConnectConfig enables the feature and defines which projects can access the endpoint via projectAllowlists. The pscAutomationConfigs specify which networks in which projects should have automated Private Service Connect attachment points created. This approach isolates endpoint traffic without requiring VPC peering setup.

Create dedicated DNS endpoint for performance isolation

Production workloads that require predictable performance and traffic isolation can enable dedicated endpoints, which provide a separate DNS name and isolated infrastructure.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const endpoint = new gcp.vertex.AiEndpoint("endpoint", {
    name: "endpoint-name_46731",
    displayName: "sample-endpoint",
    description: "A sample vertex endpoint",
    location: "us-central1",
    region: "us-central1",
    labels: {
        "label-one": "value-one",
    },
    dedicatedEndpointEnabled: true,
});
const project = gcp.organizations.getProject({});

import pulumi
import pulumi_gcp as gcp

endpoint = gcp.vertex.AiEndpoint("endpoint",
    name="endpoint-name_46731",
    display_name="sample-endpoint",
    description="A sample vertex endpoint",
    location="us-central1",
    region="us-central1",
    labels={
        "label-one": "value-one",
    },
    dedicated_endpoint_enabled=True)
project = gcp.organizations.get_project()

package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/organizations"
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/vertex"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := vertex.NewAiEndpoint(ctx, "endpoint", &vertex.AiEndpointArgs{
			Name:        pulumi.String("endpoint-name_46731"),
			DisplayName: pulumi.String("sample-endpoint"),
			Description: pulumi.String("A sample vertex endpoint"),
			Location:    pulumi.String("us-central1"),
			Region:      pulumi.String("us-central1"),
			Labels: pulumi.StringMap{
				"label-one": pulumi.String("value-one"),
			},
			DedicatedEndpointEnabled: pulumi.Bool(true),
		})
		if err != nil {
			return err
		}
		_, err = organizations.LookupProject(ctx, &organizations.LookupProjectArgs{}, nil)
		if err != nil {
			return err
		}
		return nil
	})
}

using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var endpoint = new Gcp.Vertex.AiEndpoint("endpoint", new()
    {
        Name = "endpoint-name_46731",
        DisplayName = "sample-endpoint",
        Description = "A sample vertex endpoint",
        Location = "us-central1",
        Region = "us-central1",
        Labels = 
        {
            { "label-one", "value-one" },
        },
        DedicatedEndpointEnabled = true,
    });

    var project = Gcp.Organizations.GetProject.Invoke();

});

package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.vertex.AiEndpoint;
import com.pulumi.gcp.vertex.AiEndpointArgs;
import com.pulumi.gcp.organizations.OrganizationsFunctions;
import com.pulumi.gcp.organizations.inputs.GetProjectArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var endpoint = new AiEndpoint("endpoint", AiEndpointArgs.builder()
            .name("endpoint-name_46731")
            .displayName("sample-endpoint")
            .description("A sample vertex endpoint")
            .location("us-central1")
            .region("us-central1")
            .labels(Map.of("label-one", "value-one"))
            .dedicatedEndpointEnabled(true)
            .build());

        final var project = OrganizationsFunctions.getProject(GetProjectArgs.builder()
            .build());

    }
}

resources:
  endpoint:
    type: gcp:vertex:AiEndpoint
    properties:
      name: endpoint-name_46731
      displayName: sample-endpoint
      description: A sample vertex endpoint
      location: us-central1
      region: us-central1
      labels:
        label-one: value-one
      dedicatedEndpointEnabled: true
variables:
  project:
    fn::invoke:
      function: gcp:organizations:getProject
      arguments: {}

Setting dedicatedEndpointEnabled to true provisions dedicated infrastructure and a unique DNS name (available in the dedicatedEndpointDns output). This isolates your endpoint’s traffic from other users’ requests, providing better performance and reliability. Once enabled, you must use the dedicated DNS rather than the shared regional endpoint.

Beyond these examples

These snippets focus on specific endpoint-level features: VPC peering and Private Service Connect networking, request-response logging to BigQuery, and dedicated endpoint DNS for traffic isolation. They’re intentionally minimal rather than full ML serving deployments.

The examples may reference pre-existing infrastructure such as VPC networks with private service access configured, KMS keys for encryption, and BigQuery datasets for logging. They focus on configuring the endpoint rather than deploying models or managing predictions.

To keep things focused, common endpoint patterns are omitted, including:

Model deployment and traffic splitting (trafficSplit property shown but not explained)
Encryption key IAM bindings and rotation
Monitoring job configuration (modelDeploymentMonitoringJob)
Label management and resource tagging strategies

These omissions are intentional: the goal is to illustrate how each endpoint feature is wired, not provide drop-in ML serving modules. See the Vertex AI Endpoint resource reference for all available configuration options.

Let's deploy GCP Vertex AI Endpoints

Get started with Pulumi Cloud, then follow our quick setup guide to deploy this infrastructure.

Try Pulumi Cloud for FREE

Frequently Asked Questions

Networking & Connectivity

Can I use both VPC peering and Private Service Connect for my endpoint?

No, network and privateServiceConnectConfig are mutually exclusive. You must choose one connectivity method or the other.

How do I configure VPC peering for my endpoint?

Set network to the full network path (format: projects/{project}/global/networks/{network}). Ensure private services access is already configured for the network, and use dependsOn to reference the VPC connection resource to avoid race conditions.

How do I configure Private Service Connect instead of VPC peering?

Set privateServiceConnectConfig with enablePrivateServiceConnect: true, specify projectAllowlists for allowed projects, and configure pscAutomationConfigs with the project ID and network.

What happens when I enable dedicated endpoints?

Enabling dedicatedEndpointEnabled isolates your traffic through a dedicated DNS endpoint (available in dedicatedEndpointDns output) with better performance and reliability. However, you won’t be able to send requests to the shared DNS {region}-aiplatform.googleapis.com afterward.

Traffic & Deployment

How do I set trafficSplit to empty to stop accepting traffic?

Use a two-step process: first set trafficSplit to "{}" and apply, then remove the field from your configuration. This avoids configuration drift.

What are the requirements for trafficSplit values?

The traffic percentage values must add up to 100, or the map must be empty if the endpoint should not accept any traffic. Each key is a DeployedModel ID, and the value is the percentage of traffic to route to it.

Configuration & Constraints

What format must the endpoint name follow?

The name must be numeric with no leading zeros and can be at most 10 digits.

What properties can't I change after creating the endpoint?

The following properties are immutable: location, name, project, encryptionSpec, network, and region. Changing these requires recreating the endpoint.

Security & Logging

How do I enable request and response logging to BigQuery?

Configure predictRequestResponseLoggingConfig with bigqueryDestination.outputUri pointing to your BigQuery dataset, set enabled to true, and specify a samplingRate (e.g., 0.1 for 10% sampling).

How do I encrypt my endpoint with a customer-managed key?

Set encryptionSpec.kmsKeyName to your KMS key name. You’ll also need to grant the Vertex AI service account the roles/cloudkms.cryptoKeyEncrypterDecrypter role on the key, as shown in the network example.

Using a different cloud?

Explore analytics guides for other cloud providers:

AWS Guides Azure Guides