Deploy GCP Vertex AI Endpoints

The gcp:vertex/aiEndpoint:AiEndpoint resource, part of the Pulumi GCP provider, defines a Vertex AI endpoint that serves deployed models for online prediction requests. This guide focuses on three capabilities: VPC networking options (peering and Private Service Connect), prediction logging to BigQuery, and dedicated DNS for traffic isolation.

Endpoints serve as the runtime infrastructure for deployed models. They require models to be deployed before accepting traffic, and may reference VPC networks, KMS keys, or BigQuery datasets for logging. The examples are intentionally small. Combine them with your own model deployment and traffic management configuration.

Peer endpoint to VPC for private network access

ML inference workloads often need to access private resources like databases or internal services. VPC peering connects the endpoint to your network while logging predictions to BigQuery for monitoring.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const vertexNetwork = new gcp.compute.Network("vertex_network", {name: "network-name"});
const vertexRange = new gcp.compute.GlobalAddress("vertex_range", {
    name: "address-name",
    purpose: "VPC_PEERING",
    addressType: "INTERNAL",
    prefixLength: 24,
    network: vertexNetwork.id,
});
const vertexVpcConnection = new gcp.servicenetworking.Connection("vertex_vpc_connection", {
    network: vertexNetwork.id,
    service: "servicenetworking.googleapis.com",
    reservedPeeringRanges: [vertexRange.name],
});
const bqDataset = new gcp.bigquery.Dataset("bq_dataset", {
    datasetId: "some_dataset",
    friendlyName: "logging dataset",
    description: "This is a dataset that requests are logged to",
    location: "US",
    deleteContentsOnDestroy: true,
});
const project = gcp.organizations.getProject({});
const endpoint = new gcp.vertex.AiEndpoint("endpoint", {
    name: "endpoint-name",
    displayName: "sample-endpoint",
    description: "A sample vertex endpoint",
    location: "us-central1",
    region: "us-central1",
    labels: {
        "label-one": "value-one",
    },
    network: pulumi.all([project, vertexNetwork.name]).apply(([project, name]) => `projects/${project.number}/global/networks/${name}`),
    encryptionSpec: {
        kmsKeyName: "kms-name",
    },
    predictRequestResponseLoggingConfig: {
        bigqueryDestination: {
            outputUri: pulumi.all([project, bqDataset.datasetId]).apply(([project, datasetId]) => `bq://${project.projectId}.${datasetId}.request_response_logging`),
        },
        enabled: true,
        samplingRate: 0.1,
    },
    trafficSplit: JSON.stringify({
        "12345": 100,
    }),
}, {
    dependsOn: [vertexVpcConnection],
});
const cryptoKey = new gcp.kms.CryptoKeyIAMMember("crypto_key", {
    cryptoKeyId: "kms-name",
    role: "roles/cloudkms.cryptoKeyEncrypterDecrypter",
    member: project.then(project => `serviceAccount:service-${project.number}@gcp-sa-aiplatform.iam.gserviceaccount.com`),
});

import pulumi
import json
import pulumi_gcp as gcp

vertex_network = gcp.compute.Network("vertex_network", name="network-name")
vertex_range = gcp.compute.GlobalAddress("vertex_range",
    name="address-name",
    purpose="VPC_PEERING",
    address_type="INTERNAL",
    prefix_length=24,
    network=vertex_network.id)
vertex_vpc_connection = gcp.servicenetworking.Connection("vertex_vpc_connection",
    network=vertex_network.id,
    service="servicenetworking.googleapis.com",
    reserved_peering_ranges=[vertex_range.name])
bq_dataset = gcp.bigquery.Dataset("bq_dataset",
    dataset_id="some_dataset",
    friendly_name="logging dataset",
    description="This is a dataset that requests are logged to",
    location="US",
    delete_contents_on_destroy=True)
project = gcp.organizations.get_project()
endpoint = gcp.vertex.AiEndpoint("endpoint",
    name="endpoint-name",
    display_name="sample-endpoint",
    description="A sample vertex endpoint",
    location="us-central1",
    region="us-central1",
    labels={
        "label-one": "value-one",
    },
    network=vertex_network.name.apply(lambda name: f"projects/{project.number}/global/networks/{name}"),
    encryption_spec={
        "kms_key_name": "kms-name",
    },
    predict_request_response_logging_config={
        "bigquery_destination": {
            "output_uri": bq_dataset.dataset_id.apply(lambda dataset_id: f"bq://{project.project_id}.{dataset_id}.request_response_logging"),
        },
        "enabled": True,
        "sampling_rate": 0.1,
    },
    traffic_split=json.dumps({
        "12345": 100,
    }),
    opts = pulumi.ResourceOptions(depends_on=[vertex_vpc_connection]))
crypto_key = gcp.kms.CryptoKeyIAMMember("crypto_key",
    crypto_key_id="kms-name",
    role="roles/cloudkms.cryptoKeyEncrypterDecrypter",
    member=f"serviceAccount:service-{project.number}@gcp-sa-aiplatform.iam.gserviceaccount.com")

package main

import (
	"encoding/json"
	"fmt"

	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/bigquery"
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/compute"
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/kms"
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/organizations"
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/servicenetworking"
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/vertex"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		vertexNetwork, err := compute.NewNetwork(ctx, "vertex_network", &compute.NetworkArgs{
			Name: pulumi.String("network-name"),
		})
		if err != nil {
			return err
		}
		vertexRange, err := compute.NewGlobalAddress(ctx, "vertex_range", &compute.GlobalAddressArgs{
			Name:         pulumi.String("address-name"),
			Purpose:      pulumi.String("VPC_PEERING"),
			AddressType:  pulumi.String("INTERNAL"),
			PrefixLength: pulumi.Int(24),
			Network:      vertexNetwork.ID(),
		})
		if err != nil {
			return err
		}
		vertexVpcConnection, err := servicenetworking.NewConnection(ctx, "vertex_vpc_connection", &servicenetworking.ConnectionArgs{
			Network: vertexNetwork.ID(),
			Service: pulumi.String("servicenetworking.googleapis.com"),
			ReservedPeeringRanges: pulumi.StringArray{
				vertexRange.Name,
			},
		})
		if err != nil {
			return err
		}
		bqDataset, err := bigquery.NewDataset(ctx, "bq_dataset", &bigquery.DatasetArgs{
			DatasetId:               pulumi.String("some_dataset"),
			FriendlyName:            pulumi.String("logging dataset"),
			Description:             pulumi.String("This is a dataset that requests are logged to"),
			Location:                pulumi.String("US"),
			DeleteContentsOnDestroy: pulumi.Bool(true),
		})
		if err != nil {
			return err
		}
		project, err := organizations.LookupProject(ctx, &organizations.LookupProjectArgs{}, nil)
		if err != nil {
			return err
		}
		tmpJSON0, err := json.Marshal(map[string]interface{}{
			"12345": 100,
		})
		if err != nil {
			return err
		}
		json0 := string(tmpJSON0)
		_, err = vertex.NewAiEndpoint(ctx, "endpoint", &vertex.AiEndpointArgs{
			Name:        pulumi.String("endpoint-name"),
			DisplayName: pulumi.String("sample-endpoint"),
			Description: pulumi.String("A sample vertex endpoint"),
			Location:    pulumi.String("us-central1"),
			Region:      pulumi.String("us-central1"),
			Labels: pulumi.StringMap{
				"label-one": pulumi.String("value-one"),
			},
			Network: vertexNetwork.Name.ApplyT(func(name string) (string, error) {
				return fmt.Sprintf("projects/%v/global/networks/%v", project.Number, name), nil
			}).(pulumi.StringOutput),
			EncryptionSpec: &vertex.AiEndpointEncryptionSpecArgs{
				KmsKeyName: pulumi.String("kms-name"),
			},
			PredictRequestResponseLoggingConfig: &vertex.AiEndpointPredictRequestResponseLoggingConfigArgs{
				BigqueryDestination: &vertex.AiEndpointPredictRequestResponseLoggingConfigBigqueryDestinationArgs{
					OutputUri: bqDataset.DatasetId.ApplyT(func(datasetId string) (string, error) {
						return fmt.Sprintf("bq://%v.%v.request_response_logging", project.ProjectId, datasetId), nil
					}).(pulumi.StringOutput),
				},
				Enabled:      pulumi.Bool(true),
				SamplingRate: pulumi.Float64(0.1),
			},
			TrafficSplit: pulumi.String(json0),
		}, pulumi.DependsOn([]pulumi.Resource{
			vertexVpcConnection,
		}))
		if err != nil {
			return err
		}
		_, err = kms.NewCryptoKeyIAMMember(ctx, "crypto_key", &kms.CryptoKeyIAMMemberArgs{
			CryptoKeyId: pulumi.String("kms-name"),
			Role:        pulumi.String("roles/cloudkms.cryptoKeyEncrypterDecrypter"),
			Member:      pulumi.Sprintf("serviceAccount:service-%v@gcp-sa-aiplatform.iam.gserviceaccount.com", project.Number),
		})
		if err != nil {
			return err
		}
		return nil
	})
}

using System.Collections.Generic;
using System.Linq;
using System.Text.Json;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var vertexNetwork = new Gcp.Compute.Network("vertex_network", new()
    {
        Name = "network-name",
    });

    var vertexRange = new Gcp.Compute.GlobalAddress("vertex_range", new()
    {
        Name = "address-name",
        Purpose = "VPC_PEERING",
        AddressType = "INTERNAL",
        PrefixLength = 24,
        Network = vertexNetwork.Id,
    });

    var vertexVpcConnection = new Gcp.ServiceNetworking.Connection("vertex_vpc_connection", new()
    {
        Network = vertexNetwork.Id,
        Service = "servicenetworking.googleapis.com",
        ReservedPeeringRanges = new[]
        {
            vertexRange.Name,
        },
    });

    var bqDataset = new Gcp.BigQuery.Dataset("bq_dataset", new()
    {
        DatasetId = "some_dataset",
        FriendlyName = "logging dataset",
        Description = "This is a dataset that requests are logged to",
        Location = "US",
        DeleteContentsOnDestroy = true,
    });

    var project = Gcp.Organizations.GetProject.Invoke();

    var endpoint = new Gcp.Vertex.AiEndpoint("endpoint", new()
    {
        Name = "endpoint-name",
        DisplayName = "sample-endpoint",
        Description = "A sample vertex endpoint",
        Location = "us-central1",
        Region = "us-central1",
        Labels = 
        {
            { "label-one", "value-one" },
        },
        Network = Output.Tuple(project, vertexNetwork.Name).Apply(values =>
        {
            var project = values.Item1;
            var name = values.Item2;
            return $"projects/{project.Apply(getProjectResult => getProjectResult.Number)}/global/networks/{name}";
        }),
        EncryptionSpec = new Gcp.Vertex.Inputs.AiEndpointEncryptionSpecArgs
        {
            KmsKeyName = "kms-name",
        },
        PredictRequestResponseLoggingConfig = new Gcp.Vertex.Inputs.AiEndpointPredictRequestResponseLoggingConfigArgs
        {
            BigqueryDestination = new Gcp.Vertex.Inputs.AiEndpointPredictRequestResponseLoggingConfigBigqueryDestinationArgs
            {
                OutputUri = Output.Tuple(project, bqDataset.DatasetId).Apply(values =>
                {
                    var project = values.Item1;
                    var datasetId = values.Item2;
                    return $"bq://{project.Apply(getProjectResult => getProjectResult.ProjectId)}.{datasetId}.request_response_logging";
                }),
            },
            Enabled = true,
            SamplingRate = 0.1,
        },
        TrafficSplit = JsonSerializer.Serialize(new Dictionary<string, object?>
        {
            ["12345"] = 100,
        }),
    }, new CustomResourceOptions
    {
        DependsOn =
        {
            vertexVpcConnection,
        },
    });

    var cryptoKey = new Gcp.Kms.CryptoKeyIAMMember("crypto_key", new()
    {
        CryptoKeyId = "kms-name",
        Role = "roles/cloudkms.cryptoKeyEncrypterDecrypter",
        Member = $"serviceAccount:service-{project.Apply(getProjectResult => getProjectResult.Number)}@gcp-sa-aiplatform.iam.gserviceaccount.com",
    });

});

package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.compute.Network;
import com.pulumi.gcp.compute.NetworkArgs;
import com.pulumi.gcp.compute.GlobalAddress;
import com.pulumi.gcp.compute.GlobalAddressArgs;
import com.pulumi.gcp.servicenetworking.Connection;
import com.pulumi.gcp.servicenetworking.ConnectionArgs;
import com.pulumi.gcp.bigquery.Dataset;
import com.pulumi.gcp.bigquery.DatasetArgs;
import com.pulumi.gcp.organizations.OrganizationsFunctions;
import com.pulumi.gcp.organizations.inputs.GetProjectArgs;
import com.pulumi.gcp.vertex.AiEndpoint;
import com.pulumi.gcp.vertex.AiEndpointArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointEncryptionSpecArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointPredictRequestResponseLoggingConfigArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointPredictRequestResponseLoggingConfigBigqueryDestinationArgs;
import com.pulumi.gcp.kms.CryptoKeyIAMMember;
import com.pulumi.gcp.kms.CryptoKeyIAMMemberArgs;
import static com.pulumi.codegen.internal.Serialization.*;
import com.pulumi.resources.CustomResourceOptions;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var vertexNetwork = new Network("vertexNetwork", NetworkArgs.builder()
            .name("network-name")
            .build());

        var vertexRange = new GlobalAddress("vertexRange", GlobalAddressArgs.builder()
            .name("address-name")
            .purpose("VPC_PEERING")
            .addressType("INTERNAL")
            .prefixLength(24)
            .network(vertexNetwork.id())
            .build());

        var vertexVpcConnection = new Connection("vertexVpcConnection", ConnectionArgs.builder()
            .network(vertexNetwork.id())
            .service("servicenetworking.googleapis.com")
            .reservedPeeringRanges(vertexRange.name())
            .build());

        var bqDataset = new Dataset("bqDataset", DatasetArgs.builder()
            .datasetId("some_dataset")
            .friendlyName("logging dataset")
            .description("This is a dataset that requests are logged to")
            .location("US")
            .deleteContentsOnDestroy(true)
            .build());

        final var project = OrganizationsFunctions.getProject(GetProjectArgs.builder()
            .build());

        var endpoint = new AiEndpoint("endpoint", AiEndpointArgs.builder()
            .name("endpoint-name")
            .displayName("sample-endpoint")
            .description("A sample vertex endpoint")
            .location("us-central1")
            .region("us-central1")
            .labels(Map.of("label-one", "value-one"))
            .network(vertexNetwork.name().applyValue(_name -> String.format("projects/%s/global/networks/%s", project.number(),_name)))
            .encryptionSpec(AiEndpointEncryptionSpecArgs.builder()
                .kmsKeyName("kms-name")
                .build())
            .predictRequestResponseLoggingConfig(AiEndpointPredictRequestResponseLoggingConfigArgs.builder()
                .bigqueryDestination(AiEndpointPredictRequestResponseLoggingConfigBigqueryDestinationArgs.builder()
                    .outputUri(bqDataset.datasetId().applyValue(_datasetId -> String.format("bq://%s.%s.request_response_logging", project.projectId(),_datasetId)))
                    .build())
                .enabled(true)
                .samplingRate(0.1)
                .build())
            .trafficSplit(serializeJson(
                jsonObject(
                    jsonProperty("12345", 100)
                )))
            .build(), CustomResourceOptions.builder()
                .dependsOn(vertexVpcConnection)
                .build());

        var cryptoKey = new CryptoKeyIAMMember("cryptoKey", CryptoKeyIAMMemberArgs.builder()
            .cryptoKeyId("kms-name")
            .role("roles/cloudkms.cryptoKeyEncrypterDecrypter")
            .member(String.format("serviceAccount:service-%s@gcp-sa-aiplatform.iam.gserviceaccount.com", project.number()))
            .build());

    }
}

resources:
  endpoint:
    type: gcp:vertex:AiEndpoint
    properties:
      name: endpoint-name
      displayName: sample-endpoint
      description: A sample vertex endpoint
      location: us-central1
      region: us-central1
      labels:
        label-one: value-one
      network: projects/${project.number}/global/networks/${vertexNetwork.name}
      encryptionSpec:
        kmsKeyName: kms-name
      predictRequestResponseLoggingConfig:
        bigqueryDestination:
          outputUri: bq://${project.projectId}.${bqDataset.datasetId}.request_response_logging
        enabled: true
        samplingRate: 0.1
      trafficSplit:
        fn::toJSON:
          '12345': 100
    options:
      dependsOn:
        - ${vertexVpcConnection}
  vertexVpcConnection:
    type: gcp:servicenetworking:Connection
    name: vertex_vpc_connection
    properties:
      network: ${vertexNetwork.id}
      service: servicenetworking.googleapis.com
      reservedPeeringRanges:
        - ${vertexRange.name}
  vertexRange:
    type: gcp:compute:GlobalAddress
    name: vertex_range
    properties:
      name: address-name
      purpose: VPC_PEERING
      addressType: INTERNAL
      prefixLength: 24
      network: ${vertexNetwork.id}
  vertexNetwork:
    type: gcp:compute:Network
    name: vertex_network
    properties:
      name: network-name
  cryptoKey:
    type: gcp:kms:CryptoKeyIAMMember
    name: crypto_key
    properties:
      cryptoKeyId: kms-name
      role: roles/cloudkms.cryptoKeyEncrypterDecrypter
      member: serviceAccount:service-${project.number}@gcp-sa-aiplatform.iam.gserviceaccount.com
  bqDataset:
    type: gcp:bigquery:Dataset
    name: bq_dataset
    properties:
      datasetId: some_dataset
      friendlyName: logging dataset
      description: This is a dataset that requests are logged to
      location: US
      deleteContentsOnDestroy: true
variables:
  project:
    fn::invoke:
      function: gcp:organizations:getProject
      arguments: {}

The network property connects the endpoint to your VPC using the format projects/{project_number}/global/networks/{network_name}. The predictRequestResponseLoggingConfig sends prediction requests and responses to BigQuery at the specified samplingRate (0.1 means 10% of requests). The encryptionSpec secures the endpoint with a customer-managed KMS key. The trafficSplit property controls which deployed models receive traffic; here "12345": 100 routes all traffic to model ID 12345. Note that VPC peering must be established via servicenetworking.Connection before the endpoint can use the network.

Expose endpoint through Private Service Connect

Private Service Connect provides an alternative to VPC peering by exposing endpoints through dedicated service attachments, allowing cross-project access without network peering.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const _default = new gcp.compute.Network("default", {name: "psc-network-_42503"});
const project = gcp.organizations.getProject({});
const endpoint = new gcp.vertex.AiEndpoint("endpoint", {
    name: "endpoint-name_9991",
    displayName: "sample-endpoint",
    description: "A sample vertex endpoint",
    location: "us-central1",
    region: "us-central1",
    labels: {
        "label-one": "value-one",
    },
    privateServiceConnectConfig: {
        enablePrivateServiceConnect: true,
        projectAllowlists: [project.then(project => project.projectId)],
        pscAutomationConfigs: [{
            projectId: project.then(project => project.projectId),
            network: _default.id,
        }],
    },
});

import pulumi
import pulumi_gcp as gcp

default = gcp.compute.Network("default", name="psc-network-_42503")
project = gcp.organizations.get_project()
endpoint = gcp.vertex.AiEndpoint("endpoint",
    name="endpoint-name_9991",
    display_name="sample-endpoint",
    description="A sample vertex endpoint",
    location="us-central1",
    region="us-central1",
    labels={
        "label-one": "value-one",
    },
    private_service_connect_config={
        "enable_private_service_connect": True,
        "project_allowlists": [project.project_id],
        "psc_automation_configs": [{
            "project_id": project.project_id,
            "network": default.id,
        }],
    })

package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/compute"
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/organizations"
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/vertex"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_default, err := compute.NewNetwork(ctx, "default", &compute.NetworkArgs{
			Name: pulumi.String("psc-network-_42503"),
		})
		if err != nil {
			return err
		}
		project, err := organizations.LookupProject(ctx, &organizations.LookupProjectArgs{}, nil)
		if err != nil {
			return err
		}
		_, err = vertex.NewAiEndpoint(ctx, "endpoint", &vertex.AiEndpointArgs{
			Name:        pulumi.String("endpoint-name_9991"),
			DisplayName: pulumi.String("sample-endpoint"),
			Description: pulumi.String("A sample vertex endpoint"),
			Location:    pulumi.String("us-central1"),
			Region:      pulumi.String("us-central1"),
			Labels: pulumi.StringMap{
				"label-one": pulumi.String("value-one"),
			},
			PrivateServiceConnectConfig: &vertex.AiEndpointPrivateServiceConnectConfigArgs{
				EnablePrivateServiceConnect: pulumi.Bool(true),
				ProjectAllowlists: pulumi.StringArray{
					pulumi.String(project.ProjectId),
				},
				PscAutomationConfigs: vertex.AiEndpointPrivateServiceConnectConfigPscAutomationConfigArray{
					&vertex.AiEndpointPrivateServiceConnectConfigPscAutomationConfigArgs{
						ProjectId: pulumi.String(project.ProjectId),
						Network:   _default.ID(),
					},
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}

using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var @default = new Gcp.Compute.Network("default", new()
    {
        Name = "psc-network-_42503",
    });

    var project = Gcp.Organizations.GetProject.Invoke();

    var endpoint = new Gcp.Vertex.AiEndpoint("endpoint", new()
    {
        Name = "endpoint-name_9991",
        DisplayName = "sample-endpoint",
        Description = "A sample vertex endpoint",
        Location = "us-central1",
        Region = "us-central1",
        Labels = 
        {
            { "label-one", "value-one" },
        },
        PrivateServiceConnectConfig = new Gcp.Vertex.Inputs.AiEndpointPrivateServiceConnectConfigArgs
        {
            EnablePrivateServiceConnect = true,
            ProjectAllowlists = new[]
            {
                project.Apply(getProjectResult => getProjectResult.ProjectId),
            },
            PscAutomationConfigs = new[]
            {
                new Gcp.Vertex.Inputs.AiEndpointPrivateServiceConnectConfigPscAutomationConfigArgs
                {
                    ProjectId = project.Apply(getProjectResult => getProjectResult.ProjectId),
                    Network = @default.Id,
                },
            },
        },
    });

});

package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.compute.Network;
import com.pulumi.gcp.compute.NetworkArgs;
import com.pulumi.gcp.organizations.OrganizationsFunctions;
import com.pulumi.gcp.organizations.inputs.GetProjectArgs;
import com.pulumi.gcp.vertex.AiEndpoint;
import com.pulumi.gcp.vertex.AiEndpointArgs;
import com.pulumi.gcp.vertex.inputs.AiEndpointPrivateServiceConnectConfigArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var default_ = new Network("default", NetworkArgs.builder()
            .name("psc-network-_42503")
            .build());

        final var project = OrganizationsFunctions.getProject(GetProjectArgs.builder()
            .build());

        var endpoint = new AiEndpoint("endpoint", AiEndpointArgs.builder()
            .name("endpoint-name_9991")
            .displayName("sample-endpoint")
            .description("A sample vertex endpoint")
            .location("us-central1")
            .region("us-central1")
            .labels(Map.of("label-one", "value-one"))
            .privateServiceConnectConfig(AiEndpointPrivateServiceConnectConfigArgs.builder()
                .enablePrivateServiceConnect(true)
                .projectAllowlists(project.projectId())
                .pscAutomationConfigs(AiEndpointPrivateServiceConnectConfigPscAutomationConfigArgs.builder()
                    .projectId(project.projectId())
                    .network(default_.id())
                    .build())
                .build())
            .build());

    }
}

resources:
  default:
    type: gcp:compute:Network
    properties:
      name: psc-network-_42503
  endpoint:
    type: gcp:vertex:AiEndpoint
    properties:
      name: endpoint-name_9991
      displayName: sample-endpoint
      description: A sample vertex endpoint
      location: us-central1
      region: us-central1
      labels:
        label-one: value-one
      privateServiceConnectConfig:
        enablePrivateServiceConnect: true
        projectAllowlists:
          - ${project.projectId}
        pscAutomationConfigs:
          - projectId: ${project.projectId}
            network: ${default.id}
variables:
  project:
    fn::invoke:
      function: gcp:organizations:getProject
      arguments: {}

The privateServiceConnectConfig enables Private Service Connect and defines which projects can access the endpoint. The projectAllowlists property specifies allowed project IDs, while pscAutomationConfigs defines the network where PSC attachments are created. This approach isolates endpoint traffic without requiring VPC peering, making it useful for cross-project or multi-tenant scenarios. Only one of network or privateServiceConnectConfig can be set on an endpoint.

Enable dedicated DNS for isolated traffic

Production workloads requiring performance isolation can use dedicated endpoints, which provide a separate DNS hostname and isolated traffic path.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const endpoint = new gcp.vertex.AiEndpoint("endpoint", {
    name: "endpoint-name_12125",
    displayName: "sample-endpoint",
    description: "A sample vertex endpoint",
    location: "us-central1",
    region: "us-central1",
    labels: {
        "label-one": "value-one",
    },
    dedicatedEndpointEnabled: true,
});
const project = gcp.organizations.getProject({});

import pulumi
import pulumi_gcp as gcp

endpoint = gcp.vertex.AiEndpoint("endpoint",
    name="endpoint-name_12125",
    display_name="sample-endpoint",
    description="A sample vertex endpoint",
    location="us-central1",
    region="us-central1",
    labels={
        "label-one": "value-one",
    },
    dedicated_endpoint_enabled=True)
project = gcp.organizations.get_project()

package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/organizations"
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/vertex"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := vertex.NewAiEndpoint(ctx, "endpoint", &vertex.AiEndpointArgs{
			Name:        pulumi.String("endpoint-name_12125"),
			DisplayName: pulumi.String("sample-endpoint"),
			Description: pulumi.String("A sample vertex endpoint"),
			Location:    pulumi.String("us-central1"),
			Region:      pulumi.String("us-central1"),
			Labels: pulumi.StringMap{
				"label-one": pulumi.String("value-one"),
			},
			DedicatedEndpointEnabled: pulumi.Bool(true),
		})
		if err != nil {
			return err
		}
		_, err = organizations.LookupProject(ctx, &organizations.LookupProjectArgs{}, nil)
		if err != nil {
			return err
		}
		return nil
	})
}

using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var endpoint = new Gcp.Vertex.AiEndpoint("endpoint", new()
    {
        Name = "endpoint-name_12125",
        DisplayName = "sample-endpoint",
        Description = "A sample vertex endpoint",
        Location = "us-central1",
        Region = "us-central1",
        Labels = 
        {
            { "label-one", "value-one" },
        },
        DedicatedEndpointEnabled = true,
    });

    var project = Gcp.Organizations.GetProject.Invoke();

});

package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.vertex.AiEndpoint;
import com.pulumi.gcp.vertex.AiEndpointArgs;
import com.pulumi.gcp.organizations.OrganizationsFunctions;
import com.pulumi.gcp.organizations.inputs.GetProjectArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var endpoint = new AiEndpoint("endpoint", AiEndpointArgs.builder()
            .name("endpoint-name_12125")
            .displayName("sample-endpoint")
            .description("A sample vertex endpoint")
            .location("us-central1")
            .region("us-central1")
            .labels(Map.of("label-one", "value-one"))
            .dedicatedEndpointEnabled(true)
            .build());

        final var project = OrganizationsFunctions.getProject(GetProjectArgs.builder()
            .build());

    }
}

resources:
  endpoint:
    type: gcp:vertex:AiEndpoint
    properties:
      name: endpoint-name_12125
      displayName: sample-endpoint
      description: A sample vertex endpoint
      location: us-central1
      region: us-central1
      labels:
        label-one: value-one
      dedicatedEndpointEnabled: true
variables:
  project:
    fn::invoke:
      function: gcp:organizations:getProject
      arguments: {}

Setting dedicatedEndpointEnabled to true provisions a dedicated DNS hostname (available in the dedicatedEndpointDns output property) in the format https://{endpointId}.{region}-{projectNumber}.prediction.vertexai.goog. Requests to this DNS are isolated from shared infrastructure traffic. Once enabled, the endpoint no longer accepts requests at the shared regional DNS. This is the simplest networking configuration, requiring no VPC setup.

Beyond these examples

These snippets focus on specific endpoint-level features: VPC peering and Private Service Connect networking, prediction logging to BigQuery, and dedicated DNS endpoints. They’re intentionally minimal rather than full ML serving deployments.

The examples may reference pre-existing infrastructure such as VPC networks and service networking connections, KMS encryption keys with IAM bindings, and BigQuery datasets (or API access to create them). They focus on configuring the endpoint rather than deploying models or managing traffic.

To keep things focused, common endpoint patterns are omitted, including:

Model deployment and traffic splitting (trafficSplit configuration)
Monitoring job configuration (modelDeploymentMonitoringJob)
Label management and organization
Cross-region or multi-region deployment

These omissions are intentional: the goal is to illustrate how each endpoint feature is wired, not provide drop-in ML serving modules. See the Vertex AI Endpoint resource reference for all available configuration options.

Let's deploy GCP Vertex AI Endpoints

Get started with Pulumi Cloud, then follow our quick setup guide to deploy this infrastructure.

Try Pulumi Cloud for FREE

Frequently Asked Questions

Networking & Connectivity

Can I use both VPC peering and Private Service Connect for my endpoint?

No, network and privateServiceConnectConfig are mutually exclusive. Choose one connectivity method: use network for VPC peering or privateServiceConnectConfig for Private Service Connect.

How do I set up VPC peering for my Vertex AI endpoint?

Configure the network property with the full VPC network path (format: projects/{project}/global/networks/{network}). You’ll need to create a servicenetworking.Connection resource first and reference it with dependsOn to ensure proper setup order.

How do I configure Private Service Connect for my endpoint?

Use privateServiceConnectConfig with enablePrivateServiceConnect set to true, specify projectAllowlists, and configure pscAutomationConfigs with the project ID and network.

Can I disable dedicated endpoint after enabling it?

No, once you enable dedicatedEndpointEnabled, you can’t send requests to the shared DNS anymore. This limitation will be removed in a future release, so evaluate carefully before enabling.

Traffic Management

How do I configure my endpoint to accept no traffic?

Set trafficSplit to "{}", apply the change, then remove the field from your configuration. You can’t simply omit the field initially.

What are the requirements for trafficSplit values?

The traffic percentage values must add up to 100, or the map must be empty if the endpoint should not accept any traffic. DeployedModel IDs not listed in the map receive no traffic.

Security & Monitoring

How do I configure KMS encryption for my endpoint?

Set encryptionSpec.kmsKeyName to your KMS key name. You’ll also need to grant the Vertex AI service account (service-{project-number}@gcp-sa-aiplatform.iam.gserviceaccount.com) the roles/cloudkms.cryptoKeyEncrypterDecrypter role on the key.

How do I log prediction requests and responses to BigQuery?

Configure predictRequestResponseLoggingConfig with a bigqueryDestination.outputUri (format: bq://{project}.{dataset}.{table}), set enabled to true, and specify a samplingRate (e.g., 0.1 for 10%).

Resource Configuration

What properties can't be changed after creating an endpoint?

The following properties are immutable: location, name, project, region, encryptionSpec, and network. Changing any of these requires recreating the endpoint.

Using a different cloud?

Explore analytics guides for other cloud providers:

AWS Guides Azure Guides