Configure GCP BigQuery Connections

The gcp:bigquery/connection:Connection resource, part of the Pulumi GCP provider, defines BigQuery connections that enable federated queries to external data sources without moving data into BigQuery. This guide focuses on three capabilities: Cloud SQL and Cloud Spanner connectivity, multi-cloud access to AWS and Azure, and Spark integration with encryption.

Connections reference external infrastructure that must exist separately: Cloud SQL instances, Spanner databases, AWS IAM roles, Azure federated applications, Dataproc clusters, and Cloud KMS keys. The examples are intentionally small. Combine them with your own data sources and IAM configuration.

Connect BigQuery to Cloud SQL for federated queries

Analytics teams often need to query operational data stored in Cloud SQL without moving it into BigQuery. A connection enables federated queries that join BigQuery tables with live Cloud SQL data.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
import * as random from "@pulumi/random";

const instance = new gcp.sql.DatabaseInstance("instance", {
    name: "my-database-instance",
    databaseVersion: "POSTGRES_11",
    region: "us-central1",
    settings: {
        tier: "db-f1-micro",
    },
    deletionProtection: true,
});
const db = new gcp.sql.Database("db", {
    instance: instance.name,
    name: "db",
});
const pwd = new random.index.Password("pwd", {
    length: 16,
    special: false,
});
const user = new gcp.sql.User("user", {
    name: "user",
    instance: instance.name,
    password: pwd.result,
});
const connection = new gcp.bigquery.Connection("connection", {
    friendlyName: "πŸ‘‹",
    description: "a riveting description",
    location: "US",
    cloudSql: {
        instanceId: instance.connectionName,
        database: db.name,
        type: "POSTGRES",
        credential: {
            username: user.name,
            password: user.password,
        },
    },
});
import pulumi
import pulumi_gcp as gcp
import pulumi_random as random

instance = gcp.sql.DatabaseInstance("instance",
    name="my-database-instance",
    database_version="POSTGRES_11",
    region="us-central1",
    settings={
        "tier": "db-f1-micro",
    },
    deletion_protection=True)
db = gcp.sql.Database("db",
    instance=instance.name,
    name="db")
pwd = random.index.Password("pwd",
    length=16,
    special=False)
user = gcp.sql.User("user",
    name="user",
    instance=instance.name,
    password=pwd["result"])
connection = gcp.bigquery.Connection("connection",
    friendly_name="πŸ‘‹",
    description="a riveting description",
    location="US",
    cloud_sql={
        "instance_id": instance.connection_name,
        "database": db.name,
        "type": "POSTGRES",
        "credential": {
            "username": user.name,
            "password": user.password,
        },
    })
package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/bigquery"
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/sql"
	"github.com/pulumi/pulumi-random/sdk/v4/go/random"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		instance, err := sql.NewDatabaseInstance(ctx, "instance", &sql.DatabaseInstanceArgs{
			Name:            pulumi.String("my-database-instance"),
			DatabaseVersion: pulumi.String("POSTGRES_11"),
			Region:          pulumi.String("us-central1"),
			Settings: &sql.DatabaseInstanceSettingsArgs{
				Tier: pulumi.String("db-f1-micro"),
			},
			DeletionProtection: pulumi.Bool(true),
		})
		if err != nil {
			return err
		}
		db, err := sql.NewDatabase(ctx, "db", &sql.DatabaseArgs{
			Instance: instance.Name,
			Name:     pulumi.String("db"),
		})
		if err != nil {
			return err
		}
		pwd, err := random.NewPassword(ctx, "pwd", &random.PasswordArgs{
			Length:  16,
			Special: false,
		})
		if err != nil {
			return err
		}
		user, err := sql.NewUser(ctx, "user", &sql.UserArgs{
			Name:     pulumi.String("user"),
			Instance: instance.Name,
			Password: pwd.Result,
		})
		if err != nil {
			return err
		}
		_, err = bigquery.NewConnection(ctx, "connection", &bigquery.ConnectionArgs{
			FriendlyName: pulumi.String("πŸ‘‹"),
			Description:  pulumi.String("a riveting description"),
			Location:     pulumi.String("US"),
			CloudSql: &bigquery.ConnectionCloudSqlArgs{
				InstanceId: instance.ConnectionName,
				Database:   db.Name,
				Type:       pulumi.String("POSTGRES"),
				Credential: &bigquery.ConnectionCloudSqlCredentialArgs{
					Username: user.Name,
					Password: user.Password,
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
using Random = Pulumi.Random;

return await Deployment.RunAsync(() => 
{
    var instance = new Gcp.Sql.DatabaseInstance("instance", new()
    {
        Name = "my-database-instance",
        DatabaseVersion = "POSTGRES_11",
        Region = "us-central1",
        Settings = new Gcp.Sql.Inputs.DatabaseInstanceSettingsArgs
        {
            Tier = "db-f1-micro",
        },
        DeletionProtection = true,
    });

    var db = new Gcp.Sql.Database("db", new()
    {
        Instance = instance.Name,
        Name = "db",
    });

    var pwd = new Random.Index.Password("pwd", new()
    {
        Length = 16,
        Special = false,
    });

    var user = new Gcp.Sql.User("user", new()
    {
        Name = "user",
        Instance = instance.Name,
        Password = pwd.Result,
    });

    var connection = new Gcp.BigQuery.Connection("connection", new()
    {
        FriendlyName = "πŸ‘‹",
        Description = "a riveting description",
        Location = "US",
        CloudSql = new Gcp.BigQuery.Inputs.ConnectionCloudSqlArgs
        {
            InstanceId = instance.ConnectionName,
            Database = db.Name,
            Type = "POSTGRES",
            Credential = new Gcp.BigQuery.Inputs.ConnectionCloudSqlCredentialArgs
            {
                Username = user.Name,
                Password = user.Password,
            },
        },
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.sql.DatabaseInstance;
import com.pulumi.gcp.sql.DatabaseInstanceArgs;
import com.pulumi.gcp.sql.inputs.DatabaseInstanceSettingsArgs;
import com.pulumi.gcp.sql.Database;
import com.pulumi.gcp.sql.DatabaseArgs;
import com.pulumi.random.Password;
import com.pulumi.random.PasswordArgs;
import com.pulumi.gcp.sql.User;
import com.pulumi.gcp.sql.UserArgs;
import com.pulumi.gcp.bigquery.Connection;
import com.pulumi.gcp.bigquery.ConnectionArgs;
import com.pulumi.gcp.bigquery.inputs.ConnectionCloudSqlArgs;
import com.pulumi.gcp.bigquery.inputs.ConnectionCloudSqlCredentialArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var instance = new DatabaseInstance("instance", DatabaseInstanceArgs.builder()
            .name("my-database-instance")
            .databaseVersion("POSTGRES_11")
            .region("us-central1")
            .settings(DatabaseInstanceSettingsArgs.builder()
                .tier("db-f1-micro")
                .build())
            .deletionProtection(true)
            .build());

        var db = new Database("db", DatabaseArgs.builder()
            .instance(instance.name())
            .name("db")
            .build());

        var pwd = new Password("pwd", PasswordArgs.builder()
            .length(16)
            .special(false)
            .build());

        var user = new User("user", UserArgs.builder()
            .name("user")
            .instance(instance.name())
            .password(pwd.result())
            .build());

        var connection = new Connection("connection", ConnectionArgs.builder()
            .friendlyName("πŸ‘‹")
            .description("a riveting description")
            .location("US")
            .cloudSql(ConnectionCloudSqlArgs.builder()
                .instanceId(instance.connectionName())
                .database(db.name())
                .type("POSTGRES")
                .credential(ConnectionCloudSqlCredentialArgs.builder()
                    .username(user.name())
                    .password(user.password())
                    .build())
                .build())
            .build());

    }
}
resources:
  instance:
    type: gcp:sql:DatabaseInstance
    properties:
      name: my-database-instance
      databaseVersion: POSTGRES_11
      region: us-central1
      settings:
        tier: db-f1-micro
      deletionProtection: true
  db:
    type: gcp:sql:Database
    properties:
      instance: ${instance.name}
      name: db
  pwd:
    type: random:Password
    properties:
      length: 16
      special: false
  user:
    type: gcp:sql:User
    properties:
      name: user
      instance: ${instance.name}
      password: ${pwd.result}
  connection:
    type: gcp:bigquery:Connection
    properties:
      friendlyName: "\U0001F44B"
      description: a riveting description
      location: US
      cloudSql:
        instanceId: ${instance.connectionName}
        database: ${db.name}
        type: POSTGRES
        credential:
          username: ${user.name}
          password: ${user.password}

The cloudSql block specifies the Cloud SQL instance, database, and credentials. The instanceId uses the connection name format (project:region:instance), while type specifies the database engine (POSTGRES, MYSQL). BigQuery uses these credentials to execute queries against Cloud SQL on your behalf.

Query Cloud Spanner databases from BigQuery

Applications using Cloud Spanner for transactional workloads can expose that data to BigQuery for analytics without ETL pipelines.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const connection = new gcp.bigquery.Connection("connection", {
    connectionId: "my-connection",
    location: "US",
    friendlyName: "πŸ‘‹",
    description: "a riveting description",
    cloudSpanner: {
        database: "projects/project/instances/instance/databases/database",
        databaseRole: "database_role",
    },
});
import pulumi
import pulumi_gcp as gcp

connection = gcp.bigquery.Connection("connection",
    connection_id="my-connection",
    location="US",
    friendly_name="πŸ‘‹",
    description="a riveting description",
    cloud_spanner={
        "database": "projects/project/instances/instance/databases/database",
        "database_role": "database_role",
    })
package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/bigquery"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := bigquery.NewConnection(ctx, "connection", &bigquery.ConnectionArgs{
			ConnectionId: pulumi.String("my-connection"),
			Location:     pulumi.String("US"),
			FriendlyName: pulumi.String("πŸ‘‹"),
			Description:  pulumi.String("a riveting description"),
			CloudSpanner: &bigquery.ConnectionCloudSpannerArgs{
				Database:     pulumi.String("projects/project/instances/instance/databases/database"),
				DatabaseRole: pulumi.String("database_role"),
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var connection = new Gcp.BigQuery.Connection("connection", new()
    {
        ConnectionId = "my-connection",
        Location = "US",
        FriendlyName = "πŸ‘‹",
        Description = "a riveting description",
        CloudSpanner = new Gcp.BigQuery.Inputs.ConnectionCloudSpannerArgs
        {
            Database = "projects/project/instances/instance/databases/database",
            DatabaseRole = "database_role",
        },
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.bigquery.Connection;
import com.pulumi.gcp.bigquery.ConnectionArgs;
import com.pulumi.gcp.bigquery.inputs.ConnectionCloudSpannerArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var connection = new Connection("connection", ConnectionArgs.builder()
            .connectionId("my-connection")
            .location("US")
            .friendlyName("πŸ‘‹")
            .description("a riveting description")
            .cloudSpanner(ConnectionCloudSpannerArgs.builder()
                .database("projects/project/instances/instance/databases/database")
                .databaseRole("database_role")
                .build())
            .build());

    }
}
resources:
  connection:
    type: gcp:bigquery:Connection
    properties:
      connectionId: my-connection
      location: US
      friendlyName: "\U0001F44B"
      description: a riveting description
      cloudSpanner:
        database: projects/project/instances/instance/databases/database
        databaseRole: database_role

The cloudSpanner block references a Spanner database using its full resource path. The databaseRole property specifies which Spanner database role BigQuery assumes when executing queries, controlling access to specific tables and columns.

Enable Data Boost for high-throughput Spanner queries

Large analytical queries against Cloud Spanner can impact transactional performance. Data Boost offloads query processing to dedicated resources.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const connection = new gcp.bigquery.Connection("connection", {
    connectionId: "my-connection",
    location: "US",
    friendlyName: "πŸ‘‹",
    description: "a riveting description",
    cloudSpanner: {
        database: "projects/project/instances/instance/databases/database",
        useParallelism: true,
        useDataBoost: true,
        maxParallelism: 100,
    },
});
import pulumi
import pulumi_gcp as gcp

connection = gcp.bigquery.Connection("connection",
    connection_id="my-connection",
    location="US",
    friendly_name="πŸ‘‹",
    description="a riveting description",
    cloud_spanner={
        "database": "projects/project/instances/instance/databases/database",
        "use_parallelism": True,
        "use_data_boost": True,
        "max_parallelism": 100,
    })
package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/bigquery"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := bigquery.NewConnection(ctx, "connection", &bigquery.ConnectionArgs{
			ConnectionId: pulumi.String("my-connection"),
			Location:     pulumi.String("US"),
			FriendlyName: pulumi.String("πŸ‘‹"),
			Description:  pulumi.String("a riveting description"),
			CloudSpanner: &bigquery.ConnectionCloudSpannerArgs{
				Database:       pulumi.String("projects/project/instances/instance/databases/database"),
				UseParallelism: pulumi.Bool(true),
				UseDataBoost:   pulumi.Bool(true),
				MaxParallelism: pulumi.Int(100),
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var connection = new Gcp.BigQuery.Connection("connection", new()
    {
        ConnectionId = "my-connection",
        Location = "US",
        FriendlyName = "πŸ‘‹",
        Description = "a riveting description",
        CloudSpanner = new Gcp.BigQuery.Inputs.ConnectionCloudSpannerArgs
        {
            Database = "projects/project/instances/instance/databases/database",
            UseParallelism = true,
            UseDataBoost = true,
            MaxParallelism = 100,
        },
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.bigquery.Connection;
import com.pulumi.gcp.bigquery.ConnectionArgs;
import com.pulumi.gcp.bigquery.inputs.ConnectionCloudSpannerArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var connection = new Connection("connection", ConnectionArgs.builder()
            .connectionId("my-connection")
            .location("US")
            .friendlyName("πŸ‘‹")
            .description("a riveting description")
            .cloudSpanner(ConnectionCloudSpannerArgs.builder()
                .database("projects/project/instances/instance/databases/database")
                .useParallelism(true)
                .useDataBoost(true)
                .maxParallelism(100)
                .build())
            .build());

    }
}
resources:
  connection:
    type: gcp:bigquery:Connection
    properties:
      connectionId: my-connection
      location: US
      friendlyName: "\U0001F44B"
      description: a riveting description
      cloudSpanner:
        database: projects/project/instances/instance/databases/database
        useParallelism: true
        useDataBoost: true
        maxParallelism: 100

Setting useDataBoost to true isolates analytics queries from transactional traffic. The useParallelism and maxParallelism properties control how BigQuery distributes query execution across Spanner resources, balancing throughput against cost.

Connect to AWS data sources via IAM role

Multi-cloud deployments often need to query data stored in AWS services like S3 or Redshift from BigQuery.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const connection = new gcp.bigquery.Connection("connection", {
    connectionId: "my-connection",
    location: "aws-us-east-1",
    friendlyName: "πŸ‘‹",
    description: "a riveting description",
    aws: {
        accessRole: {
            iamRoleId: "arn:aws:iam::999999999999:role/omnirole",
        },
    },
});
import pulumi
import pulumi_gcp as gcp

connection = gcp.bigquery.Connection("connection",
    connection_id="my-connection",
    location="aws-us-east-1",
    friendly_name="πŸ‘‹",
    description="a riveting description",
    aws={
        "access_role": {
            "iam_role_id": "arn:aws:iam::999999999999:role/omnirole",
        },
    })
package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/bigquery"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := bigquery.NewConnection(ctx, "connection", &bigquery.ConnectionArgs{
			ConnectionId: pulumi.String("my-connection"),
			Location:     pulumi.String("aws-us-east-1"),
			FriendlyName: pulumi.String("πŸ‘‹"),
			Description:  pulumi.String("a riveting description"),
			Aws: &bigquery.ConnectionAwsArgs{
				AccessRole: &bigquery.ConnectionAwsAccessRoleArgs{
					IamRoleId: pulumi.String("arn:aws:iam::999999999999:role/omnirole"),
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var connection = new Gcp.BigQuery.Connection("connection", new()
    {
        ConnectionId = "my-connection",
        Location = "aws-us-east-1",
        FriendlyName = "πŸ‘‹",
        Description = "a riveting description",
        Aws = new Gcp.BigQuery.Inputs.ConnectionAwsArgs
        {
            AccessRole = new Gcp.BigQuery.Inputs.ConnectionAwsAccessRoleArgs
            {
                IamRoleId = "arn:aws:iam::999999999999:role/omnirole",
            },
        },
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.bigquery.Connection;
import com.pulumi.gcp.bigquery.ConnectionArgs;
import com.pulumi.gcp.bigquery.inputs.ConnectionAwsArgs;
import com.pulumi.gcp.bigquery.inputs.ConnectionAwsAccessRoleArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var connection = new Connection("connection", ConnectionArgs.builder()
            .connectionId("my-connection")
            .location("aws-us-east-1")
            .friendlyName("πŸ‘‹")
            .description("a riveting description")
            .aws(ConnectionAwsArgs.builder()
                .accessRole(ConnectionAwsAccessRoleArgs.builder()
                    .iamRoleId("arn:aws:iam::999999999999:role/omnirole")
                    .build())
                .build())
            .build());

    }
}
resources:
  connection:
    type: gcp:bigquery:Connection
    properties:
      connectionId: my-connection
      location: aws-us-east-1
      friendlyName: "\U0001F44B"
      description: a riveting description
      aws:
        accessRole:
          iamRoleId: arn:aws:iam::999999999999:role/omnirole

The aws block specifies an IAM role ARN that BigQuery assumes when accessing AWS resources. The location property must use an AWS region format (aws-us-east-1). This requires AWS-side configuration to trust the GCP service account.

Connect to Azure data sources via federated identity

Organizations with Azure infrastructure can query Azure Blob Storage or other Azure services from BigQuery using federated authentication.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const connection = new gcp.bigquery.Connection("connection", {
    connectionId: "my-connection",
    location: "azure-eastus2",
    friendlyName: "πŸ‘‹",
    description: "a riveting description",
    azure: {
        customerTenantId: "customer-tenant-id",
        federatedApplicationClientId: "b43eeeee-eeee-eeee-eeee-a480155501ce",
    },
});
import pulumi
import pulumi_gcp as gcp

connection = gcp.bigquery.Connection("connection",
    connection_id="my-connection",
    location="azure-eastus2",
    friendly_name="πŸ‘‹",
    description="a riveting description",
    azure={
        "customer_tenant_id": "customer-tenant-id",
        "federated_application_client_id": "b43eeeee-eeee-eeee-eeee-a480155501ce",
    })
package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/bigquery"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := bigquery.NewConnection(ctx, "connection", &bigquery.ConnectionArgs{
			ConnectionId: pulumi.String("my-connection"),
			Location:     pulumi.String("azure-eastus2"),
			FriendlyName: pulumi.String("πŸ‘‹"),
			Description:  pulumi.String("a riveting description"),
			Azure: &bigquery.ConnectionAzureArgs{
				CustomerTenantId:             pulumi.String("customer-tenant-id"),
				FederatedApplicationClientId: pulumi.String("b43eeeee-eeee-eeee-eeee-a480155501ce"),
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var connection = new Gcp.BigQuery.Connection("connection", new()
    {
        ConnectionId = "my-connection",
        Location = "azure-eastus2",
        FriendlyName = "πŸ‘‹",
        Description = "a riveting description",
        Azure = new Gcp.BigQuery.Inputs.ConnectionAzureArgs
        {
            CustomerTenantId = "customer-tenant-id",
            FederatedApplicationClientId = "b43eeeee-eeee-eeee-eeee-a480155501ce",
        },
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.bigquery.Connection;
import com.pulumi.gcp.bigquery.ConnectionArgs;
import com.pulumi.gcp.bigquery.inputs.ConnectionAzureArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var connection = new Connection("connection", ConnectionArgs.builder()
            .connectionId("my-connection")
            .location("azure-eastus2")
            .friendlyName("πŸ‘‹")
            .description("a riveting description")
            .azure(ConnectionAzureArgs.builder()
                .customerTenantId("customer-tenant-id")
                .federatedApplicationClientId("b43eeeee-eeee-eeee-eeee-a480155501ce")
                .build())
            .build());

    }
}
resources:
  connection:
    type: gcp:bigquery:Connection
    properties:
      connectionId: my-connection
      location: azure-eastus2
      friendlyName: "\U0001F44B"
      description: a riveting description
      azure:
        customerTenantId: customer-tenant-id
        federatedApplicationClientId: b43eeeee-eeee-eeee-eeee-a480155501ce

The azure block uses customerTenantId and federatedApplicationClientId to establish cross-cloud identity. The location property must use an Azure region format (azure-eastus2). This enables BigQuery to authenticate to Azure services without storing credentials.

Execute Spark stored procedures from BigQuery

Data processing pipelines that combine BigQuery SQL with Spark transformations can use connections to execute Spark code as stored procedures.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const basic = new gcp.dataproc.Cluster("basic", {
    name: "my-connection",
    region: "us-central1",
    clusterConfig: {
        softwareConfig: {
            overrideProperties: {
                "dataproc:dataproc.allow.zero.workers": "true",
            },
        },
        masterConfig: {
            numInstances: 1,
            machineType: "e2-standard-2",
            diskConfig: {
                bootDiskSizeGb: 35,
            },
        },
    },
});
const connection = new gcp.bigquery.Connection("connection", {
    connectionId: "my-connection",
    location: "US",
    friendlyName: "πŸ‘‹",
    description: "a riveting description",
    spark: {
        sparkHistoryServerConfig: {
            dataprocCluster: basic.id,
        },
    },
});
import pulumi
import pulumi_gcp as gcp

basic = gcp.dataproc.Cluster("basic",
    name="my-connection",
    region="us-central1",
    cluster_config={
        "software_config": {
            "override_properties": {
                "dataproc:dataproc.allow.zero.workers": "true",
            },
        },
        "master_config": {
            "num_instances": 1,
            "machine_type": "e2-standard-2",
            "disk_config": {
                "boot_disk_size_gb": 35,
            },
        },
    })
connection = gcp.bigquery.Connection("connection",
    connection_id="my-connection",
    location="US",
    friendly_name="πŸ‘‹",
    description="a riveting description",
    spark={
        "spark_history_server_config": {
            "dataproc_cluster": basic.id,
        },
    })
package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/bigquery"
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/dataproc"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		basic, err := dataproc.NewCluster(ctx, "basic", &dataproc.ClusterArgs{
			Name:   pulumi.String("my-connection"),
			Region: pulumi.String("us-central1"),
			ClusterConfig: &dataproc.ClusterClusterConfigArgs{
				SoftwareConfig: &dataproc.ClusterClusterConfigSoftwareConfigArgs{
					OverrideProperties: pulumi.StringMap{
						"dataproc:dataproc.allow.zero.workers": pulumi.String("true"),
					},
				},
				MasterConfig: &dataproc.ClusterClusterConfigMasterConfigArgs{
					NumInstances: pulumi.Int(1),
					MachineType:  pulumi.String("e2-standard-2"),
					DiskConfig: &dataproc.ClusterClusterConfigMasterConfigDiskConfigArgs{
						BootDiskSizeGb: pulumi.Int(35),
					},
				},
			},
		})
		if err != nil {
			return err
		}
		_, err = bigquery.NewConnection(ctx, "connection", &bigquery.ConnectionArgs{
			ConnectionId: pulumi.String("my-connection"),
			Location:     pulumi.String("US"),
			FriendlyName: pulumi.String("πŸ‘‹"),
			Description:  pulumi.String("a riveting description"),
			Spark: &bigquery.ConnectionSparkArgs{
				SparkHistoryServerConfig: &bigquery.ConnectionSparkSparkHistoryServerConfigArgs{
					DataprocCluster: basic.ID(),
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var basic = new Gcp.Dataproc.Cluster("basic", new()
    {
        Name = "my-connection",
        Region = "us-central1",
        ClusterConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigArgs
        {
            SoftwareConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigSoftwareConfigArgs
            {
                OverrideProperties = 
                {
                    { "dataproc:dataproc.allow.zero.workers", "true" },
                },
            },
            MasterConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigMasterConfigArgs
            {
                NumInstances = 1,
                MachineType = "e2-standard-2",
                DiskConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigMasterConfigDiskConfigArgs
                {
                    BootDiskSizeGb = 35,
                },
            },
        },
    });

    var connection = new Gcp.BigQuery.Connection("connection", new()
    {
        ConnectionId = "my-connection",
        Location = "US",
        FriendlyName = "πŸ‘‹",
        Description = "a riveting description",
        Spark = new Gcp.BigQuery.Inputs.ConnectionSparkArgs
        {
            SparkHistoryServerConfig = new Gcp.BigQuery.Inputs.ConnectionSparkSparkHistoryServerConfigArgs
            {
                DataprocCluster = basic.Id,
            },
        },
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.dataproc.Cluster;
import com.pulumi.gcp.dataproc.ClusterArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigSoftwareConfigArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigMasterConfigArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigMasterConfigDiskConfigArgs;
import com.pulumi.gcp.bigquery.Connection;
import com.pulumi.gcp.bigquery.ConnectionArgs;
import com.pulumi.gcp.bigquery.inputs.ConnectionSparkArgs;
import com.pulumi.gcp.bigquery.inputs.ConnectionSparkSparkHistoryServerConfigArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var basic = new Cluster("basic", ClusterArgs.builder()
            .name("my-connection")
            .region("us-central1")
            .clusterConfig(ClusterClusterConfigArgs.builder()
                .softwareConfig(ClusterClusterConfigSoftwareConfigArgs.builder()
                    .overrideProperties(Map.of("dataproc:dataproc.allow.zero.workers", "true"))
                    .build())
                .masterConfig(ClusterClusterConfigMasterConfigArgs.builder()
                    .numInstances(1)
                    .machineType("e2-standard-2")
                    .diskConfig(ClusterClusterConfigMasterConfigDiskConfigArgs.builder()
                        .bootDiskSizeGb(35)
                        .build())
                    .build())
                .build())
            .build());

        var connection = new Connection("connection", ConnectionArgs.builder()
            .connectionId("my-connection")
            .location("US")
            .friendlyName("πŸ‘‹")
            .description("a riveting description")
            .spark(ConnectionSparkArgs.builder()
                .sparkHistoryServerConfig(ConnectionSparkSparkHistoryServerConfigArgs.builder()
                    .dataprocCluster(basic.id())
                    .build())
                .build())
            .build());

    }
}
resources:
  connection:
    type: gcp:bigquery:Connection
    properties:
      connectionId: my-connection
      location: US
      friendlyName: "\U0001F44B"
      description: a riveting description
      spark:
        sparkHistoryServerConfig:
          dataprocCluster: ${basic.id}
  basic:
    type: gcp:dataproc:Cluster
    properties:
      name: my-connection
      region: us-central1
      clusterConfig:
        softwareConfig:
          overrideProperties:
            dataproc:dataproc.allow.zero.workers: 'true'
        masterConfig:
          numInstances: 1
          machineType: e2-standard-2
          diskConfig:
            bootDiskSizeGb: 35

The spark block references a Dataproc cluster through sparkHistoryServerConfig. BigQuery submits Spark jobs to this cluster when you invoke stored procedures, allowing you to combine SQL analytics with Spark’s data processing capabilities.

Encrypt connection credentials with customer-managed keys

Compliance requirements often mandate customer-managed encryption for credentials stored in BigQuery connections.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const instance = new gcp.sql.DatabaseInstance("instance", {
    name: "my-database-instance",
    region: "us-central1",
    databaseVersion: "POSTGRES_11",
    settings: {
        tier: "db-f1-micro",
    },
    deletionProtection: true,
});
const db = new gcp.sql.Database("db", {
    instance: instance.name,
    name: "db",
});
const user = new gcp.sql.User("user", {
    name: "user",
    instance: instance.name,
    password: "tf-test-my-password_60302",
});
const bq_connection_cmek = new gcp.bigquery.Connection("bq-connection-cmek", {
    friendlyName: "πŸ‘‹",
    description: "a riveting description",
    location: "US",
    kmsKeyName: "projects/project/locations/us-central1/keyRings/us-central1/cryptoKeys/bq-key",
    cloudSql: {
        instanceId: instance.connectionName,
        database: db.name,
        type: "POSTGRES",
        credential: {
            username: user.name,
            password: user.password,
        },
    },
});
import pulumi
import pulumi_gcp as gcp

instance = gcp.sql.DatabaseInstance("instance",
    name="my-database-instance",
    region="us-central1",
    database_version="POSTGRES_11",
    settings={
        "tier": "db-f1-micro",
    },
    deletion_protection=True)
db = gcp.sql.Database("db",
    instance=instance.name,
    name="db")
user = gcp.sql.User("user",
    name="user",
    instance=instance.name,
    password="tf-test-my-password_60302")
bq_connection_cmek = gcp.bigquery.Connection("bq-connection-cmek",
    friendly_name="πŸ‘‹",
    description="a riveting description",
    location="US",
    kms_key_name="projects/project/locations/us-central1/keyRings/us-central1/cryptoKeys/bq-key",
    cloud_sql={
        "instance_id": instance.connection_name,
        "database": db.name,
        "type": "POSTGRES",
        "credential": {
            "username": user.name,
            "password": user.password,
        },
    })
package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/bigquery"
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/sql"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		instance, err := sql.NewDatabaseInstance(ctx, "instance", &sql.DatabaseInstanceArgs{
			Name:            pulumi.String("my-database-instance"),
			Region:          pulumi.String("us-central1"),
			DatabaseVersion: pulumi.String("POSTGRES_11"),
			Settings: &sql.DatabaseInstanceSettingsArgs{
				Tier: pulumi.String("db-f1-micro"),
			},
			DeletionProtection: pulumi.Bool(true),
		})
		if err != nil {
			return err
		}
		db, err := sql.NewDatabase(ctx, "db", &sql.DatabaseArgs{
			Instance: instance.Name,
			Name:     pulumi.String("db"),
		})
		if err != nil {
			return err
		}
		user, err := sql.NewUser(ctx, "user", &sql.UserArgs{
			Name:     pulumi.String("user"),
			Instance: instance.Name,
			Password: pulumi.String("tf-test-my-password_60302"),
		})
		if err != nil {
			return err
		}
		_, err = bigquery.NewConnection(ctx, "bq-connection-cmek", &bigquery.ConnectionArgs{
			FriendlyName: pulumi.String("πŸ‘‹"),
			Description:  pulumi.String("a riveting description"),
			Location:     pulumi.String("US"),
			KmsKeyName:   pulumi.String("projects/project/locations/us-central1/keyRings/us-central1/cryptoKeys/bq-key"),
			CloudSql: &bigquery.ConnectionCloudSqlArgs{
				InstanceId: instance.ConnectionName,
				Database:   db.Name,
				Type:       pulumi.String("POSTGRES"),
				Credential: &bigquery.ConnectionCloudSqlCredentialArgs{
					Username: user.Name,
					Password: user.Password,
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var instance = new Gcp.Sql.DatabaseInstance("instance", new()
    {
        Name = "my-database-instance",
        Region = "us-central1",
        DatabaseVersion = "POSTGRES_11",
        Settings = new Gcp.Sql.Inputs.DatabaseInstanceSettingsArgs
        {
            Tier = "db-f1-micro",
        },
        DeletionProtection = true,
    });

    var db = new Gcp.Sql.Database("db", new()
    {
        Instance = instance.Name,
        Name = "db",
    });

    var user = new Gcp.Sql.User("user", new()
    {
        Name = "user",
        Instance = instance.Name,
        Password = "tf-test-my-password_60302",
    });

    var bq_connection_cmek = new Gcp.BigQuery.Connection("bq-connection-cmek", new()
    {
        FriendlyName = "πŸ‘‹",
        Description = "a riveting description",
        Location = "US",
        KmsKeyName = "projects/project/locations/us-central1/keyRings/us-central1/cryptoKeys/bq-key",
        CloudSql = new Gcp.BigQuery.Inputs.ConnectionCloudSqlArgs
        {
            InstanceId = instance.ConnectionName,
            Database = db.Name,
            Type = "POSTGRES",
            Credential = new Gcp.BigQuery.Inputs.ConnectionCloudSqlCredentialArgs
            {
                Username = user.Name,
                Password = user.Password,
            },
        },
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.sql.DatabaseInstance;
import com.pulumi.gcp.sql.DatabaseInstanceArgs;
import com.pulumi.gcp.sql.inputs.DatabaseInstanceSettingsArgs;
import com.pulumi.gcp.sql.Database;
import com.pulumi.gcp.sql.DatabaseArgs;
import com.pulumi.gcp.sql.User;
import com.pulumi.gcp.sql.UserArgs;
import com.pulumi.gcp.bigquery.Connection;
import com.pulumi.gcp.bigquery.ConnectionArgs;
import com.pulumi.gcp.bigquery.inputs.ConnectionCloudSqlArgs;
import com.pulumi.gcp.bigquery.inputs.ConnectionCloudSqlCredentialArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var instance = new DatabaseInstance("instance", DatabaseInstanceArgs.builder()
            .name("my-database-instance")
            .region("us-central1")
            .databaseVersion("POSTGRES_11")
            .settings(DatabaseInstanceSettingsArgs.builder()
                .tier("db-f1-micro")
                .build())
            .deletionProtection(true)
            .build());

        var db = new Database("db", DatabaseArgs.builder()
            .instance(instance.name())
            .name("db")
            .build());

        var user = new User("user", UserArgs.builder()
            .name("user")
            .instance(instance.name())
            .password("tf-test-my-password_60302")
            .build());

        var bq_connection_cmek = new Connection("bq-connection-cmek", ConnectionArgs.builder()
            .friendlyName("πŸ‘‹")
            .description("a riveting description")
            .location("US")
            .kmsKeyName("projects/project/locations/us-central1/keyRings/us-central1/cryptoKeys/bq-key")
            .cloudSql(ConnectionCloudSqlArgs.builder()
                .instanceId(instance.connectionName())
                .database(db.name())
                .type("POSTGRES")
                .credential(ConnectionCloudSqlCredentialArgs.builder()
                    .username(user.name())
                    .password(user.password())
                    .build())
                .build())
            .build());

    }
}
resources:
  instance:
    type: gcp:sql:DatabaseInstance
    properties:
      name: my-database-instance
      region: us-central1
      databaseVersion: POSTGRES_11
      settings:
        tier: db-f1-micro
      deletionProtection: true
  db:
    type: gcp:sql:Database
    properties:
      instance: ${instance.name}
      name: db
  user:
    type: gcp:sql:User
    properties:
      name: user
      instance: ${instance.name}
      password: tf-test-my-password_60302
  bq-connection-cmek:
    type: gcp:bigquery:Connection
    properties:
      friendlyName: "\U0001F44B"
      description: a riveting description
      location: US
      kmsKeyName: projects/project/locations/us-central1/keyRings/us-central1/cryptoKeys/bq-key
      cloudSql:
        instanceId: ${instance.connectionName}
        database: ${db.name}
        type: POSTGRES
        credential:
          username: ${user.name}
          password: ${user.password}

The kmsKeyName property specifies a Cloud KMS key that encrypts connection credentials at rest. This extends the basic Cloud SQL connection pattern with customer-managed encryption, giving you control over the encryption keys used to protect database passwords.

Beyond these examples

These snippets focus on specific connection-level features: Cloud SQL, Spanner, AWS, and Azure connectivity, Data Boost performance optimization, and Spark integration and CMEK encryption. They’re intentionally minimal rather than full data integration solutions.

The examples reference pre-existing infrastructure such as Cloud SQL instances, databases, and users, Cloud Spanner databases and roles, AWS IAM roles and Azure federated applications, and Dataproc clusters and Cloud KMS keys. They focus on configuring the connection rather than provisioning the data sources.

To keep things focused, common connection patterns are omitted, including:

  • Connection lifecycle management (updates, deletion)
  • IAM permissions for connection access
  • Query syntax for federated queries
  • Connection pooling and performance tuning

These omissions are intentional: the goal is to illustrate how each connection type is wired, not provide drop-in data integration modules. See the BigQuery Connection resource reference for all available configuration options.

Let's configure GCP BigQuery Connections

Get started with Pulumi Cloud, then follow our quick setup guide to deploy this infrastructure.

Try Pulumi Cloud for FREE

Frequently Asked Questions

Immutability & Lifecycle
What properties can't I change after creating a connection?
The connectionId, project, and location properties are immutable. Changing any of these requires destroying and recreating the connection.
Location & Regional Requirements
Why does my Cloud SQL connection location not match BigQuery's location?
Cloud SQL uses location mapping: us-central1 maps to BigQuery US, and europe-west1 maps to BigQuery EU. Other regions must match exactly.
What regions are supported for AWS and Azure connections?
AWS connections are limited to aws-us-east-1. Azure connections are limited to azure-eastus2. Spanner connections must match the Spanner instance region.
Connection Types & Configuration
What types of external data sources can I connect to?
You can create connections to Cloud SQL (cloudSql), Cloud Spanner (cloudSpanner), AWS (aws), Azure (azure), GCP resources (cloudResource), and Apache Spark via Dataproc (spark).
How do I connect BigQuery to a Cloud SQL database?
Configure the cloudSql property with instanceId, database, type (e.g., POSTGRES), and credential containing username and password.
How do I connect BigQuery to AWS resources?
Use the aws property with accessRole.iamRoleId pointing to your AWS IAM role ARN (e.g., arn:aws:iam::999999999999:role/omnirole).
How do I enable Cloud Spanner Data Boost for better performance?
Set useDataBoost: true in the cloudSpanner configuration. You can also enable useParallelism and set maxParallelism to control query parallelism.
Security & Encryption
Can I encrypt my connection credentials with a customer-managed key?
Yes, specify kmsKeyName with your Cloud KMS key path (e.g., projects/[project]/locations/[region]/keyRings/[ring]/cryptoKeys/[key]).

Using a different cloud?

Explore analytics guides for other cloud providers: