The gcp:bigquery/dataset:Dataset resource, part of the Pulumi GCP provider, defines a BigQuery dataset container: its location, access controls, encryption settings, and defaults for tables created within it. This guide focuses on four capabilities: access control configuration, customer-managed encryption, cross-dataset authorization, and external catalog references.
Datasets reference service accounts for access control, KMS keys for encryption, and BigQuery connections for external catalogs. The examples are intentionally small. Combine them with your own IAM configuration, encryption policies, and data governance rules.
Create a dataset with access controls and expiration
Most deployments start by creating a dataset with a unique ID, location, and role-based access that defines who can read or modify the data.
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const bqowner = new gcp.serviceaccount.Account("bqowner", {accountId: "bqowner"});
const dataset = new gcp.bigquery.Dataset("dataset", {
datasetId: "example_dataset",
friendlyName: "test",
description: "This is a test description",
location: "EU",
defaultTableExpirationMs: 3600000,
labels: {
env: "default",
},
accesses: [
{
role: "roles/bigquery.dataOwner",
userByEmail: bqowner.email,
},
{
role: "READER",
domain: "hashicorp.com",
},
],
});
import pulumi
import pulumi_gcp as gcp
bqowner = gcp.serviceaccount.Account("bqowner", account_id="bqowner")
dataset = gcp.bigquery.Dataset("dataset",
dataset_id="example_dataset",
friendly_name="test",
description="This is a test description",
location="EU",
default_table_expiration_ms=3600000,
labels={
"env": "default",
},
accesses=[
{
"role": "roles/bigquery.dataOwner",
"user_by_email": bqowner.email,
},
{
"role": "READER",
"domain": "hashicorp.com",
},
])
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/bigquery"
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/serviceaccount"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
bqowner, err := serviceaccount.NewAccount(ctx, "bqowner", &serviceaccount.AccountArgs{
AccountId: pulumi.String("bqowner"),
})
if err != nil {
return err
}
_, err = bigquery.NewDataset(ctx, "dataset", &bigquery.DatasetArgs{
DatasetId: pulumi.String("example_dataset"),
FriendlyName: pulumi.String("test"),
Description: pulumi.String("This is a test description"),
Location: pulumi.String("EU"),
DefaultTableExpirationMs: pulumi.Int(3600000),
Labels: pulumi.StringMap{
"env": pulumi.String("default"),
},
Accesses: bigquery.DatasetAccessTypeArray{
&bigquery.DatasetAccessTypeArgs{
Role: pulumi.String("roles/bigquery.dataOwner"),
UserByEmail: bqowner.Email,
},
&bigquery.DatasetAccessTypeArgs{
Role: pulumi.String("READER"),
Domain: pulumi.String("hashicorp.com"),
},
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var bqowner = new Gcp.ServiceAccount.Account("bqowner", new()
{
AccountId = "bqowner",
});
var dataset = new Gcp.BigQuery.Dataset("dataset", new()
{
DatasetId = "example_dataset",
FriendlyName = "test",
Description = "This is a test description",
Location = "EU",
DefaultTableExpirationMs = 3600000,
Labels =
{
{ "env", "default" },
},
Accesses = new[]
{
new Gcp.BigQuery.Inputs.DatasetAccessArgs
{
Role = "roles/bigquery.dataOwner",
UserByEmail = bqowner.Email,
},
new Gcp.BigQuery.Inputs.DatasetAccessArgs
{
Role = "READER",
Domain = "hashicorp.com",
},
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.serviceaccount.Account;
import com.pulumi.gcp.serviceaccount.AccountArgs;
import com.pulumi.gcp.bigquery.Dataset;
import com.pulumi.gcp.bigquery.DatasetArgs;
import com.pulumi.gcp.bigquery.inputs.DatasetAccessArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var bqowner = new Account("bqowner", AccountArgs.builder()
.accountId("bqowner")
.build());
var dataset = new Dataset("dataset", DatasetArgs.builder()
.datasetId("example_dataset")
.friendlyName("test")
.description("This is a test description")
.location("EU")
.defaultTableExpirationMs(3600000)
.labels(Map.of("env", "default"))
.accesses(
DatasetAccessArgs.builder()
.role("roles/bigquery.dataOwner")
.userByEmail(bqowner.email())
.build(),
DatasetAccessArgs.builder()
.role("READER")
.domain("hashicorp.com")
.build())
.build());
}
}
resources:
dataset:
type: gcp:bigquery:Dataset
properties:
datasetId: example_dataset
friendlyName: test
description: This is a test description
location: EU
defaultTableExpirationMs: 3.6e+06
labels:
env: default
accesses:
- role: roles/bigquery.dataOwner
userByEmail: ${bqowner.email}
- role: READER
domain: hashicorp.com
bqowner:
type: gcp:serviceaccount:Account
properties:
accountId: bqowner
The datasetId property sets a unique identifier within the project. The accesses array grants permissions: each entry specifies a role (like OWNER or READER) and an identity (userByEmail, domain, or group). The defaultTableExpirationMs property automatically deletes tables after the specified lifetime, measured from creation time.
Encrypt dataset tables with customer-managed keys
Organizations with compliance requirements often encrypt BigQuery data using their own KMS keys rather than Google-managed encryption.
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const keyRing = new gcp.kms.KeyRing("key_ring", {
name: "example-keyring",
location: "us",
});
const cryptoKey = new gcp.kms.CryptoKey("crypto_key", {
name: "example-key",
keyRing: keyRing.id,
});
const dataset = new gcp.bigquery.Dataset("dataset", {
datasetId: "example_dataset",
friendlyName: "test",
description: "This is a test description",
location: "US",
defaultTableExpirationMs: 3600000,
defaultEncryptionConfiguration: {
kmsKeyName: cryptoKey.id,
},
});
import pulumi
import pulumi_gcp as gcp
key_ring = gcp.kms.KeyRing("key_ring",
name="example-keyring",
location="us")
crypto_key = gcp.kms.CryptoKey("crypto_key",
name="example-key",
key_ring=key_ring.id)
dataset = gcp.bigquery.Dataset("dataset",
dataset_id="example_dataset",
friendly_name="test",
description="This is a test description",
location="US",
default_table_expiration_ms=3600000,
default_encryption_configuration={
"kms_key_name": crypto_key.id,
})
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/bigquery"
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/kms"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
keyRing, err := kms.NewKeyRing(ctx, "key_ring", &kms.KeyRingArgs{
Name: pulumi.String("example-keyring"),
Location: pulumi.String("us"),
})
if err != nil {
return err
}
cryptoKey, err := kms.NewCryptoKey(ctx, "crypto_key", &kms.CryptoKeyArgs{
Name: pulumi.String("example-key"),
KeyRing: keyRing.ID(),
})
if err != nil {
return err
}
_, err = bigquery.NewDataset(ctx, "dataset", &bigquery.DatasetArgs{
DatasetId: pulumi.String("example_dataset"),
FriendlyName: pulumi.String("test"),
Description: pulumi.String("This is a test description"),
Location: pulumi.String("US"),
DefaultTableExpirationMs: pulumi.Int(3600000),
DefaultEncryptionConfiguration: &bigquery.DatasetDefaultEncryptionConfigurationArgs{
KmsKeyName: cryptoKey.ID(),
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var keyRing = new Gcp.Kms.KeyRing("key_ring", new()
{
Name = "example-keyring",
Location = "us",
});
var cryptoKey = new Gcp.Kms.CryptoKey("crypto_key", new()
{
Name = "example-key",
KeyRing = keyRing.Id,
});
var dataset = new Gcp.BigQuery.Dataset("dataset", new()
{
DatasetId = "example_dataset",
FriendlyName = "test",
Description = "This is a test description",
Location = "US",
DefaultTableExpirationMs = 3600000,
DefaultEncryptionConfiguration = new Gcp.BigQuery.Inputs.DatasetDefaultEncryptionConfigurationArgs
{
KmsKeyName = cryptoKey.Id,
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.kms.KeyRing;
import com.pulumi.gcp.kms.KeyRingArgs;
import com.pulumi.gcp.kms.CryptoKey;
import com.pulumi.gcp.kms.CryptoKeyArgs;
import com.pulumi.gcp.bigquery.Dataset;
import com.pulumi.gcp.bigquery.DatasetArgs;
import com.pulumi.gcp.bigquery.inputs.DatasetDefaultEncryptionConfigurationArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var keyRing = new KeyRing("keyRing", KeyRingArgs.builder()
.name("example-keyring")
.location("us")
.build());
var cryptoKey = new CryptoKey("cryptoKey", CryptoKeyArgs.builder()
.name("example-key")
.keyRing(keyRing.id())
.build());
var dataset = new Dataset("dataset", DatasetArgs.builder()
.datasetId("example_dataset")
.friendlyName("test")
.description("This is a test description")
.location("US")
.defaultTableExpirationMs(3600000)
.defaultEncryptionConfiguration(DatasetDefaultEncryptionConfigurationArgs.builder()
.kmsKeyName(cryptoKey.id())
.build())
.build());
}
}
resources:
dataset:
type: gcp:bigquery:Dataset
properties:
datasetId: example_dataset
friendlyName: test
description: This is a test description
location: US
defaultTableExpirationMs: 3.6e+06
defaultEncryptionConfiguration:
kmsKeyName: ${cryptoKey.id}
cryptoKey:
type: gcp:kms:CryptoKey
name: crypto_key
properties:
name: example-key
keyRing: ${keyRing.id}
keyRing:
type: gcp:kms:KeyRing
name: key_ring
properties:
name: example-keyring
location: us
The defaultEncryptionConfiguration property applies to all newly-created tables in the dataset. The kmsKeyName references a Cloud KMS key; BigQuery’s service account needs encrypt/decrypt permissions on that key. Existing tables retain their original encryption settings.
Grant cross-dataset access for authorized views
Data warehouses often need views in one dataset to query tables in another, requiring explicit authorization between datasets.
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const bqowner = new gcp.serviceaccount.Account("bqowner", {accountId: "bqowner"});
const _public = new gcp.bigquery.Dataset("public", {
datasetId: "public",
friendlyName: "test",
description: "This dataset is public",
location: "EU",
defaultTableExpirationMs: 3600000,
labels: {
env: "default",
},
accesses: [
{
role: "OWNER",
userByEmail: bqowner.email,
},
{
role: "READER",
domain: "hashicorp.com",
},
],
});
const dataset = new gcp.bigquery.Dataset("dataset", {
datasetId: "private",
friendlyName: "test",
description: "This dataset is private",
location: "EU",
defaultTableExpirationMs: 3600000,
labels: {
env: "default",
},
accesses: [
{
role: "OWNER",
userByEmail: bqowner.email,
},
{
role: "READER",
domain: "hashicorp.com",
},
{
dataset: {
dataset: {
projectId: _public.project,
datasetId: _public.datasetId,
},
targetTypes: ["VIEWS"],
},
},
],
});
import pulumi
import pulumi_gcp as gcp
bqowner = gcp.serviceaccount.Account("bqowner", account_id="bqowner")
public = gcp.bigquery.Dataset("public",
dataset_id="public",
friendly_name="test",
description="This dataset is public",
location="EU",
default_table_expiration_ms=3600000,
labels={
"env": "default",
},
accesses=[
{
"role": "OWNER",
"user_by_email": bqowner.email,
},
{
"role": "READER",
"domain": "hashicorp.com",
},
])
dataset = gcp.bigquery.Dataset("dataset",
dataset_id="private",
friendly_name="test",
description="This dataset is private",
location="EU",
default_table_expiration_ms=3600000,
labels={
"env": "default",
},
accesses=[
{
"role": "OWNER",
"user_by_email": bqowner.email,
},
{
"role": "READER",
"domain": "hashicorp.com",
},
{
"dataset": {
"dataset": {
"project_id": public.project,
"dataset_id": public.dataset_id,
},
"target_types": ["VIEWS"],
},
},
])
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/bigquery"
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/serviceaccount"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
bqowner, err := serviceaccount.NewAccount(ctx, "bqowner", &serviceaccount.AccountArgs{
AccountId: pulumi.String("bqowner"),
})
if err != nil {
return err
}
public, err := bigquery.NewDataset(ctx, "public", &bigquery.DatasetArgs{
DatasetId: pulumi.String("public"),
FriendlyName: pulumi.String("test"),
Description: pulumi.String("This dataset is public"),
Location: pulumi.String("EU"),
DefaultTableExpirationMs: pulumi.Int(3600000),
Labels: pulumi.StringMap{
"env": pulumi.String("default"),
},
Accesses: bigquery.DatasetAccessTypeArray{
&bigquery.DatasetAccessTypeArgs{
Role: pulumi.String("OWNER"),
UserByEmail: bqowner.Email,
},
&bigquery.DatasetAccessTypeArgs{
Role: pulumi.String("READER"),
Domain: pulumi.String("hashicorp.com"),
},
},
})
if err != nil {
return err
}
_, err = bigquery.NewDataset(ctx, "dataset", &bigquery.DatasetArgs{
DatasetId: pulumi.String("private"),
FriendlyName: pulumi.String("test"),
Description: pulumi.String("This dataset is private"),
Location: pulumi.String("EU"),
DefaultTableExpirationMs: pulumi.Int(3600000),
Labels: pulumi.StringMap{
"env": pulumi.String("default"),
},
Accesses: bigquery.DatasetAccessTypeArray{
&bigquery.DatasetAccessTypeArgs{
Role: pulumi.String("OWNER"),
UserByEmail: bqowner.Email,
},
&bigquery.DatasetAccessTypeArgs{
Role: pulumi.String("READER"),
Domain: pulumi.String("hashicorp.com"),
},
&bigquery.DatasetAccessTypeArgs{
Dataset: &bigquery.DatasetAccessDatasetArgs{
Dataset: &bigquery.DatasetAccessDatasetDatasetArgs{
ProjectId: public.Project,
DatasetId: public.DatasetId,
},
TargetTypes: pulumi.StringArray{
pulumi.String("VIEWS"),
},
},
},
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var bqowner = new Gcp.ServiceAccount.Account("bqowner", new()
{
AccountId = "bqowner",
});
var @public = new Gcp.BigQuery.Dataset("public", new()
{
DatasetId = "public",
FriendlyName = "test",
Description = "This dataset is public",
Location = "EU",
DefaultTableExpirationMs = 3600000,
Labels =
{
{ "env", "default" },
},
Accesses = new[]
{
new Gcp.BigQuery.Inputs.DatasetAccessArgs
{
Role = "OWNER",
UserByEmail = bqowner.Email,
},
new Gcp.BigQuery.Inputs.DatasetAccessArgs
{
Role = "READER",
Domain = "hashicorp.com",
},
},
});
var dataset = new Gcp.BigQuery.Dataset("dataset", new()
{
DatasetId = "private",
FriendlyName = "test",
Description = "This dataset is private",
Location = "EU",
DefaultTableExpirationMs = 3600000,
Labels =
{
{ "env", "default" },
},
Accesses = new[]
{
new Gcp.BigQuery.Inputs.DatasetAccessArgs
{
Role = "OWNER",
UserByEmail = bqowner.Email,
},
new Gcp.BigQuery.Inputs.DatasetAccessArgs
{
Role = "READER",
Domain = "hashicorp.com",
},
new Gcp.BigQuery.Inputs.DatasetAccessArgs
{
Dataset = new Gcp.BigQuery.Inputs.DatasetAccessDatasetArgs
{
Dataset = new Gcp.BigQuery.Inputs.DatasetAccessDatasetDatasetArgs
{
ProjectId = @public.Project,
DatasetId = @public.DatasetId,
},
TargetTypes = new[]
{
"VIEWS",
},
},
},
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.serviceaccount.Account;
import com.pulumi.gcp.serviceaccount.AccountArgs;
import com.pulumi.gcp.bigquery.Dataset;
import com.pulumi.gcp.bigquery.DatasetArgs;
import com.pulumi.gcp.bigquery.inputs.DatasetAccessArgs;
import com.pulumi.gcp.bigquery.inputs.DatasetAccessDatasetArgs;
import com.pulumi.gcp.bigquery.inputs.DatasetAccessDatasetDatasetArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var bqowner = new Account("bqowner", AccountArgs.builder()
.accountId("bqowner")
.build());
var public_ = new Dataset("public", DatasetArgs.builder()
.datasetId("public")
.friendlyName("test")
.description("This dataset is public")
.location("EU")
.defaultTableExpirationMs(3600000)
.labels(Map.of("env", "default"))
.accesses(
DatasetAccessArgs.builder()
.role("OWNER")
.userByEmail(bqowner.email())
.build(),
DatasetAccessArgs.builder()
.role("READER")
.domain("hashicorp.com")
.build())
.build());
var dataset = new Dataset("dataset", DatasetArgs.builder()
.datasetId("private")
.friendlyName("test")
.description("This dataset is private")
.location("EU")
.defaultTableExpirationMs(3600000)
.labels(Map.of("env", "default"))
.accesses(
DatasetAccessArgs.builder()
.role("OWNER")
.userByEmail(bqowner.email())
.build(),
DatasetAccessArgs.builder()
.role("READER")
.domain("hashicorp.com")
.build(),
DatasetAccessArgs.builder()
.dataset(DatasetAccessDatasetArgs.builder()
.dataset(DatasetAccessDatasetDatasetArgs.builder()
.projectId(public_.project())
.datasetId(public_.datasetId())
.build())
.targetTypes("VIEWS")
.build())
.build())
.build());
}
}
resources:
public:
type: gcp:bigquery:Dataset
properties:
datasetId: public
friendlyName: test
description: This dataset is public
location: EU
defaultTableExpirationMs: 3.6e+06
labels:
env: default
accesses:
- role: OWNER
userByEmail: ${bqowner.email}
- role: READER
domain: hashicorp.com
dataset:
type: gcp:bigquery:Dataset
properties:
datasetId: private
friendlyName: test
description: This dataset is private
location: EU
defaultTableExpirationMs: 3.6e+06
labels:
env: default
accesses:
- role: OWNER
userByEmail: ${bqowner.email}
- role: READER
domain: hashicorp.com
- dataset:
dataset:
projectId: ${public.project}
datasetId: ${public.datasetId}
targetTypes:
- VIEWS
bqowner:
type: gcp:serviceaccount:Account
properties:
accountId: bqowner
The accesses array can include a dataset block that authorizes another dataset to reference this one. The targetTypes property limits authorization to specific object types (here, VIEWS). This allows views in the private dataset to query tables in the public dataset without granting broader access.
Reference external AWS Glue catalogs
Multi-cloud analytics workflows can query AWS Glue tables directly from BigQuery by creating a dataset that references the external catalog.
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const dataset = new gcp.bigquery.Dataset("dataset", {
datasetId: "example_dataset",
friendlyName: "test",
description: "This is a test description",
location: "aws-us-east-1",
externalDatasetReference: {
externalSource: "aws-glue://arn:aws:glue:us-east-1:999999999999:database/database",
connection: "projects/project/locations/aws-us-east-1/connections/connection",
},
});
import pulumi
import pulumi_gcp as gcp
dataset = gcp.bigquery.Dataset("dataset",
dataset_id="example_dataset",
friendly_name="test",
description="This is a test description",
location="aws-us-east-1",
external_dataset_reference={
"external_source": "aws-glue://arn:aws:glue:us-east-1:999999999999:database/database",
"connection": "projects/project/locations/aws-us-east-1/connections/connection",
})
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/bigquery"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := bigquery.NewDataset(ctx, "dataset", &bigquery.DatasetArgs{
DatasetId: pulumi.String("example_dataset"),
FriendlyName: pulumi.String("test"),
Description: pulumi.String("This is a test description"),
Location: pulumi.String("aws-us-east-1"),
ExternalDatasetReference: &bigquery.DatasetExternalDatasetReferenceArgs{
ExternalSource: pulumi.String("aws-glue://arn:aws:glue:us-east-1:999999999999:database/database"),
Connection: pulumi.String("projects/project/locations/aws-us-east-1/connections/connection"),
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var dataset = new Gcp.BigQuery.Dataset("dataset", new()
{
DatasetId = "example_dataset",
FriendlyName = "test",
Description = "This is a test description",
Location = "aws-us-east-1",
ExternalDatasetReference = new Gcp.BigQuery.Inputs.DatasetExternalDatasetReferenceArgs
{
ExternalSource = "aws-glue://arn:aws:glue:us-east-1:999999999999:database/database",
Connection = "projects/project/locations/aws-us-east-1/connections/connection",
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.bigquery.Dataset;
import com.pulumi.gcp.bigquery.DatasetArgs;
import com.pulumi.gcp.bigquery.inputs.DatasetExternalDatasetReferenceArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var dataset = new Dataset("dataset", DatasetArgs.builder()
.datasetId("example_dataset")
.friendlyName("test")
.description("This is a test description")
.location("aws-us-east-1")
.externalDatasetReference(DatasetExternalDatasetReferenceArgs.builder()
.externalSource("aws-glue://arn:aws:glue:us-east-1:999999999999:database/database")
.connection("projects/project/locations/aws-us-east-1/connections/connection")
.build())
.build());
}
}
resources:
dataset:
type: gcp:bigquery:Dataset
properties:
datasetId: example_dataset
friendlyName: test
description: This is a test description
location: aws-us-east-1
externalDatasetReference:
externalSource: aws-glue://arn:aws:glue:us-east-1:999999999999:database/database
connection: projects/project/locations/aws-us-east-1/connections/connection
The externalDatasetReference property points to an AWS Glue database via its ARN. The connection property references a BigQuery connection resource that handles authentication to AWS. The location must match the AWS region (here, aws-us-east-1). This enables querying AWS data without replication.
Beyond these examples
These snippets focus on specific dataset-level features: access control and encryption, cross-dataset authorization, and external catalog integration. They’re intentionally minimal rather than full data warehouse configurations.
The examples may reference pre-existing infrastructure such as service accounts for access control, Cloud KMS keys for encryption, and BigQuery connections to external systems like AWS. They focus on configuring the dataset rather than provisioning the surrounding infrastructure.
To keep things focused, common dataset patterns are omitted, including:
- Table expiration vs partition expiration (defaultPartitionExpirationMs)
- Case sensitivity and collation settings (isCaseInsensitive, defaultCollation)
- Time travel configuration (maxTimeTravelHours)
- Storage billing models (storageBillingModel)
- Dataset deletion behavior (deleteContentsOnDestroy)
These omissions are intentional: the goal is to illustrate how each dataset feature is wired, not provide drop-in data warehouse modules. See the BigQuery Dataset resource reference for all available configuration options.
Let's create and Configure BigQuery Datasets
Get started with Pulumi Cloud, then follow our quick setup guide to deploy this infrastructure.
Try Pulumi Cloud for FREEFrequently Asked Questions
Resource Lifecycle & Immutability
deleteContentsOnDestroy to true, which automatically deletes all tables in the dataset.datasetId, project, location, and externalDatasetReference properties are immutable. Changing any of these forces creation of a new resource.defaultTableExpirationMs, defaultPartitionExpirationMs, and defaultCollation only affect newly-created tables, not existing ones.Access Control & Security
accesses array with entries containing role and either userByEmail or domain. For example, {role: "READER", domain: "example.com"}.dataset containing projectId, datasetId, and targetTypes: ["VIEWS"] to allow cross-dataset view access.routine containing projectId, datasetId, and routineId of the authorized routine.defaultEncryptionConfiguration with kmsKeyName pointing to your Cloud KMS crypto key ID. This applies to all newly-created tables in the dataset.Table Expiration & Defaults
expirationTime takes precedence, followed by defaultPartitionExpirationMs for partitioned tables, then defaultTableExpirationMs for all tables. The minimum expiration is 3600000ms (1 hour). Changes only affect new tables.maxTimeTravelHours to set a window between 48 and 168 hours (2 to 7 days) for querying historical data.Labels & Metadata
labels field is non-authoritative and only manages labels in your configuration. Use effectiveLabels to see all labels on the resource, including those set by other clients and services.External Integrations
externalDatasetReference with externalSource pointing to your AWS Glue database ARN and connection pointing to your BigQuery connection resource. The location must match your AWS region (e.g., aws-us-east-1).isCaseInsensitive to true. This defaults to false (case-sensitive). Note that this setting doesn’t affect routine references.