The gcp:bigquery/dataset:Dataset resource, part of the Pulumi GCP provider, defines a BigQuery dataset container that organizes tables and controls access, encryption, and lifecycle policies. This guide focuses on four capabilities: access control configuration, customer-managed encryption, cross-dataset authorization, and external catalog integration.
Datasets reference service accounts for access control, Cloud KMS keys for encryption, and BigQuery Connection resources for external catalogs. The examples are intentionally small. Combine them with your own IAM policies, KMS keys, and table definitions.
Create a dataset with access controls and expiration
Most deployments start by creating a dataset with a unique ID, organizing tables under a common namespace with shared access policies.
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const bqowner = new gcp.serviceaccount.Account("bqowner", {accountId: "bqowner"});
const dataset = new gcp.bigquery.Dataset("dataset", {
datasetId: "example_dataset",
friendlyName: "test",
description: "This is a test description",
location: "EU",
defaultTableExpirationMs: 3600000,
labels: {
env: "default",
},
accesses: [
{
role: "roles/bigquery.dataOwner",
userByEmail: bqowner.email,
},
{
role: "READER",
domain: "hashicorp.com",
},
],
});
import pulumi
import pulumi_gcp as gcp
bqowner = gcp.serviceaccount.Account("bqowner", account_id="bqowner")
dataset = gcp.bigquery.Dataset("dataset",
dataset_id="example_dataset",
friendly_name="test",
description="This is a test description",
location="EU",
default_table_expiration_ms=3600000,
labels={
"env": "default",
},
accesses=[
{
"role": "roles/bigquery.dataOwner",
"user_by_email": bqowner.email,
},
{
"role": "READER",
"domain": "hashicorp.com",
},
])
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/bigquery"
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/serviceaccount"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
bqowner, err := serviceaccount.NewAccount(ctx, "bqowner", &serviceaccount.AccountArgs{
AccountId: pulumi.String("bqowner"),
})
if err != nil {
return err
}
_, err = bigquery.NewDataset(ctx, "dataset", &bigquery.DatasetArgs{
DatasetId: pulumi.String("example_dataset"),
FriendlyName: pulumi.String("test"),
Description: pulumi.String("This is a test description"),
Location: pulumi.String("EU"),
DefaultTableExpirationMs: pulumi.Int(3600000),
Labels: pulumi.StringMap{
"env": pulumi.String("default"),
},
Accesses: bigquery.DatasetAccessTypeArray{
&bigquery.DatasetAccessTypeArgs{
Role: pulumi.String("roles/bigquery.dataOwner"),
UserByEmail: bqowner.Email,
},
&bigquery.DatasetAccessTypeArgs{
Role: pulumi.String("READER"),
Domain: pulumi.String("hashicorp.com"),
},
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var bqowner = new Gcp.ServiceAccount.Account("bqowner", new()
{
AccountId = "bqowner",
});
var dataset = new Gcp.BigQuery.Dataset("dataset", new()
{
DatasetId = "example_dataset",
FriendlyName = "test",
Description = "This is a test description",
Location = "EU",
DefaultTableExpirationMs = 3600000,
Labels =
{
{ "env", "default" },
},
Accesses = new[]
{
new Gcp.BigQuery.Inputs.DatasetAccessArgs
{
Role = "roles/bigquery.dataOwner",
UserByEmail = bqowner.Email,
},
new Gcp.BigQuery.Inputs.DatasetAccessArgs
{
Role = "READER",
Domain = "hashicorp.com",
},
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.serviceaccount.Account;
import com.pulumi.gcp.serviceaccount.AccountArgs;
import com.pulumi.gcp.bigquery.Dataset;
import com.pulumi.gcp.bigquery.DatasetArgs;
import com.pulumi.gcp.bigquery.inputs.DatasetAccessArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var bqowner = new Account("bqowner", AccountArgs.builder()
.accountId("bqowner")
.build());
var dataset = new Dataset("dataset", DatasetArgs.builder()
.datasetId("example_dataset")
.friendlyName("test")
.description("This is a test description")
.location("EU")
.defaultTableExpirationMs(3600000)
.labels(Map.of("env", "default"))
.accesses(
DatasetAccessArgs.builder()
.role("roles/bigquery.dataOwner")
.userByEmail(bqowner.email())
.build(),
DatasetAccessArgs.builder()
.role("READER")
.domain("hashicorp.com")
.build())
.build());
}
}
resources:
dataset:
type: gcp:bigquery:Dataset
properties:
datasetId: example_dataset
friendlyName: test
description: This is a test description
location: EU
defaultTableExpirationMs: 3.6e+06
labels:
env: default
accesses:
- role: roles/bigquery.dataOwner
userByEmail: ${bqowner.email}
- role: READER
domain: hashicorp.com
bqowner:
type: gcp:serviceaccount:Account
properties:
accountId: bqowner
The datasetId property sets a unique identifier within your project. The accesses array defines who can read or modify data: each entry specifies a role (OWNER, READER, WRITER) and an entity (user email, domain, or special group). The defaultTableExpirationMs property applies automatic deletion to all tables created in this dataset after the specified milliseconds, unless a table overrides it. The location property determines where data is stored (regional like “EU” or multi-regional like “US”).
Encrypt tables with customer-managed keys
Organizations with compliance requirements often control encryption keys for data at rest using Cloud KMS.
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const keyRing = new gcp.kms.KeyRing("key_ring", {
name: "example-keyring",
location: "us",
});
const cryptoKey = new gcp.kms.CryptoKey("crypto_key", {
name: "example-key",
keyRing: keyRing.id,
});
const dataset = new gcp.bigquery.Dataset("dataset", {
datasetId: "example_dataset",
friendlyName: "test",
description: "This is a test description",
location: "US",
defaultTableExpirationMs: 3600000,
defaultEncryptionConfiguration: {
kmsKeyName: cryptoKey.id,
},
});
import pulumi
import pulumi_gcp as gcp
key_ring = gcp.kms.KeyRing("key_ring",
name="example-keyring",
location="us")
crypto_key = gcp.kms.CryptoKey("crypto_key",
name="example-key",
key_ring=key_ring.id)
dataset = gcp.bigquery.Dataset("dataset",
dataset_id="example_dataset",
friendly_name="test",
description="This is a test description",
location="US",
default_table_expiration_ms=3600000,
default_encryption_configuration={
"kms_key_name": crypto_key.id,
})
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/bigquery"
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/kms"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
keyRing, err := kms.NewKeyRing(ctx, "key_ring", &kms.KeyRingArgs{
Name: pulumi.String("example-keyring"),
Location: pulumi.String("us"),
})
if err != nil {
return err
}
cryptoKey, err := kms.NewCryptoKey(ctx, "crypto_key", &kms.CryptoKeyArgs{
Name: pulumi.String("example-key"),
KeyRing: keyRing.ID(),
})
if err != nil {
return err
}
_, err = bigquery.NewDataset(ctx, "dataset", &bigquery.DatasetArgs{
DatasetId: pulumi.String("example_dataset"),
FriendlyName: pulumi.String("test"),
Description: pulumi.String("This is a test description"),
Location: pulumi.String("US"),
DefaultTableExpirationMs: pulumi.Int(3600000),
DefaultEncryptionConfiguration: &bigquery.DatasetDefaultEncryptionConfigurationArgs{
KmsKeyName: cryptoKey.ID(),
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var keyRing = new Gcp.Kms.KeyRing("key_ring", new()
{
Name = "example-keyring",
Location = "us",
});
var cryptoKey = new Gcp.Kms.CryptoKey("crypto_key", new()
{
Name = "example-key",
KeyRing = keyRing.Id,
});
var dataset = new Gcp.BigQuery.Dataset("dataset", new()
{
DatasetId = "example_dataset",
FriendlyName = "test",
Description = "This is a test description",
Location = "US",
DefaultTableExpirationMs = 3600000,
DefaultEncryptionConfiguration = new Gcp.BigQuery.Inputs.DatasetDefaultEncryptionConfigurationArgs
{
KmsKeyName = cryptoKey.Id,
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.kms.KeyRing;
import com.pulumi.gcp.kms.KeyRingArgs;
import com.pulumi.gcp.kms.CryptoKey;
import com.pulumi.gcp.kms.CryptoKeyArgs;
import com.pulumi.gcp.bigquery.Dataset;
import com.pulumi.gcp.bigquery.DatasetArgs;
import com.pulumi.gcp.bigquery.inputs.DatasetDefaultEncryptionConfigurationArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var keyRing = new KeyRing("keyRing", KeyRingArgs.builder()
.name("example-keyring")
.location("us")
.build());
var cryptoKey = new CryptoKey("cryptoKey", CryptoKeyArgs.builder()
.name("example-key")
.keyRing(keyRing.id())
.build());
var dataset = new Dataset("dataset", DatasetArgs.builder()
.datasetId("example_dataset")
.friendlyName("test")
.description("This is a test description")
.location("US")
.defaultTableExpirationMs(3600000)
.defaultEncryptionConfiguration(DatasetDefaultEncryptionConfigurationArgs.builder()
.kmsKeyName(cryptoKey.id())
.build())
.build());
}
}
resources:
dataset:
type: gcp:bigquery:Dataset
properties:
datasetId: example_dataset
friendlyName: test
description: This is a test description
location: US
defaultTableExpirationMs: 3.6e+06
defaultEncryptionConfiguration:
kmsKeyName: ${cryptoKey.id}
cryptoKey:
type: gcp:kms:CryptoKey
name: crypto_key
properties:
name: example-key
keyRing: ${keyRing.id}
keyRing:
type: gcp:kms:KeyRing
name: key_ring
properties:
name: example-keyring
location: us
The defaultEncryptionConfiguration property applies a Cloud KMS key to all newly-created tables in the dataset. The kmsKeyName references a CryptoKey resource; BigQuery uses this key to encrypt table data. Tables created before setting this property retain their original encryption; only new tables inherit the key.
Grant cross-dataset access for views
Analytics workflows often need views in one dataset to query tables in another, requiring explicit authorization.
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const bqowner = new gcp.serviceaccount.Account("bqowner", {accountId: "bqowner"});
const _public = new gcp.bigquery.Dataset("public", {
datasetId: "public",
friendlyName: "test",
description: "This dataset is public",
location: "EU",
defaultTableExpirationMs: 3600000,
labels: {
env: "default",
},
accesses: [
{
role: "OWNER",
userByEmail: bqowner.email,
},
{
role: "READER",
domain: "hashicorp.com",
},
],
});
const dataset = new gcp.bigquery.Dataset("dataset", {
datasetId: "private",
friendlyName: "test",
description: "This dataset is private",
location: "EU",
defaultTableExpirationMs: 3600000,
labels: {
env: "default",
},
accesses: [
{
role: "OWNER",
userByEmail: bqowner.email,
},
{
role: "READER",
domain: "hashicorp.com",
},
{
dataset: {
dataset: {
projectId: _public.project,
datasetId: _public.datasetId,
},
targetTypes: ["VIEWS"],
},
},
],
});
import pulumi
import pulumi_gcp as gcp
bqowner = gcp.serviceaccount.Account("bqowner", account_id="bqowner")
public = gcp.bigquery.Dataset("public",
dataset_id="public",
friendly_name="test",
description="This dataset is public",
location="EU",
default_table_expiration_ms=3600000,
labels={
"env": "default",
},
accesses=[
{
"role": "OWNER",
"user_by_email": bqowner.email,
},
{
"role": "READER",
"domain": "hashicorp.com",
},
])
dataset = gcp.bigquery.Dataset("dataset",
dataset_id="private",
friendly_name="test",
description="This dataset is private",
location="EU",
default_table_expiration_ms=3600000,
labels={
"env": "default",
},
accesses=[
{
"role": "OWNER",
"user_by_email": bqowner.email,
},
{
"role": "READER",
"domain": "hashicorp.com",
},
{
"dataset": {
"dataset": {
"project_id": public.project,
"dataset_id": public.dataset_id,
},
"target_types": ["VIEWS"],
},
},
])
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/bigquery"
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/serviceaccount"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
bqowner, err := serviceaccount.NewAccount(ctx, "bqowner", &serviceaccount.AccountArgs{
AccountId: pulumi.String("bqowner"),
})
if err != nil {
return err
}
public, err := bigquery.NewDataset(ctx, "public", &bigquery.DatasetArgs{
DatasetId: pulumi.String("public"),
FriendlyName: pulumi.String("test"),
Description: pulumi.String("This dataset is public"),
Location: pulumi.String("EU"),
DefaultTableExpirationMs: pulumi.Int(3600000),
Labels: pulumi.StringMap{
"env": pulumi.String("default"),
},
Accesses: bigquery.DatasetAccessTypeArray{
&bigquery.DatasetAccessTypeArgs{
Role: pulumi.String("OWNER"),
UserByEmail: bqowner.Email,
},
&bigquery.DatasetAccessTypeArgs{
Role: pulumi.String("READER"),
Domain: pulumi.String("hashicorp.com"),
},
},
})
if err != nil {
return err
}
_, err = bigquery.NewDataset(ctx, "dataset", &bigquery.DatasetArgs{
DatasetId: pulumi.String("private"),
FriendlyName: pulumi.String("test"),
Description: pulumi.String("This dataset is private"),
Location: pulumi.String("EU"),
DefaultTableExpirationMs: pulumi.Int(3600000),
Labels: pulumi.StringMap{
"env": pulumi.String("default"),
},
Accesses: bigquery.DatasetAccessTypeArray{
&bigquery.DatasetAccessTypeArgs{
Role: pulumi.String("OWNER"),
UserByEmail: bqowner.Email,
},
&bigquery.DatasetAccessTypeArgs{
Role: pulumi.String("READER"),
Domain: pulumi.String("hashicorp.com"),
},
&bigquery.DatasetAccessTypeArgs{
Dataset: &bigquery.DatasetAccessDatasetArgs{
Dataset: &bigquery.DatasetAccessDatasetDatasetArgs{
ProjectId: public.Project,
DatasetId: public.DatasetId,
},
TargetTypes: pulumi.StringArray{
pulumi.String("VIEWS"),
},
},
},
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var bqowner = new Gcp.ServiceAccount.Account("bqowner", new()
{
AccountId = "bqowner",
});
var @public = new Gcp.BigQuery.Dataset("public", new()
{
DatasetId = "public",
FriendlyName = "test",
Description = "This dataset is public",
Location = "EU",
DefaultTableExpirationMs = 3600000,
Labels =
{
{ "env", "default" },
},
Accesses = new[]
{
new Gcp.BigQuery.Inputs.DatasetAccessArgs
{
Role = "OWNER",
UserByEmail = bqowner.Email,
},
new Gcp.BigQuery.Inputs.DatasetAccessArgs
{
Role = "READER",
Domain = "hashicorp.com",
},
},
});
var dataset = new Gcp.BigQuery.Dataset("dataset", new()
{
DatasetId = "private",
FriendlyName = "test",
Description = "This dataset is private",
Location = "EU",
DefaultTableExpirationMs = 3600000,
Labels =
{
{ "env", "default" },
},
Accesses = new[]
{
new Gcp.BigQuery.Inputs.DatasetAccessArgs
{
Role = "OWNER",
UserByEmail = bqowner.Email,
},
new Gcp.BigQuery.Inputs.DatasetAccessArgs
{
Role = "READER",
Domain = "hashicorp.com",
},
new Gcp.BigQuery.Inputs.DatasetAccessArgs
{
Dataset = new Gcp.BigQuery.Inputs.DatasetAccessDatasetArgs
{
Dataset = new Gcp.BigQuery.Inputs.DatasetAccessDatasetDatasetArgs
{
ProjectId = @public.Project,
DatasetId = @public.DatasetId,
},
TargetTypes = new[]
{
"VIEWS",
},
},
},
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.serviceaccount.Account;
import com.pulumi.gcp.serviceaccount.AccountArgs;
import com.pulumi.gcp.bigquery.Dataset;
import com.pulumi.gcp.bigquery.DatasetArgs;
import com.pulumi.gcp.bigquery.inputs.DatasetAccessArgs;
import com.pulumi.gcp.bigquery.inputs.DatasetAccessDatasetArgs;
import com.pulumi.gcp.bigquery.inputs.DatasetAccessDatasetDatasetArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var bqowner = new Account("bqowner", AccountArgs.builder()
.accountId("bqowner")
.build());
var public_ = new Dataset("public", DatasetArgs.builder()
.datasetId("public")
.friendlyName("test")
.description("This dataset is public")
.location("EU")
.defaultTableExpirationMs(3600000)
.labels(Map.of("env", "default"))
.accesses(
DatasetAccessArgs.builder()
.role("OWNER")
.userByEmail(bqowner.email())
.build(),
DatasetAccessArgs.builder()
.role("READER")
.domain("hashicorp.com")
.build())
.build());
var dataset = new Dataset("dataset", DatasetArgs.builder()
.datasetId("private")
.friendlyName("test")
.description("This dataset is private")
.location("EU")
.defaultTableExpirationMs(3600000)
.labels(Map.of("env", "default"))
.accesses(
DatasetAccessArgs.builder()
.role("OWNER")
.userByEmail(bqowner.email())
.build(),
DatasetAccessArgs.builder()
.role("READER")
.domain("hashicorp.com")
.build(),
DatasetAccessArgs.builder()
.dataset(DatasetAccessDatasetArgs.builder()
.dataset(DatasetAccessDatasetDatasetArgs.builder()
.projectId(public_.project())
.datasetId(public_.datasetId())
.build())
.targetTypes("VIEWS")
.build())
.build())
.build());
}
}
resources:
public:
type: gcp:bigquery:Dataset
properties:
datasetId: public
friendlyName: test
description: This dataset is public
location: EU
defaultTableExpirationMs: 3.6e+06
labels:
env: default
accesses:
- role: OWNER
userByEmail: ${bqowner.email}
- role: READER
domain: hashicorp.com
dataset:
type: gcp:bigquery:Dataset
properties:
datasetId: private
friendlyName: test
description: This dataset is private
location: EU
defaultTableExpirationMs: 3.6e+06
labels:
env: default
accesses:
- role: OWNER
userByEmail: ${bqowner.email}
- role: READER
domain: hashicorp.com
- dataset:
dataset:
projectId: ${public.project}
datasetId: ${public.datasetId}
targetTypes:
- VIEWS
bqowner:
type: gcp:serviceaccount:Account
properties:
accountId: bqowner
The dataset access type in the accesses array grants cross-dataset permissions. The nested dataset object specifies the authorized dataset by projectId and datasetId. The targetTypes array limits access to specific object types; here, “VIEWS” means only views in the private dataset can reference tables in the public dataset. Direct table queries from the private dataset are blocked.
Reference AWS Glue catalogs as external datasets
Multi-cloud platforms can query AWS Glue tables directly from BigQuery by referencing external metadata stores.
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const dataset = new gcp.bigquery.Dataset("dataset", {
datasetId: "example_dataset",
friendlyName: "test",
description: "This is a test description",
location: "aws-us-east-1",
externalDatasetReference: {
externalSource: "aws-glue://arn:aws:glue:us-east-1:999999999999:database/database",
connection: "projects/project/locations/aws-us-east-1/connections/connection",
},
});
import pulumi
import pulumi_gcp as gcp
dataset = gcp.bigquery.Dataset("dataset",
dataset_id="example_dataset",
friendly_name="test",
description="This is a test description",
location="aws-us-east-1",
external_dataset_reference={
"external_source": "aws-glue://arn:aws:glue:us-east-1:999999999999:database/database",
"connection": "projects/project/locations/aws-us-east-1/connections/connection",
})
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/bigquery"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := bigquery.NewDataset(ctx, "dataset", &bigquery.DatasetArgs{
DatasetId: pulumi.String("example_dataset"),
FriendlyName: pulumi.String("test"),
Description: pulumi.String("This is a test description"),
Location: pulumi.String("aws-us-east-1"),
ExternalDatasetReference: &bigquery.DatasetExternalDatasetReferenceArgs{
ExternalSource: pulumi.String("aws-glue://arn:aws:glue:us-east-1:999999999999:database/database"),
Connection: pulumi.String("projects/project/locations/aws-us-east-1/connections/connection"),
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var dataset = new Gcp.BigQuery.Dataset("dataset", new()
{
DatasetId = "example_dataset",
FriendlyName = "test",
Description = "This is a test description",
Location = "aws-us-east-1",
ExternalDatasetReference = new Gcp.BigQuery.Inputs.DatasetExternalDatasetReferenceArgs
{
ExternalSource = "aws-glue://arn:aws:glue:us-east-1:999999999999:database/database",
Connection = "projects/project/locations/aws-us-east-1/connections/connection",
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.bigquery.Dataset;
import com.pulumi.gcp.bigquery.DatasetArgs;
import com.pulumi.gcp.bigquery.inputs.DatasetExternalDatasetReferenceArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var dataset = new Dataset("dataset", DatasetArgs.builder()
.datasetId("example_dataset")
.friendlyName("test")
.description("This is a test description")
.location("aws-us-east-1")
.externalDatasetReference(DatasetExternalDatasetReferenceArgs.builder()
.externalSource("aws-glue://arn:aws:glue:us-east-1:999999999999:database/database")
.connection("projects/project/locations/aws-us-east-1/connections/connection")
.build())
.build());
}
}
resources:
dataset:
type: gcp:bigquery:Dataset
properties:
datasetId: example_dataset
friendlyName: test
description: This is a test description
location: aws-us-east-1
externalDatasetReference:
externalSource: aws-glue://arn:aws:glue:us-east-1:999999999999:database/database
connection: projects/project/locations/aws-us-east-1/connections/connection
The externalDatasetReference property links this dataset to an AWS Glue database. The externalSource uses an ARN format pointing to the Glue database. The connection property references a BigQuery Connection resource that handles authentication and network routing to AWS. The location must match the connection’s region (e.g., “aws-us-east-1”). Tables in this dataset are metadata pointers; actual data remains in AWS.
Beyond these examples
These snippets focus on specific dataset-level features: access control and lifecycle management, customer-managed encryption, and cross-dataset and external catalog integration. They’re intentionally minimal rather than full data warehouse configurations.
The examples may reference pre-existing infrastructure such as service accounts for access control, Cloud KMS keys for encryption, BigQuery Connection resources for external catalogs, and AWS Glue databases for external references. They focus on configuring the dataset rather than provisioning everything around it.
To keep things focused, common dataset patterns are omitted, including:
- Table-level configuration (created separately via gcp.bigquery.Table)
- Partition and clustering strategies (table-level concerns)
- Dataset deletion protection (deleteContentsOnDestroy)
- Case sensitivity and collation settings (isCaseInsensitive, defaultCollation)
- Storage billing models (storageBillingModel)
- Authorized routines for function-level access (Example 4)
These omissions are intentional: the goal is to illustrate how each dataset feature is wired, not provide drop-in analytics modules. See the BigQuery Dataset resource reference for all available configuration options.
Let's create and Configure BigQuery Datasets
Get started with Pulumi Cloud, then follow our quick setup guide to deploy this infrastructure.
Try Pulumi Cloud for FREEFrequently Asked Questions
Access Control & Permissions
OWNER, READER, WRITER even when you specify modern formats like roles/bigquery.dataOwner. Use legacy formats in your accesses configuration to avoid perpetual diffs.dataset.dataset containing the source dataset’s projectId and datasetId, plus targetTypes: ["VIEWS"] to limit access to views only.routine containing the routine’s projectId, datasetId, and routineId.Data Lifecycle & Expiration
defaultTableExpirationMs sets the default lifetime for all tables (minimum 3600000 ms or 1 hour). For partitioned tables, defaultPartitionExpirationMs overrides this setting. Explicit table-level expirationTime or timePartitioning.expirationMs always take precedence over dataset defaults.maxTimeTravelHours property accepts values from 48 to 168 hours (2 to 7 days).Encryption & Security
defaultEncryptionConfiguration with kmsKeyName pointing to your Cloud KMS crypto key. All newly-created tables will inherit this encryption unless overridden at table creation.Dataset Configuration
datasetId, project, location, or externalDatasetReference after creation. Modifying these forces a new resource to be created.isCaseInsensitive to true. By default, this is false, meaning dataset and table names are case-sensitive. This setting doesn’t affect routine references.defaultCollation property only affects tables created after the change. Existing tables retain their original collation settings.storageBillingModel set to LOGICAL (the default) bills based on logical bytes, while PHYSICAL bills based on physical bytes stored.Resource Management
deleteContentsOnDestroy to true, which automatically deletes all tables when destroying the dataset.labels field is non-authoritative and only manages labels present in your configuration. Use effectiveLabels to see all labels on the resource, including those set by other clients and services.External Data Sources
externalDatasetReference with externalSource (your AWS Glue ARN like aws-glue://arn:aws:glue:us-east-1:999999999999:database/database) and connection (your GCP connection resource path). Note that externalDatasetReference is immutable.