The gcp:bigquery/dataset:Dataset resource, part of the Pulumi Google Cloud provider, provisions BigQuery datasets that serve as containers for tables, views, and routines. This guide focuses on four capabilities: access control configuration, customer-managed encryption, cross-dataset authorization, and external data source integration.
A dataset doesn’t exist in isolation. It references service accounts for access control, Cloud KMS keys for encryption, and BigQuery connections for external data sources. The examples are intentionally small and show how to configure dataset-level features. Combine them with your own IAM setup and table definitions.
Create a dataset with access controls and metadata
Most deployments start with a dataset that has basic metadata and access controls to organize tables.
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const bqowner = new gcp.serviceaccount.Account("bqowner", {accountId: "bqowner"});
const dataset = new gcp.bigquery.Dataset("dataset", {
datasetId: "example_dataset",
friendlyName: "test",
description: "This is a test description",
location: "EU",
defaultTableExpirationMs: 3600000,
labels: {
env: "default",
},
accesses: [
{
role: "roles/bigquery.dataOwner",
userByEmail: bqowner.email,
},
{
role: "READER",
domain: "hashicorp.com",
},
],
});
import pulumi
import pulumi_gcp as gcp
bqowner = gcp.serviceaccount.Account("bqowner", account_id="bqowner")
dataset = gcp.bigquery.Dataset("dataset",
dataset_id="example_dataset",
friendly_name="test",
description="This is a test description",
location="EU",
default_table_expiration_ms=3600000,
labels={
"env": "default",
},
accesses=[
{
"role": "roles/bigquery.dataOwner",
"user_by_email": bqowner.email,
},
{
"role": "READER",
"domain": "hashicorp.com",
},
])
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/bigquery"
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/serviceaccount"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
bqowner, err := serviceaccount.NewAccount(ctx, "bqowner", &serviceaccount.AccountArgs{
AccountId: pulumi.String("bqowner"),
})
if err != nil {
return err
}
_, err = bigquery.NewDataset(ctx, "dataset", &bigquery.DatasetArgs{
DatasetId: pulumi.String("example_dataset"),
FriendlyName: pulumi.String("test"),
Description: pulumi.String("This is a test description"),
Location: pulumi.String("EU"),
DefaultTableExpirationMs: pulumi.Int(3600000),
Labels: pulumi.StringMap{
"env": pulumi.String("default"),
},
Accesses: bigquery.DatasetAccessTypeArray{
&bigquery.DatasetAccessTypeArgs{
Role: pulumi.String("roles/bigquery.dataOwner"),
UserByEmail: bqowner.Email,
},
&bigquery.DatasetAccessTypeArgs{
Role: pulumi.String("READER"),
Domain: pulumi.String("hashicorp.com"),
},
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var bqowner = new Gcp.ServiceAccount.Account("bqowner", new()
{
AccountId = "bqowner",
});
var dataset = new Gcp.BigQuery.Dataset("dataset", new()
{
DatasetId = "example_dataset",
FriendlyName = "test",
Description = "This is a test description",
Location = "EU",
DefaultTableExpirationMs = 3600000,
Labels =
{
{ "env", "default" },
},
Accesses = new[]
{
new Gcp.BigQuery.Inputs.DatasetAccessArgs
{
Role = "roles/bigquery.dataOwner",
UserByEmail = bqowner.Email,
},
new Gcp.BigQuery.Inputs.DatasetAccessArgs
{
Role = "READER",
Domain = "hashicorp.com",
},
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.serviceaccount.Account;
import com.pulumi.gcp.serviceaccount.AccountArgs;
import com.pulumi.gcp.bigquery.Dataset;
import com.pulumi.gcp.bigquery.DatasetArgs;
import com.pulumi.gcp.bigquery.inputs.DatasetAccessArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var bqowner = new Account("bqowner", AccountArgs.builder()
.accountId("bqowner")
.build());
var dataset = new Dataset("dataset", DatasetArgs.builder()
.datasetId("example_dataset")
.friendlyName("test")
.description("This is a test description")
.location("EU")
.defaultTableExpirationMs(3600000)
.labels(Map.of("env", "default"))
.accesses(
DatasetAccessArgs.builder()
.role("roles/bigquery.dataOwner")
.userByEmail(bqowner.email())
.build(),
DatasetAccessArgs.builder()
.role("READER")
.domain("hashicorp.com")
.build())
.build());
}
}
resources:
dataset:
type: gcp:bigquery:Dataset
properties:
datasetId: example_dataset
friendlyName: test
description: This is a test description
location: EU
defaultTableExpirationMs: 3.6e+06
labels:
env: default
accesses:
- role: roles/bigquery.dataOwner
userByEmail: ${bqowner.email}
- role: READER
domain: hashicorp.com
bqowner:
type: gcp:serviceaccount:Account
properties:
accountId: bqowner
The datasetId property sets a unique identifier within your project. The accesses array defines who can interact with the dataset: userByEmail grants individual access, while domain grants access to all users in an organization. The defaultTableExpirationMs property applies to all tables created in this dataset unless overridden at the table level.
Encrypt datasets with customer-managed keys
Regulated workloads often require customer-managed encryption keys to meet compliance requirements.
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const keyRing = new gcp.kms.KeyRing("key_ring", {
name: "example-keyring",
location: "us",
});
const cryptoKey = new gcp.kms.CryptoKey("crypto_key", {
name: "example-key",
keyRing: keyRing.id,
});
const dataset = new gcp.bigquery.Dataset("dataset", {
datasetId: "example_dataset",
friendlyName: "test",
description: "This is a test description",
location: "US",
defaultTableExpirationMs: 3600000,
defaultEncryptionConfiguration: {
kmsKeyName: cryptoKey.id,
},
});
import pulumi
import pulumi_gcp as gcp
key_ring = gcp.kms.KeyRing("key_ring",
name="example-keyring",
location="us")
crypto_key = gcp.kms.CryptoKey("crypto_key",
name="example-key",
key_ring=key_ring.id)
dataset = gcp.bigquery.Dataset("dataset",
dataset_id="example_dataset",
friendly_name="test",
description="This is a test description",
location="US",
default_table_expiration_ms=3600000,
default_encryption_configuration={
"kms_key_name": crypto_key.id,
})
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/bigquery"
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/kms"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
keyRing, err := kms.NewKeyRing(ctx, "key_ring", &kms.KeyRingArgs{
Name: pulumi.String("example-keyring"),
Location: pulumi.String("us"),
})
if err != nil {
return err
}
cryptoKey, err := kms.NewCryptoKey(ctx, "crypto_key", &kms.CryptoKeyArgs{
Name: pulumi.String("example-key"),
KeyRing: keyRing.ID(),
})
if err != nil {
return err
}
_, err = bigquery.NewDataset(ctx, "dataset", &bigquery.DatasetArgs{
DatasetId: pulumi.String("example_dataset"),
FriendlyName: pulumi.String("test"),
Description: pulumi.String("This is a test description"),
Location: pulumi.String("US"),
DefaultTableExpirationMs: pulumi.Int(3600000),
DefaultEncryptionConfiguration: &bigquery.DatasetDefaultEncryptionConfigurationArgs{
KmsKeyName: cryptoKey.ID(),
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var keyRing = new Gcp.Kms.KeyRing("key_ring", new()
{
Name = "example-keyring",
Location = "us",
});
var cryptoKey = new Gcp.Kms.CryptoKey("crypto_key", new()
{
Name = "example-key",
KeyRing = keyRing.Id,
});
var dataset = new Gcp.BigQuery.Dataset("dataset", new()
{
DatasetId = "example_dataset",
FriendlyName = "test",
Description = "This is a test description",
Location = "US",
DefaultTableExpirationMs = 3600000,
DefaultEncryptionConfiguration = new Gcp.BigQuery.Inputs.DatasetDefaultEncryptionConfigurationArgs
{
KmsKeyName = cryptoKey.Id,
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.kms.KeyRing;
import com.pulumi.gcp.kms.KeyRingArgs;
import com.pulumi.gcp.kms.CryptoKey;
import com.pulumi.gcp.kms.CryptoKeyArgs;
import com.pulumi.gcp.bigquery.Dataset;
import com.pulumi.gcp.bigquery.DatasetArgs;
import com.pulumi.gcp.bigquery.inputs.DatasetDefaultEncryptionConfigurationArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var keyRing = new KeyRing("keyRing", KeyRingArgs.builder()
.name("example-keyring")
.location("us")
.build());
var cryptoKey = new CryptoKey("cryptoKey", CryptoKeyArgs.builder()
.name("example-key")
.keyRing(keyRing.id())
.build());
var dataset = new Dataset("dataset", DatasetArgs.builder()
.datasetId("example_dataset")
.friendlyName("test")
.description("This is a test description")
.location("US")
.defaultTableExpirationMs(3600000)
.defaultEncryptionConfiguration(DatasetDefaultEncryptionConfigurationArgs.builder()
.kmsKeyName(cryptoKey.id())
.build())
.build());
}
}
resources:
dataset:
type: gcp:bigquery:Dataset
properties:
datasetId: example_dataset
friendlyName: test
description: This is a test description
location: US
defaultTableExpirationMs: 3.6e+06
defaultEncryptionConfiguration:
kmsKeyName: ${cryptoKey.id}
cryptoKey:
type: gcp:kms:CryptoKey
name: crypto_key
properties:
name: example-key
keyRing: ${keyRing.id}
keyRing:
type: gcp:kms:KeyRing
name: key_ring
properties:
name: example-keyring
location: us
The defaultEncryptionConfiguration property applies your KMS key to all newly-created tables in the dataset. BigQuery uses this key for encryption at rest unless a table-level key overrides it. Your BigQuery service account needs permission to use the KMS key.
Grant cross-dataset access for authorized views
Organizations share specific views from one dataset while keeping underlying tables private through authorized datasets.
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const bqowner = new gcp.serviceaccount.Account("bqowner", {accountId: "bqowner"});
const _public = new gcp.bigquery.Dataset("public", {
datasetId: "public",
friendlyName: "test",
description: "This dataset is public",
location: "EU",
defaultTableExpirationMs: 3600000,
labels: {
env: "default",
},
accesses: [
{
role: "OWNER",
userByEmail: bqowner.email,
},
{
role: "READER",
domain: "hashicorp.com",
},
],
});
const dataset = new gcp.bigquery.Dataset("dataset", {
datasetId: "private",
friendlyName: "test",
description: "This dataset is private",
location: "EU",
defaultTableExpirationMs: 3600000,
labels: {
env: "default",
},
accesses: [
{
role: "OWNER",
userByEmail: bqowner.email,
},
{
role: "READER",
domain: "hashicorp.com",
},
{
dataset: {
dataset: {
projectId: _public.project,
datasetId: _public.datasetId,
},
targetTypes: ["VIEWS"],
},
},
],
});
import pulumi
import pulumi_gcp as gcp
bqowner = gcp.serviceaccount.Account("bqowner", account_id="bqowner")
public = gcp.bigquery.Dataset("public",
dataset_id="public",
friendly_name="test",
description="This dataset is public",
location="EU",
default_table_expiration_ms=3600000,
labels={
"env": "default",
},
accesses=[
{
"role": "OWNER",
"user_by_email": bqowner.email,
},
{
"role": "READER",
"domain": "hashicorp.com",
},
])
dataset = gcp.bigquery.Dataset("dataset",
dataset_id="private",
friendly_name="test",
description="This dataset is private",
location="EU",
default_table_expiration_ms=3600000,
labels={
"env": "default",
},
accesses=[
{
"role": "OWNER",
"user_by_email": bqowner.email,
},
{
"role": "READER",
"domain": "hashicorp.com",
},
{
"dataset": {
"dataset": {
"project_id": public.project,
"dataset_id": public.dataset_id,
},
"target_types": ["VIEWS"],
},
},
])
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/bigquery"
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/serviceaccount"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
bqowner, err := serviceaccount.NewAccount(ctx, "bqowner", &serviceaccount.AccountArgs{
AccountId: pulumi.String("bqowner"),
})
if err != nil {
return err
}
public, err := bigquery.NewDataset(ctx, "public", &bigquery.DatasetArgs{
DatasetId: pulumi.String("public"),
FriendlyName: pulumi.String("test"),
Description: pulumi.String("This dataset is public"),
Location: pulumi.String("EU"),
DefaultTableExpirationMs: pulumi.Int(3600000),
Labels: pulumi.StringMap{
"env": pulumi.String("default"),
},
Accesses: bigquery.DatasetAccessTypeArray{
&bigquery.DatasetAccessTypeArgs{
Role: pulumi.String("OWNER"),
UserByEmail: bqowner.Email,
},
&bigquery.DatasetAccessTypeArgs{
Role: pulumi.String("READER"),
Domain: pulumi.String("hashicorp.com"),
},
},
})
if err != nil {
return err
}
_, err = bigquery.NewDataset(ctx, "dataset", &bigquery.DatasetArgs{
DatasetId: pulumi.String("private"),
FriendlyName: pulumi.String("test"),
Description: pulumi.String("This dataset is private"),
Location: pulumi.String("EU"),
DefaultTableExpirationMs: pulumi.Int(3600000),
Labels: pulumi.StringMap{
"env": pulumi.String("default"),
},
Accesses: bigquery.DatasetAccessTypeArray{
&bigquery.DatasetAccessTypeArgs{
Role: pulumi.String("OWNER"),
UserByEmail: bqowner.Email,
},
&bigquery.DatasetAccessTypeArgs{
Role: pulumi.String("READER"),
Domain: pulumi.String("hashicorp.com"),
},
&bigquery.DatasetAccessTypeArgs{
Dataset: &bigquery.DatasetAccessDatasetArgs{
Dataset: &bigquery.DatasetAccessDatasetDatasetArgs{
ProjectId: public.Project,
DatasetId: public.DatasetId,
},
TargetTypes: pulumi.StringArray{
pulumi.String("VIEWS"),
},
},
},
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var bqowner = new Gcp.ServiceAccount.Account("bqowner", new()
{
AccountId = "bqowner",
});
var @public = new Gcp.BigQuery.Dataset("public", new()
{
DatasetId = "public",
FriendlyName = "test",
Description = "This dataset is public",
Location = "EU",
DefaultTableExpirationMs = 3600000,
Labels =
{
{ "env", "default" },
},
Accesses = new[]
{
new Gcp.BigQuery.Inputs.DatasetAccessArgs
{
Role = "OWNER",
UserByEmail = bqowner.Email,
},
new Gcp.BigQuery.Inputs.DatasetAccessArgs
{
Role = "READER",
Domain = "hashicorp.com",
},
},
});
var dataset = new Gcp.BigQuery.Dataset("dataset", new()
{
DatasetId = "private",
FriendlyName = "test",
Description = "This dataset is private",
Location = "EU",
DefaultTableExpirationMs = 3600000,
Labels =
{
{ "env", "default" },
},
Accesses = new[]
{
new Gcp.BigQuery.Inputs.DatasetAccessArgs
{
Role = "OWNER",
UserByEmail = bqowner.Email,
},
new Gcp.BigQuery.Inputs.DatasetAccessArgs
{
Role = "READER",
Domain = "hashicorp.com",
},
new Gcp.BigQuery.Inputs.DatasetAccessArgs
{
Dataset = new Gcp.BigQuery.Inputs.DatasetAccessDatasetArgs
{
Dataset = new Gcp.BigQuery.Inputs.DatasetAccessDatasetDatasetArgs
{
ProjectId = @public.Project,
DatasetId = @public.DatasetId,
},
TargetTypes = new[]
{
"VIEWS",
},
},
},
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.serviceaccount.Account;
import com.pulumi.gcp.serviceaccount.AccountArgs;
import com.pulumi.gcp.bigquery.Dataset;
import com.pulumi.gcp.bigquery.DatasetArgs;
import com.pulumi.gcp.bigquery.inputs.DatasetAccessArgs;
import com.pulumi.gcp.bigquery.inputs.DatasetAccessDatasetArgs;
import com.pulumi.gcp.bigquery.inputs.DatasetAccessDatasetDatasetArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var bqowner = new Account("bqowner", AccountArgs.builder()
.accountId("bqowner")
.build());
var public_ = new Dataset("public", DatasetArgs.builder()
.datasetId("public")
.friendlyName("test")
.description("This dataset is public")
.location("EU")
.defaultTableExpirationMs(3600000)
.labels(Map.of("env", "default"))
.accesses(
DatasetAccessArgs.builder()
.role("OWNER")
.userByEmail(bqowner.email())
.build(),
DatasetAccessArgs.builder()
.role("READER")
.domain("hashicorp.com")
.build())
.build());
var dataset = new Dataset("dataset", DatasetArgs.builder()
.datasetId("private")
.friendlyName("test")
.description("This dataset is private")
.location("EU")
.defaultTableExpirationMs(3600000)
.labels(Map.of("env", "default"))
.accesses(
DatasetAccessArgs.builder()
.role("OWNER")
.userByEmail(bqowner.email())
.build(),
DatasetAccessArgs.builder()
.role("READER")
.domain("hashicorp.com")
.build(),
DatasetAccessArgs.builder()
.dataset(DatasetAccessDatasetArgs.builder()
.dataset(DatasetAccessDatasetDatasetArgs.builder()
.projectId(public_.project())
.datasetId(public_.datasetId())
.build())
.targetTypes("VIEWS")
.build())
.build())
.build());
}
}
resources:
public:
type: gcp:bigquery:Dataset
properties:
datasetId: public
friendlyName: test
description: This dataset is public
location: EU
defaultTableExpirationMs: 3.6e+06
labels:
env: default
accesses:
- role: OWNER
userByEmail: ${bqowner.email}
- role: READER
domain: hashicorp.com
dataset:
type: gcp:bigquery:Dataset
properties:
datasetId: private
friendlyName: test
description: This dataset is private
location: EU
defaultTableExpirationMs: 3.6e+06
labels:
env: default
accesses:
- role: OWNER
userByEmail: ${bqowner.email}
- role: READER
domain: hashicorp.com
- dataset:
dataset:
projectId: ${public.project}
datasetId: ${public.datasetId}
targetTypes:
- VIEWS
bqowner:
type: gcp:serviceaccount:Account
properties:
accountId: bqowner
The dataset access type in the accesses array grants views in the public dataset permission to query tables in the private dataset. The targetTypes property restricts authorization to views only, not direct table access. This enables selective data sharing without exposing raw data.
Reference external data sources from AWS Glue
Multi-cloud analytics require BigQuery to query data catalogs managed outside Google Cloud.
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const dataset = new gcp.bigquery.Dataset("dataset", {
datasetId: "example_dataset",
friendlyName: "test",
description: "This is a test description",
location: "aws-us-east-1",
externalDatasetReference: {
externalSource: "aws-glue://arn:aws:glue:us-east-1:999999999999:database/database",
connection: "projects/project/locations/aws-us-east-1/connections/connection",
},
});
import pulumi
import pulumi_gcp as gcp
dataset = gcp.bigquery.Dataset("dataset",
dataset_id="example_dataset",
friendly_name="test",
description="This is a test description",
location="aws-us-east-1",
external_dataset_reference={
"external_source": "aws-glue://arn:aws:glue:us-east-1:999999999999:database/database",
"connection": "projects/project/locations/aws-us-east-1/connections/connection",
})
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/bigquery"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := bigquery.NewDataset(ctx, "dataset", &bigquery.DatasetArgs{
DatasetId: pulumi.String("example_dataset"),
FriendlyName: pulumi.String("test"),
Description: pulumi.String("This is a test description"),
Location: pulumi.String("aws-us-east-1"),
ExternalDatasetReference: &bigquery.DatasetExternalDatasetReferenceArgs{
ExternalSource: pulumi.String("aws-glue://arn:aws:glue:us-east-1:999999999999:database/database"),
Connection: pulumi.String("projects/project/locations/aws-us-east-1/connections/connection"),
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var dataset = new Gcp.BigQuery.Dataset("dataset", new()
{
DatasetId = "example_dataset",
FriendlyName = "test",
Description = "This is a test description",
Location = "aws-us-east-1",
ExternalDatasetReference = new Gcp.BigQuery.Inputs.DatasetExternalDatasetReferenceArgs
{
ExternalSource = "aws-glue://arn:aws:glue:us-east-1:999999999999:database/database",
Connection = "projects/project/locations/aws-us-east-1/connections/connection",
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.bigquery.Dataset;
import com.pulumi.gcp.bigquery.DatasetArgs;
import com.pulumi.gcp.bigquery.inputs.DatasetExternalDatasetReferenceArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var dataset = new Dataset("dataset", DatasetArgs.builder()
.datasetId("example_dataset")
.friendlyName("test")
.description("This is a test description")
.location("aws-us-east-1")
.externalDatasetReference(DatasetExternalDatasetReferenceArgs.builder()
.externalSource("aws-glue://arn:aws:glue:us-east-1:999999999999:database/database")
.connection("projects/project/locations/aws-us-east-1/connections/connection")
.build())
.build());
}
}
resources:
dataset:
type: gcp:bigquery:Dataset
properties:
datasetId: example_dataset
friendlyName: test
description: This is a test description
location: aws-us-east-1
externalDatasetReference:
externalSource: aws-glue://arn:aws:glue:us-east-1:999999999999:database/database
connection: projects/project/locations/aws-us-east-1/connections/connection
The externalDatasetReference property connects BigQuery to an AWS Glue database through a pre-configured BigQuery connection. The externalSource property points to the AWS Glue database ARN. Queries run through BigQuery but read metadata from AWS Glue without data movement.
Beyond These Examples
These snippets focus on specific dataset-level features: access controls (IAM roles, domain-based, cross-dataset), customer-managed encryption (CMEK), and external data source integration (AWS Glue). They’re intentionally minimal rather than full analytics environments.
The examples may reference pre-existing infrastructure such as service accounts for access control, Cloud KMS keys and permissions, BigQuery connections to external systems, and AWS Glue databases (for multi-cloud scenarios). They focus on configuring the dataset rather than provisioning everything around it.
To keep things focused, common dataset patterns are omitted, including:
- Table expiration policies (defaultPartitionExpirationMs)
- Case sensitivity settings (isCaseInsensitive)
- Time travel windows (maxTimeTravelHours)
- Storage billing models (storageBillingModel)
- Collation specifications (defaultCollation)
- Authorized routines for function sharing
These omissions are intentional: the goal is to illustrate how each dataset feature is wired, not provide drop-in analytics modules. See the BigQuery Dataset resource reference for all available configuration options.
Frequently Asked Questions
Resource Lifecycle & Immutability
deleteContentsOnDestroy to true to automatically delete all tables when destroying the dataset.datasetId, project, location, and externalDatasetReference. Changing any of these forces creation of a new resource.datasetId must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_), with a maximum length of 1,024 characters.Access Control & Security
accesses array with role and either userByEmail (for service accounts) or domain (for domain-wide access).accesses entry with a nested dataset object containing the source dataset’s projectId and datasetId, plus targetTypes set to ['VIEWS'].accesses entry with a nested routine object specifying the routine’s projectId, datasetId, and routineId.defaultEncryptionConfiguration.kmsKeyName to your Cloud KMS crypto key ID. All newly-created tables will inherit this encryption key.Table Defaults & Expiration
defaultTableExpirationMs property requires a minimum value of 3600000 milliseconds (1 hour).defaultPartitionExpirationMs takes precedence over defaultTableExpirationMs. Only one of these properties applies to any new partitioned table.defaultCollation only affect tables created afterwards. Existing tables retain their original collation settings.Labels & Metadata
labels field is non-authoritative and only manages labels in your configuration. It won’t remove labels added by other clients or services. Use effectiveLabels to view all labels on the resource.Advanced Features
maxTimeTravelHours property accepts values from 48 to 168 hours (2 to 7 days).externalDatasetReference.externalSource to your AWS Glue ARN (e.g., aws-glue://arn:aws:glue:us-east-1:999999999999:database/database) and externalDatasetReference.connection to your BigQuery connection resource path.Ready to get started?
Get started with Pulumi Cloud, then follow our quick setup guide to deploy this infrastructure.
Create free account