Create and Configure BigQuery Datasets

The gcp:bigquery/dataset:Dataset resource, part of the Pulumi GCP provider, defines a BigQuery dataset container: its location, access controls, encryption settings, and defaults for tables created within it. This guide focuses on four capabilities: access control configuration, customer-managed encryption, cross-dataset authorization, and external catalog references.

Datasets reference service accounts for access control, KMS keys for encryption, and BigQuery connections for external catalogs. The examples are intentionally small. Combine them with your own IAM configuration, encryption policies, and data governance rules.

Create a dataset with access controls and expiration

Most deployments start by creating a dataset with a unique ID, location, and role-based access that defines who can read or modify the data.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const bqowner = new gcp.serviceaccount.Account("bqowner", {accountId: "bqowner"});
const dataset = new gcp.bigquery.Dataset("dataset", {
    datasetId: "example_dataset",
    friendlyName: "test",
    description: "This is a test description",
    location: "EU",
    defaultTableExpirationMs: 3600000,
    labels: {
        env: "default",
    },
    accesses: [
        {
            role: "roles/bigquery.dataOwner",
            userByEmail: bqowner.email,
        },
        {
            role: "READER",
            domain: "hashicorp.com",
        },
    ],
});
import pulumi
import pulumi_gcp as gcp

bqowner = gcp.serviceaccount.Account("bqowner", account_id="bqowner")
dataset = gcp.bigquery.Dataset("dataset",
    dataset_id="example_dataset",
    friendly_name="test",
    description="This is a test description",
    location="EU",
    default_table_expiration_ms=3600000,
    labels={
        "env": "default",
    },
    accesses=[
        {
            "role": "roles/bigquery.dataOwner",
            "user_by_email": bqowner.email,
        },
        {
            "role": "READER",
            "domain": "hashicorp.com",
        },
    ])
package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/bigquery"
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/serviceaccount"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		bqowner, err := serviceaccount.NewAccount(ctx, "bqowner", &serviceaccount.AccountArgs{
			AccountId: pulumi.String("bqowner"),
		})
		if err != nil {
			return err
		}
		_, err = bigquery.NewDataset(ctx, "dataset", &bigquery.DatasetArgs{
			DatasetId:                pulumi.String("example_dataset"),
			FriendlyName:             pulumi.String("test"),
			Description:              pulumi.String("This is a test description"),
			Location:                 pulumi.String("EU"),
			DefaultTableExpirationMs: pulumi.Int(3600000),
			Labels: pulumi.StringMap{
				"env": pulumi.String("default"),
			},
			Accesses: bigquery.DatasetAccessTypeArray{
				&bigquery.DatasetAccessTypeArgs{
					Role:        pulumi.String("roles/bigquery.dataOwner"),
					UserByEmail: bqowner.Email,
				},
				&bigquery.DatasetAccessTypeArgs{
					Role:   pulumi.String("READER"),
					Domain: pulumi.String("hashicorp.com"),
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var bqowner = new Gcp.ServiceAccount.Account("bqowner", new()
    {
        AccountId = "bqowner",
    });

    var dataset = new Gcp.BigQuery.Dataset("dataset", new()
    {
        DatasetId = "example_dataset",
        FriendlyName = "test",
        Description = "This is a test description",
        Location = "EU",
        DefaultTableExpirationMs = 3600000,
        Labels = 
        {
            { "env", "default" },
        },
        Accesses = new[]
        {
            new Gcp.BigQuery.Inputs.DatasetAccessArgs
            {
                Role = "roles/bigquery.dataOwner",
                UserByEmail = bqowner.Email,
            },
            new Gcp.BigQuery.Inputs.DatasetAccessArgs
            {
                Role = "READER",
                Domain = "hashicorp.com",
            },
        },
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.serviceaccount.Account;
import com.pulumi.gcp.serviceaccount.AccountArgs;
import com.pulumi.gcp.bigquery.Dataset;
import com.pulumi.gcp.bigquery.DatasetArgs;
import com.pulumi.gcp.bigquery.inputs.DatasetAccessArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var bqowner = new Account("bqowner", AccountArgs.builder()
            .accountId("bqowner")
            .build());

        var dataset = new Dataset("dataset", DatasetArgs.builder()
            .datasetId("example_dataset")
            .friendlyName("test")
            .description("This is a test description")
            .location("EU")
            .defaultTableExpirationMs(3600000)
            .labels(Map.of("env", "default"))
            .accesses(            
                DatasetAccessArgs.builder()
                    .role("roles/bigquery.dataOwner")
                    .userByEmail(bqowner.email())
                    .build(),
                DatasetAccessArgs.builder()
                    .role("READER")
                    .domain("hashicorp.com")
                    .build())
            .build());

    }
}
resources:
  dataset:
    type: gcp:bigquery:Dataset
    properties:
      datasetId: example_dataset
      friendlyName: test
      description: This is a test description
      location: EU
      defaultTableExpirationMs: 3.6e+06
      labels:
        env: default
      accesses:
        - role: roles/bigquery.dataOwner
          userByEmail: ${bqowner.email}
        - role: READER
          domain: hashicorp.com
  bqowner:
    type: gcp:serviceaccount:Account
    properties:
      accountId: bqowner

The datasetId property sets a unique identifier within the project. The accesses array grants permissions: each entry specifies a role (like OWNER or READER) and an identity (userByEmail, domain, or group). The defaultTableExpirationMs property automatically deletes tables after the specified lifetime, measured from creation time.

Encrypt dataset tables with customer-managed keys

Organizations with compliance requirements often encrypt BigQuery data using their own KMS keys rather than Google-managed encryption.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const keyRing = new gcp.kms.KeyRing("key_ring", {
    name: "example-keyring",
    location: "us",
});
const cryptoKey = new gcp.kms.CryptoKey("crypto_key", {
    name: "example-key",
    keyRing: keyRing.id,
});
const dataset = new gcp.bigquery.Dataset("dataset", {
    datasetId: "example_dataset",
    friendlyName: "test",
    description: "This is a test description",
    location: "US",
    defaultTableExpirationMs: 3600000,
    defaultEncryptionConfiguration: {
        kmsKeyName: cryptoKey.id,
    },
});
import pulumi
import pulumi_gcp as gcp

key_ring = gcp.kms.KeyRing("key_ring",
    name="example-keyring",
    location="us")
crypto_key = gcp.kms.CryptoKey("crypto_key",
    name="example-key",
    key_ring=key_ring.id)
dataset = gcp.bigquery.Dataset("dataset",
    dataset_id="example_dataset",
    friendly_name="test",
    description="This is a test description",
    location="US",
    default_table_expiration_ms=3600000,
    default_encryption_configuration={
        "kms_key_name": crypto_key.id,
    })
package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/bigquery"
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/kms"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		keyRing, err := kms.NewKeyRing(ctx, "key_ring", &kms.KeyRingArgs{
			Name:     pulumi.String("example-keyring"),
			Location: pulumi.String("us"),
		})
		if err != nil {
			return err
		}
		cryptoKey, err := kms.NewCryptoKey(ctx, "crypto_key", &kms.CryptoKeyArgs{
			Name:    pulumi.String("example-key"),
			KeyRing: keyRing.ID(),
		})
		if err != nil {
			return err
		}
		_, err = bigquery.NewDataset(ctx, "dataset", &bigquery.DatasetArgs{
			DatasetId:                pulumi.String("example_dataset"),
			FriendlyName:             pulumi.String("test"),
			Description:              pulumi.String("This is a test description"),
			Location:                 pulumi.String("US"),
			DefaultTableExpirationMs: pulumi.Int(3600000),
			DefaultEncryptionConfiguration: &bigquery.DatasetDefaultEncryptionConfigurationArgs{
				KmsKeyName: cryptoKey.ID(),
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var keyRing = new Gcp.Kms.KeyRing("key_ring", new()
    {
        Name = "example-keyring",
        Location = "us",
    });

    var cryptoKey = new Gcp.Kms.CryptoKey("crypto_key", new()
    {
        Name = "example-key",
        KeyRing = keyRing.Id,
    });

    var dataset = new Gcp.BigQuery.Dataset("dataset", new()
    {
        DatasetId = "example_dataset",
        FriendlyName = "test",
        Description = "This is a test description",
        Location = "US",
        DefaultTableExpirationMs = 3600000,
        DefaultEncryptionConfiguration = new Gcp.BigQuery.Inputs.DatasetDefaultEncryptionConfigurationArgs
        {
            KmsKeyName = cryptoKey.Id,
        },
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.kms.KeyRing;
import com.pulumi.gcp.kms.KeyRingArgs;
import com.pulumi.gcp.kms.CryptoKey;
import com.pulumi.gcp.kms.CryptoKeyArgs;
import com.pulumi.gcp.bigquery.Dataset;
import com.pulumi.gcp.bigquery.DatasetArgs;
import com.pulumi.gcp.bigquery.inputs.DatasetDefaultEncryptionConfigurationArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var keyRing = new KeyRing("keyRing", KeyRingArgs.builder()
            .name("example-keyring")
            .location("us")
            .build());

        var cryptoKey = new CryptoKey("cryptoKey", CryptoKeyArgs.builder()
            .name("example-key")
            .keyRing(keyRing.id())
            .build());

        var dataset = new Dataset("dataset", DatasetArgs.builder()
            .datasetId("example_dataset")
            .friendlyName("test")
            .description("This is a test description")
            .location("US")
            .defaultTableExpirationMs(3600000)
            .defaultEncryptionConfiguration(DatasetDefaultEncryptionConfigurationArgs.builder()
                .kmsKeyName(cryptoKey.id())
                .build())
            .build());

    }
}
resources:
  dataset:
    type: gcp:bigquery:Dataset
    properties:
      datasetId: example_dataset
      friendlyName: test
      description: This is a test description
      location: US
      defaultTableExpirationMs: 3.6e+06
      defaultEncryptionConfiguration:
        kmsKeyName: ${cryptoKey.id}
  cryptoKey:
    type: gcp:kms:CryptoKey
    name: crypto_key
    properties:
      name: example-key
      keyRing: ${keyRing.id}
  keyRing:
    type: gcp:kms:KeyRing
    name: key_ring
    properties:
      name: example-keyring
      location: us

The defaultEncryptionConfiguration property applies to all newly-created tables in the dataset. The kmsKeyName references a Cloud KMS key; BigQuery’s service account needs encrypt/decrypt permissions on that key. Existing tables retain their original encryption settings.

Grant cross-dataset access for authorized views

Data warehouses often need views in one dataset to query tables in another, requiring explicit authorization between datasets.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const bqowner = new gcp.serviceaccount.Account("bqowner", {accountId: "bqowner"});
const _public = new gcp.bigquery.Dataset("public", {
    datasetId: "public",
    friendlyName: "test",
    description: "This dataset is public",
    location: "EU",
    defaultTableExpirationMs: 3600000,
    labels: {
        env: "default",
    },
    accesses: [
        {
            role: "OWNER",
            userByEmail: bqowner.email,
        },
        {
            role: "READER",
            domain: "hashicorp.com",
        },
    ],
});
const dataset = new gcp.bigquery.Dataset("dataset", {
    datasetId: "private",
    friendlyName: "test",
    description: "This dataset is private",
    location: "EU",
    defaultTableExpirationMs: 3600000,
    labels: {
        env: "default",
    },
    accesses: [
        {
            role: "OWNER",
            userByEmail: bqowner.email,
        },
        {
            role: "READER",
            domain: "hashicorp.com",
        },
        {
            dataset: {
                dataset: {
                    projectId: _public.project,
                    datasetId: _public.datasetId,
                },
                targetTypes: ["VIEWS"],
            },
        },
    ],
});
import pulumi
import pulumi_gcp as gcp

bqowner = gcp.serviceaccount.Account("bqowner", account_id="bqowner")
public = gcp.bigquery.Dataset("public",
    dataset_id="public",
    friendly_name="test",
    description="This dataset is public",
    location="EU",
    default_table_expiration_ms=3600000,
    labels={
        "env": "default",
    },
    accesses=[
        {
            "role": "OWNER",
            "user_by_email": bqowner.email,
        },
        {
            "role": "READER",
            "domain": "hashicorp.com",
        },
    ])
dataset = gcp.bigquery.Dataset("dataset",
    dataset_id="private",
    friendly_name="test",
    description="This dataset is private",
    location="EU",
    default_table_expiration_ms=3600000,
    labels={
        "env": "default",
    },
    accesses=[
        {
            "role": "OWNER",
            "user_by_email": bqowner.email,
        },
        {
            "role": "READER",
            "domain": "hashicorp.com",
        },
        {
            "dataset": {
                "dataset": {
                    "project_id": public.project,
                    "dataset_id": public.dataset_id,
                },
                "target_types": ["VIEWS"],
            },
        },
    ])
package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/bigquery"
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/serviceaccount"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		bqowner, err := serviceaccount.NewAccount(ctx, "bqowner", &serviceaccount.AccountArgs{
			AccountId: pulumi.String("bqowner"),
		})
		if err != nil {
			return err
		}
		public, err := bigquery.NewDataset(ctx, "public", &bigquery.DatasetArgs{
			DatasetId:                pulumi.String("public"),
			FriendlyName:             pulumi.String("test"),
			Description:              pulumi.String("This dataset is public"),
			Location:                 pulumi.String("EU"),
			DefaultTableExpirationMs: pulumi.Int(3600000),
			Labels: pulumi.StringMap{
				"env": pulumi.String("default"),
			},
			Accesses: bigquery.DatasetAccessTypeArray{
				&bigquery.DatasetAccessTypeArgs{
					Role:        pulumi.String("OWNER"),
					UserByEmail: bqowner.Email,
				},
				&bigquery.DatasetAccessTypeArgs{
					Role:   pulumi.String("READER"),
					Domain: pulumi.String("hashicorp.com"),
				},
			},
		})
		if err != nil {
			return err
		}
		_, err = bigquery.NewDataset(ctx, "dataset", &bigquery.DatasetArgs{
			DatasetId:                pulumi.String("private"),
			FriendlyName:             pulumi.String("test"),
			Description:              pulumi.String("This dataset is private"),
			Location:                 pulumi.String("EU"),
			DefaultTableExpirationMs: pulumi.Int(3600000),
			Labels: pulumi.StringMap{
				"env": pulumi.String("default"),
			},
			Accesses: bigquery.DatasetAccessTypeArray{
				&bigquery.DatasetAccessTypeArgs{
					Role:        pulumi.String("OWNER"),
					UserByEmail: bqowner.Email,
				},
				&bigquery.DatasetAccessTypeArgs{
					Role:   pulumi.String("READER"),
					Domain: pulumi.String("hashicorp.com"),
				},
				&bigquery.DatasetAccessTypeArgs{
					Dataset: &bigquery.DatasetAccessDatasetArgs{
						Dataset: &bigquery.DatasetAccessDatasetDatasetArgs{
							ProjectId: public.Project,
							DatasetId: public.DatasetId,
						},
						TargetTypes: pulumi.StringArray{
							pulumi.String("VIEWS"),
						},
					},
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var bqowner = new Gcp.ServiceAccount.Account("bqowner", new()
    {
        AccountId = "bqowner",
    });

    var @public = new Gcp.BigQuery.Dataset("public", new()
    {
        DatasetId = "public",
        FriendlyName = "test",
        Description = "This dataset is public",
        Location = "EU",
        DefaultTableExpirationMs = 3600000,
        Labels = 
        {
            { "env", "default" },
        },
        Accesses = new[]
        {
            new Gcp.BigQuery.Inputs.DatasetAccessArgs
            {
                Role = "OWNER",
                UserByEmail = bqowner.Email,
            },
            new Gcp.BigQuery.Inputs.DatasetAccessArgs
            {
                Role = "READER",
                Domain = "hashicorp.com",
            },
        },
    });

    var dataset = new Gcp.BigQuery.Dataset("dataset", new()
    {
        DatasetId = "private",
        FriendlyName = "test",
        Description = "This dataset is private",
        Location = "EU",
        DefaultTableExpirationMs = 3600000,
        Labels = 
        {
            { "env", "default" },
        },
        Accesses = new[]
        {
            new Gcp.BigQuery.Inputs.DatasetAccessArgs
            {
                Role = "OWNER",
                UserByEmail = bqowner.Email,
            },
            new Gcp.BigQuery.Inputs.DatasetAccessArgs
            {
                Role = "READER",
                Domain = "hashicorp.com",
            },
            new Gcp.BigQuery.Inputs.DatasetAccessArgs
            {
                Dataset = new Gcp.BigQuery.Inputs.DatasetAccessDatasetArgs
                {
                    Dataset = new Gcp.BigQuery.Inputs.DatasetAccessDatasetDatasetArgs
                    {
                        ProjectId = @public.Project,
                        DatasetId = @public.DatasetId,
                    },
                    TargetTypes = new[]
                    {
                        "VIEWS",
                    },
                },
            },
        },
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.serviceaccount.Account;
import com.pulumi.gcp.serviceaccount.AccountArgs;
import com.pulumi.gcp.bigquery.Dataset;
import com.pulumi.gcp.bigquery.DatasetArgs;
import com.pulumi.gcp.bigquery.inputs.DatasetAccessArgs;
import com.pulumi.gcp.bigquery.inputs.DatasetAccessDatasetArgs;
import com.pulumi.gcp.bigquery.inputs.DatasetAccessDatasetDatasetArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var bqowner = new Account("bqowner", AccountArgs.builder()
            .accountId("bqowner")
            .build());

        var public_ = new Dataset("public", DatasetArgs.builder()
            .datasetId("public")
            .friendlyName("test")
            .description("This dataset is public")
            .location("EU")
            .defaultTableExpirationMs(3600000)
            .labels(Map.of("env", "default"))
            .accesses(            
                DatasetAccessArgs.builder()
                    .role("OWNER")
                    .userByEmail(bqowner.email())
                    .build(),
                DatasetAccessArgs.builder()
                    .role("READER")
                    .domain("hashicorp.com")
                    .build())
            .build());

        var dataset = new Dataset("dataset", DatasetArgs.builder()
            .datasetId("private")
            .friendlyName("test")
            .description("This dataset is private")
            .location("EU")
            .defaultTableExpirationMs(3600000)
            .labels(Map.of("env", "default"))
            .accesses(            
                DatasetAccessArgs.builder()
                    .role("OWNER")
                    .userByEmail(bqowner.email())
                    .build(),
                DatasetAccessArgs.builder()
                    .role("READER")
                    .domain("hashicorp.com")
                    .build(),
                DatasetAccessArgs.builder()
                    .dataset(DatasetAccessDatasetArgs.builder()
                        .dataset(DatasetAccessDatasetDatasetArgs.builder()
                            .projectId(public_.project())
                            .datasetId(public_.datasetId())
                            .build())
                        .targetTypes("VIEWS")
                        .build())
                    .build())
            .build());

    }
}
resources:
  public:
    type: gcp:bigquery:Dataset
    properties:
      datasetId: public
      friendlyName: test
      description: This dataset is public
      location: EU
      defaultTableExpirationMs: 3.6e+06
      labels:
        env: default
      accesses:
        - role: OWNER
          userByEmail: ${bqowner.email}
        - role: READER
          domain: hashicorp.com
  dataset:
    type: gcp:bigquery:Dataset
    properties:
      datasetId: private
      friendlyName: test
      description: This dataset is private
      location: EU
      defaultTableExpirationMs: 3.6e+06
      labels:
        env: default
      accesses:
        - role: OWNER
          userByEmail: ${bqowner.email}
        - role: READER
          domain: hashicorp.com
        - dataset:
            dataset:
              projectId: ${public.project}
              datasetId: ${public.datasetId}
            targetTypes:
              - VIEWS
  bqowner:
    type: gcp:serviceaccount:Account
    properties:
      accountId: bqowner

The accesses array can include a dataset block that authorizes another dataset to reference this one. The targetTypes property limits authorization to specific object types (here, VIEWS). This allows views in the private dataset to query tables in the public dataset without granting broader access.

Reference external AWS Glue catalogs

Multi-cloud analytics workflows can query AWS Glue tables directly from BigQuery by creating a dataset that references the external catalog.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const dataset = new gcp.bigquery.Dataset("dataset", {
    datasetId: "example_dataset",
    friendlyName: "test",
    description: "This is a test description",
    location: "aws-us-east-1",
    externalDatasetReference: {
        externalSource: "aws-glue://arn:aws:glue:us-east-1:999999999999:database/database",
        connection: "projects/project/locations/aws-us-east-1/connections/connection",
    },
});
import pulumi
import pulumi_gcp as gcp

dataset = gcp.bigquery.Dataset("dataset",
    dataset_id="example_dataset",
    friendly_name="test",
    description="This is a test description",
    location="aws-us-east-1",
    external_dataset_reference={
        "external_source": "aws-glue://arn:aws:glue:us-east-1:999999999999:database/database",
        "connection": "projects/project/locations/aws-us-east-1/connections/connection",
    })
package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/bigquery"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := bigquery.NewDataset(ctx, "dataset", &bigquery.DatasetArgs{
			DatasetId:    pulumi.String("example_dataset"),
			FriendlyName: pulumi.String("test"),
			Description:  pulumi.String("This is a test description"),
			Location:     pulumi.String("aws-us-east-1"),
			ExternalDatasetReference: &bigquery.DatasetExternalDatasetReferenceArgs{
				ExternalSource: pulumi.String("aws-glue://arn:aws:glue:us-east-1:999999999999:database/database"),
				Connection:     pulumi.String("projects/project/locations/aws-us-east-1/connections/connection"),
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var dataset = new Gcp.BigQuery.Dataset("dataset", new()
    {
        DatasetId = "example_dataset",
        FriendlyName = "test",
        Description = "This is a test description",
        Location = "aws-us-east-1",
        ExternalDatasetReference = new Gcp.BigQuery.Inputs.DatasetExternalDatasetReferenceArgs
        {
            ExternalSource = "aws-glue://arn:aws:glue:us-east-1:999999999999:database/database",
            Connection = "projects/project/locations/aws-us-east-1/connections/connection",
        },
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.bigquery.Dataset;
import com.pulumi.gcp.bigquery.DatasetArgs;
import com.pulumi.gcp.bigquery.inputs.DatasetExternalDatasetReferenceArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var dataset = new Dataset("dataset", DatasetArgs.builder()
            .datasetId("example_dataset")
            .friendlyName("test")
            .description("This is a test description")
            .location("aws-us-east-1")
            .externalDatasetReference(DatasetExternalDatasetReferenceArgs.builder()
                .externalSource("aws-glue://arn:aws:glue:us-east-1:999999999999:database/database")
                .connection("projects/project/locations/aws-us-east-1/connections/connection")
                .build())
            .build());

    }
}
resources:
  dataset:
    type: gcp:bigquery:Dataset
    properties:
      datasetId: example_dataset
      friendlyName: test
      description: This is a test description
      location: aws-us-east-1
      externalDatasetReference:
        externalSource: aws-glue://arn:aws:glue:us-east-1:999999999999:database/database
        connection: projects/project/locations/aws-us-east-1/connections/connection

The externalDatasetReference property points to an AWS Glue database via its ARN. The connection property references a BigQuery connection resource that handles authentication to AWS. The location must match the AWS region (here, aws-us-east-1). This enables querying AWS data without replication.

Beyond these examples

These snippets focus on specific dataset-level features: access control and encryption, cross-dataset authorization, and external catalog integration. They’re intentionally minimal rather than full data warehouse configurations.

The examples may reference pre-existing infrastructure such as service accounts for access control, Cloud KMS keys for encryption, and BigQuery connections to external systems like AWS. They focus on configuring the dataset rather than provisioning the surrounding infrastructure.

To keep things focused, common dataset patterns are omitted, including:

  • Table expiration vs partition expiration (defaultPartitionExpirationMs)
  • Case sensitivity and collation settings (isCaseInsensitive, defaultCollation)
  • Time travel configuration (maxTimeTravelHours)
  • Storage billing models (storageBillingModel)
  • Dataset deletion behavior (deleteContentsOnDestroy)

These omissions are intentional: the goal is to illustrate how each dataset feature is wired, not provide drop-in data warehouse modules. See the BigQuery Dataset resource reference for all available configuration options.

Let's create and Configure BigQuery Datasets

Get started with Pulumi Cloud, then follow our quick setup guide to deploy this infrastructure.

Try Pulumi Cloud for FREE

Frequently Asked Questions

Resource Lifecycle & Immutability
Why does destroying my dataset fail?
Destroying a dataset fails if it contains tables unless you set deleteContentsOnDestroy to true, which automatically deletes all tables in the dataset.
What properties can't I change after creating a dataset?
The datasetId, project, location, and externalDatasetReference properties are immutable. Changing any of these forces creation of a new resource.
Why didn't my default settings apply to existing tables?
Changes to defaultTableExpirationMs, defaultPartitionExpirationMs, and defaultCollation only affect newly-created tables, not existing ones.
Access Control & Security
How do I grant access to users and domains?
Use the accesses array with entries containing role and either userByEmail or domain. For example, {role: "READER", domain: "example.com"}.
How do I authorize views from another dataset to access this one?
Add an access entry with dataset containing projectId, datasetId, and targetTypes: ["VIEWS"] to allow cross-dataset view access.
How do I authorize a routine to access this dataset?
Add an access entry with routine containing projectId, datasetId, and routineId of the authorized routine.
How do I use customer-managed encryption keys?
Set defaultEncryptionConfiguration with kmsKeyName pointing to your Cloud KMS crypto key ID. This applies to all newly-created tables in the dataset.
Table Expiration & Defaults
How do table expiration settings work?
Explicit table-level expirationTime takes precedence, followed by defaultPartitionExpirationMs for partitioned tables, then defaultTableExpirationMs for all tables. The minimum expiration is 3600000ms (1 hour). Changes only affect new tables.
What's the time travel window for datasets?
Use maxTimeTravelHours to set a window between 48 and 168 hours (2 to 7 days) for querying historical data.
Labels & Metadata
Why should I use effectiveLabels instead of labels?
The labels field is non-authoritative and only manages labels in your configuration. Use effectiveLabels to see all labels on the resource, including those set by other clients and services.
External Integrations
How do I connect a dataset to AWS Glue?
Set externalDatasetReference with externalSource pointing to your AWS Glue database ARN and connection pointing to your BigQuery connection resource. The location must match your AWS region (e.g., aws-us-east-1).
Can I make dataset and table names case-insensitive?
Yes, set isCaseInsensitive to true. This defaults to false (case-sensitive). Note that this setting doesn’t affect routine references.

Using a different cloud?