Create and Configure BigQuery Datasets

The gcp:bigquery/dataset:Dataset resource, part of the Pulumi Google Cloud provider, provisions BigQuery datasets that serve as containers for tables, views, and routines. This guide focuses on four capabilities: access control configuration, customer-managed encryption, cross-dataset authorization, and external data source integration.

A dataset doesn’t exist in isolation. It references service accounts for access control, Cloud KMS keys for encryption, and BigQuery connections for external data sources. The examples are intentionally small and show how to configure dataset-level features. Combine them with your own IAM setup and table definitions.

Create a dataset with access controls and metadata

Most deployments start with a dataset that has basic metadata and access controls to organize tables.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const bqowner = new gcp.serviceaccount.Account("bqowner", {accountId: "bqowner"});
const dataset = new gcp.bigquery.Dataset("dataset", {
    datasetId: "example_dataset",
    friendlyName: "test",
    description: "This is a test description",
    location: "EU",
    defaultTableExpirationMs: 3600000,
    labels: {
        env: "default",
    },
    accesses: [
        {
            role: "roles/bigquery.dataOwner",
            userByEmail: bqowner.email,
        },
        {
            role: "READER",
            domain: "hashicorp.com",
        },
    ],
});
import pulumi
import pulumi_gcp as gcp

bqowner = gcp.serviceaccount.Account("bqowner", account_id="bqowner")
dataset = gcp.bigquery.Dataset("dataset",
    dataset_id="example_dataset",
    friendly_name="test",
    description="This is a test description",
    location="EU",
    default_table_expiration_ms=3600000,
    labels={
        "env": "default",
    },
    accesses=[
        {
            "role": "roles/bigquery.dataOwner",
            "user_by_email": bqowner.email,
        },
        {
            "role": "READER",
            "domain": "hashicorp.com",
        },
    ])
package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/bigquery"
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/serviceaccount"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		bqowner, err := serviceaccount.NewAccount(ctx, "bqowner", &serviceaccount.AccountArgs{
			AccountId: pulumi.String("bqowner"),
		})
		if err != nil {
			return err
		}
		_, err = bigquery.NewDataset(ctx, "dataset", &bigquery.DatasetArgs{
			DatasetId:                pulumi.String("example_dataset"),
			FriendlyName:             pulumi.String("test"),
			Description:              pulumi.String("This is a test description"),
			Location:                 pulumi.String("EU"),
			DefaultTableExpirationMs: pulumi.Int(3600000),
			Labels: pulumi.StringMap{
				"env": pulumi.String("default"),
			},
			Accesses: bigquery.DatasetAccessTypeArray{
				&bigquery.DatasetAccessTypeArgs{
					Role:        pulumi.String("roles/bigquery.dataOwner"),
					UserByEmail: bqowner.Email,
				},
				&bigquery.DatasetAccessTypeArgs{
					Role:   pulumi.String("READER"),
					Domain: pulumi.String("hashicorp.com"),
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var bqowner = new Gcp.ServiceAccount.Account("bqowner", new()
    {
        AccountId = "bqowner",
    });

    var dataset = new Gcp.BigQuery.Dataset("dataset", new()
    {
        DatasetId = "example_dataset",
        FriendlyName = "test",
        Description = "This is a test description",
        Location = "EU",
        DefaultTableExpirationMs = 3600000,
        Labels = 
        {
            { "env", "default" },
        },
        Accesses = new[]
        {
            new Gcp.BigQuery.Inputs.DatasetAccessArgs
            {
                Role = "roles/bigquery.dataOwner",
                UserByEmail = bqowner.Email,
            },
            new Gcp.BigQuery.Inputs.DatasetAccessArgs
            {
                Role = "READER",
                Domain = "hashicorp.com",
            },
        },
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.serviceaccount.Account;
import com.pulumi.gcp.serviceaccount.AccountArgs;
import com.pulumi.gcp.bigquery.Dataset;
import com.pulumi.gcp.bigquery.DatasetArgs;
import com.pulumi.gcp.bigquery.inputs.DatasetAccessArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var bqowner = new Account("bqowner", AccountArgs.builder()
            .accountId("bqowner")
            .build());

        var dataset = new Dataset("dataset", DatasetArgs.builder()
            .datasetId("example_dataset")
            .friendlyName("test")
            .description("This is a test description")
            .location("EU")
            .defaultTableExpirationMs(3600000)
            .labels(Map.of("env", "default"))
            .accesses(            
                DatasetAccessArgs.builder()
                    .role("roles/bigquery.dataOwner")
                    .userByEmail(bqowner.email())
                    .build(),
                DatasetAccessArgs.builder()
                    .role("READER")
                    .domain("hashicorp.com")
                    .build())
            .build());

    }
}
resources:
  dataset:
    type: gcp:bigquery:Dataset
    properties:
      datasetId: example_dataset
      friendlyName: test
      description: This is a test description
      location: EU
      defaultTableExpirationMs: 3.6e+06
      labels:
        env: default
      accesses:
        - role: roles/bigquery.dataOwner
          userByEmail: ${bqowner.email}
        - role: READER
          domain: hashicorp.com
  bqowner:
    type: gcp:serviceaccount:Account
    properties:
      accountId: bqowner

The datasetId property sets a unique identifier within your project. The accesses array defines who can interact with the dataset: userByEmail grants individual access, while domain grants access to all users in an organization. The defaultTableExpirationMs property applies to all tables created in this dataset unless overridden at the table level.

Encrypt datasets with customer-managed keys

Regulated workloads often require customer-managed encryption keys to meet compliance requirements.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const keyRing = new gcp.kms.KeyRing("key_ring", {
    name: "example-keyring",
    location: "us",
});
const cryptoKey = new gcp.kms.CryptoKey("crypto_key", {
    name: "example-key",
    keyRing: keyRing.id,
});
const dataset = new gcp.bigquery.Dataset("dataset", {
    datasetId: "example_dataset",
    friendlyName: "test",
    description: "This is a test description",
    location: "US",
    defaultTableExpirationMs: 3600000,
    defaultEncryptionConfiguration: {
        kmsKeyName: cryptoKey.id,
    },
});
import pulumi
import pulumi_gcp as gcp

key_ring = gcp.kms.KeyRing("key_ring",
    name="example-keyring",
    location="us")
crypto_key = gcp.kms.CryptoKey("crypto_key",
    name="example-key",
    key_ring=key_ring.id)
dataset = gcp.bigquery.Dataset("dataset",
    dataset_id="example_dataset",
    friendly_name="test",
    description="This is a test description",
    location="US",
    default_table_expiration_ms=3600000,
    default_encryption_configuration={
        "kms_key_name": crypto_key.id,
    })
package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/bigquery"
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/kms"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		keyRing, err := kms.NewKeyRing(ctx, "key_ring", &kms.KeyRingArgs{
			Name:     pulumi.String("example-keyring"),
			Location: pulumi.String("us"),
		})
		if err != nil {
			return err
		}
		cryptoKey, err := kms.NewCryptoKey(ctx, "crypto_key", &kms.CryptoKeyArgs{
			Name:    pulumi.String("example-key"),
			KeyRing: keyRing.ID(),
		})
		if err != nil {
			return err
		}
		_, err = bigquery.NewDataset(ctx, "dataset", &bigquery.DatasetArgs{
			DatasetId:                pulumi.String("example_dataset"),
			FriendlyName:             pulumi.String("test"),
			Description:              pulumi.String("This is a test description"),
			Location:                 pulumi.String("US"),
			DefaultTableExpirationMs: pulumi.Int(3600000),
			DefaultEncryptionConfiguration: &bigquery.DatasetDefaultEncryptionConfigurationArgs{
				KmsKeyName: cryptoKey.ID(),
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var keyRing = new Gcp.Kms.KeyRing("key_ring", new()
    {
        Name = "example-keyring",
        Location = "us",
    });

    var cryptoKey = new Gcp.Kms.CryptoKey("crypto_key", new()
    {
        Name = "example-key",
        KeyRing = keyRing.Id,
    });

    var dataset = new Gcp.BigQuery.Dataset("dataset", new()
    {
        DatasetId = "example_dataset",
        FriendlyName = "test",
        Description = "This is a test description",
        Location = "US",
        DefaultTableExpirationMs = 3600000,
        DefaultEncryptionConfiguration = new Gcp.BigQuery.Inputs.DatasetDefaultEncryptionConfigurationArgs
        {
            KmsKeyName = cryptoKey.Id,
        },
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.kms.KeyRing;
import com.pulumi.gcp.kms.KeyRingArgs;
import com.pulumi.gcp.kms.CryptoKey;
import com.pulumi.gcp.kms.CryptoKeyArgs;
import com.pulumi.gcp.bigquery.Dataset;
import com.pulumi.gcp.bigquery.DatasetArgs;
import com.pulumi.gcp.bigquery.inputs.DatasetDefaultEncryptionConfigurationArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var keyRing = new KeyRing("keyRing", KeyRingArgs.builder()
            .name("example-keyring")
            .location("us")
            .build());

        var cryptoKey = new CryptoKey("cryptoKey", CryptoKeyArgs.builder()
            .name("example-key")
            .keyRing(keyRing.id())
            .build());

        var dataset = new Dataset("dataset", DatasetArgs.builder()
            .datasetId("example_dataset")
            .friendlyName("test")
            .description("This is a test description")
            .location("US")
            .defaultTableExpirationMs(3600000)
            .defaultEncryptionConfiguration(DatasetDefaultEncryptionConfigurationArgs.builder()
                .kmsKeyName(cryptoKey.id())
                .build())
            .build());

    }
}
resources:
  dataset:
    type: gcp:bigquery:Dataset
    properties:
      datasetId: example_dataset
      friendlyName: test
      description: This is a test description
      location: US
      defaultTableExpirationMs: 3.6e+06
      defaultEncryptionConfiguration:
        kmsKeyName: ${cryptoKey.id}
  cryptoKey:
    type: gcp:kms:CryptoKey
    name: crypto_key
    properties:
      name: example-key
      keyRing: ${keyRing.id}
  keyRing:
    type: gcp:kms:KeyRing
    name: key_ring
    properties:
      name: example-keyring
      location: us

The defaultEncryptionConfiguration property applies your KMS key to all newly-created tables in the dataset. BigQuery uses this key for encryption at rest unless a table-level key overrides it. Your BigQuery service account needs permission to use the KMS key.

Grant cross-dataset access for authorized views

Organizations share specific views from one dataset while keeping underlying tables private through authorized datasets.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const bqowner = new gcp.serviceaccount.Account("bqowner", {accountId: "bqowner"});
const _public = new gcp.bigquery.Dataset("public", {
    datasetId: "public",
    friendlyName: "test",
    description: "This dataset is public",
    location: "EU",
    defaultTableExpirationMs: 3600000,
    labels: {
        env: "default",
    },
    accesses: [
        {
            role: "OWNER",
            userByEmail: bqowner.email,
        },
        {
            role: "READER",
            domain: "hashicorp.com",
        },
    ],
});
const dataset = new gcp.bigquery.Dataset("dataset", {
    datasetId: "private",
    friendlyName: "test",
    description: "This dataset is private",
    location: "EU",
    defaultTableExpirationMs: 3600000,
    labels: {
        env: "default",
    },
    accesses: [
        {
            role: "OWNER",
            userByEmail: bqowner.email,
        },
        {
            role: "READER",
            domain: "hashicorp.com",
        },
        {
            dataset: {
                dataset: {
                    projectId: _public.project,
                    datasetId: _public.datasetId,
                },
                targetTypes: ["VIEWS"],
            },
        },
    ],
});
import pulumi
import pulumi_gcp as gcp

bqowner = gcp.serviceaccount.Account("bqowner", account_id="bqowner")
public = gcp.bigquery.Dataset("public",
    dataset_id="public",
    friendly_name="test",
    description="This dataset is public",
    location="EU",
    default_table_expiration_ms=3600000,
    labels={
        "env": "default",
    },
    accesses=[
        {
            "role": "OWNER",
            "user_by_email": bqowner.email,
        },
        {
            "role": "READER",
            "domain": "hashicorp.com",
        },
    ])
dataset = gcp.bigquery.Dataset("dataset",
    dataset_id="private",
    friendly_name="test",
    description="This dataset is private",
    location="EU",
    default_table_expiration_ms=3600000,
    labels={
        "env": "default",
    },
    accesses=[
        {
            "role": "OWNER",
            "user_by_email": bqowner.email,
        },
        {
            "role": "READER",
            "domain": "hashicorp.com",
        },
        {
            "dataset": {
                "dataset": {
                    "project_id": public.project,
                    "dataset_id": public.dataset_id,
                },
                "target_types": ["VIEWS"],
            },
        },
    ])
package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/bigquery"
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/serviceaccount"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		bqowner, err := serviceaccount.NewAccount(ctx, "bqowner", &serviceaccount.AccountArgs{
			AccountId: pulumi.String("bqowner"),
		})
		if err != nil {
			return err
		}
		public, err := bigquery.NewDataset(ctx, "public", &bigquery.DatasetArgs{
			DatasetId:                pulumi.String("public"),
			FriendlyName:             pulumi.String("test"),
			Description:              pulumi.String("This dataset is public"),
			Location:                 pulumi.String("EU"),
			DefaultTableExpirationMs: pulumi.Int(3600000),
			Labels: pulumi.StringMap{
				"env": pulumi.String("default"),
			},
			Accesses: bigquery.DatasetAccessTypeArray{
				&bigquery.DatasetAccessTypeArgs{
					Role:        pulumi.String("OWNER"),
					UserByEmail: bqowner.Email,
				},
				&bigquery.DatasetAccessTypeArgs{
					Role:   pulumi.String("READER"),
					Domain: pulumi.String("hashicorp.com"),
				},
			},
		})
		if err != nil {
			return err
		}
		_, err = bigquery.NewDataset(ctx, "dataset", &bigquery.DatasetArgs{
			DatasetId:                pulumi.String("private"),
			FriendlyName:             pulumi.String("test"),
			Description:              pulumi.String("This dataset is private"),
			Location:                 pulumi.String("EU"),
			DefaultTableExpirationMs: pulumi.Int(3600000),
			Labels: pulumi.StringMap{
				"env": pulumi.String("default"),
			},
			Accesses: bigquery.DatasetAccessTypeArray{
				&bigquery.DatasetAccessTypeArgs{
					Role:        pulumi.String("OWNER"),
					UserByEmail: bqowner.Email,
				},
				&bigquery.DatasetAccessTypeArgs{
					Role:   pulumi.String("READER"),
					Domain: pulumi.String("hashicorp.com"),
				},
				&bigquery.DatasetAccessTypeArgs{
					Dataset: &bigquery.DatasetAccessDatasetArgs{
						Dataset: &bigquery.DatasetAccessDatasetDatasetArgs{
							ProjectId: public.Project,
							DatasetId: public.DatasetId,
						},
						TargetTypes: pulumi.StringArray{
							pulumi.String("VIEWS"),
						},
					},
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var bqowner = new Gcp.ServiceAccount.Account("bqowner", new()
    {
        AccountId = "bqowner",
    });

    var @public = new Gcp.BigQuery.Dataset("public", new()
    {
        DatasetId = "public",
        FriendlyName = "test",
        Description = "This dataset is public",
        Location = "EU",
        DefaultTableExpirationMs = 3600000,
        Labels = 
        {
            { "env", "default" },
        },
        Accesses = new[]
        {
            new Gcp.BigQuery.Inputs.DatasetAccessArgs
            {
                Role = "OWNER",
                UserByEmail = bqowner.Email,
            },
            new Gcp.BigQuery.Inputs.DatasetAccessArgs
            {
                Role = "READER",
                Domain = "hashicorp.com",
            },
        },
    });

    var dataset = new Gcp.BigQuery.Dataset("dataset", new()
    {
        DatasetId = "private",
        FriendlyName = "test",
        Description = "This dataset is private",
        Location = "EU",
        DefaultTableExpirationMs = 3600000,
        Labels = 
        {
            { "env", "default" },
        },
        Accesses = new[]
        {
            new Gcp.BigQuery.Inputs.DatasetAccessArgs
            {
                Role = "OWNER",
                UserByEmail = bqowner.Email,
            },
            new Gcp.BigQuery.Inputs.DatasetAccessArgs
            {
                Role = "READER",
                Domain = "hashicorp.com",
            },
            new Gcp.BigQuery.Inputs.DatasetAccessArgs
            {
                Dataset = new Gcp.BigQuery.Inputs.DatasetAccessDatasetArgs
                {
                    Dataset = new Gcp.BigQuery.Inputs.DatasetAccessDatasetDatasetArgs
                    {
                        ProjectId = @public.Project,
                        DatasetId = @public.DatasetId,
                    },
                    TargetTypes = new[]
                    {
                        "VIEWS",
                    },
                },
            },
        },
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.serviceaccount.Account;
import com.pulumi.gcp.serviceaccount.AccountArgs;
import com.pulumi.gcp.bigquery.Dataset;
import com.pulumi.gcp.bigquery.DatasetArgs;
import com.pulumi.gcp.bigquery.inputs.DatasetAccessArgs;
import com.pulumi.gcp.bigquery.inputs.DatasetAccessDatasetArgs;
import com.pulumi.gcp.bigquery.inputs.DatasetAccessDatasetDatasetArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var bqowner = new Account("bqowner", AccountArgs.builder()
            .accountId("bqowner")
            .build());

        var public_ = new Dataset("public", DatasetArgs.builder()
            .datasetId("public")
            .friendlyName("test")
            .description("This dataset is public")
            .location("EU")
            .defaultTableExpirationMs(3600000)
            .labels(Map.of("env", "default"))
            .accesses(            
                DatasetAccessArgs.builder()
                    .role("OWNER")
                    .userByEmail(bqowner.email())
                    .build(),
                DatasetAccessArgs.builder()
                    .role("READER")
                    .domain("hashicorp.com")
                    .build())
            .build());

        var dataset = new Dataset("dataset", DatasetArgs.builder()
            .datasetId("private")
            .friendlyName("test")
            .description("This dataset is private")
            .location("EU")
            .defaultTableExpirationMs(3600000)
            .labels(Map.of("env", "default"))
            .accesses(            
                DatasetAccessArgs.builder()
                    .role("OWNER")
                    .userByEmail(bqowner.email())
                    .build(),
                DatasetAccessArgs.builder()
                    .role("READER")
                    .domain("hashicorp.com")
                    .build(),
                DatasetAccessArgs.builder()
                    .dataset(DatasetAccessDatasetArgs.builder()
                        .dataset(DatasetAccessDatasetDatasetArgs.builder()
                            .projectId(public_.project())
                            .datasetId(public_.datasetId())
                            .build())
                        .targetTypes("VIEWS")
                        .build())
                    .build())
            .build());

    }
}
resources:
  public:
    type: gcp:bigquery:Dataset
    properties:
      datasetId: public
      friendlyName: test
      description: This dataset is public
      location: EU
      defaultTableExpirationMs: 3.6e+06
      labels:
        env: default
      accesses:
        - role: OWNER
          userByEmail: ${bqowner.email}
        - role: READER
          domain: hashicorp.com
  dataset:
    type: gcp:bigquery:Dataset
    properties:
      datasetId: private
      friendlyName: test
      description: This dataset is private
      location: EU
      defaultTableExpirationMs: 3.6e+06
      labels:
        env: default
      accesses:
        - role: OWNER
          userByEmail: ${bqowner.email}
        - role: READER
          domain: hashicorp.com
        - dataset:
            dataset:
              projectId: ${public.project}
              datasetId: ${public.datasetId}
            targetTypes:
              - VIEWS
  bqowner:
    type: gcp:serviceaccount:Account
    properties:
      accountId: bqowner

The dataset access type in the accesses array grants views in the public dataset permission to query tables in the private dataset. The targetTypes property restricts authorization to views only, not direct table access. This enables selective data sharing without exposing raw data.

Reference external data sources from AWS Glue

Multi-cloud analytics require BigQuery to query data catalogs managed outside Google Cloud.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const dataset = new gcp.bigquery.Dataset("dataset", {
    datasetId: "example_dataset",
    friendlyName: "test",
    description: "This is a test description",
    location: "aws-us-east-1",
    externalDatasetReference: {
        externalSource: "aws-glue://arn:aws:glue:us-east-1:999999999999:database/database",
        connection: "projects/project/locations/aws-us-east-1/connections/connection",
    },
});
import pulumi
import pulumi_gcp as gcp

dataset = gcp.bigquery.Dataset("dataset",
    dataset_id="example_dataset",
    friendly_name="test",
    description="This is a test description",
    location="aws-us-east-1",
    external_dataset_reference={
        "external_source": "aws-glue://arn:aws:glue:us-east-1:999999999999:database/database",
        "connection": "projects/project/locations/aws-us-east-1/connections/connection",
    })
package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/bigquery"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := bigquery.NewDataset(ctx, "dataset", &bigquery.DatasetArgs{
			DatasetId:    pulumi.String("example_dataset"),
			FriendlyName: pulumi.String("test"),
			Description:  pulumi.String("This is a test description"),
			Location:     pulumi.String("aws-us-east-1"),
			ExternalDatasetReference: &bigquery.DatasetExternalDatasetReferenceArgs{
				ExternalSource: pulumi.String("aws-glue://arn:aws:glue:us-east-1:999999999999:database/database"),
				Connection:     pulumi.String("projects/project/locations/aws-us-east-1/connections/connection"),
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var dataset = new Gcp.BigQuery.Dataset("dataset", new()
    {
        DatasetId = "example_dataset",
        FriendlyName = "test",
        Description = "This is a test description",
        Location = "aws-us-east-1",
        ExternalDatasetReference = new Gcp.BigQuery.Inputs.DatasetExternalDatasetReferenceArgs
        {
            ExternalSource = "aws-glue://arn:aws:glue:us-east-1:999999999999:database/database",
            Connection = "projects/project/locations/aws-us-east-1/connections/connection",
        },
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.bigquery.Dataset;
import com.pulumi.gcp.bigquery.DatasetArgs;
import com.pulumi.gcp.bigquery.inputs.DatasetExternalDatasetReferenceArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var dataset = new Dataset("dataset", DatasetArgs.builder()
            .datasetId("example_dataset")
            .friendlyName("test")
            .description("This is a test description")
            .location("aws-us-east-1")
            .externalDatasetReference(DatasetExternalDatasetReferenceArgs.builder()
                .externalSource("aws-glue://arn:aws:glue:us-east-1:999999999999:database/database")
                .connection("projects/project/locations/aws-us-east-1/connections/connection")
                .build())
            .build());

    }
}
resources:
  dataset:
    type: gcp:bigquery:Dataset
    properties:
      datasetId: example_dataset
      friendlyName: test
      description: This is a test description
      location: aws-us-east-1
      externalDatasetReference:
        externalSource: aws-glue://arn:aws:glue:us-east-1:999999999999:database/database
        connection: projects/project/locations/aws-us-east-1/connections/connection

The externalDatasetReference property connects BigQuery to an AWS Glue database through a pre-configured BigQuery connection. The externalSource property points to the AWS Glue database ARN. Queries run through BigQuery but read metadata from AWS Glue without data movement.

Beyond These Examples

These snippets focus on specific dataset-level features: access controls (IAM roles, domain-based, cross-dataset), customer-managed encryption (CMEK), and external data source integration (AWS Glue). They’re intentionally minimal rather than full analytics environments.

The examples may reference pre-existing infrastructure such as service accounts for access control, Cloud KMS keys and permissions, BigQuery connections to external systems, and AWS Glue databases (for multi-cloud scenarios). They focus on configuring the dataset rather than provisioning everything around it.

To keep things focused, common dataset patterns are omitted, including:

  • Table expiration policies (defaultPartitionExpirationMs)
  • Case sensitivity settings (isCaseInsensitive)
  • Time travel windows (maxTimeTravelHours)
  • Storage billing models (storageBillingModel)
  • Collation specifications (defaultCollation)
  • Authorized routines for function sharing

These omissions are intentional: the goal is to illustrate how each dataset feature is wired, not provide drop-in analytics modules. See the BigQuery Dataset resource reference for all available configuration options.

Frequently Asked Questions

Resource Lifecycle & Immutability
Why does deleting my dataset fail even though I destroyed the resource?
By default, dataset destruction fails if tables exist. Set deleteContentsOnDestroy to true to automatically delete all tables when destroying the dataset.
What properties can't be changed after dataset creation?
Four properties are immutable: datasetId, project, location, and externalDatasetReference. Changing any of these forces creation of a new resource.
What are the naming constraints for dataset IDs?
The datasetId must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_), with a maximum length of 1,024 characters.
Access Control & Security
How do I grant access to a service account or domain?
Add entries to the accesses array with role and either userByEmail (for service accounts) or domain (for domain-wide access).
How do I let views in another dataset query my tables?
Add an accesses entry with a nested dataset object containing the source dataset’s projectId and datasetId, plus targetTypes set to ['VIEWS'].
How do I grant a routine access to my dataset?
Add an accesses entry with a nested routine object specifying the routine’s projectId, datasetId, and routineId.
How do I enable customer-managed encryption for my dataset?
Set defaultEncryptionConfiguration.kmsKeyName to your Cloud KMS crypto key ID. All newly-created tables will inherit this encryption key.
Table Defaults & Expiration
What's the minimum table expiration time I can set?
The defaultTableExpirationMs property requires a minimum value of 3600000 milliseconds (1 hour).
What's the difference between defaultTableExpirationMs and defaultPartitionExpirationMs?
For partitioned tables, defaultPartitionExpirationMs takes precedence over defaultTableExpirationMs. Only one of these properties applies to any new partitioned table.
Will changing defaultCollation update my existing tables?
No, changes to defaultCollation only affect tables created afterwards. Existing tables retain their original collation settings.
Labels & Metadata
Why aren't labels removed when I delete them from my Pulumi config?
The labels field is non-authoritative and only manages labels in your configuration. It won’t remove labels added by other clients or services. Use effectiveLabels to view all labels on the resource.
Advanced Features
What are the time travel limits for BigQuery datasets?
The maxTimeTravelHours property accepts values from 48 to 168 hours (2 to 7 days).
How do I connect my dataset to an external AWS Glue database?
Set externalDatasetReference.externalSource to your AWS Glue ARN (e.g., aws-glue://arn:aws:glue:us-east-1:999999999999:database/database) and externalDatasetReference.connection to your BigQuery connection resource path.

Ready to get started?

Get started with Pulumi Cloud, then follow our quick setup guide to deploy this infrastructure.

Create free account