Configure GCP BigQuery Data Policies

The gcp:bigquery/datapolicyv2DataPolicy:Datapolicyv2DataPolicy resource, part of the Pulumi GCP provider, defines BigQuery data policies that control access to sensitive columns through raw access controls, masking rules, or custom transformations. This guide focuses on three capabilities: raw data access policies, predefined and custom masking, and fine-grained principal access.

Data policies reference BigQuery datasets for custom routines and IAM principals for access grants. The examples are intentionally small. Combine them with your own datasets, routines, and IAM configuration.

Create a raw data access policy

Organizations controlling access to sensitive BigQuery columns start by defining data policies that govern who can see unmasked data.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const basicDataPolicy = new gcp.bigquery.Datapolicyv2DataPolicy("basic_data_policy", {
    location: "us-central1",
    dataPolicyType: "RAW_DATA_ACCESS_POLICY",
    dataPolicyId: "basic_data_policy",
});
import pulumi
import pulumi_gcp as gcp

basic_data_policy = gcp.bigquery.Datapolicyv2DataPolicy("basic_data_policy",
    location="us-central1",
    data_policy_type="RAW_DATA_ACCESS_POLICY",
    data_policy_id="basic_data_policy")
package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/bigquery"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := bigquery.NewDatapolicyv2DataPolicy(ctx, "basic_data_policy", &bigquery.Datapolicyv2DataPolicyArgs{
			Location:       pulumi.String("us-central1"),
			DataPolicyType: pulumi.String("RAW_DATA_ACCESS_POLICY"),
			DataPolicyId:   pulumi.String("basic_data_policy"),
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var basicDataPolicy = new Gcp.BigQuery.Datapolicyv2DataPolicy("basic_data_policy", new()
    {
        Location = "us-central1",
        DataPolicyType = "RAW_DATA_ACCESS_POLICY",
        DataPolicyId = "basic_data_policy",
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.bigquery.Datapolicyv2DataPolicy;
import com.pulumi.gcp.bigquery.Datapolicyv2DataPolicyArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var basicDataPolicy = new Datapolicyv2DataPolicy("basicDataPolicy", Datapolicyv2DataPolicyArgs.builder()
            .location("us-central1")
            .dataPolicyType("RAW_DATA_ACCESS_POLICY")
            .dataPolicyId("basic_data_policy")
            .build());

    }
}
resources:
  basicDataPolicy:
    type: gcp:bigquery:Datapolicyv2DataPolicy
    name: basic_data_policy
    properties:
      location: us-central1
      dataPolicyType: RAW_DATA_ACCESS_POLICY
      dataPolicyId: basic_data_policy

The dataPolicyType property set to RAW_DATA_ACCESS_POLICY establishes baseline access control without masking. The dataPolicyId provides a human-readable identifier within the project and location. This policy type controls who can query the underlying column values directly.

Apply predefined masking with SHA256

When columns contain sensitive identifiers like email addresses or account numbers, predefined masking expressions provide standard transformations without custom code.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const predefinedMaskingDataPolicy = new gcp.bigquery.Datapolicyv2DataPolicy("predefined_masking_data_policy", {
    location: "us-central1",
    dataPolicyType: "DATA_MASKING_POLICY",
    dataMaskingPolicy: {
        predefinedExpression: "SHA256",
    },
    dataPolicyId: "predefined_masking_data_policy",
});
import pulumi
import pulumi_gcp as gcp

predefined_masking_data_policy = gcp.bigquery.Datapolicyv2DataPolicy("predefined_masking_data_policy",
    location="us-central1",
    data_policy_type="DATA_MASKING_POLICY",
    data_masking_policy={
        "predefined_expression": "SHA256",
    },
    data_policy_id="predefined_masking_data_policy")
package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/bigquery"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := bigquery.NewDatapolicyv2DataPolicy(ctx, "predefined_masking_data_policy", &bigquery.Datapolicyv2DataPolicyArgs{
			Location:       pulumi.String("us-central1"),
			DataPolicyType: pulumi.String("DATA_MASKING_POLICY"),
			DataMaskingPolicy: &bigquery.Datapolicyv2DataPolicyDataMaskingPolicyArgs{
				PredefinedExpression: pulumi.String("SHA256"),
			},
			DataPolicyId: pulumi.String("predefined_masking_data_policy"),
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var predefinedMaskingDataPolicy = new Gcp.BigQuery.Datapolicyv2DataPolicy("predefined_masking_data_policy", new()
    {
        Location = "us-central1",
        DataPolicyType = "DATA_MASKING_POLICY",
        DataMaskingPolicy = new Gcp.BigQuery.Inputs.Datapolicyv2DataPolicyDataMaskingPolicyArgs
        {
            PredefinedExpression = "SHA256",
        },
        DataPolicyId = "predefined_masking_data_policy",
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.bigquery.Datapolicyv2DataPolicy;
import com.pulumi.gcp.bigquery.Datapolicyv2DataPolicyArgs;
import com.pulumi.gcp.bigquery.inputs.Datapolicyv2DataPolicyDataMaskingPolicyArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var predefinedMaskingDataPolicy = new Datapolicyv2DataPolicy("predefinedMaskingDataPolicy", Datapolicyv2DataPolicyArgs.builder()
            .location("us-central1")
            .dataPolicyType("DATA_MASKING_POLICY")
            .dataMaskingPolicy(Datapolicyv2DataPolicyDataMaskingPolicyArgs.builder()
                .predefinedExpression("SHA256")
                .build())
            .dataPolicyId("predefined_masking_data_policy")
            .build());

    }
}
resources:
  predefinedMaskingDataPolicy:
    type: gcp:bigquery:Datapolicyv2DataPolicy
    name: predefined_masking_data_policy
    properties:
      location: us-central1
      dataPolicyType: DATA_MASKING_POLICY
      dataMaskingPolicy:
        predefinedExpression: SHA256
      dataPolicyId: predefined_masking_data_policy

The dataPolicyType switches to DATA_MASKING_POLICY, and the dataMaskingPolicy block specifies a predefinedExpression. SHA256 hashes column values at query time, making the original data unrecoverable while preserving uniqueness for joins and aggregations.

Mask data with a custom SQL routine

Complex masking requirements often need custom logic, such as partially redacting social security numbers while preserving format.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const test = new gcp.bigquery.Dataset("test", {
    datasetId: "dataset_id",
    location: "us-central1",
});
const customMaskingRoutine = new gcp.bigquery.Routine("custom_masking_routine", {
    datasetId: test.datasetId,
    routineId: "custom_masking_routine",
    routineType: "SCALAR_FUNCTION",
    language: "SQL",
    dataGovernanceType: "DATA_MASKING",
    definitionBody: "SAFE.REGEXP_REPLACE(ssn, '[0-9]', 'X')",
    returnType: "{\"typeKind\" :  \"STRING\"}",
    arguments: [{
        name: "ssn",
        dataType: "{\"typeKind\" :  \"STRING\"}",
    }],
});
const routineDataPolicy = new gcp.bigquery.Datapolicyv2DataPolicy("routine_data_policy", {
    location: "us-central1",
    dataPolicyId: "routine_data_policy",
    dataPolicyType: "DATA_MASKING_POLICY",
    dataMaskingPolicy: {
        routine: customMaskingRoutine.id,
    },
});
import pulumi
import pulumi_gcp as gcp

test = gcp.bigquery.Dataset("test",
    dataset_id="dataset_id",
    location="us-central1")
custom_masking_routine = gcp.bigquery.Routine("custom_masking_routine",
    dataset_id=test.dataset_id,
    routine_id="custom_masking_routine",
    routine_type="SCALAR_FUNCTION",
    language="SQL",
    data_governance_type="DATA_MASKING",
    definition_body="SAFE.REGEXP_REPLACE(ssn, '[0-9]', 'X')",
    return_type="{\"typeKind\" :  \"STRING\"}",
    arguments=[{
        "name": "ssn",
        "data_type": "{\"typeKind\" :  \"STRING\"}",
    }])
routine_data_policy = gcp.bigquery.Datapolicyv2DataPolicy("routine_data_policy",
    location="us-central1",
    data_policy_id="routine_data_policy",
    data_policy_type="DATA_MASKING_POLICY",
    data_masking_policy={
        "routine": custom_masking_routine.id,
    })
package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/bigquery"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		test, err := bigquery.NewDataset(ctx, "test", &bigquery.DatasetArgs{
			DatasetId: pulumi.String("dataset_id"),
			Location:  pulumi.String("us-central1"),
		})
		if err != nil {
			return err
		}
		customMaskingRoutine, err := bigquery.NewRoutine(ctx, "custom_masking_routine", &bigquery.RoutineArgs{
			DatasetId:          test.DatasetId,
			RoutineId:          pulumi.String("custom_masking_routine"),
			RoutineType:        pulumi.String("SCALAR_FUNCTION"),
			Language:           pulumi.String("SQL"),
			DataGovernanceType: pulumi.String("DATA_MASKING"),
			DefinitionBody:     pulumi.String("SAFE.REGEXP_REPLACE(ssn, '[0-9]', 'X')"),
			ReturnType:         pulumi.String("{\"typeKind\" :  \"STRING\"}"),
			Arguments: bigquery.RoutineArgumentArray{
				&bigquery.RoutineArgumentArgs{
					Name:     pulumi.String("ssn"),
					DataType: pulumi.String("{\"typeKind\" :  \"STRING\"}"),
				},
			},
		})
		if err != nil {
			return err
		}
		_, err = bigquery.NewDatapolicyv2DataPolicy(ctx, "routine_data_policy", &bigquery.Datapolicyv2DataPolicyArgs{
			Location:       pulumi.String("us-central1"),
			DataPolicyId:   pulumi.String("routine_data_policy"),
			DataPolicyType: pulumi.String("DATA_MASKING_POLICY"),
			DataMaskingPolicy: &bigquery.Datapolicyv2DataPolicyDataMaskingPolicyArgs{
				Routine: customMaskingRoutine.ID(),
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var test = new Gcp.BigQuery.Dataset("test", new()
    {
        DatasetId = "dataset_id",
        Location = "us-central1",
    });

    var customMaskingRoutine = new Gcp.BigQuery.Routine("custom_masking_routine", new()
    {
        DatasetId = test.DatasetId,
        RoutineId = "custom_masking_routine",
        RoutineType = "SCALAR_FUNCTION",
        Language = "SQL",
        DataGovernanceType = "DATA_MASKING",
        DefinitionBody = "SAFE.REGEXP_REPLACE(ssn, '[0-9]', 'X')",
        ReturnType = "{\"typeKind\" :  \"STRING\"}",
        Arguments = new[]
        {
            new Gcp.BigQuery.Inputs.RoutineArgumentArgs
            {
                Name = "ssn",
                DataType = "{\"typeKind\" :  \"STRING\"}",
            },
        },
    });

    var routineDataPolicy = new Gcp.BigQuery.Datapolicyv2DataPolicy("routine_data_policy", new()
    {
        Location = "us-central1",
        DataPolicyId = "routine_data_policy",
        DataPolicyType = "DATA_MASKING_POLICY",
        DataMaskingPolicy = new Gcp.BigQuery.Inputs.Datapolicyv2DataPolicyDataMaskingPolicyArgs
        {
            Routine = customMaskingRoutine.Id,
        },
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.bigquery.Dataset;
import com.pulumi.gcp.bigquery.DatasetArgs;
import com.pulumi.gcp.bigquery.Routine;
import com.pulumi.gcp.bigquery.RoutineArgs;
import com.pulumi.gcp.bigquery.inputs.RoutineArgumentArgs;
import com.pulumi.gcp.bigquery.Datapolicyv2DataPolicy;
import com.pulumi.gcp.bigquery.Datapolicyv2DataPolicyArgs;
import com.pulumi.gcp.bigquery.inputs.Datapolicyv2DataPolicyDataMaskingPolicyArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var test = new Dataset("test", DatasetArgs.builder()
            .datasetId("dataset_id")
            .location("us-central1")
            .build());

        var customMaskingRoutine = new Routine("customMaskingRoutine", RoutineArgs.builder()
            .datasetId(test.datasetId())
            .routineId("custom_masking_routine")
            .routineType("SCALAR_FUNCTION")
            .language("SQL")
            .dataGovernanceType("DATA_MASKING")
            .definitionBody("SAFE.REGEXP_REPLACE(ssn, '[0-9]', 'X')")
            .returnType("{\"typeKind\" :  \"STRING\"}")
            .arguments(RoutineArgumentArgs.builder()
                .name("ssn")
                .dataType("{\"typeKind\" :  \"STRING\"}")
                .build())
            .build());

        var routineDataPolicy = new Datapolicyv2DataPolicy("routineDataPolicy", Datapolicyv2DataPolicyArgs.builder()
            .location("us-central1")
            .dataPolicyId("routine_data_policy")
            .dataPolicyType("DATA_MASKING_POLICY")
            .dataMaskingPolicy(Datapolicyv2DataPolicyDataMaskingPolicyArgs.builder()
                .routine(customMaskingRoutine.id())
                .build())
            .build());

    }
}
resources:
  routineDataPolicy:
    type: gcp:bigquery:Datapolicyv2DataPolicy
    name: routine_data_policy
    properties:
      location: us-central1
      dataPolicyId: routine_data_policy
      dataPolicyType: DATA_MASKING_POLICY
      dataMaskingPolicy:
        routine: ${customMaskingRoutine.id}
  test:
    type: gcp:bigquery:Dataset
    properties:
      datasetId: dataset_id
      location: us-central1
  customMaskingRoutine:
    type: gcp:bigquery:Routine
    name: custom_masking_routine
    properties:
      datasetId: ${test.datasetId}
      routineId: custom_masking_routine
      routineType: SCALAR_FUNCTION
      language: SQL
      dataGovernanceType: DATA_MASKING
      definitionBody: SAFE.REGEXP_REPLACE(ssn, '[0-9]', 'X')
      returnType: '{"typeKind" :  "STRING"}'
      arguments:
        - name: ssn
          dataType: '{"typeKind" :  "STRING"}'

The dataMaskingPolicy.routine property references a BigQuery routine that executes during query time. The routine must have dataGovernanceType set to DATA_MASKING. In this example, SAFE.REGEXP_REPLACE redacts digits while keeping the SSN format intact. The routine receives the column value as input and returns the masked result.

Grant fine-grained access to specific principals

Data policies can specify exactly which users, groups, or service accounts can access governed data, enabling column-level access control without table-level IAM permissions.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const dataPolicyWithGrantees = new gcp.bigquery.Datapolicyv2DataPolicy("data_policy_with_grantees", {
    location: "us-central1",
    dataPolicyType: "RAW_DATA_ACCESS_POLICY",
    grantees: ["principal://goog/subject/jane@example.com"],
    dataPolicyId: "data_policy_with_grantees",
});
import pulumi
import pulumi_gcp as gcp

data_policy_with_grantees = gcp.bigquery.Datapolicyv2DataPolicy("data_policy_with_grantees",
    location="us-central1",
    data_policy_type="RAW_DATA_ACCESS_POLICY",
    grantees=["principal://goog/subject/jane@example.com"],
    data_policy_id="data_policy_with_grantees")
package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/bigquery"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := bigquery.NewDatapolicyv2DataPolicy(ctx, "data_policy_with_grantees", &bigquery.Datapolicyv2DataPolicyArgs{
			Location:       pulumi.String("us-central1"),
			DataPolicyType: pulumi.String("RAW_DATA_ACCESS_POLICY"),
			Grantees: pulumi.StringArray{
				pulumi.String("principal://goog/subject/jane@example.com"),
			},
			DataPolicyId: pulumi.String("data_policy_with_grantees"),
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var dataPolicyWithGrantees = new Gcp.BigQuery.Datapolicyv2DataPolicy("data_policy_with_grantees", new()
    {
        Location = "us-central1",
        DataPolicyType = "RAW_DATA_ACCESS_POLICY",
        Grantees = new[]
        {
            "principal://goog/subject/jane@example.com",
        },
        DataPolicyId = "data_policy_with_grantees",
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.bigquery.Datapolicyv2DataPolicy;
import com.pulumi.gcp.bigquery.Datapolicyv2DataPolicyArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var dataPolicyWithGrantees = new Datapolicyv2DataPolicy("dataPolicyWithGrantees", Datapolicyv2DataPolicyArgs.builder()
            .location("us-central1")
            .dataPolicyType("RAW_DATA_ACCESS_POLICY")
            .grantees("principal://goog/subject/jane@example.com")
            .dataPolicyId("data_policy_with_grantees")
            .build());

    }
}
resources:
  dataPolicyWithGrantees:
    type: gcp:bigquery:Datapolicyv2DataPolicy
    name: data_policy_with_grantees
    properties:
      location: us-central1
      dataPolicyType: RAW_DATA_ACCESS_POLICY
      grantees:
        - principal://goog/subject/jane@example.com
      dataPolicyId: data_policy_with_grantees

The grantees property accepts an array of IAM V2 principal identifiers. Principals listed here can access the data governed by this policy. The syntax principal://goog/subject/jane@example.com identifies individual users; you can also specify groups, service accounts, or Cloud Identity domains.

Beyond these examples

These snippets focus on specific data policy features: raw data access policies, predefined and custom masking, and fine-grained principal access. They’re intentionally minimal rather than full data governance solutions.

The examples may reference pre-existing infrastructure such as BigQuery datasets for custom routines, and IAM principals (users, groups, service accounts). They focus on configuring the data policy rather than provisioning the surrounding infrastructure.

To keep things focused, common data policy patterns are omitted, including:

  • Policy tag integration (policyTag property for V1 policies)
  • Column-level security policies (COLUMN_LEVEL_SECURITY_POLICY type)
  • Etag-based update conflict handling
  • Cross-project or cross-location policy references

These omissions are intentional: the goal is to illustrate how each data policy feature is wired, not provide drop-in governance modules. See the BigQuery Data Policy resource reference for all available configuration options.

Let's configure GCP BigQuery Data Policies

Get started with Pulumi Cloud, then follow our quick setup guide to deploy this infrastructure.

Try Pulumi Cloud for FREE

Frequently Asked Questions

Policy Types & Configuration
What types of data policies can I create?
You can create three types: DATA_MASKING_POLICY for masking sensitive data, RAW_DATA_ACCESS_POLICY for controlling raw data access, and COLUMN_LEVEL_SECURITY_POLICY for column-level security.
What properties are immutable after creating a data policy?
Both location and project are immutable and cannot be changed after the policy is created.
Data Masking
How do I create a data masking policy with predefined masking?
Set dataPolicyType to DATA_MASKING_POLICY and configure dataMaskingPolicy with a predefinedExpression like SHA256.
How do I use a custom masking routine instead of predefined expressions?
Create a BigQuery routine with dataGovernanceType set to DATA_MASKING, then reference its ID in dataMaskingPolicy.routine.
Access Control & Versions
How do I grant Fine Grained Access to specific users?
Add IAM principals to the grantees array using IAM V2 principal syntax, such as principal://goog/subject/jane@example.com.
What's the difference between V1 and V2 data policies?
V2 policies support the grantees field for Fine Grained Access control. V1 policies use policyTag instead and don’t populate the grantees field.
Can I use grantees with V1 data policies?
No, the grantees field is only supported in V2 data policies. V1 policies with policyTag set don’t populate this field.
Resource Management
What is the etag field used for?
The etag is required for update operations and must match the server’s current etag. It’s automatically populated in responses from create, get, and update calls.
What import formats are supported?
You can import using three formats: the full resource path projects/{{project}}/locations/{{location}}/dataPolicies/{{data_policy_id}}, the short form {{project}}/{{location}}/{{data_policy_id}}, or just {{location}}/{{data_policy_id}}.

Using a different cloud?

Explore analytics guides for other cloud providers: