Configure GCP BigQuery Data Policies

The gcp:bigquery/datapolicyv2DataPolicy:Datapolicyv2DataPolicy resource, part of the Pulumi GCP provider, defines BigQuery data policies that control access to sensitive column data through masking or access restrictions. This guide focuses on three capabilities: raw data access policies, predefined and custom masking, and fine-grained principal grants.

Data policies reference BigQuery datasets for custom routines and IAM principals for access grants. The examples are intentionally small. Combine them with your own datasets, routines, and IAM configuration.

Create a raw data access policy

Organizations controlling access to unmasked data start by defining which principals can bypass masking rules.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const basicDataPolicy = new gcp.bigquery.Datapolicyv2DataPolicy("basic_data_policy", {
    location: "us-central1",
    dataPolicyType: "RAW_DATA_ACCESS_POLICY",
    dataPolicyId: "basic_data_policy",
});
import pulumi
import pulumi_gcp as gcp

basic_data_policy = gcp.bigquery.Datapolicyv2DataPolicy("basic_data_policy",
    location="us-central1",
    data_policy_type="RAW_DATA_ACCESS_POLICY",
    data_policy_id="basic_data_policy")
package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/bigquery"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := bigquery.NewDatapolicyv2DataPolicy(ctx, "basic_data_policy", &bigquery.Datapolicyv2DataPolicyArgs{
			Location:       pulumi.String("us-central1"),
			DataPolicyType: pulumi.String("RAW_DATA_ACCESS_POLICY"),
			DataPolicyId:   pulumi.String("basic_data_policy"),
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var basicDataPolicy = new Gcp.BigQuery.Datapolicyv2DataPolicy("basic_data_policy", new()
    {
        Location = "us-central1",
        DataPolicyType = "RAW_DATA_ACCESS_POLICY",
        DataPolicyId = "basic_data_policy",
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.bigquery.Datapolicyv2DataPolicy;
import com.pulumi.gcp.bigquery.Datapolicyv2DataPolicyArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var basicDataPolicy = new Datapolicyv2DataPolicy("basicDataPolicy", Datapolicyv2DataPolicyArgs.builder()
            .location("us-central1")
            .dataPolicyType("RAW_DATA_ACCESS_POLICY")
            .dataPolicyId("basic_data_policy")
            .build());

    }
}
resources:
  basicDataPolicy:
    type: gcp:bigquery:Datapolicyv2DataPolicy
    name: basic_data_policy
    properties:
      location: us-central1
      dataPolicyType: RAW_DATA_ACCESS_POLICY
      dataPolicyId: basic_data_policy

The dataPolicyType of RAW_DATA_ACCESS_POLICY creates a policy that governs who can see unmasked column values. The dataPolicyId provides a human-readable identifier within the project and location. Without grantees specified, the policy exists but grants no access.

Apply predefined masking with SHA256

When columns contain sensitive identifiers, predefined masking expressions provide standard transformations.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const predefinedMaskingDataPolicy = new gcp.bigquery.Datapolicyv2DataPolicy("predefined_masking_data_policy", {
    location: "us-central1",
    dataPolicyType: "DATA_MASKING_POLICY",
    dataMaskingPolicy: {
        predefinedExpression: "SHA256",
    },
    dataPolicyId: "predefined_masking_data_policy",
});
import pulumi
import pulumi_gcp as gcp

predefined_masking_data_policy = gcp.bigquery.Datapolicyv2DataPolicy("predefined_masking_data_policy",
    location="us-central1",
    data_policy_type="DATA_MASKING_POLICY",
    data_masking_policy={
        "predefined_expression": "SHA256",
    },
    data_policy_id="predefined_masking_data_policy")
package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/bigquery"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := bigquery.NewDatapolicyv2DataPolicy(ctx, "predefined_masking_data_policy", &bigquery.Datapolicyv2DataPolicyArgs{
			Location:       pulumi.String("us-central1"),
			DataPolicyType: pulumi.String("DATA_MASKING_POLICY"),
			DataMaskingPolicy: &bigquery.Datapolicyv2DataPolicyDataMaskingPolicyArgs{
				PredefinedExpression: pulumi.String("SHA256"),
			},
			DataPolicyId: pulumi.String("predefined_masking_data_policy"),
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var predefinedMaskingDataPolicy = new Gcp.BigQuery.Datapolicyv2DataPolicy("predefined_masking_data_policy", new()
    {
        Location = "us-central1",
        DataPolicyType = "DATA_MASKING_POLICY",
        DataMaskingPolicy = new Gcp.BigQuery.Inputs.Datapolicyv2DataPolicyDataMaskingPolicyArgs
        {
            PredefinedExpression = "SHA256",
        },
        DataPolicyId = "predefined_masking_data_policy",
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.bigquery.Datapolicyv2DataPolicy;
import com.pulumi.gcp.bigquery.Datapolicyv2DataPolicyArgs;
import com.pulumi.gcp.bigquery.inputs.Datapolicyv2DataPolicyDataMaskingPolicyArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var predefinedMaskingDataPolicy = new Datapolicyv2DataPolicy("predefinedMaskingDataPolicy", Datapolicyv2DataPolicyArgs.builder()
            .location("us-central1")
            .dataPolicyType("DATA_MASKING_POLICY")
            .dataMaskingPolicy(Datapolicyv2DataPolicyDataMaskingPolicyArgs.builder()
                .predefinedExpression("SHA256")
                .build())
            .dataPolicyId("predefined_masking_data_policy")
            .build());

    }
}
resources:
  predefinedMaskingDataPolicy:
    type: gcp:bigquery:Datapolicyv2DataPolicy
    name: predefined_masking_data_policy
    properties:
      location: us-central1
      dataPolicyType: DATA_MASKING_POLICY
      dataMaskingPolicy:
        predefinedExpression: SHA256
      dataPolicyId: predefined_masking_data_policy

The dataMaskingPolicy block enables masking, and predefinedExpression specifies the transformation. SHA256 hashes column values, making them unreadable while preserving uniqueness for joins. BigQuery applies this masking automatically when queries access the protected column.

Mask data with a custom SQL routine

Complex masking requirements need custom logic that preserves format while obscuring values.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const test = new gcp.bigquery.Dataset("test", {
    datasetId: "dataset_id",
    location: "us-central1",
});
const customMaskingRoutine = new gcp.bigquery.Routine("custom_masking_routine", {
    datasetId: test.datasetId,
    routineId: "custom_masking_routine",
    routineType: "SCALAR_FUNCTION",
    language: "SQL",
    dataGovernanceType: "DATA_MASKING",
    definitionBody: "SAFE.REGEXP_REPLACE(ssn, '[0-9]', 'X')",
    returnType: "{\"typeKind\" :  \"STRING\"}",
    arguments: [{
        name: "ssn",
        dataType: "{\"typeKind\" :  \"STRING\"}",
    }],
});
const routineDataPolicy = new gcp.bigquery.Datapolicyv2DataPolicy("routine_data_policy", {
    location: "us-central1",
    dataPolicyId: "routine_data_policy",
    dataPolicyType: "DATA_MASKING_POLICY",
    dataMaskingPolicy: {
        routine: customMaskingRoutine.id,
    },
});
import pulumi
import pulumi_gcp as gcp

test = gcp.bigquery.Dataset("test",
    dataset_id="dataset_id",
    location="us-central1")
custom_masking_routine = gcp.bigquery.Routine("custom_masking_routine",
    dataset_id=test.dataset_id,
    routine_id="custom_masking_routine",
    routine_type="SCALAR_FUNCTION",
    language="SQL",
    data_governance_type="DATA_MASKING",
    definition_body="SAFE.REGEXP_REPLACE(ssn, '[0-9]', 'X')",
    return_type="{\"typeKind\" :  \"STRING\"}",
    arguments=[{
        "name": "ssn",
        "data_type": "{\"typeKind\" :  \"STRING\"}",
    }])
routine_data_policy = gcp.bigquery.Datapolicyv2DataPolicy("routine_data_policy",
    location="us-central1",
    data_policy_id="routine_data_policy",
    data_policy_type="DATA_MASKING_POLICY",
    data_masking_policy={
        "routine": custom_masking_routine.id,
    })
package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/bigquery"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		test, err := bigquery.NewDataset(ctx, "test", &bigquery.DatasetArgs{
			DatasetId: pulumi.String("dataset_id"),
			Location:  pulumi.String("us-central1"),
		})
		if err != nil {
			return err
		}
		customMaskingRoutine, err := bigquery.NewRoutine(ctx, "custom_masking_routine", &bigquery.RoutineArgs{
			DatasetId:          test.DatasetId,
			RoutineId:          pulumi.String("custom_masking_routine"),
			RoutineType:        pulumi.String("SCALAR_FUNCTION"),
			Language:           pulumi.String("SQL"),
			DataGovernanceType: pulumi.String("DATA_MASKING"),
			DefinitionBody:     pulumi.String("SAFE.REGEXP_REPLACE(ssn, '[0-9]', 'X')"),
			ReturnType:         pulumi.String("{\"typeKind\" :  \"STRING\"}"),
			Arguments: bigquery.RoutineArgumentArray{
				&bigquery.RoutineArgumentArgs{
					Name:     pulumi.String("ssn"),
					DataType: pulumi.String("{\"typeKind\" :  \"STRING\"}"),
				},
			},
		})
		if err != nil {
			return err
		}
		_, err = bigquery.NewDatapolicyv2DataPolicy(ctx, "routine_data_policy", &bigquery.Datapolicyv2DataPolicyArgs{
			Location:       pulumi.String("us-central1"),
			DataPolicyId:   pulumi.String("routine_data_policy"),
			DataPolicyType: pulumi.String("DATA_MASKING_POLICY"),
			DataMaskingPolicy: &bigquery.Datapolicyv2DataPolicyDataMaskingPolicyArgs{
				Routine: customMaskingRoutine.ID(),
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var test = new Gcp.BigQuery.Dataset("test", new()
    {
        DatasetId = "dataset_id",
        Location = "us-central1",
    });

    var customMaskingRoutine = new Gcp.BigQuery.Routine("custom_masking_routine", new()
    {
        DatasetId = test.DatasetId,
        RoutineId = "custom_masking_routine",
        RoutineType = "SCALAR_FUNCTION",
        Language = "SQL",
        DataGovernanceType = "DATA_MASKING",
        DefinitionBody = "SAFE.REGEXP_REPLACE(ssn, '[0-9]', 'X')",
        ReturnType = "{\"typeKind\" :  \"STRING\"}",
        Arguments = new[]
        {
            new Gcp.BigQuery.Inputs.RoutineArgumentArgs
            {
                Name = "ssn",
                DataType = "{\"typeKind\" :  \"STRING\"}",
            },
        },
    });

    var routineDataPolicy = new Gcp.BigQuery.Datapolicyv2DataPolicy("routine_data_policy", new()
    {
        Location = "us-central1",
        DataPolicyId = "routine_data_policy",
        DataPolicyType = "DATA_MASKING_POLICY",
        DataMaskingPolicy = new Gcp.BigQuery.Inputs.Datapolicyv2DataPolicyDataMaskingPolicyArgs
        {
            Routine = customMaskingRoutine.Id,
        },
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.bigquery.Dataset;
import com.pulumi.gcp.bigquery.DatasetArgs;
import com.pulumi.gcp.bigquery.Routine;
import com.pulumi.gcp.bigquery.RoutineArgs;
import com.pulumi.gcp.bigquery.inputs.RoutineArgumentArgs;
import com.pulumi.gcp.bigquery.Datapolicyv2DataPolicy;
import com.pulumi.gcp.bigquery.Datapolicyv2DataPolicyArgs;
import com.pulumi.gcp.bigquery.inputs.Datapolicyv2DataPolicyDataMaskingPolicyArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var test = new Dataset("test", DatasetArgs.builder()
            .datasetId("dataset_id")
            .location("us-central1")
            .build());

        var customMaskingRoutine = new Routine("customMaskingRoutine", RoutineArgs.builder()
            .datasetId(test.datasetId())
            .routineId("custom_masking_routine")
            .routineType("SCALAR_FUNCTION")
            .language("SQL")
            .dataGovernanceType("DATA_MASKING")
            .definitionBody("SAFE.REGEXP_REPLACE(ssn, '[0-9]', 'X')")
            .returnType("{\"typeKind\" :  \"STRING\"}")
            .arguments(RoutineArgumentArgs.builder()
                .name("ssn")
                .dataType("{\"typeKind\" :  \"STRING\"}")
                .build())
            .build());

        var routineDataPolicy = new Datapolicyv2DataPolicy("routineDataPolicy", Datapolicyv2DataPolicyArgs.builder()
            .location("us-central1")
            .dataPolicyId("routine_data_policy")
            .dataPolicyType("DATA_MASKING_POLICY")
            .dataMaskingPolicy(Datapolicyv2DataPolicyDataMaskingPolicyArgs.builder()
                .routine(customMaskingRoutine.id())
                .build())
            .build());

    }
}
resources:
  routineDataPolicy:
    type: gcp:bigquery:Datapolicyv2DataPolicy
    name: routine_data_policy
    properties:
      location: us-central1
      dataPolicyId: routine_data_policy
      dataPolicyType: DATA_MASKING_POLICY
      dataMaskingPolicy:
        routine: ${customMaskingRoutine.id}
  test:
    type: gcp:bigquery:Dataset
    properties:
      datasetId: dataset_id
      location: us-central1
  customMaskingRoutine:
    type: gcp:bigquery:Routine
    name: custom_masking_routine
    properties:
      datasetId: ${test.datasetId}
      routineId: custom_masking_routine
      routineType: SCALAR_FUNCTION
      language: SQL
      dataGovernanceType: DATA_MASKING
      definitionBody: SAFE.REGEXP_REPLACE(ssn, '[0-9]', 'X')
      returnType: '{"typeKind" :  "STRING"}'
      arguments:
        - name: ssn
          dataType: '{"typeKind" :  "STRING"}'

The routine property points to a BigQuery routine with dataGovernanceType set to DATA_MASKING. The routine’s definitionBody contains the masking logic (here, replacing digits with ‘X’). BigQuery invokes this routine when queries access the protected column, passing the raw value and returning the masked result.

Grant fine-grained access to specific principals

After defining a policy, you can grant specific users access without broad dataset permissions.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const dataPolicyWithGrantees = new gcp.bigquery.Datapolicyv2DataPolicy("data_policy_with_grantees", {
    location: "us-central1",
    dataPolicyType: "RAW_DATA_ACCESS_POLICY",
    grantees: ["principal://goog/subject/jane@example.com"],
    dataPolicyId: "data_policy_with_grantees",
});
import pulumi
import pulumi_gcp as gcp

data_policy_with_grantees = gcp.bigquery.Datapolicyv2DataPolicy("data_policy_with_grantees",
    location="us-central1",
    data_policy_type="RAW_DATA_ACCESS_POLICY",
    grantees=["principal://goog/subject/jane@example.com"],
    data_policy_id="data_policy_with_grantees")
package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/bigquery"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := bigquery.NewDatapolicyv2DataPolicy(ctx, "data_policy_with_grantees", &bigquery.Datapolicyv2DataPolicyArgs{
			Location:       pulumi.String("us-central1"),
			DataPolicyType: pulumi.String("RAW_DATA_ACCESS_POLICY"),
			Grantees: pulumi.StringArray{
				pulumi.String("principal://goog/subject/jane@example.com"),
			},
			DataPolicyId: pulumi.String("data_policy_with_grantees"),
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var dataPolicyWithGrantees = new Gcp.BigQuery.Datapolicyv2DataPolicy("data_policy_with_grantees", new()
    {
        Location = "us-central1",
        DataPolicyType = "RAW_DATA_ACCESS_POLICY",
        Grantees = new[]
        {
            "principal://goog/subject/jane@example.com",
        },
        DataPolicyId = "data_policy_with_grantees",
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.bigquery.Datapolicyv2DataPolicy;
import com.pulumi.gcp.bigquery.Datapolicyv2DataPolicyArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var dataPolicyWithGrantees = new Datapolicyv2DataPolicy("dataPolicyWithGrantees", Datapolicyv2DataPolicyArgs.builder()
            .location("us-central1")
            .dataPolicyType("RAW_DATA_ACCESS_POLICY")
            .grantees("principal://goog/subject/jane@example.com")
            .dataPolicyId("data_policy_with_grantees")
            .build());

    }
}
resources:
  dataPolicyWithGrantees:
    type: gcp:bigquery:Datapolicyv2DataPolicy
    name: data_policy_with_grantees
    properties:
      location: us-central1
      dataPolicyType: RAW_DATA_ACCESS_POLICY
      grantees:
        - principal://goog/subject/jane@example.com
      dataPolicyId: data_policy_with_grantees

The grantees property lists IAM principals using V2 syntax (principal://goog/subject/email). These principals can access data governed by this policy, bypassing any masking rules. This enables fine-grained access control at the column level.

Beyond these examples

These snippets focus on specific data policy features: raw data access and masking policies, predefined and custom masking routines, and fine-grained principal access. They’re intentionally minimal rather than full data governance solutions.

The examples may reference pre-existing infrastructure such as BigQuery datasets for custom routines, and IAM principals (users, service accounts, groups). They focus on configuring the policy rather than provisioning the surrounding infrastructure.

To keep things focused, common data policy patterns are omitted, including:

  • Policy tag integration (policyTag property for V1 policies)
  • Column-level security policies (COLUMN_LEVEL_SECURITY_POLICY type)
  • Policy versioning (version property)
  • Etag-based update conflict handling

These omissions are intentional: the goal is to illustrate how each data policy feature is wired, not provide drop-in governance modules. See the BigQuery Data Policy resource reference for all available configuration options.

Let's configure GCP BigQuery Data Policies

Get started with Pulumi Cloud, then follow our quick setup guide to deploy this infrastructure.

Try Pulumi Cloud for FREE

Frequently Asked Questions

Policy Types & Configuration
What types of data policies can I create?
You can create three types: DATA_MASKING_POLICY (masks sensitive data), RAW_DATA_ACCESS_POLICY (controls raw data access), and COLUMN_LEVEL_SECURITY_POLICY (column-level security).
What's the difference between V1 and V2 data policies?
V2 policies support the grantees field for Fine Grained Access control, while V1 policies use policyTag instead. The grantees field is not populated in V1 policies.
Data Masking
How do I mask data using predefined expressions?
Set dataPolicyType to DATA_MASKING_POLICY and configure dataMaskingPolicy with a predefinedExpression like SHA256.
Can I create custom masking logic?
Yes, create a BigQuery routine with dataGovernanceType set to DATA_MASKING, then reference it in dataMaskingPolicy.routine.
Access Control & Grantees
How do I grant access to specific users or groups?
Use the grantees field with IAM V2 principal syntax, such as principal://goog/subject/jane@example.com. This field is only supported in V2 data policies.
What principal types are supported for grantees?
Only users, groups, serviceaccounts, and cloudidentity principal types are supported. Use IAM V2 principal syntax for all grantees.
Updates & Immutability
Why is my data policy update failing?
Updates require the etag field to match the server’s current etag. Retrieve the current etag from the resource state before attempting updates.
Can I change the location or project after creating a data policy?
No, both location and project are immutable. Changing either requires recreating the resource.

Using a different cloud?

Explore analytics guides for other cloud providers: