Create GCP Data Loss Prevention Stored Info Types

The gcp:dataloss/preventionStoredInfoType:PreventionStoredInfoType resource, part of the Pulumi GCP provider, defines custom info types for Cloud DLP: regex patterns, word lists, or large dictionaries stored in Cloud Storage. This guide focuses on four capabilities: regex-based pattern matching, exact-match word lists, large dictionaries from Cloud Storage, and custom identifier control.

Stored info types belong to a GCP project or organization and may reference Cloud Storage buckets for large dictionaries. The examples are intentionally small. Combine them with your own DLP inspection jobs and data scanning workflows.

Match patterns with regular expressions

Data loss prevention often starts by defining custom patterns that identify sensitive information specific to your organization, such as internal patient IDs or employee codes.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const basic = new gcp.dataloss.PreventionStoredInfoType("basic", {
    parent: "projects/my-project-name",
    description: "Description",
    displayName: "Displayname",
    regex: {
        pattern: "patient",
        groupIndexes: [2],
    },
});
import pulumi
import pulumi_gcp as gcp

basic = gcp.dataloss.PreventionStoredInfoType("basic",
    parent="projects/my-project-name",
    description="Description",
    display_name="Displayname",
    regex={
        "pattern": "patient",
        "group_indexes": [2],
    })
package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/dataloss"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := dataloss.NewPreventionStoredInfoType(ctx, "basic", &dataloss.PreventionStoredInfoTypeArgs{
			Parent:      pulumi.String("projects/my-project-name"),
			Description: pulumi.String("Description"),
			DisplayName: pulumi.String("Displayname"),
			Regex: &dataloss.PreventionStoredInfoTypeRegexArgs{
				Pattern: pulumi.String("patient"),
				GroupIndexes: pulumi.IntArray{
					pulumi.Int(2),
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var basic = new Gcp.DataLoss.PreventionStoredInfoType("basic", new()
    {
        Parent = "projects/my-project-name",
        Description = "Description",
        DisplayName = "Displayname",
        Regex = new Gcp.DataLoss.Inputs.PreventionStoredInfoTypeRegexArgs
        {
            Pattern = "patient",
            GroupIndexes = new[]
            {
                2,
            },
        },
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.dataloss.PreventionStoredInfoType;
import com.pulumi.gcp.dataloss.PreventionStoredInfoTypeArgs;
import com.pulumi.gcp.dataloss.inputs.PreventionStoredInfoTypeRegexArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var basic = new PreventionStoredInfoType("basic", PreventionStoredInfoTypeArgs.builder()
            .parent("projects/my-project-name")
            .description("Description")
            .displayName("Displayname")
            .regex(PreventionStoredInfoTypeRegexArgs.builder()
                .pattern("patient")
                .groupIndexes(2)
                .build())
            .build());

    }
}
resources:
  basic:
    type: gcp:dataloss:PreventionStoredInfoType
    properties:
      parent: projects/my-project-name
      description: Description
      displayName: Displayname
      regex:
        pattern: patient
        groupIndexes:
          - 2

The regex property defines the pattern to match. The pattern field contains the regular expression itself, while groupIndexes specifies which capture groups to extract. The parent property scopes the info type to a project or organization.

Define exact-match word lists

When you have a known set of sensitive terms, exact-match dictionaries provide faster detection than regex patterns.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const dictionary = new gcp.dataloss.PreventionStoredInfoType("dictionary", {
    parent: "projects/my-project-name",
    description: "Description",
    displayName: "Displayname",
    dictionary: {
        wordList: {
            words: [
                "word",
                "word2",
            ],
        },
    },
});
import pulumi
import pulumi_gcp as gcp

dictionary = gcp.dataloss.PreventionStoredInfoType("dictionary",
    parent="projects/my-project-name",
    description="Description",
    display_name="Displayname",
    dictionary={
        "word_list": {
            "words": [
                "word",
                "word2",
            ],
        },
    })
package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/dataloss"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := dataloss.NewPreventionStoredInfoType(ctx, "dictionary", &dataloss.PreventionStoredInfoTypeArgs{
			Parent:      pulumi.String("projects/my-project-name"),
			Description: pulumi.String("Description"),
			DisplayName: pulumi.String("Displayname"),
			Dictionary: &dataloss.PreventionStoredInfoTypeDictionaryArgs{
				WordList: &dataloss.PreventionStoredInfoTypeDictionaryWordListArgs{
					Words: pulumi.StringArray{
						pulumi.String("word"),
						pulumi.String("word2"),
					},
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var dictionary = new Gcp.DataLoss.PreventionStoredInfoType("dictionary", new()
    {
        Parent = "projects/my-project-name",
        Description = "Description",
        DisplayName = "Displayname",
        Dictionary = new Gcp.DataLoss.Inputs.PreventionStoredInfoTypeDictionaryArgs
        {
            WordList = new Gcp.DataLoss.Inputs.PreventionStoredInfoTypeDictionaryWordListArgs
            {
                Words = new[]
                {
                    "word",
                    "word2",
                },
            },
        },
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.dataloss.PreventionStoredInfoType;
import com.pulumi.gcp.dataloss.PreventionStoredInfoTypeArgs;
import com.pulumi.gcp.dataloss.inputs.PreventionStoredInfoTypeDictionaryArgs;
import com.pulumi.gcp.dataloss.inputs.PreventionStoredInfoTypeDictionaryWordListArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var dictionary = new PreventionStoredInfoType("dictionary", PreventionStoredInfoTypeArgs.builder()
            .parent("projects/my-project-name")
            .description("Description")
            .displayName("Displayname")
            .dictionary(PreventionStoredInfoTypeDictionaryArgs.builder()
                .wordList(PreventionStoredInfoTypeDictionaryWordListArgs.builder()
                    .words(                    
                        "word",
                        "word2")
                    .build())
                .build())
            .build());

    }
}
resources:
  dictionary:
    type: gcp:dataloss:PreventionStoredInfoType
    properties:
      parent: projects/my-project-name
      description: Description
      displayName: Displayname
      dictionary:
        wordList:
          words:
            - word
            - word2

The dictionary property defines exact-match detection. The wordList contains an array of terms to match. DLP scans for these exact strings without regex overhead, making it efficient for known vocabularies like project codenames or internal identifiers.

Load large dictionaries from Cloud Storage

Organizations with thousands of sensitive terms need to store dictionaries in Cloud Storage rather than embedding them in configuration.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const bucket = new gcp.storage.Bucket("bucket", {
    name: "tf-test-bucket",
    location: "US",
    forceDestroy: true,
});
const object = new gcp.storage.BucketObject("object", {
    name: "tf-test-object",
    bucket: bucket.name,
    source: new pulumi.asset.FileAsset("./test-fixtures/words.txt"),
});
const large = new gcp.dataloss.PreventionStoredInfoType("large", {
    parent: "projects/my-project-name",
    description: "Description",
    displayName: "Displayname",
    largeCustomDictionary: {
        cloudStorageFileSet: {
            url: pulumi.interpolate`gs://${bucket.name}/${object.name}`,
        },
        outputPath: {
            path: pulumi.interpolate`gs://${bucket.name}/output/dictionary.txt`,
        },
    },
});
import pulumi
import pulumi_gcp as gcp

bucket = gcp.storage.Bucket("bucket",
    name="tf-test-bucket",
    location="US",
    force_destroy=True)
object = gcp.storage.BucketObject("object",
    name="tf-test-object",
    bucket=bucket.name,
    source=pulumi.FileAsset("./test-fixtures/words.txt"))
large = gcp.dataloss.PreventionStoredInfoType("large",
    parent="projects/my-project-name",
    description="Description",
    display_name="Displayname",
    large_custom_dictionary={
        "cloud_storage_file_set": {
            "url": pulumi.Output.all(
                bucketName=bucket.name,
                objectName=object.name
).apply(lambda resolved_outputs: f"gs://{resolved_outputs['bucketName']}/{resolved_outputs['objectName']}")
,
        },
        "output_path": {
            "path": bucket.name.apply(lambda name: f"gs://{name}/output/dictionary.txt"),
        },
    })
package main

import (
	"fmt"

	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/dataloss"
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/storage"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		bucket, err := storage.NewBucket(ctx, "bucket", &storage.BucketArgs{
			Name:         pulumi.String("tf-test-bucket"),
			Location:     pulumi.String("US"),
			ForceDestroy: pulumi.Bool(true),
		})
		if err != nil {
			return err
		}
		object, err := storage.NewBucketObject(ctx, "object", &storage.BucketObjectArgs{
			Name:   pulumi.String("tf-test-object"),
			Bucket: bucket.Name,
			Source: pulumi.NewFileAsset("./test-fixtures/words.txt"),
		})
		if err != nil {
			return err
		}
		_, err = dataloss.NewPreventionStoredInfoType(ctx, "large", &dataloss.PreventionStoredInfoTypeArgs{
			Parent:      pulumi.String("projects/my-project-name"),
			Description: pulumi.String("Description"),
			DisplayName: pulumi.String("Displayname"),
			LargeCustomDictionary: &dataloss.PreventionStoredInfoTypeLargeCustomDictionaryArgs{
				CloudStorageFileSet: &dataloss.PreventionStoredInfoTypeLargeCustomDictionaryCloudStorageFileSetArgs{
					Url: pulumi.All(bucket.Name, object.Name).ApplyT(func(_args []interface{}) (string, error) {
						bucketName := _args[0].(string)
						objectName := _args[1].(string)
						return fmt.Sprintf("gs://%v/%v", bucketName, objectName), nil
					}).(pulumi.StringOutput),
				},
				OutputPath: &dataloss.PreventionStoredInfoTypeLargeCustomDictionaryOutputPathArgs{
					Path: bucket.Name.ApplyT(func(name string) (string, error) {
						return fmt.Sprintf("gs://%v/output/dictionary.txt", name), nil
					}).(pulumi.StringOutput),
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var bucket = new Gcp.Storage.Bucket("bucket", new()
    {
        Name = "tf-test-bucket",
        Location = "US",
        ForceDestroy = true,
    });

    var @object = new Gcp.Storage.BucketObject("object", new()
    {
        Name = "tf-test-object",
        Bucket = bucket.Name,
        Source = new FileAsset("./test-fixtures/words.txt"),
    });

    var large = new Gcp.DataLoss.PreventionStoredInfoType("large", new()
    {
        Parent = "projects/my-project-name",
        Description = "Description",
        DisplayName = "Displayname",
        LargeCustomDictionary = new Gcp.DataLoss.Inputs.PreventionStoredInfoTypeLargeCustomDictionaryArgs
        {
            CloudStorageFileSet = new Gcp.DataLoss.Inputs.PreventionStoredInfoTypeLargeCustomDictionaryCloudStorageFileSetArgs
            {
                Url = Output.Tuple(bucket.Name, @object.Name).Apply(values =>
                {
                    var bucketName = values.Item1;
                    var objectName = values.Item2;
                    return $"gs://{bucketName}/{objectName}";
                }),
            },
            OutputPath = new Gcp.DataLoss.Inputs.PreventionStoredInfoTypeLargeCustomDictionaryOutputPathArgs
            {
                Path = bucket.Name.Apply(name => $"gs://{name}/output/dictionary.txt"),
            },
        },
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.storage.Bucket;
import com.pulumi.gcp.storage.BucketArgs;
import com.pulumi.gcp.storage.BucketObject;
import com.pulumi.gcp.storage.BucketObjectArgs;
import com.pulumi.gcp.dataloss.PreventionStoredInfoType;
import com.pulumi.gcp.dataloss.PreventionStoredInfoTypeArgs;
import com.pulumi.gcp.dataloss.inputs.PreventionStoredInfoTypeLargeCustomDictionaryArgs;
import com.pulumi.gcp.dataloss.inputs.PreventionStoredInfoTypeLargeCustomDictionaryCloudStorageFileSetArgs;
import com.pulumi.gcp.dataloss.inputs.PreventionStoredInfoTypeLargeCustomDictionaryOutputPathArgs;
import com.pulumi.asset.FileAsset;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var bucket = new Bucket("bucket", BucketArgs.builder()
            .name("tf-test-bucket")
            .location("US")
            .forceDestroy(true)
            .build());

        var object = new BucketObject("object", BucketObjectArgs.builder()
            .name("tf-test-object")
            .bucket(bucket.name())
            .source(new FileAsset("./test-fixtures/words.txt"))
            .build());

        var large = new PreventionStoredInfoType("large", PreventionStoredInfoTypeArgs.builder()
            .parent("projects/my-project-name")
            .description("Description")
            .displayName("Displayname")
            .largeCustomDictionary(PreventionStoredInfoTypeLargeCustomDictionaryArgs.builder()
                .cloudStorageFileSet(PreventionStoredInfoTypeLargeCustomDictionaryCloudStorageFileSetArgs.builder()
                    .url(Output.tuple(bucket.name(), object.name()).applyValue(values -> {
                        var bucketName = values.t1;
                        var objectName = values.t2;
                        return String.format("gs://%s/%s", bucketName,objectName);
                    }))
                    .build())
                .outputPath(PreventionStoredInfoTypeLargeCustomDictionaryOutputPathArgs.builder()
                    .path(bucket.name().applyValue(_name -> String.format("gs://%s/output/dictionary.txt", _name)))
                    .build())
                .build())
            .build());

    }
}
resources:
  large:
    type: gcp:dataloss:PreventionStoredInfoType
    properties:
      parent: projects/my-project-name
      description: Description
      displayName: Displayname
      largeCustomDictionary:
        cloudStorageFileSet:
          url: gs://${bucket.name}/${object.name}
        outputPath:
          path: gs://${bucket.name}/output/dictionary.txt
  bucket:
    type: gcp:storage:Bucket
    properties:
      name: tf-test-bucket
      location: US
      forceDestroy: true
  object:
    type: gcp:storage:BucketObject
    properties:
      name: tf-test-object
      bucket: ${bucket.name}
      source:
        fn::FileAsset: ./test-fixtures/words.txt

The largeCustomDictionary property points to a Cloud Storage file containing your word list. The cloudStorageFileSet specifies the input file location, while outputPath defines where DLP writes processing results. This approach scales to millions of terms without configuration size limits.

Control stored info type identifiers

By default, DLP generates random IDs for stored info types. Custom IDs make it easier to reference types across configurations.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const withStoredInfoTypeId = new gcp.dataloss.PreventionStoredInfoType("with_stored_info_type_id", {
    parent: "projects/my-project-name",
    description: "Description",
    displayName: "Displayname",
    storedInfoTypeId: "id-",
    regex: {
        pattern: "patient",
        groupIndexes: [2],
    },
});
import pulumi
import pulumi_gcp as gcp

with_stored_info_type_id = gcp.dataloss.PreventionStoredInfoType("with_stored_info_type_id",
    parent="projects/my-project-name",
    description="Description",
    display_name="Displayname",
    stored_info_type_id="id-",
    regex={
        "pattern": "patient",
        "group_indexes": [2],
    })
package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/dataloss"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := dataloss.NewPreventionStoredInfoType(ctx, "with_stored_info_type_id", &dataloss.PreventionStoredInfoTypeArgs{
			Parent:           pulumi.String("projects/my-project-name"),
			Description:      pulumi.String("Description"),
			DisplayName:      pulumi.String("Displayname"),
			StoredInfoTypeId: pulumi.String("id-"),
			Regex: &dataloss.PreventionStoredInfoTypeRegexArgs{
				Pattern: pulumi.String("patient"),
				GroupIndexes: pulumi.IntArray{
					pulumi.Int(2),
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var withStoredInfoTypeId = new Gcp.DataLoss.PreventionStoredInfoType("with_stored_info_type_id", new()
    {
        Parent = "projects/my-project-name",
        Description = "Description",
        DisplayName = "Displayname",
        StoredInfoTypeId = "id-",
        Regex = new Gcp.DataLoss.Inputs.PreventionStoredInfoTypeRegexArgs
        {
            Pattern = "patient",
            GroupIndexes = new[]
            {
                2,
            },
        },
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.dataloss.PreventionStoredInfoType;
import com.pulumi.gcp.dataloss.PreventionStoredInfoTypeArgs;
import com.pulumi.gcp.dataloss.inputs.PreventionStoredInfoTypeRegexArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var withStoredInfoTypeId = new PreventionStoredInfoType("withStoredInfoTypeId", PreventionStoredInfoTypeArgs.builder()
            .parent("projects/my-project-name")
            .description("Description")
            .displayName("Displayname")
            .storedInfoTypeId("id-")
            .regex(PreventionStoredInfoTypeRegexArgs.builder()
                .pattern("patient")
                .groupIndexes(2)
                .build())
            .build());

    }
}
resources:
  withStoredInfoTypeId:
    type: gcp:dataloss:PreventionStoredInfoType
    name: with_stored_info_type_id
    properties:
      parent: projects/my-project-name
      description: Description
      displayName: Displayname
      storedInfoTypeId: id-
      regex:
        pattern: patient
        groupIndexes:
          - 2

The storedInfoTypeId property sets a custom identifier instead of accepting a generated one. This extends the basic regex configuration by adding predictable naming, making it easier to reference the info type in inspection templates and job configurations.

Beyond these examples

These snippets focus on specific stored info type features: regex patterns and word list dictionaries, Cloud Storage integration for large dictionaries, and custom identifier management. They’re intentionally minimal rather than full DLP scanning solutions.

The examples may reference pre-existing infrastructure such as GCP projects with DLP API enabled, and Cloud Storage buckets for large dictionary examples. They focus on defining custom info types rather than provisioning the surrounding DLP infrastructure.

To keep things focused, common stored info type patterns are omitted, including:

  • Dictionary updates and versioning
  • Organization-level vs project-level scoping
  • Integration with DLP inspection jobs
  • Performance tuning for large dictionaries

These omissions are intentional: the goal is to illustrate how each detection method is wired, not provide drop-in DLP modules. See the PreventionStoredInfoType resource reference for all available configuration options.

Let's create GCP Data Loss Prevention Stored Info Types

Get started with Pulumi Cloud, then follow our quick setup guide to deploy this infrastructure.

Try Pulumi Cloud for FREE

Frequently Asked Questions

Immutability & Updates
What properties can't I change after creating a stored info type?
Both parent and storedInfoTypeId are immutable. Changing either forces resource replacement.
Detection Methods
What are the three ways to define detection rules for a stored info type?

You have three options:

  1. Regex - Use regex with a pattern for matching (e.g., “patient”)
  2. Dictionary - Use dictionary.wordList.words for small word lists
  3. Large Custom Dictionary - Use largeCustomDictionary with a Cloud Storage file for large word lists
When should I use largeCustomDictionary instead of dictionary?
Use largeCustomDictionary when your word list is too large to manage inline. It requires a Cloud Storage bucket with your word list file and an output path for results.
How do I configure a large custom dictionary from Cloud Storage?
Set largeCustomDictionary.cloudStorageFileSet.url to your GCS file (e.g., gs://bucket-name/words.txt) and largeCustomDictionary.outputPath.path for the output location.
Configuration & Setup
What formats can I use for the parent parameter?
Four formats are supported: projects/{{project}}, projects/{{project}}/locations/{{location}}, organizations/{{organization_id}}, or organizations/{{organization_id}}/locations/{{location}}.
Do I need to specify a storedInfoTypeId?
The field is required but can be left empty to let the system auto-generate an ID. If provided, it must match the pattern [a-zA-Z\d-_]+ with a maximum length of 100 characters.
What characters are allowed in storedInfoTypeId?
The ID can contain uppercase and lowercase letters, numbers, hyphens, and underscores (matching [a-zA-Z\d-_]+), with a maximum length of 100 characters.

Using a different cloud?

Explore security guides for other cloud providers: