Create GCP Data Loss Prevention Stored Info Types

The gcp:dataloss/preventionStoredInfoType:PreventionStoredInfoType resource, part of the Pulumi GCP provider, defines custom info types for Cloud DLP: regex patterns, word lists, or large dictionaries that identify sensitive data in your organization. This guide focuses on three capabilities: regex-based pattern matching, small and large dictionary configurations, and custom identifier assignment.

Stored info types belong to a GCP project or organization and may reference Cloud Storage buckets for large dictionaries. The examples are intentionally small. Combine them with inspection templates and job triggers to build complete DLP workflows.

Match patterns with regular expressions

Data loss prevention often starts by defining custom patterns that identify sensitive information specific to your organization, such as internal patient IDs or employee codes.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const basic = new gcp.dataloss.PreventionStoredInfoType("basic", {
    parent: "projects/my-project-name",
    description: "Description",
    displayName: "Displayname",
    regex: {
        pattern: "patient",
        groupIndexes: [2],
    },
});

import pulumi
import pulumi_gcp as gcp

basic = gcp.dataloss.PreventionStoredInfoType("basic",
    parent="projects/my-project-name",
    description="Description",
    display_name="Displayname",
    regex={
        "pattern": "patient",
        "group_indexes": [2],
    })

package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/dataloss"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := dataloss.NewPreventionStoredInfoType(ctx, "basic", &dataloss.PreventionStoredInfoTypeArgs{
			Parent:      pulumi.String("projects/my-project-name"),
			Description: pulumi.String("Description"),
			DisplayName: pulumi.String("Displayname"),
			Regex: &dataloss.PreventionStoredInfoTypeRegexArgs{
				Pattern: pulumi.String("patient"),
				GroupIndexes: pulumi.IntArray{
					pulumi.Int(2),
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}

using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var basic = new Gcp.DataLoss.PreventionStoredInfoType("basic", new()
    {
        Parent = "projects/my-project-name",
        Description = "Description",
        DisplayName = "Displayname",
        Regex = new Gcp.DataLoss.Inputs.PreventionStoredInfoTypeRegexArgs
        {
            Pattern = "patient",
            GroupIndexes = new[]
            {
                2,
            },
        },
    });

});

package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.dataloss.PreventionStoredInfoType;
import com.pulumi.gcp.dataloss.PreventionStoredInfoTypeArgs;
import com.pulumi.gcp.dataloss.inputs.PreventionStoredInfoTypeRegexArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var basic = new PreventionStoredInfoType("basic", PreventionStoredInfoTypeArgs.builder()
            .parent("projects/my-project-name")
            .description("Description")
            .displayName("Displayname")
            .regex(PreventionStoredInfoTypeRegexArgs.builder()
                .pattern("patient")
                .groupIndexes(2)
                .build())
            .build());

    }
}

resources:
  basic:
    type: gcp:dataloss:PreventionStoredInfoType
    properties:
      parent: projects/my-project-name
      description: Description
      displayName: Displayname
      regex:
        pattern: patient
        groupIndexes:
          - 2

The regex property defines the pattern to match. The pattern field contains the regular expression itself, while groupIndexes specifies which capture groups to extract. The parent property sets the scope (project or organization) where this info type is available.

Define exact-match word lists

When you have a known set of sensitive terms, exact-match dictionaries provide faster detection than regex patterns.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const dictionary = new gcp.dataloss.PreventionStoredInfoType("dictionary", {
    parent: "projects/my-project-name",
    description: "Description",
    displayName: "Displayname",
    dictionary: {
        wordList: {
            words: [
                "word",
                "word2",
            ],
        },
    },
});

import pulumi
import pulumi_gcp as gcp

dictionary = gcp.dataloss.PreventionStoredInfoType("dictionary",
    parent="projects/my-project-name",
    description="Description",
    display_name="Displayname",
    dictionary={
        "word_list": {
            "words": [
                "word",
                "word2",
            ],
        },
    })

package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/dataloss"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := dataloss.NewPreventionStoredInfoType(ctx, "dictionary", &dataloss.PreventionStoredInfoTypeArgs{
			Parent:      pulumi.String("projects/my-project-name"),
			Description: pulumi.String("Description"),
			DisplayName: pulumi.String("Displayname"),
			Dictionary: &dataloss.PreventionStoredInfoTypeDictionaryArgs{
				WordList: &dataloss.PreventionStoredInfoTypeDictionaryWordListArgs{
					Words: pulumi.StringArray{
						pulumi.String("word"),
						pulumi.String("word2"),
					},
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}

using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var dictionary = new Gcp.DataLoss.PreventionStoredInfoType("dictionary", new()
    {
        Parent = "projects/my-project-name",
        Description = "Description",
        DisplayName = "Displayname",
        Dictionary = new Gcp.DataLoss.Inputs.PreventionStoredInfoTypeDictionaryArgs
        {
            WordList = new Gcp.DataLoss.Inputs.PreventionStoredInfoTypeDictionaryWordListArgs
            {
                Words = new[]
                {
                    "word",
                    "word2",
                },
            },
        },
    });

});

package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.dataloss.PreventionStoredInfoType;
import com.pulumi.gcp.dataloss.PreventionStoredInfoTypeArgs;
import com.pulumi.gcp.dataloss.inputs.PreventionStoredInfoTypeDictionaryArgs;
import com.pulumi.gcp.dataloss.inputs.PreventionStoredInfoTypeDictionaryWordListArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var dictionary = new PreventionStoredInfoType("dictionary", PreventionStoredInfoTypeArgs.builder()
            .parent("projects/my-project-name")
            .description("Description")
            .displayName("Displayname")
            .dictionary(PreventionStoredInfoTypeDictionaryArgs.builder()
                .wordList(PreventionStoredInfoTypeDictionaryWordListArgs.builder()
                    .words(                    
                        "word",
                        "word2")
                    .build())
                .build())
            .build());

    }
}

resources:
  dictionary:
    type: gcp:dataloss:PreventionStoredInfoType
    properties:
      parent: projects/my-project-name
      description: Description
      displayName: Displayname
      dictionary:
        wordList:
          words:
            - word
            - word2

The dictionary property defines a word list for exact matching. The wordList contains an array of terms to detect. This approach works well for small, static lists (project codenames, internal identifiers) but becomes impractical for thousands of terms.

Load large dictionaries from Cloud Storage

Organizations with thousands of sensitive terms need to store dictionaries in Cloud Storage rather than embedding them in configuration.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const bucket = new gcp.storage.Bucket("bucket", {
    name: "tf-test-bucket",
    location: "US",
    forceDestroy: true,
});
const object = new gcp.storage.BucketObject("object", {
    name: "tf-test-object",
    bucket: bucket.name,
    source: new pulumi.asset.FileAsset("./test-fixtures/words.txt"),
});
const large = new gcp.dataloss.PreventionStoredInfoType("large", {
    parent: "projects/my-project-name",
    description: "Description",
    displayName: "Displayname",
    largeCustomDictionary: {
        cloudStorageFileSet: {
            url: pulumi.interpolate`gs://${bucket.name}/${object.name}`,
        },
        outputPath: {
            path: pulumi.interpolate`gs://${bucket.name}/output/dictionary.txt`,
        },
    },
});

import pulumi
import pulumi_gcp as gcp

bucket = gcp.storage.Bucket("bucket",
    name="tf-test-bucket",
    location="US",
    force_destroy=True)
object = gcp.storage.BucketObject("object",
    name="tf-test-object",
    bucket=bucket.name,
    source=pulumi.FileAsset("./test-fixtures/words.txt"))
large = gcp.dataloss.PreventionStoredInfoType("large",
    parent="projects/my-project-name",
    description="Description",
    display_name="Displayname",
    large_custom_dictionary={
        "cloud_storage_file_set": {
            "url": pulumi.Output.all(
                bucketName=bucket.name,
                objectName=object.name
).apply(lambda resolved_outputs: f"gs://{resolved_outputs['bucketName']}/{resolved_outputs['objectName']}")
,
        },
        "output_path": {
            "path": bucket.name.apply(lambda name: f"gs://{name}/output/dictionary.txt"),
        },
    })

package main

import (
	"fmt"

	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/dataloss"
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/storage"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		bucket, err := storage.NewBucket(ctx, "bucket", &storage.BucketArgs{
			Name:         pulumi.String("tf-test-bucket"),
			Location:     pulumi.String("US"),
			ForceDestroy: pulumi.Bool(true),
		})
		if err != nil {
			return err
		}
		object, err := storage.NewBucketObject(ctx, "object", &storage.BucketObjectArgs{
			Name:   pulumi.String("tf-test-object"),
			Bucket: bucket.Name,
			Source: pulumi.NewFileAsset("./test-fixtures/words.txt"),
		})
		if err != nil {
			return err
		}
		_, err = dataloss.NewPreventionStoredInfoType(ctx, "large", &dataloss.PreventionStoredInfoTypeArgs{
			Parent:      pulumi.String("projects/my-project-name"),
			Description: pulumi.String("Description"),
			DisplayName: pulumi.String("Displayname"),
			LargeCustomDictionary: &dataloss.PreventionStoredInfoTypeLargeCustomDictionaryArgs{
				CloudStorageFileSet: &dataloss.PreventionStoredInfoTypeLargeCustomDictionaryCloudStorageFileSetArgs{
					Url: pulumi.All(bucket.Name, object.Name).ApplyT(func(_args []interface{}) (string, error) {
						bucketName := _args[0].(string)
						objectName := _args[1].(string)
						return fmt.Sprintf("gs://%v/%v", bucketName, objectName), nil
					}).(pulumi.StringOutput),
				},
				OutputPath: &dataloss.PreventionStoredInfoTypeLargeCustomDictionaryOutputPathArgs{
					Path: bucket.Name.ApplyT(func(name string) (string, error) {
						return fmt.Sprintf("gs://%v/output/dictionary.txt", name), nil
					}).(pulumi.StringOutput),
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}

using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var bucket = new Gcp.Storage.Bucket("bucket", new()
    {
        Name = "tf-test-bucket",
        Location = "US",
        ForceDestroy = true,
    });

    var @object = new Gcp.Storage.BucketObject("object", new()
    {
        Name = "tf-test-object",
        Bucket = bucket.Name,
        Source = new FileAsset("./test-fixtures/words.txt"),
    });

    var large = new Gcp.DataLoss.PreventionStoredInfoType("large", new()
    {
        Parent = "projects/my-project-name",
        Description = "Description",
        DisplayName = "Displayname",
        LargeCustomDictionary = new Gcp.DataLoss.Inputs.PreventionStoredInfoTypeLargeCustomDictionaryArgs
        {
            CloudStorageFileSet = new Gcp.DataLoss.Inputs.PreventionStoredInfoTypeLargeCustomDictionaryCloudStorageFileSetArgs
            {
                Url = Output.Tuple(bucket.Name, @object.Name).Apply(values =>
                {
                    var bucketName = values.Item1;
                    var objectName = values.Item2;
                    return $"gs://{bucketName}/{objectName}";
                }),
            },
            OutputPath = new Gcp.DataLoss.Inputs.PreventionStoredInfoTypeLargeCustomDictionaryOutputPathArgs
            {
                Path = bucket.Name.Apply(name => $"gs://{name}/output/dictionary.txt"),
            },
        },
    });

});

package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.storage.Bucket;
import com.pulumi.gcp.storage.BucketArgs;
import com.pulumi.gcp.storage.BucketObject;
import com.pulumi.gcp.storage.BucketObjectArgs;
import com.pulumi.gcp.dataloss.PreventionStoredInfoType;
import com.pulumi.gcp.dataloss.PreventionStoredInfoTypeArgs;
import com.pulumi.gcp.dataloss.inputs.PreventionStoredInfoTypeLargeCustomDictionaryArgs;
import com.pulumi.gcp.dataloss.inputs.PreventionStoredInfoTypeLargeCustomDictionaryCloudStorageFileSetArgs;
import com.pulumi.gcp.dataloss.inputs.PreventionStoredInfoTypeLargeCustomDictionaryOutputPathArgs;
import com.pulumi.asset.FileAsset;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var bucket = new Bucket("bucket", BucketArgs.builder()
            .name("tf-test-bucket")
            .location("US")
            .forceDestroy(true)
            .build());

        var object = new BucketObject("object", BucketObjectArgs.builder()
            .name("tf-test-object")
            .bucket(bucket.name())
            .source(new FileAsset("./test-fixtures/words.txt"))
            .build());

        var large = new PreventionStoredInfoType("large", PreventionStoredInfoTypeArgs.builder()
            .parent("projects/my-project-name")
            .description("Description")
            .displayName("Displayname")
            .largeCustomDictionary(PreventionStoredInfoTypeLargeCustomDictionaryArgs.builder()
                .cloudStorageFileSet(PreventionStoredInfoTypeLargeCustomDictionaryCloudStorageFileSetArgs.builder()
                    .url(Output.tuple(bucket.name(), object.name()).applyValue(values -> {
                        var bucketName = values.t1;
                        var objectName = values.t2;
                        return String.format("gs://%s/%s", bucketName,objectName);
                    }))
                    .build())
                .outputPath(PreventionStoredInfoTypeLargeCustomDictionaryOutputPathArgs.builder()
                    .path(bucket.name().applyValue(_name -> String.format("gs://%s/output/dictionary.txt", _name)))
                    .build())
                .build())
            .build());

    }
}

resources:
  large:
    type: gcp:dataloss:PreventionStoredInfoType
    properties:
      parent: projects/my-project-name
      description: Description
      displayName: Displayname
      largeCustomDictionary:
        cloudStorageFileSet:
          url: gs://${bucket.name}/${object.name}
        outputPath:
          path: gs://${bucket.name}/output/dictionary.txt
  bucket:
    type: gcp:storage:Bucket
    properties:
      name: tf-test-bucket
      location: US
      forceDestroy: true
  object:
    type: gcp:storage:BucketObject
    properties:
      name: tf-test-object
      bucket: ${bucket.name}
      source:
        fn::FileAsset: ./test-fixtures/words.txt

The largeCustomDictionary property points to a file in Cloud Storage. The cloudStorageFileSet specifies the input file location, while outputPath defines where DLP writes processing results. This example creates the bucket and file; in production, you’d typically reference existing storage.

Control the stored info type identifier

By default, DLP generates random IDs for stored info types. Custom IDs make it easier to reference these types consistently across inspection templates and job triggers.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const withStoredInfoTypeId = new gcp.dataloss.PreventionStoredInfoType("with_stored_info_type_id", {
    parent: "projects/my-project-name",
    description: "Description",
    displayName: "Displayname",
    storedInfoTypeId: "id-",
    regex: {
        pattern: "patient",
        groupIndexes: [2],
    },
});

import pulumi
import pulumi_gcp as gcp

with_stored_info_type_id = gcp.dataloss.PreventionStoredInfoType("with_stored_info_type_id",
    parent="projects/my-project-name",
    description="Description",
    display_name="Displayname",
    stored_info_type_id="id-",
    regex={
        "pattern": "patient",
        "group_indexes": [2],
    })

package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/dataloss"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := dataloss.NewPreventionStoredInfoType(ctx, "with_stored_info_type_id", &dataloss.PreventionStoredInfoTypeArgs{
			Parent:           pulumi.String("projects/my-project-name"),
			Description:      pulumi.String("Description"),
			DisplayName:      pulumi.String("Displayname"),
			StoredInfoTypeId: pulumi.String("id-"),
			Regex: &dataloss.PreventionStoredInfoTypeRegexArgs{
				Pattern: pulumi.String("patient"),
				GroupIndexes: pulumi.IntArray{
					pulumi.Int(2),
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}

using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var withStoredInfoTypeId = new Gcp.DataLoss.PreventionStoredInfoType("with_stored_info_type_id", new()
    {
        Parent = "projects/my-project-name",
        Description = "Description",
        DisplayName = "Displayname",
        StoredInfoTypeId = "id-",
        Regex = new Gcp.DataLoss.Inputs.PreventionStoredInfoTypeRegexArgs
        {
            Pattern = "patient",
            GroupIndexes = new[]
            {
                2,
            },
        },
    });

});

package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.dataloss.PreventionStoredInfoType;
import com.pulumi.gcp.dataloss.PreventionStoredInfoTypeArgs;
import com.pulumi.gcp.dataloss.inputs.PreventionStoredInfoTypeRegexArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var withStoredInfoTypeId = new PreventionStoredInfoType("withStoredInfoTypeId", PreventionStoredInfoTypeArgs.builder()
            .parent("projects/my-project-name")
            .description("Description")
            .displayName("Displayname")
            .storedInfoTypeId("id-")
            .regex(PreventionStoredInfoTypeRegexArgs.builder()
                .pattern("patient")
                .groupIndexes(2)
                .build())
            .build());

    }
}

resources:
  withStoredInfoTypeId:
    type: gcp:dataloss:PreventionStoredInfoType
    name: with_stored_info_type_id
    properties:
      parent: projects/my-project-name
      description: Description
      displayName: Displayname
      storedInfoTypeId: id-
      regex:
        pattern: patient
        groupIndexes:
          - 2

The storedInfoTypeId property sets a custom identifier instead of accepting a generated one. This extends the basic regex configuration by adding a predictable ID for cross-resource references. The ID must match the pattern [a-zA-Z\d-_]+ and can be up to 100 characters.

Beyond these examples

These snippets focus on specific stored info type features: regex patterns and word list dictionaries, Cloud Storage integration for large dictionaries, and custom identifier assignment. They’re intentionally minimal rather than complete DLP scanning solutions.

The examples may reference pre-existing infrastructure such as a GCP project with DLP API enabled, and Cloud Storage buckets for the large dictionary example. They focus on defining info types rather than using them in inspection workflows.

To keep things focused, common stored info type patterns are omitted, including:

Dictionary updates and versioning
Organization-level vs project-level scoping
Integration with inspection templates and job triggers
Performance tuning (groupIndexes optimization)

These omissions are intentional: the goal is to illustrate how each detection method is wired, not provide drop-in DLP modules. See the PreventionStoredInfoType resource reference for all available configuration options.

Let's create GCP Data Loss Prevention Stored Info Types

Get started with Pulumi Cloud, then follow our quick setup guide to deploy this infrastructure.

Try Pulumi Cloud for FREE

Frequently Asked Questions

Configuration & Immutability

What happens if I change the parent or storedInfoTypeId after creation?

Both parent and storedInfoTypeId are immutable properties. Changing either forces resource replacement.

What are the naming requirements for storedInfoTypeId?

The storedInfoTypeId must match the pattern [a-zA-Z\d-_]+ with a maximum length of 100 characters. You can leave it empty to let GCP auto-generate one.

What formats are valid for the parent property?

The parent accepts four formats: projects/{{project}}, projects/{{project}}/locations/{{location}}, organizations/{{organization_id}}, or organizations/{{organization_id}}/locations/{{location}}.

Detection Methods

What are the different ways to define custom info types?

You have three options:

Regex - Use regex with a pattern and optional groupIndexes
Dictionary - Use dictionary.wordList.words for a simple word list
Large custom dictionary - Use largeCustomDictionary with a Cloud Storage file

When should I use a large custom dictionary instead of a regular dictionary?

Use largeCustomDictionary when your word list is too large to manage inline. It requires a Cloud Storage bucket with cloudStorageFileSet.url pointing to your dictionary file and an outputPath for results.

How do I configure a regex-based stored info type?

Set the regex property with a pattern field. Optionally include groupIndexes to specify which regex groups to match.

Import & Management

What formats can I use to import an existing stored info type?

You can import using either {{parent}}/storedInfoTypes/{{name}} or {{parent}}/{{name}}.

Using a different cloud?

Explore security guides for other cloud providers:

AWS Guides Azure Guides