The gcp:dataloss/preventionStoredInfoType:PreventionStoredInfoType resource, part of the Pulumi GCP provider, defines custom info types for Cloud DLP: regex patterns, word lists, or large dictionaries stored in Cloud Storage. This guide focuses on four capabilities: regex-based pattern matching, exact-match word lists, large dictionaries from Cloud Storage, and custom identifier control.
Stored info types belong to a GCP project or organization and may reference Cloud Storage buckets for large dictionaries. The examples are intentionally small. Combine them with your own DLP inspection jobs and data scanning workflows.
Match patterns with regular expressions
Data loss prevention often starts by defining custom patterns that identify sensitive information specific to your organization, such as internal patient IDs or employee codes.
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const basic = new gcp.dataloss.PreventionStoredInfoType("basic", {
parent: "projects/my-project-name",
description: "Description",
displayName: "Displayname",
regex: {
pattern: "patient",
groupIndexes: [2],
},
});
import pulumi
import pulumi_gcp as gcp
basic = gcp.dataloss.PreventionStoredInfoType("basic",
parent="projects/my-project-name",
description="Description",
display_name="Displayname",
regex={
"pattern": "patient",
"group_indexes": [2],
})
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/dataloss"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := dataloss.NewPreventionStoredInfoType(ctx, "basic", &dataloss.PreventionStoredInfoTypeArgs{
Parent: pulumi.String("projects/my-project-name"),
Description: pulumi.String("Description"),
DisplayName: pulumi.String("Displayname"),
Regex: &dataloss.PreventionStoredInfoTypeRegexArgs{
Pattern: pulumi.String("patient"),
GroupIndexes: pulumi.IntArray{
pulumi.Int(2),
},
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var basic = new Gcp.DataLoss.PreventionStoredInfoType("basic", new()
{
Parent = "projects/my-project-name",
Description = "Description",
DisplayName = "Displayname",
Regex = new Gcp.DataLoss.Inputs.PreventionStoredInfoTypeRegexArgs
{
Pattern = "patient",
GroupIndexes = new[]
{
2,
},
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.dataloss.PreventionStoredInfoType;
import com.pulumi.gcp.dataloss.PreventionStoredInfoTypeArgs;
import com.pulumi.gcp.dataloss.inputs.PreventionStoredInfoTypeRegexArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var basic = new PreventionStoredInfoType("basic", PreventionStoredInfoTypeArgs.builder()
.parent("projects/my-project-name")
.description("Description")
.displayName("Displayname")
.regex(PreventionStoredInfoTypeRegexArgs.builder()
.pattern("patient")
.groupIndexes(2)
.build())
.build());
}
}
resources:
basic:
type: gcp:dataloss:PreventionStoredInfoType
properties:
parent: projects/my-project-name
description: Description
displayName: Displayname
regex:
pattern: patient
groupIndexes:
- 2
The regex property defines the pattern to match. The pattern field contains the regular expression itself, while groupIndexes specifies which capture groups to extract. The parent property scopes the info type to a project or organization.
Define exact-match word lists
When you have a known set of sensitive terms, exact-match dictionaries provide faster detection than regex patterns.
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const dictionary = new gcp.dataloss.PreventionStoredInfoType("dictionary", {
parent: "projects/my-project-name",
description: "Description",
displayName: "Displayname",
dictionary: {
wordList: {
words: [
"word",
"word2",
],
},
},
});
import pulumi
import pulumi_gcp as gcp
dictionary = gcp.dataloss.PreventionStoredInfoType("dictionary",
parent="projects/my-project-name",
description="Description",
display_name="Displayname",
dictionary={
"word_list": {
"words": [
"word",
"word2",
],
},
})
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/dataloss"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := dataloss.NewPreventionStoredInfoType(ctx, "dictionary", &dataloss.PreventionStoredInfoTypeArgs{
Parent: pulumi.String("projects/my-project-name"),
Description: pulumi.String("Description"),
DisplayName: pulumi.String("Displayname"),
Dictionary: &dataloss.PreventionStoredInfoTypeDictionaryArgs{
WordList: &dataloss.PreventionStoredInfoTypeDictionaryWordListArgs{
Words: pulumi.StringArray{
pulumi.String("word"),
pulumi.String("word2"),
},
},
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var dictionary = new Gcp.DataLoss.PreventionStoredInfoType("dictionary", new()
{
Parent = "projects/my-project-name",
Description = "Description",
DisplayName = "Displayname",
Dictionary = new Gcp.DataLoss.Inputs.PreventionStoredInfoTypeDictionaryArgs
{
WordList = new Gcp.DataLoss.Inputs.PreventionStoredInfoTypeDictionaryWordListArgs
{
Words = new[]
{
"word",
"word2",
},
},
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.dataloss.PreventionStoredInfoType;
import com.pulumi.gcp.dataloss.PreventionStoredInfoTypeArgs;
import com.pulumi.gcp.dataloss.inputs.PreventionStoredInfoTypeDictionaryArgs;
import com.pulumi.gcp.dataloss.inputs.PreventionStoredInfoTypeDictionaryWordListArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var dictionary = new PreventionStoredInfoType("dictionary", PreventionStoredInfoTypeArgs.builder()
.parent("projects/my-project-name")
.description("Description")
.displayName("Displayname")
.dictionary(PreventionStoredInfoTypeDictionaryArgs.builder()
.wordList(PreventionStoredInfoTypeDictionaryWordListArgs.builder()
.words(
"word",
"word2")
.build())
.build())
.build());
}
}
resources:
dictionary:
type: gcp:dataloss:PreventionStoredInfoType
properties:
parent: projects/my-project-name
description: Description
displayName: Displayname
dictionary:
wordList:
words:
- word
- word2
The dictionary property defines exact-match detection. The wordList contains an array of terms to match. DLP scans for these exact strings without regex overhead, making it efficient for known vocabularies like project codenames or internal identifiers.
Load large dictionaries from Cloud Storage
Organizations with thousands of sensitive terms need to store dictionaries in Cloud Storage rather than embedding them in configuration.
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const bucket = new gcp.storage.Bucket("bucket", {
name: "tf-test-bucket",
location: "US",
forceDestroy: true,
});
const object = new gcp.storage.BucketObject("object", {
name: "tf-test-object",
bucket: bucket.name,
source: new pulumi.asset.FileAsset("./test-fixtures/words.txt"),
});
const large = new gcp.dataloss.PreventionStoredInfoType("large", {
parent: "projects/my-project-name",
description: "Description",
displayName: "Displayname",
largeCustomDictionary: {
cloudStorageFileSet: {
url: pulumi.interpolate`gs://${bucket.name}/${object.name}`,
},
outputPath: {
path: pulumi.interpolate`gs://${bucket.name}/output/dictionary.txt`,
},
},
});
import pulumi
import pulumi_gcp as gcp
bucket = gcp.storage.Bucket("bucket",
name="tf-test-bucket",
location="US",
force_destroy=True)
object = gcp.storage.BucketObject("object",
name="tf-test-object",
bucket=bucket.name,
source=pulumi.FileAsset("./test-fixtures/words.txt"))
large = gcp.dataloss.PreventionStoredInfoType("large",
parent="projects/my-project-name",
description="Description",
display_name="Displayname",
large_custom_dictionary={
"cloud_storage_file_set": {
"url": pulumi.Output.all(
bucketName=bucket.name,
objectName=object.name
).apply(lambda resolved_outputs: f"gs://{resolved_outputs['bucketName']}/{resolved_outputs['objectName']}")
,
},
"output_path": {
"path": bucket.name.apply(lambda name: f"gs://{name}/output/dictionary.txt"),
},
})
package main
import (
"fmt"
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/dataloss"
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/storage"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
bucket, err := storage.NewBucket(ctx, "bucket", &storage.BucketArgs{
Name: pulumi.String("tf-test-bucket"),
Location: pulumi.String("US"),
ForceDestroy: pulumi.Bool(true),
})
if err != nil {
return err
}
object, err := storage.NewBucketObject(ctx, "object", &storage.BucketObjectArgs{
Name: pulumi.String("tf-test-object"),
Bucket: bucket.Name,
Source: pulumi.NewFileAsset("./test-fixtures/words.txt"),
})
if err != nil {
return err
}
_, err = dataloss.NewPreventionStoredInfoType(ctx, "large", &dataloss.PreventionStoredInfoTypeArgs{
Parent: pulumi.String("projects/my-project-name"),
Description: pulumi.String("Description"),
DisplayName: pulumi.String("Displayname"),
LargeCustomDictionary: &dataloss.PreventionStoredInfoTypeLargeCustomDictionaryArgs{
CloudStorageFileSet: &dataloss.PreventionStoredInfoTypeLargeCustomDictionaryCloudStorageFileSetArgs{
Url: pulumi.All(bucket.Name, object.Name).ApplyT(func(_args []interface{}) (string, error) {
bucketName := _args[0].(string)
objectName := _args[1].(string)
return fmt.Sprintf("gs://%v/%v", bucketName, objectName), nil
}).(pulumi.StringOutput),
},
OutputPath: &dataloss.PreventionStoredInfoTypeLargeCustomDictionaryOutputPathArgs{
Path: bucket.Name.ApplyT(func(name string) (string, error) {
return fmt.Sprintf("gs://%v/output/dictionary.txt", name), nil
}).(pulumi.StringOutput),
},
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var bucket = new Gcp.Storage.Bucket("bucket", new()
{
Name = "tf-test-bucket",
Location = "US",
ForceDestroy = true,
});
var @object = new Gcp.Storage.BucketObject("object", new()
{
Name = "tf-test-object",
Bucket = bucket.Name,
Source = new FileAsset("./test-fixtures/words.txt"),
});
var large = new Gcp.DataLoss.PreventionStoredInfoType("large", new()
{
Parent = "projects/my-project-name",
Description = "Description",
DisplayName = "Displayname",
LargeCustomDictionary = new Gcp.DataLoss.Inputs.PreventionStoredInfoTypeLargeCustomDictionaryArgs
{
CloudStorageFileSet = new Gcp.DataLoss.Inputs.PreventionStoredInfoTypeLargeCustomDictionaryCloudStorageFileSetArgs
{
Url = Output.Tuple(bucket.Name, @object.Name).Apply(values =>
{
var bucketName = values.Item1;
var objectName = values.Item2;
return $"gs://{bucketName}/{objectName}";
}),
},
OutputPath = new Gcp.DataLoss.Inputs.PreventionStoredInfoTypeLargeCustomDictionaryOutputPathArgs
{
Path = bucket.Name.Apply(name => $"gs://{name}/output/dictionary.txt"),
},
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.storage.Bucket;
import com.pulumi.gcp.storage.BucketArgs;
import com.pulumi.gcp.storage.BucketObject;
import com.pulumi.gcp.storage.BucketObjectArgs;
import com.pulumi.gcp.dataloss.PreventionStoredInfoType;
import com.pulumi.gcp.dataloss.PreventionStoredInfoTypeArgs;
import com.pulumi.gcp.dataloss.inputs.PreventionStoredInfoTypeLargeCustomDictionaryArgs;
import com.pulumi.gcp.dataloss.inputs.PreventionStoredInfoTypeLargeCustomDictionaryCloudStorageFileSetArgs;
import com.pulumi.gcp.dataloss.inputs.PreventionStoredInfoTypeLargeCustomDictionaryOutputPathArgs;
import com.pulumi.asset.FileAsset;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var bucket = new Bucket("bucket", BucketArgs.builder()
.name("tf-test-bucket")
.location("US")
.forceDestroy(true)
.build());
var object = new BucketObject("object", BucketObjectArgs.builder()
.name("tf-test-object")
.bucket(bucket.name())
.source(new FileAsset("./test-fixtures/words.txt"))
.build());
var large = new PreventionStoredInfoType("large", PreventionStoredInfoTypeArgs.builder()
.parent("projects/my-project-name")
.description("Description")
.displayName("Displayname")
.largeCustomDictionary(PreventionStoredInfoTypeLargeCustomDictionaryArgs.builder()
.cloudStorageFileSet(PreventionStoredInfoTypeLargeCustomDictionaryCloudStorageFileSetArgs.builder()
.url(Output.tuple(bucket.name(), object.name()).applyValue(values -> {
var bucketName = values.t1;
var objectName = values.t2;
return String.format("gs://%s/%s", bucketName,objectName);
}))
.build())
.outputPath(PreventionStoredInfoTypeLargeCustomDictionaryOutputPathArgs.builder()
.path(bucket.name().applyValue(_name -> String.format("gs://%s/output/dictionary.txt", _name)))
.build())
.build())
.build());
}
}
resources:
large:
type: gcp:dataloss:PreventionStoredInfoType
properties:
parent: projects/my-project-name
description: Description
displayName: Displayname
largeCustomDictionary:
cloudStorageFileSet:
url: gs://${bucket.name}/${object.name}
outputPath:
path: gs://${bucket.name}/output/dictionary.txt
bucket:
type: gcp:storage:Bucket
properties:
name: tf-test-bucket
location: US
forceDestroy: true
object:
type: gcp:storage:BucketObject
properties:
name: tf-test-object
bucket: ${bucket.name}
source:
fn::FileAsset: ./test-fixtures/words.txt
The largeCustomDictionary property points to a Cloud Storage file containing your word list. The cloudStorageFileSet specifies the input file location, while outputPath defines where DLP writes processing results. This approach scales to millions of terms without configuration size limits.
Control stored info type identifiers
By default, DLP generates random IDs for stored info types. Custom IDs make it easier to reference types across configurations.
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const withStoredInfoTypeId = new gcp.dataloss.PreventionStoredInfoType("with_stored_info_type_id", {
parent: "projects/my-project-name",
description: "Description",
displayName: "Displayname",
storedInfoTypeId: "id-",
regex: {
pattern: "patient",
groupIndexes: [2],
},
});
import pulumi
import pulumi_gcp as gcp
with_stored_info_type_id = gcp.dataloss.PreventionStoredInfoType("with_stored_info_type_id",
parent="projects/my-project-name",
description="Description",
display_name="Displayname",
stored_info_type_id="id-",
regex={
"pattern": "patient",
"group_indexes": [2],
})
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/dataloss"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := dataloss.NewPreventionStoredInfoType(ctx, "with_stored_info_type_id", &dataloss.PreventionStoredInfoTypeArgs{
Parent: pulumi.String("projects/my-project-name"),
Description: pulumi.String("Description"),
DisplayName: pulumi.String("Displayname"),
StoredInfoTypeId: pulumi.String("id-"),
Regex: &dataloss.PreventionStoredInfoTypeRegexArgs{
Pattern: pulumi.String("patient"),
GroupIndexes: pulumi.IntArray{
pulumi.Int(2),
},
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var withStoredInfoTypeId = new Gcp.DataLoss.PreventionStoredInfoType("with_stored_info_type_id", new()
{
Parent = "projects/my-project-name",
Description = "Description",
DisplayName = "Displayname",
StoredInfoTypeId = "id-",
Regex = new Gcp.DataLoss.Inputs.PreventionStoredInfoTypeRegexArgs
{
Pattern = "patient",
GroupIndexes = new[]
{
2,
},
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.dataloss.PreventionStoredInfoType;
import com.pulumi.gcp.dataloss.PreventionStoredInfoTypeArgs;
import com.pulumi.gcp.dataloss.inputs.PreventionStoredInfoTypeRegexArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var withStoredInfoTypeId = new PreventionStoredInfoType("withStoredInfoTypeId", PreventionStoredInfoTypeArgs.builder()
.parent("projects/my-project-name")
.description("Description")
.displayName("Displayname")
.storedInfoTypeId("id-")
.regex(PreventionStoredInfoTypeRegexArgs.builder()
.pattern("patient")
.groupIndexes(2)
.build())
.build());
}
}
resources:
withStoredInfoTypeId:
type: gcp:dataloss:PreventionStoredInfoType
name: with_stored_info_type_id
properties:
parent: projects/my-project-name
description: Description
displayName: Displayname
storedInfoTypeId: id-
regex:
pattern: patient
groupIndexes:
- 2
The storedInfoTypeId property sets a custom identifier instead of accepting a generated one. This extends the basic regex configuration by adding predictable naming, making it easier to reference the info type in inspection templates and job configurations.
Beyond these examples
These snippets focus on specific stored info type features: regex patterns and word list dictionaries, Cloud Storage integration for large dictionaries, and custom identifier management. They’re intentionally minimal rather than full DLP scanning solutions.
The examples may reference pre-existing infrastructure such as GCP projects with DLP API enabled, and Cloud Storage buckets for large dictionary examples. They focus on defining custom info types rather than provisioning the surrounding DLP infrastructure.
To keep things focused, common stored info type patterns are omitted, including:
- Dictionary updates and versioning
- Organization-level vs project-level scoping
- Integration with DLP inspection jobs
- Performance tuning for large dictionaries
These omissions are intentional: the goal is to illustrate how each detection method is wired, not provide drop-in DLP modules. See the PreventionStoredInfoType resource reference for all available configuration options.
Let's create GCP Data Loss Prevention Stored Info Types
Get started with Pulumi Cloud, then follow our quick setup guide to deploy this infrastructure.
Try Pulumi Cloud for FREEFrequently Asked Questions
Immutability & Updates
parent and storedInfoTypeId are immutable. Changing either forces resource replacement.Detection Methods
You have three options:
- Regex - Use
regexwith a pattern for matching (e.g., “patient”) - Dictionary - Use
dictionary.wordList.wordsfor small word lists - Large Custom Dictionary - Use
largeCustomDictionarywith a Cloud Storage file for large word lists
largeCustomDictionary when your word list is too large to manage inline. It requires a Cloud Storage bucket with your word list file and an output path for results.largeCustomDictionary.cloudStorageFileSet.url to your GCS file (e.g., gs://bucket-name/words.txt) and largeCustomDictionary.outputPath.path for the output location.Configuration & Setup
projects/{{project}}, projects/{{project}}/locations/{{location}}, organizations/{{organization_id}}, or organizations/{{organization_id}}/locations/{{location}}.[a-zA-Z\d-_]+ with a maximum length of 100 characters.[a-zA-Z\d-_]+), with a maximum length of 100 characters.