The gcp:dataloss/preventionStoredInfoType:PreventionStoredInfoType resource, part of the Pulumi GCP provider, defines custom info types for Cloud DLP: regex patterns, word lists, or large dictionaries that identify sensitive data in your organization. This guide focuses on three capabilities: regex-based pattern matching, small and large dictionary configurations, and custom identifier assignment.
Stored info types belong to a GCP project or organization and may reference Cloud Storage buckets for large dictionaries. The examples are intentionally small. Combine them with inspection templates and job triggers to build complete DLP workflows.
Match patterns with regular expressions
Data loss prevention often starts by defining custom patterns that identify sensitive information specific to your organization, such as internal patient IDs or employee codes.
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const basic = new gcp.dataloss.PreventionStoredInfoType("basic", {
parent: "projects/my-project-name",
description: "Description",
displayName: "Displayname",
regex: {
pattern: "patient",
groupIndexes: [2],
},
});
import pulumi
import pulumi_gcp as gcp
basic = gcp.dataloss.PreventionStoredInfoType("basic",
parent="projects/my-project-name",
description="Description",
display_name="Displayname",
regex={
"pattern": "patient",
"group_indexes": [2],
})
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/dataloss"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := dataloss.NewPreventionStoredInfoType(ctx, "basic", &dataloss.PreventionStoredInfoTypeArgs{
Parent: pulumi.String("projects/my-project-name"),
Description: pulumi.String("Description"),
DisplayName: pulumi.String("Displayname"),
Regex: &dataloss.PreventionStoredInfoTypeRegexArgs{
Pattern: pulumi.String("patient"),
GroupIndexes: pulumi.IntArray{
pulumi.Int(2),
},
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var basic = new Gcp.DataLoss.PreventionStoredInfoType("basic", new()
{
Parent = "projects/my-project-name",
Description = "Description",
DisplayName = "Displayname",
Regex = new Gcp.DataLoss.Inputs.PreventionStoredInfoTypeRegexArgs
{
Pattern = "patient",
GroupIndexes = new[]
{
2,
},
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.dataloss.PreventionStoredInfoType;
import com.pulumi.gcp.dataloss.PreventionStoredInfoTypeArgs;
import com.pulumi.gcp.dataloss.inputs.PreventionStoredInfoTypeRegexArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var basic = new PreventionStoredInfoType("basic", PreventionStoredInfoTypeArgs.builder()
.parent("projects/my-project-name")
.description("Description")
.displayName("Displayname")
.regex(PreventionStoredInfoTypeRegexArgs.builder()
.pattern("patient")
.groupIndexes(2)
.build())
.build());
}
}
resources:
basic:
type: gcp:dataloss:PreventionStoredInfoType
properties:
parent: projects/my-project-name
description: Description
displayName: Displayname
regex:
pattern: patient
groupIndexes:
- 2
The regex property defines the pattern to match. The pattern field contains the regular expression itself, while groupIndexes specifies which capture groups to extract. The parent property sets the scope (project or organization) where this info type is available.
Define exact-match word lists
When you have a known set of sensitive terms, exact-match dictionaries provide faster detection than regex patterns.
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const dictionary = new gcp.dataloss.PreventionStoredInfoType("dictionary", {
parent: "projects/my-project-name",
description: "Description",
displayName: "Displayname",
dictionary: {
wordList: {
words: [
"word",
"word2",
],
},
},
});
import pulumi
import pulumi_gcp as gcp
dictionary = gcp.dataloss.PreventionStoredInfoType("dictionary",
parent="projects/my-project-name",
description="Description",
display_name="Displayname",
dictionary={
"word_list": {
"words": [
"word",
"word2",
],
},
})
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/dataloss"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := dataloss.NewPreventionStoredInfoType(ctx, "dictionary", &dataloss.PreventionStoredInfoTypeArgs{
Parent: pulumi.String("projects/my-project-name"),
Description: pulumi.String("Description"),
DisplayName: pulumi.String("Displayname"),
Dictionary: &dataloss.PreventionStoredInfoTypeDictionaryArgs{
WordList: &dataloss.PreventionStoredInfoTypeDictionaryWordListArgs{
Words: pulumi.StringArray{
pulumi.String("word"),
pulumi.String("word2"),
},
},
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var dictionary = new Gcp.DataLoss.PreventionStoredInfoType("dictionary", new()
{
Parent = "projects/my-project-name",
Description = "Description",
DisplayName = "Displayname",
Dictionary = new Gcp.DataLoss.Inputs.PreventionStoredInfoTypeDictionaryArgs
{
WordList = new Gcp.DataLoss.Inputs.PreventionStoredInfoTypeDictionaryWordListArgs
{
Words = new[]
{
"word",
"word2",
},
},
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.dataloss.PreventionStoredInfoType;
import com.pulumi.gcp.dataloss.PreventionStoredInfoTypeArgs;
import com.pulumi.gcp.dataloss.inputs.PreventionStoredInfoTypeDictionaryArgs;
import com.pulumi.gcp.dataloss.inputs.PreventionStoredInfoTypeDictionaryWordListArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var dictionary = new PreventionStoredInfoType("dictionary", PreventionStoredInfoTypeArgs.builder()
.parent("projects/my-project-name")
.description("Description")
.displayName("Displayname")
.dictionary(PreventionStoredInfoTypeDictionaryArgs.builder()
.wordList(PreventionStoredInfoTypeDictionaryWordListArgs.builder()
.words(
"word",
"word2")
.build())
.build())
.build());
}
}
resources:
dictionary:
type: gcp:dataloss:PreventionStoredInfoType
properties:
parent: projects/my-project-name
description: Description
displayName: Displayname
dictionary:
wordList:
words:
- word
- word2
The dictionary property defines a word list for exact matching. The wordList contains an array of terms to detect. This approach works well for small, static lists (project codenames, internal identifiers) but becomes impractical for thousands of terms.
Load large dictionaries from Cloud Storage
Organizations with thousands of sensitive terms need to store dictionaries in Cloud Storage rather than embedding them in configuration.
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const bucket = new gcp.storage.Bucket("bucket", {
name: "tf-test-bucket",
location: "US",
forceDestroy: true,
});
const object = new gcp.storage.BucketObject("object", {
name: "tf-test-object",
bucket: bucket.name,
source: new pulumi.asset.FileAsset("./test-fixtures/words.txt"),
});
const large = new gcp.dataloss.PreventionStoredInfoType("large", {
parent: "projects/my-project-name",
description: "Description",
displayName: "Displayname",
largeCustomDictionary: {
cloudStorageFileSet: {
url: pulumi.interpolate`gs://${bucket.name}/${object.name}`,
},
outputPath: {
path: pulumi.interpolate`gs://${bucket.name}/output/dictionary.txt`,
},
},
});
import pulumi
import pulumi_gcp as gcp
bucket = gcp.storage.Bucket("bucket",
name="tf-test-bucket",
location="US",
force_destroy=True)
object = gcp.storage.BucketObject("object",
name="tf-test-object",
bucket=bucket.name,
source=pulumi.FileAsset("./test-fixtures/words.txt"))
large = gcp.dataloss.PreventionStoredInfoType("large",
parent="projects/my-project-name",
description="Description",
display_name="Displayname",
large_custom_dictionary={
"cloud_storage_file_set": {
"url": pulumi.Output.all(
bucketName=bucket.name,
objectName=object.name
).apply(lambda resolved_outputs: f"gs://{resolved_outputs['bucketName']}/{resolved_outputs['objectName']}")
,
},
"output_path": {
"path": bucket.name.apply(lambda name: f"gs://{name}/output/dictionary.txt"),
},
})
package main
import (
"fmt"
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/dataloss"
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/storage"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
bucket, err := storage.NewBucket(ctx, "bucket", &storage.BucketArgs{
Name: pulumi.String("tf-test-bucket"),
Location: pulumi.String("US"),
ForceDestroy: pulumi.Bool(true),
})
if err != nil {
return err
}
object, err := storage.NewBucketObject(ctx, "object", &storage.BucketObjectArgs{
Name: pulumi.String("tf-test-object"),
Bucket: bucket.Name,
Source: pulumi.NewFileAsset("./test-fixtures/words.txt"),
})
if err != nil {
return err
}
_, err = dataloss.NewPreventionStoredInfoType(ctx, "large", &dataloss.PreventionStoredInfoTypeArgs{
Parent: pulumi.String("projects/my-project-name"),
Description: pulumi.String("Description"),
DisplayName: pulumi.String("Displayname"),
LargeCustomDictionary: &dataloss.PreventionStoredInfoTypeLargeCustomDictionaryArgs{
CloudStorageFileSet: &dataloss.PreventionStoredInfoTypeLargeCustomDictionaryCloudStorageFileSetArgs{
Url: pulumi.All(bucket.Name, object.Name).ApplyT(func(_args []interface{}) (string, error) {
bucketName := _args[0].(string)
objectName := _args[1].(string)
return fmt.Sprintf("gs://%v/%v", bucketName, objectName), nil
}).(pulumi.StringOutput),
},
OutputPath: &dataloss.PreventionStoredInfoTypeLargeCustomDictionaryOutputPathArgs{
Path: bucket.Name.ApplyT(func(name string) (string, error) {
return fmt.Sprintf("gs://%v/output/dictionary.txt", name), nil
}).(pulumi.StringOutput),
},
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var bucket = new Gcp.Storage.Bucket("bucket", new()
{
Name = "tf-test-bucket",
Location = "US",
ForceDestroy = true,
});
var @object = new Gcp.Storage.BucketObject("object", new()
{
Name = "tf-test-object",
Bucket = bucket.Name,
Source = new FileAsset("./test-fixtures/words.txt"),
});
var large = new Gcp.DataLoss.PreventionStoredInfoType("large", new()
{
Parent = "projects/my-project-name",
Description = "Description",
DisplayName = "Displayname",
LargeCustomDictionary = new Gcp.DataLoss.Inputs.PreventionStoredInfoTypeLargeCustomDictionaryArgs
{
CloudStorageFileSet = new Gcp.DataLoss.Inputs.PreventionStoredInfoTypeLargeCustomDictionaryCloudStorageFileSetArgs
{
Url = Output.Tuple(bucket.Name, @object.Name).Apply(values =>
{
var bucketName = values.Item1;
var objectName = values.Item2;
return $"gs://{bucketName}/{objectName}";
}),
},
OutputPath = new Gcp.DataLoss.Inputs.PreventionStoredInfoTypeLargeCustomDictionaryOutputPathArgs
{
Path = bucket.Name.Apply(name => $"gs://{name}/output/dictionary.txt"),
},
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.storage.Bucket;
import com.pulumi.gcp.storage.BucketArgs;
import com.pulumi.gcp.storage.BucketObject;
import com.pulumi.gcp.storage.BucketObjectArgs;
import com.pulumi.gcp.dataloss.PreventionStoredInfoType;
import com.pulumi.gcp.dataloss.PreventionStoredInfoTypeArgs;
import com.pulumi.gcp.dataloss.inputs.PreventionStoredInfoTypeLargeCustomDictionaryArgs;
import com.pulumi.gcp.dataloss.inputs.PreventionStoredInfoTypeLargeCustomDictionaryCloudStorageFileSetArgs;
import com.pulumi.gcp.dataloss.inputs.PreventionStoredInfoTypeLargeCustomDictionaryOutputPathArgs;
import com.pulumi.asset.FileAsset;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var bucket = new Bucket("bucket", BucketArgs.builder()
.name("tf-test-bucket")
.location("US")
.forceDestroy(true)
.build());
var object = new BucketObject("object", BucketObjectArgs.builder()
.name("tf-test-object")
.bucket(bucket.name())
.source(new FileAsset("./test-fixtures/words.txt"))
.build());
var large = new PreventionStoredInfoType("large", PreventionStoredInfoTypeArgs.builder()
.parent("projects/my-project-name")
.description("Description")
.displayName("Displayname")
.largeCustomDictionary(PreventionStoredInfoTypeLargeCustomDictionaryArgs.builder()
.cloudStorageFileSet(PreventionStoredInfoTypeLargeCustomDictionaryCloudStorageFileSetArgs.builder()
.url(Output.tuple(bucket.name(), object.name()).applyValue(values -> {
var bucketName = values.t1;
var objectName = values.t2;
return String.format("gs://%s/%s", bucketName,objectName);
}))
.build())
.outputPath(PreventionStoredInfoTypeLargeCustomDictionaryOutputPathArgs.builder()
.path(bucket.name().applyValue(_name -> String.format("gs://%s/output/dictionary.txt", _name)))
.build())
.build())
.build());
}
}
resources:
large:
type: gcp:dataloss:PreventionStoredInfoType
properties:
parent: projects/my-project-name
description: Description
displayName: Displayname
largeCustomDictionary:
cloudStorageFileSet:
url: gs://${bucket.name}/${object.name}
outputPath:
path: gs://${bucket.name}/output/dictionary.txt
bucket:
type: gcp:storage:Bucket
properties:
name: tf-test-bucket
location: US
forceDestroy: true
object:
type: gcp:storage:BucketObject
properties:
name: tf-test-object
bucket: ${bucket.name}
source:
fn::FileAsset: ./test-fixtures/words.txt
The largeCustomDictionary property points to a file in Cloud Storage. The cloudStorageFileSet specifies the input file location, while outputPath defines where DLP writes processing results. This example creates the bucket and file; in production, you’d typically reference existing storage.
Control the stored info type identifier
By default, DLP generates random IDs for stored info types. Custom IDs make it easier to reference these types consistently across inspection templates and job triggers.
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const withStoredInfoTypeId = new gcp.dataloss.PreventionStoredInfoType("with_stored_info_type_id", {
parent: "projects/my-project-name",
description: "Description",
displayName: "Displayname",
storedInfoTypeId: "id-",
regex: {
pattern: "patient",
groupIndexes: [2],
},
});
import pulumi
import pulumi_gcp as gcp
with_stored_info_type_id = gcp.dataloss.PreventionStoredInfoType("with_stored_info_type_id",
parent="projects/my-project-name",
description="Description",
display_name="Displayname",
stored_info_type_id="id-",
regex={
"pattern": "patient",
"group_indexes": [2],
})
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/dataloss"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := dataloss.NewPreventionStoredInfoType(ctx, "with_stored_info_type_id", &dataloss.PreventionStoredInfoTypeArgs{
Parent: pulumi.String("projects/my-project-name"),
Description: pulumi.String("Description"),
DisplayName: pulumi.String("Displayname"),
StoredInfoTypeId: pulumi.String("id-"),
Regex: &dataloss.PreventionStoredInfoTypeRegexArgs{
Pattern: pulumi.String("patient"),
GroupIndexes: pulumi.IntArray{
pulumi.Int(2),
},
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var withStoredInfoTypeId = new Gcp.DataLoss.PreventionStoredInfoType("with_stored_info_type_id", new()
{
Parent = "projects/my-project-name",
Description = "Description",
DisplayName = "Displayname",
StoredInfoTypeId = "id-",
Regex = new Gcp.DataLoss.Inputs.PreventionStoredInfoTypeRegexArgs
{
Pattern = "patient",
GroupIndexes = new[]
{
2,
},
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.dataloss.PreventionStoredInfoType;
import com.pulumi.gcp.dataloss.PreventionStoredInfoTypeArgs;
import com.pulumi.gcp.dataloss.inputs.PreventionStoredInfoTypeRegexArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var withStoredInfoTypeId = new PreventionStoredInfoType("withStoredInfoTypeId", PreventionStoredInfoTypeArgs.builder()
.parent("projects/my-project-name")
.description("Description")
.displayName("Displayname")
.storedInfoTypeId("id-")
.regex(PreventionStoredInfoTypeRegexArgs.builder()
.pattern("patient")
.groupIndexes(2)
.build())
.build());
}
}
resources:
withStoredInfoTypeId:
type: gcp:dataloss:PreventionStoredInfoType
name: with_stored_info_type_id
properties:
parent: projects/my-project-name
description: Description
displayName: Displayname
storedInfoTypeId: id-
regex:
pattern: patient
groupIndexes:
- 2
The storedInfoTypeId property sets a custom identifier instead of accepting a generated one. This extends the basic regex configuration by adding a predictable ID for cross-resource references. The ID must match the pattern [a-zA-Z\d-_]+ and can be up to 100 characters.
Beyond these examples
These snippets focus on specific stored info type features: regex patterns and word list dictionaries, Cloud Storage integration for large dictionaries, and custom identifier assignment. They’re intentionally minimal rather than complete DLP scanning solutions.
The examples may reference pre-existing infrastructure such as a GCP project with DLP API enabled, and Cloud Storage buckets for the large dictionary example. They focus on defining info types rather than using them in inspection workflows.
To keep things focused, common stored info type patterns are omitted, including:
- Dictionary updates and versioning
- Organization-level vs project-level scoping
- Integration with inspection templates and job triggers
- Performance tuning (groupIndexes optimization)
These omissions are intentional: the goal is to illustrate how each detection method is wired, not provide drop-in DLP modules. See the PreventionStoredInfoType resource reference for all available configuration options.
Let's create GCP Data Loss Prevention Stored Info Types
Get started with Pulumi Cloud, then follow our quick setup guide to deploy this infrastructure.
Try Pulumi Cloud for FREEFrequently Asked Questions
Configuration & Immutability
parent and storedInfoTypeId are immutable properties. Changing either forces resource replacement.storedInfoTypeId must match the pattern [a-zA-Z\d-_]+ with a maximum length of 100 characters. You can leave it empty to let GCP auto-generate one.parent accepts four formats: projects/{{project}}, projects/{{project}}/locations/{{location}}, organizations/{{organization_id}}, or organizations/{{organization_id}}/locations/{{location}}.Detection Methods
You have three options:
- Regex - Use
regexwith a pattern and optionalgroupIndexes - Dictionary - Use
dictionary.wordList.wordsfor a simple word list - Large custom dictionary - Use
largeCustomDictionarywith a Cloud Storage file
largeCustomDictionary when your word list is too large to manage inline. It requires a Cloud Storage bucket with cloudStorageFileSet.url pointing to your dictionary file and an outputPath for results.regex property with a pattern field. Optionally include groupIndexes to specify which regex groups to match.Import & Management
{{parent}}/storedInfoTypes/{{name}} or {{parent}}/{{name}}.