The gcp:discoveryengine/dataStore:DataStore resource, part of the Pulumi GCP provider, defines a Discovery Engine data store that serves as a container for websites and documents used in search and conversation applications. This guide focuses on four capabilities: basic data store setup, customer-managed encryption, document processing configuration, and advanced site search controls.
Data stores require a GCP project with the Discovery Engine API enabled and may reference Cloud KMS keys for encryption. The examples are intentionally small. Combine them with your own document ingestion pipelines and search applications.
Create a data store for search applications
Most deployments start with a basic data store that defines the collection’s purpose and content type.
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const basic = new gcp.discoveryengine.DataStore("basic", {
location: "global",
dataStoreId: "data-store-id",
displayName: "tf-test-structured-datastore",
industryVertical: "GENERIC",
contentConfig: "NO_CONTENT",
solutionTypes: ["SOLUTION_TYPE_SEARCH"],
createAdvancedSiteSearch: false,
skipDefaultSchemaCreation: false,
});
import pulumi
import pulumi_gcp as gcp
basic = gcp.discoveryengine.DataStore("basic",
location="global",
data_store_id="data-store-id",
display_name="tf-test-structured-datastore",
industry_vertical="GENERIC",
content_config="NO_CONTENT",
solution_types=["SOLUTION_TYPE_SEARCH"],
create_advanced_site_search=False,
skip_default_schema_creation=False)
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/discoveryengine"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := discoveryengine.NewDataStore(ctx, "basic", &discoveryengine.DataStoreArgs{
Location: pulumi.String("global"),
DataStoreId: pulumi.String("data-store-id"),
DisplayName: pulumi.String("tf-test-structured-datastore"),
IndustryVertical: pulumi.String("GENERIC"),
ContentConfig: pulumi.String("NO_CONTENT"),
SolutionTypes: pulumi.StringArray{
pulumi.String("SOLUTION_TYPE_SEARCH"),
},
CreateAdvancedSiteSearch: pulumi.Bool(false),
SkipDefaultSchemaCreation: pulumi.Bool(false),
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var basic = new Gcp.DiscoveryEngine.DataStore("basic", new()
{
Location = "global",
DataStoreId = "data-store-id",
DisplayName = "tf-test-structured-datastore",
IndustryVertical = "GENERIC",
ContentConfig = "NO_CONTENT",
SolutionTypes = new[]
{
"SOLUTION_TYPE_SEARCH",
},
CreateAdvancedSiteSearch = false,
SkipDefaultSchemaCreation = false,
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.discoveryengine.DataStore;
import com.pulumi.gcp.discoveryengine.DataStoreArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var basic = new DataStore("basic", DataStoreArgs.builder()
.location("global")
.dataStoreId("data-store-id")
.displayName("tf-test-structured-datastore")
.industryVertical("GENERIC")
.contentConfig("NO_CONTENT")
.solutionTypes("SOLUTION_TYPE_SEARCH")
.createAdvancedSiteSearch(false)
.skipDefaultSchemaCreation(false)
.build());
}
}
resources:
basic:
type: gcp:discoveryengine:DataStore
properties:
location: global
dataStoreId: data-store-id
displayName: tf-test-structured-datastore
industryVertical: GENERIC
contentConfig: NO_CONTENT
solutionTypes:
- SOLUTION_TYPE_SEARCH
createAdvancedSiteSearch: false
skipDefaultSchemaCreation: false
The dataStoreId provides a unique identifier within your project. The industryVertical determines which features are available (GENERIC, MEDIA, or HEALTHCARE_FHIR). The contentConfig specifies whether the store contains structured data (NO_CONTENT), unstructured documents (CONTENT_REQUIRED), or public websites (PUBLIC_WEBSITE). The solutionTypes array declares which Discovery Engine features you’ll use: search, chat, recommendations, or generative chat. The location property controls data residency and must be “global”, “us”, or “eu”.
Encrypt data stores with customer-managed keys
Organizations with compliance requirements can control encryption keys through Cloud KMS.
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const kmsKeyName = new gcp.discoveryengine.DataStore("kms_key_name", {
location: "us",
dataStoreId: "data-store-id",
displayName: "tf-test-structured-datastore",
industryVertical: "GENERIC",
contentConfig: "NO_CONTENT",
solutionTypes: ["SOLUTION_TYPE_SEARCH"],
kmsKeyName: "kms-key",
createAdvancedSiteSearch: false,
skipDefaultSchemaCreation: false,
});
import pulumi
import pulumi_gcp as gcp
kms_key_name = gcp.discoveryengine.DataStore("kms_key_name",
location="us",
data_store_id="data-store-id",
display_name="tf-test-structured-datastore",
industry_vertical="GENERIC",
content_config="NO_CONTENT",
solution_types=["SOLUTION_TYPE_SEARCH"],
kms_key_name="kms-key",
create_advanced_site_search=False,
skip_default_schema_creation=False)
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/discoveryengine"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := discoveryengine.NewDataStore(ctx, "kms_key_name", &discoveryengine.DataStoreArgs{
Location: pulumi.String("us"),
DataStoreId: pulumi.String("data-store-id"),
DisplayName: pulumi.String("tf-test-structured-datastore"),
IndustryVertical: pulumi.String("GENERIC"),
ContentConfig: pulumi.String("NO_CONTENT"),
SolutionTypes: pulumi.StringArray{
pulumi.String("SOLUTION_TYPE_SEARCH"),
},
KmsKeyName: pulumi.String("kms-key"),
CreateAdvancedSiteSearch: pulumi.Bool(false),
SkipDefaultSchemaCreation: pulumi.Bool(false),
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var kmsKeyName = new Gcp.DiscoveryEngine.DataStore("kms_key_name", new()
{
Location = "us",
DataStoreId = "data-store-id",
DisplayName = "tf-test-structured-datastore",
IndustryVertical = "GENERIC",
ContentConfig = "NO_CONTENT",
SolutionTypes = new[]
{
"SOLUTION_TYPE_SEARCH",
},
KmsKeyName = "kms-key",
CreateAdvancedSiteSearch = false,
SkipDefaultSchemaCreation = false,
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.discoveryengine.DataStore;
import com.pulumi.gcp.discoveryengine.DataStoreArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var kmsKeyName = new DataStore("kmsKeyName", DataStoreArgs.builder()
.location("us")
.dataStoreId("data-store-id")
.displayName("tf-test-structured-datastore")
.industryVertical("GENERIC")
.contentConfig("NO_CONTENT")
.solutionTypes("SOLUTION_TYPE_SEARCH")
.kmsKeyName("kms-key")
.createAdvancedSiteSearch(false)
.skipDefaultSchemaCreation(false)
.build());
}
}
resources:
kmsKeyName:
type: gcp:discoveryengine:DataStore
name: kms_key_name
properties:
location: us
dataStoreId: data-store-id
displayName: tf-test-structured-datastore
industryVertical: GENERIC
contentConfig: NO_CONTENT
solutionTypes:
- SOLUTION_TYPE_SEARCH
kmsKeyName: kms-key
createAdvancedSiteSearch: false
skipDefaultSchemaCreation: false
The kmsKeyName property references a Cloud KMS key resource. When set, Discovery Engine encrypts the data store using your customer-managed key instead of Google-managed keys. This satisfies CMEK compliance requirements. The location must be “us” or “eu” when using CMEK (not “global”).
Configure document parsing and OCR processing
When ingesting documents, you can control how Discovery Engine extracts text from different file types.
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const documentProcessingConfig = new gcp.discoveryengine.DataStore("document_processing_config", {
location: "global",
dataStoreId: "data-store-id",
displayName: "tf-test-structured-datastore",
industryVertical: "GENERIC",
contentConfig: "NO_CONTENT",
solutionTypes: ["SOLUTION_TYPE_SEARCH"],
createAdvancedSiteSearch: false,
documentProcessingConfig: {
defaultParsingConfig: {
digitalParsingConfig: {},
},
parsingConfigOverrides: [{
fileType: "pdf",
ocrParsingConfig: {
useNativeText: true,
},
}],
},
});
import pulumi
import pulumi_gcp as gcp
document_processing_config = gcp.discoveryengine.DataStore("document_processing_config",
location="global",
data_store_id="data-store-id",
display_name="tf-test-structured-datastore",
industry_vertical="GENERIC",
content_config="NO_CONTENT",
solution_types=["SOLUTION_TYPE_SEARCH"],
create_advanced_site_search=False,
document_processing_config={
"default_parsing_config": {
"digital_parsing_config": {},
},
"parsing_config_overrides": [{
"file_type": "pdf",
"ocr_parsing_config": {
"use_native_text": True,
},
}],
})
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/discoveryengine"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := discoveryengine.NewDataStore(ctx, "document_processing_config", &discoveryengine.DataStoreArgs{
Location: pulumi.String("global"),
DataStoreId: pulumi.String("data-store-id"),
DisplayName: pulumi.String("tf-test-structured-datastore"),
IndustryVertical: pulumi.String("GENERIC"),
ContentConfig: pulumi.String("NO_CONTENT"),
SolutionTypes: pulumi.StringArray{
pulumi.String("SOLUTION_TYPE_SEARCH"),
},
CreateAdvancedSiteSearch: pulumi.Bool(false),
DocumentProcessingConfig: &discoveryengine.DataStoreDocumentProcessingConfigArgs{
DefaultParsingConfig: &discoveryengine.DataStoreDocumentProcessingConfigDefaultParsingConfigArgs{
DigitalParsingConfig: &discoveryengine.DataStoreDocumentProcessingConfigDefaultParsingConfigDigitalParsingConfigArgs{},
},
ParsingConfigOverrides: discoveryengine.DataStoreDocumentProcessingConfigParsingConfigOverrideArray{
&discoveryengine.DataStoreDocumentProcessingConfigParsingConfigOverrideArgs{
FileType: pulumi.String("pdf"),
OcrParsingConfig: &discoveryengine.DataStoreDocumentProcessingConfigParsingConfigOverrideOcrParsingConfigArgs{
UseNativeText: pulumi.Bool(true),
},
},
},
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var documentProcessingConfig = new Gcp.DiscoveryEngine.DataStore("document_processing_config", new()
{
Location = "global",
DataStoreId = "data-store-id",
DisplayName = "tf-test-structured-datastore",
IndustryVertical = "GENERIC",
ContentConfig = "NO_CONTENT",
SolutionTypes = new[]
{
"SOLUTION_TYPE_SEARCH",
},
CreateAdvancedSiteSearch = false,
DocumentProcessingConfig = new Gcp.DiscoveryEngine.Inputs.DataStoreDocumentProcessingConfigArgs
{
DefaultParsingConfig = new Gcp.DiscoveryEngine.Inputs.DataStoreDocumentProcessingConfigDefaultParsingConfigArgs
{
DigitalParsingConfig = null,
},
ParsingConfigOverrides = new[]
{
new Gcp.DiscoveryEngine.Inputs.DataStoreDocumentProcessingConfigParsingConfigOverrideArgs
{
FileType = "pdf",
OcrParsingConfig = new Gcp.DiscoveryEngine.Inputs.DataStoreDocumentProcessingConfigParsingConfigOverrideOcrParsingConfigArgs
{
UseNativeText = true,
},
},
},
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.discoveryengine.DataStore;
import com.pulumi.gcp.discoveryengine.DataStoreArgs;
import com.pulumi.gcp.discoveryengine.inputs.DataStoreDocumentProcessingConfigArgs;
import com.pulumi.gcp.discoveryengine.inputs.DataStoreDocumentProcessingConfigDefaultParsingConfigArgs;
import com.pulumi.gcp.discoveryengine.inputs.DataStoreDocumentProcessingConfigDefaultParsingConfigDigitalParsingConfigArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var documentProcessingConfig = new DataStore("documentProcessingConfig", DataStoreArgs.builder()
.location("global")
.dataStoreId("data-store-id")
.displayName("tf-test-structured-datastore")
.industryVertical("GENERIC")
.contentConfig("NO_CONTENT")
.solutionTypes("SOLUTION_TYPE_SEARCH")
.createAdvancedSiteSearch(false)
.documentProcessingConfig(DataStoreDocumentProcessingConfigArgs.builder()
.defaultParsingConfig(DataStoreDocumentProcessingConfigDefaultParsingConfigArgs.builder()
.digitalParsingConfig(DataStoreDocumentProcessingConfigDefaultParsingConfigDigitalParsingConfigArgs.builder()
.build())
.build())
.parsingConfigOverrides(DataStoreDocumentProcessingConfigParsingConfigOverrideArgs.builder()
.fileType("pdf")
.ocrParsingConfig(DataStoreDocumentProcessingConfigParsingConfigOverrideOcrParsingConfigArgs.builder()
.useNativeText(true)
.build())
.build())
.build())
.build());
}
}
resources:
documentProcessingConfig:
type: gcp:discoveryengine:DataStore
name: document_processing_config
properties:
location: global
dataStoreId: data-store-id
displayName: tf-test-structured-datastore
industryVertical: GENERIC
contentConfig: NO_CONTENT
solutionTypes:
- SOLUTION_TYPE_SEARCH
createAdvancedSiteSearch: false
documentProcessingConfig:
defaultParsingConfig:
digitalParsingConfig: {}
parsingConfigOverrides:
- fileType: pdf
ocrParsingConfig:
useNativeText: true
The documentProcessingConfig defines parsing behavior. The defaultParsingConfig applies to all documents unless overridden. Here, digitalParsingConfig handles standard digital documents. The parsingConfigOverrides array lets you specify different processing for specific file types. In this example, PDFs use ocrParsingConfig with useNativeText enabled, which extracts embedded text rather than performing OCR on the entire document.
Enable advanced site search with custom indexing
Public website data stores can use advanced features that control indexing and refresh behavior.
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const advancedSiteSearchConfig = new gcp.discoveryengine.DataStore("advanced_site_search_config", {
location: "global",
dataStoreId: "data-store-id",
displayName: "tf-test-advanced-site-search-config-datastore",
industryVertical: "GENERIC",
contentConfig: "PUBLIC_WEBSITE",
solutionTypes: ["SOLUTION_TYPE_CHAT"],
createAdvancedSiteSearch: true,
skipDefaultSchemaCreation: false,
advancedSiteSearchConfig: {
disableInitialIndex: true,
disableAutomaticRefresh: true,
},
});
import pulumi
import pulumi_gcp as gcp
advanced_site_search_config = gcp.discoveryengine.DataStore("advanced_site_search_config",
location="global",
data_store_id="data-store-id",
display_name="tf-test-advanced-site-search-config-datastore",
industry_vertical="GENERIC",
content_config="PUBLIC_WEBSITE",
solution_types=["SOLUTION_TYPE_CHAT"],
create_advanced_site_search=True,
skip_default_schema_creation=False,
advanced_site_search_config={
"disable_initial_index": True,
"disable_automatic_refresh": True,
})
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/discoveryengine"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := discoveryengine.NewDataStore(ctx, "advanced_site_search_config", &discoveryengine.DataStoreArgs{
Location: pulumi.String("global"),
DataStoreId: pulumi.String("data-store-id"),
DisplayName: pulumi.String("tf-test-advanced-site-search-config-datastore"),
IndustryVertical: pulumi.String("GENERIC"),
ContentConfig: pulumi.String("PUBLIC_WEBSITE"),
SolutionTypes: pulumi.StringArray{
pulumi.String("SOLUTION_TYPE_CHAT"),
},
CreateAdvancedSiteSearch: pulumi.Bool(true),
SkipDefaultSchemaCreation: pulumi.Bool(false),
AdvancedSiteSearchConfig: &discoveryengine.DataStoreAdvancedSiteSearchConfigArgs{
DisableInitialIndex: pulumi.Bool(true),
DisableAutomaticRefresh: pulumi.Bool(true),
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var advancedSiteSearchConfig = new Gcp.DiscoveryEngine.DataStore("advanced_site_search_config", new()
{
Location = "global",
DataStoreId = "data-store-id",
DisplayName = "tf-test-advanced-site-search-config-datastore",
IndustryVertical = "GENERIC",
ContentConfig = "PUBLIC_WEBSITE",
SolutionTypes = new[]
{
"SOLUTION_TYPE_CHAT",
},
CreateAdvancedSiteSearch = true,
SkipDefaultSchemaCreation = false,
AdvancedSiteSearchConfig = new Gcp.DiscoveryEngine.Inputs.DataStoreAdvancedSiteSearchConfigArgs
{
DisableInitialIndex = true,
DisableAutomaticRefresh = true,
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.discoveryengine.DataStore;
import com.pulumi.gcp.discoveryengine.DataStoreArgs;
import com.pulumi.gcp.discoveryengine.inputs.DataStoreAdvancedSiteSearchConfigArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var advancedSiteSearchConfig = new DataStore("advancedSiteSearchConfig", DataStoreArgs.builder()
.location("global")
.dataStoreId("data-store-id")
.displayName("tf-test-advanced-site-search-config-datastore")
.industryVertical("GENERIC")
.contentConfig("PUBLIC_WEBSITE")
.solutionTypes("SOLUTION_TYPE_CHAT")
.createAdvancedSiteSearch(true)
.skipDefaultSchemaCreation(false)
.advancedSiteSearchConfig(DataStoreAdvancedSiteSearchConfigArgs.builder()
.disableInitialIndex(true)
.disableAutomaticRefresh(true)
.build())
.build());
}
}
resources:
advancedSiteSearchConfig:
type: gcp:discoveryengine:DataStore
name: advanced_site_search_config
properties:
location: global
dataStoreId: data-store-id
displayName: tf-test-advanced-site-search-config-datastore
industryVertical: GENERIC
contentConfig: PUBLIC_WEBSITE
solutionTypes:
- SOLUTION_TYPE_CHAT
createAdvancedSiteSearch: true
skipDefaultSchemaCreation: false
advancedSiteSearchConfig:
disableInitialIndex: true
disableAutomaticRefresh: true
Advanced site search requires contentConfig set to PUBLIC_WEBSITE and createAdvancedSiteSearch set to true. The advancedSiteSearchConfig block controls indexing behavior. Setting disableInitialIndex to true prevents automatic crawling at creation time. Setting disableAutomaticRefresh to true stops periodic re-indexing, giving you manual control over when Discovery Engine crawls your sites.
Beyond these examples
These snippets focus on specific data store features: basic data store creation and configuration, customer-managed encryption keys, document processing and OCR customization, and advanced site search controls. They’re intentionally minimal rather than full search applications.
The examples may reference pre-existing infrastructure such as a GCP project with Discovery Engine API enabled, and Cloud KMS keys for the CMEK example. They focus on configuring the data store rather than provisioning the surrounding infrastructure.
To keep things focused, common data store patterns are omitted, including:
- Schema creation and customization (skipDefaultSchemaCreation flag)
- Starting schema specification
- Multi-region deployment strategies
- Document ingestion and indexing workflows
These omissions are intentional: the goal is to illustrate how each data store feature is wired, not provide drop-in search modules. See the Discovery Engine DataStore resource reference for all available configuration options.
Let's create GCP Discovery Engine Data Stores
Get started with Pulumi Cloud, then follow our quick setup guide to deploy this infrastructure.
Try Pulumi Cloud for FREEFrequently Asked Questions
Configuration & Immutability
dataStoreId, location, project, industryVertical, contentConfig, solutionTypes, documentProcessingConfig, and advancedSiteSearchConfig.global, us, or eu.NO_CONTENT (no content stored), CONTENT_REQUIRED (content must be provided), and PUBLIC_WEBSITE (for public website indexing).Schema Management
skipDefaultSchemaCreation if you’re certain the default schema is incompatible with your use case. If set to true, you must manually create a schema before any documents can be ingested.skipDefaultSchemaCreation cannot be specified if data_store.starting_schema is specified.Solution Types & Industry Verticals
SOLUTION_TYPE_RECOMMENDATION, SOLUTION_TYPE_SEARCH, SOLUTION_TYPE_CHAT, and SOLUTION_TYPE_GENERATIVE_CHAT. This property is immutable after creation.GENERIC (general purpose), MEDIA (media content), and HEALTHCARE_FHIR (healthcare data). This property is immutable after creation.Advanced Features
createAdvancedSiteSearch flag is ignored if the data store is not configured for site search, which requires industryVertical set to GENERIC and contentConfig set to PUBLIC_WEBSITE.kmsKeyName to your KMS key resource name in the format /{project}/locations/{location}/keyRings/{keyRing}/cryptoKeys/{keyId}. This is required for CMEK Org Policy compliance.documentProcessingConfig with parsingConfigOverrides to specify custom parsing for file types like PDF. You can configure options such as ocrParsingConfig with useNativeText enabled.Using a different cloud?
Explore analytics guides for other cloud providers: