The gcp:dataproc/metastoreService:MetastoreService resource, part of the Pulumi GCP provider, provisions a managed Hive metastore service that stores and serves metadata for data processing workloads. This guide focuses on four capabilities: service tier and Hive version selection, customer-managed encryption, VPC networking and scheduled backups, and Spanner backend with autoscaling.
Metastores may reference KMS keys for encryption, VPC subnets for private networking, and Cloud Storage buckets for backups. The examples are intentionally small. Combine them with your own network infrastructure and backup policies.
Create a basic metastore with maintenance windows
Most deployments start with a minimal configuration that specifies the Hive version, service tier, and maintenance schedule.
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const _default = new gcp.dataproc.MetastoreService("default", {
serviceId: "metastore-srv",
location: "us-central1",
port: 9080,
tier: "DEVELOPER",
maintenanceWindow: {
hourOfDay: 2,
dayOfWeek: "SUNDAY",
},
hiveMetastoreConfig: {
version: "2.3.6",
},
labels: {
env: "test",
},
});
import pulumi
import pulumi_gcp as gcp
default = gcp.dataproc.MetastoreService("default",
service_id="metastore-srv",
location="us-central1",
port=9080,
tier="DEVELOPER",
maintenance_window={
"hour_of_day": 2,
"day_of_week": "SUNDAY",
},
hive_metastore_config={
"version": "2.3.6",
},
labels={
"env": "test",
})
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/dataproc"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := dataproc.NewMetastoreService(ctx, "default", &dataproc.MetastoreServiceArgs{
ServiceId: pulumi.String("metastore-srv"),
Location: pulumi.String("us-central1"),
Port: pulumi.Int(9080),
Tier: pulumi.String("DEVELOPER"),
MaintenanceWindow: &dataproc.MetastoreServiceMaintenanceWindowArgs{
HourOfDay: pulumi.Int(2),
DayOfWeek: pulumi.String("SUNDAY"),
},
HiveMetastoreConfig: &dataproc.MetastoreServiceHiveMetastoreConfigArgs{
Version: pulumi.String("2.3.6"),
},
Labels: pulumi.StringMap{
"env": pulumi.String("test"),
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var @default = new Gcp.Dataproc.MetastoreService("default", new()
{
ServiceId = "metastore-srv",
Location = "us-central1",
Port = 9080,
Tier = "DEVELOPER",
MaintenanceWindow = new Gcp.Dataproc.Inputs.MetastoreServiceMaintenanceWindowArgs
{
HourOfDay = 2,
DayOfWeek = "SUNDAY",
},
HiveMetastoreConfig = new Gcp.Dataproc.Inputs.MetastoreServiceHiveMetastoreConfigArgs
{
Version = "2.3.6",
},
Labels =
{
{ "env", "test" },
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.dataproc.MetastoreService;
import com.pulumi.gcp.dataproc.MetastoreServiceArgs;
import com.pulumi.gcp.dataproc.inputs.MetastoreServiceMaintenanceWindowArgs;
import com.pulumi.gcp.dataproc.inputs.MetastoreServiceHiveMetastoreConfigArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var default_ = new MetastoreService("default", MetastoreServiceArgs.builder()
.serviceId("metastore-srv")
.location("us-central1")
.port(9080)
.tier("DEVELOPER")
.maintenanceWindow(MetastoreServiceMaintenanceWindowArgs.builder()
.hourOfDay(2)
.dayOfWeek("SUNDAY")
.build())
.hiveMetastoreConfig(MetastoreServiceHiveMetastoreConfigArgs.builder()
.version("2.3.6")
.build())
.labels(Map.of("env", "test"))
.build());
}
}
resources:
default:
type: gcp:dataproc:MetastoreService
properties:
serviceId: metastore-srv
location: us-central1
port: 9080
tier: DEVELOPER
maintenanceWindow:
hourOfDay: 2
dayOfWeek: SUNDAY
hiveMetastoreConfig:
version: 2.3.6
labels:
env: test
The tier property controls capacity and features; DEVELOPER provides lower cost for non-production workloads. The hiveMetastoreConfig specifies which Hive version to run. The maintenanceWindow defines when Google can restart the service for updates, specified as a day of week and hour in UTC.
Encrypt metadata with customer-managed keys
Organizations with compliance requirements often encrypt metastore data using their own KMS keys.
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const keyRing = new gcp.kms.KeyRing("key_ring", {
name: "example-keyring",
location: "us-central1",
});
const cryptoKey = new gcp.kms.CryptoKey("crypto_key", {
name: "example-key",
keyRing: keyRing.id,
purpose: "ENCRYPT_DECRYPT",
});
const _default = new gcp.dataproc.MetastoreService("default", {
serviceId: "example-service",
location: "us-central1",
encryptionConfig: {
kmsKey: cryptoKey.id,
},
hiveMetastoreConfig: {
version: "3.1.2",
},
});
import pulumi
import pulumi_gcp as gcp
key_ring = gcp.kms.KeyRing("key_ring",
name="example-keyring",
location="us-central1")
crypto_key = gcp.kms.CryptoKey("crypto_key",
name="example-key",
key_ring=key_ring.id,
purpose="ENCRYPT_DECRYPT")
default = gcp.dataproc.MetastoreService("default",
service_id="example-service",
location="us-central1",
encryption_config={
"kms_key": crypto_key.id,
},
hive_metastore_config={
"version": "3.1.2",
})
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/dataproc"
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/kms"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
keyRing, err := kms.NewKeyRing(ctx, "key_ring", &kms.KeyRingArgs{
Name: pulumi.String("example-keyring"),
Location: pulumi.String("us-central1"),
})
if err != nil {
return err
}
cryptoKey, err := kms.NewCryptoKey(ctx, "crypto_key", &kms.CryptoKeyArgs{
Name: pulumi.String("example-key"),
KeyRing: keyRing.ID(),
Purpose: pulumi.String("ENCRYPT_DECRYPT"),
})
if err != nil {
return err
}
_, err = dataproc.NewMetastoreService(ctx, "default", &dataproc.MetastoreServiceArgs{
ServiceId: pulumi.String("example-service"),
Location: pulumi.String("us-central1"),
EncryptionConfig: &dataproc.MetastoreServiceEncryptionConfigArgs{
KmsKey: cryptoKey.ID(),
},
HiveMetastoreConfig: &dataproc.MetastoreServiceHiveMetastoreConfigArgs{
Version: pulumi.String("3.1.2"),
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var keyRing = new Gcp.Kms.KeyRing("key_ring", new()
{
Name = "example-keyring",
Location = "us-central1",
});
var cryptoKey = new Gcp.Kms.CryptoKey("crypto_key", new()
{
Name = "example-key",
KeyRing = keyRing.Id,
Purpose = "ENCRYPT_DECRYPT",
});
var @default = new Gcp.Dataproc.MetastoreService("default", new()
{
ServiceId = "example-service",
Location = "us-central1",
EncryptionConfig = new Gcp.Dataproc.Inputs.MetastoreServiceEncryptionConfigArgs
{
KmsKey = cryptoKey.Id,
},
HiveMetastoreConfig = new Gcp.Dataproc.Inputs.MetastoreServiceHiveMetastoreConfigArgs
{
Version = "3.1.2",
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.kms.KeyRing;
import com.pulumi.gcp.kms.KeyRingArgs;
import com.pulumi.gcp.kms.CryptoKey;
import com.pulumi.gcp.kms.CryptoKeyArgs;
import com.pulumi.gcp.dataproc.MetastoreService;
import com.pulumi.gcp.dataproc.MetastoreServiceArgs;
import com.pulumi.gcp.dataproc.inputs.MetastoreServiceEncryptionConfigArgs;
import com.pulumi.gcp.dataproc.inputs.MetastoreServiceHiveMetastoreConfigArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var keyRing = new KeyRing("keyRing", KeyRingArgs.builder()
.name("example-keyring")
.location("us-central1")
.build());
var cryptoKey = new CryptoKey("cryptoKey", CryptoKeyArgs.builder()
.name("example-key")
.keyRing(keyRing.id())
.purpose("ENCRYPT_DECRYPT")
.build());
var default_ = new MetastoreService("default", MetastoreServiceArgs.builder()
.serviceId("example-service")
.location("us-central1")
.encryptionConfig(MetastoreServiceEncryptionConfigArgs.builder()
.kmsKey(cryptoKey.id())
.build())
.hiveMetastoreConfig(MetastoreServiceHiveMetastoreConfigArgs.builder()
.version("3.1.2")
.build())
.build());
}
}
resources:
default:
type: gcp:dataproc:MetastoreService
properties:
serviceId: example-service
location: us-central1
encryptionConfig:
kmsKey: ${cryptoKey.id}
hiveMetastoreConfig:
version: 3.1.2
cryptoKey:
type: gcp:kms:CryptoKey
name: crypto_key
properties:
name: example-key
keyRing: ${keyRing.id}
purpose: ENCRYPT_DECRYPT
keyRing:
type: gcp:kms:KeyRing
name: key_ring
properties:
name: example-keyring
location: us-central1
The encryptionConfig property points to a Cloud KMS key that encrypts metadata at rest. You create the KeyRing and CryptoKey resources first, then reference the key ID. This gives you control over key rotation and access policies.
Connect to a private VPC subnet
Production metastores typically run in private networks to restrict access to authorized compute resources.
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const net = new gcp.compute.Network("net", {
name: "my-network",
autoCreateSubnetworks: false,
});
const subnet = new gcp.compute.Subnetwork("subnet", {
name: "my-subnetwork",
region: "us-central1",
network: net.id,
ipCidrRange: "10.0.0.0/22",
privateIpGoogleAccess: true,
});
const _default = new gcp.dataproc.MetastoreService("default", {
serviceId: "metastore-srv",
location: "us-central1",
tier: "DEVELOPER",
hiveMetastoreConfig: {
version: "3.1.2",
},
networkConfig: {
consumers: [{
subnetwork: subnet.id,
}],
},
});
import pulumi
import pulumi_gcp as gcp
net = gcp.compute.Network("net",
name="my-network",
auto_create_subnetworks=False)
subnet = gcp.compute.Subnetwork("subnet",
name="my-subnetwork",
region="us-central1",
network=net.id,
ip_cidr_range="10.0.0.0/22",
private_ip_google_access=True)
default = gcp.dataproc.MetastoreService("default",
service_id="metastore-srv",
location="us-central1",
tier="DEVELOPER",
hive_metastore_config={
"version": "3.1.2",
},
network_config={
"consumers": [{
"subnetwork": subnet.id,
}],
})
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/compute"
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/dataproc"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
net, err := compute.NewNetwork(ctx, "net", &compute.NetworkArgs{
Name: pulumi.String("my-network"),
AutoCreateSubnetworks: pulumi.Bool(false),
})
if err != nil {
return err
}
subnet, err := compute.NewSubnetwork(ctx, "subnet", &compute.SubnetworkArgs{
Name: pulumi.String("my-subnetwork"),
Region: pulumi.String("us-central1"),
Network: net.ID(),
IpCidrRange: pulumi.String("10.0.0.0/22"),
PrivateIpGoogleAccess: pulumi.Bool(true),
})
if err != nil {
return err
}
_, err = dataproc.NewMetastoreService(ctx, "default", &dataproc.MetastoreServiceArgs{
ServiceId: pulumi.String("metastore-srv"),
Location: pulumi.String("us-central1"),
Tier: pulumi.String("DEVELOPER"),
HiveMetastoreConfig: &dataproc.MetastoreServiceHiveMetastoreConfigArgs{
Version: pulumi.String("3.1.2"),
},
NetworkConfig: &dataproc.MetastoreServiceNetworkConfigArgs{
Consumers: dataproc.MetastoreServiceNetworkConfigConsumerArray{
&dataproc.MetastoreServiceNetworkConfigConsumerArgs{
Subnetwork: subnet.ID(),
},
},
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var net = new Gcp.Compute.Network("net", new()
{
Name = "my-network",
AutoCreateSubnetworks = false,
});
var subnet = new Gcp.Compute.Subnetwork("subnet", new()
{
Name = "my-subnetwork",
Region = "us-central1",
Network = net.Id,
IpCidrRange = "10.0.0.0/22",
PrivateIpGoogleAccess = true,
});
var @default = new Gcp.Dataproc.MetastoreService("default", new()
{
ServiceId = "metastore-srv",
Location = "us-central1",
Tier = "DEVELOPER",
HiveMetastoreConfig = new Gcp.Dataproc.Inputs.MetastoreServiceHiveMetastoreConfigArgs
{
Version = "3.1.2",
},
NetworkConfig = new Gcp.Dataproc.Inputs.MetastoreServiceNetworkConfigArgs
{
Consumers = new[]
{
new Gcp.Dataproc.Inputs.MetastoreServiceNetworkConfigConsumerArgs
{
Subnetwork = subnet.Id,
},
},
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.compute.Network;
import com.pulumi.gcp.compute.NetworkArgs;
import com.pulumi.gcp.compute.Subnetwork;
import com.pulumi.gcp.compute.SubnetworkArgs;
import com.pulumi.gcp.dataproc.MetastoreService;
import com.pulumi.gcp.dataproc.MetastoreServiceArgs;
import com.pulumi.gcp.dataproc.inputs.MetastoreServiceHiveMetastoreConfigArgs;
import com.pulumi.gcp.dataproc.inputs.MetastoreServiceNetworkConfigArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var net = new Network("net", NetworkArgs.builder()
.name("my-network")
.autoCreateSubnetworks(false)
.build());
var subnet = new Subnetwork("subnet", SubnetworkArgs.builder()
.name("my-subnetwork")
.region("us-central1")
.network(net.id())
.ipCidrRange("10.0.0.0/22")
.privateIpGoogleAccess(true)
.build());
var default_ = new MetastoreService("default", MetastoreServiceArgs.builder()
.serviceId("metastore-srv")
.location("us-central1")
.tier("DEVELOPER")
.hiveMetastoreConfig(MetastoreServiceHiveMetastoreConfigArgs.builder()
.version("3.1.2")
.build())
.networkConfig(MetastoreServiceNetworkConfigArgs.builder()
.consumers(MetastoreServiceNetworkConfigConsumerArgs.builder()
.subnetwork(subnet.id())
.build())
.build())
.build());
}
}
resources:
net:
type: gcp:compute:Network
properties:
name: my-network
autoCreateSubnetworks: false
subnet:
type: gcp:compute:Subnetwork
properties:
name: my-subnetwork
region: us-central1
network: ${net.id}
ipCidrRange: 10.0.0.0/22
privateIpGoogleAccess: true
default:
type: gcp:dataproc:MetastoreService
properties:
serviceId: metastore-srv
location: us-central1
tier: DEVELOPER
hiveMetastoreConfig:
version: 3.1.2
networkConfig:
consumers:
- subnetwork: ${subnet.id}
The networkConfig property places the metastore in a specific VPC subnet. The consumers array lists which subnets can access the service. This example creates a VPC and subnet inline, but you can reference existing infrastructure instead.
Schedule automated backups to Cloud Storage
Teams running production metastores configure automated backups to recover from data loss.
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const bucket = new gcp.storage.Bucket("bucket", {
name: "backup",
location: "us-central1",
});
const backup = new gcp.dataproc.MetastoreService("backup", {
serviceId: "backup",
location: "us-central1",
port: 9080,
tier: "DEVELOPER",
maintenanceWindow: {
hourOfDay: 2,
dayOfWeek: "SUNDAY",
},
hiveMetastoreConfig: {
version: "2.3.6",
},
scheduledBackup: {
enabled: true,
cronSchedule: "0 0 * * *",
timeZone: "UTC",
backupLocation: pulumi.interpolate`gs://${bucket.name}`,
},
labels: {
env: "test",
},
});
import pulumi
import pulumi_gcp as gcp
bucket = gcp.storage.Bucket("bucket",
name="backup",
location="us-central1")
backup = gcp.dataproc.MetastoreService("backup",
service_id="backup",
location="us-central1",
port=9080,
tier="DEVELOPER",
maintenance_window={
"hour_of_day": 2,
"day_of_week": "SUNDAY",
},
hive_metastore_config={
"version": "2.3.6",
},
scheduled_backup={
"enabled": True,
"cron_schedule": "0 0 * * *",
"time_zone": "UTC",
"backup_location": bucket.name.apply(lambda name: f"gs://{name}"),
},
labels={
"env": "test",
})
package main
import (
"fmt"
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/dataproc"
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/storage"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
bucket, err := storage.NewBucket(ctx, "bucket", &storage.BucketArgs{
Name: pulumi.String("backup"),
Location: pulumi.String("us-central1"),
})
if err != nil {
return err
}
_, err = dataproc.NewMetastoreService(ctx, "backup", &dataproc.MetastoreServiceArgs{
ServiceId: pulumi.String("backup"),
Location: pulumi.String("us-central1"),
Port: pulumi.Int(9080),
Tier: pulumi.String("DEVELOPER"),
MaintenanceWindow: &dataproc.MetastoreServiceMaintenanceWindowArgs{
HourOfDay: pulumi.Int(2),
DayOfWeek: pulumi.String("SUNDAY"),
},
HiveMetastoreConfig: &dataproc.MetastoreServiceHiveMetastoreConfigArgs{
Version: pulumi.String("2.3.6"),
},
ScheduledBackup: &dataproc.MetastoreServiceScheduledBackupArgs{
Enabled: pulumi.Bool(true),
CronSchedule: pulumi.String("0 0 * * *"),
TimeZone: pulumi.String("UTC"),
BackupLocation: bucket.Name.ApplyT(func(name string) (string, error) {
return fmt.Sprintf("gs://%v", name), nil
}).(pulumi.StringOutput),
},
Labels: pulumi.StringMap{
"env": pulumi.String("test"),
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var bucket = new Gcp.Storage.Bucket("bucket", new()
{
Name = "backup",
Location = "us-central1",
});
var backup = new Gcp.Dataproc.MetastoreService("backup", new()
{
ServiceId = "backup",
Location = "us-central1",
Port = 9080,
Tier = "DEVELOPER",
MaintenanceWindow = new Gcp.Dataproc.Inputs.MetastoreServiceMaintenanceWindowArgs
{
HourOfDay = 2,
DayOfWeek = "SUNDAY",
},
HiveMetastoreConfig = new Gcp.Dataproc.Inputs.MetastoreServiceHiveMetastoreConfigArgs
{
Version = "2.3.6",
},
ScheduledBackup = new Gcp.Dataproc.Inputs.MetastoreServiceScheduledBackupArgs
{
Enabled = true,
CronSchedule = "0 0 * * *",
TimeZone = "UTC",
BackupLocation = bucket.Name.Apply(name => $"gs://{name}"),
},
Labels =
{
{ "env", "test" },
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.storage.Bucket;
import com.pulumi.gcp.storage.BucketArgs;
import com.pulumi.gcp.dataproc.MetastoreService;
import com.pulumi.gcp.dataproc.MetastoreServiceArgs;
import com.pulumi.gcp.dataproc.inputs.MetastoreServiceMaintenanceWindowArgs;
import com.pulumi.gcp.dataproc.inputs.MetastoreServiceHiveMetastoreConfigArgs;
import com.pulumi.gcp.dataproc.inputs.MetastoreServiceScheduledBackupArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var bucket = new Bucket("bucket", BucketArgs.builder()
.name("backup")
.location("us-central1")
.build());
var backup = new MetastoreService("backup", MetastoreServiceArgs.builder()
.serviceId("backup")
.location("us-central1")
.port(9080)
.tier("DEVELOPER")
.maintenanceWindow(MetastoreServiceMaintenanceWindowArgs.builder()
.hourOfDay(2)
.dayOfWeek("SUNDAY")
.build())
.hiveMetastoreConfig(MetastoreServiceHiveMetastoreConfigArgs.builder()
.version("2.3.6")
.build())
.scheduledBackup(MetastoreServiceScheduledBackupArgs.builder()
.enabled(true)
.cronSchedule("0 0 * * *")
.timeZone("UTC")
.backupLocation(bucket.name().applyValue(_name -> String.format("gs://%s", _name)))
.build())
.labels(Map.of("env", "test"))
.build());
}
}
resources:
backup:
type: gcp:dataproc:MetastoreService
properties:
serviceId: backup
location: us-central1
port: 9080
tier: DEVELOPER
maintenanceWindow:
hourOfDay: 2
dayOfWeek: SUNDAY
hiveMetastoreConfig:
version: 2.3.6
scheduledBackup:
enabled: true
cronSchedule: 0 0 * * *
timeZone: UTC
backupLocation: gs://${bucket.name}
labels:
env: test
bucket:
type: gcp:storage:Bucket
properties:
name: backup
location: us-central1
The scheduledBackup property enables automatic backups on a cron schedule. The backupLocation points to a Cloud Storage bucket where backups are stored. The cronSchedule uses standard cron syntax; this example runs daily at midnight UTC.
Use Spanner backend with fixed instance size
Metastores can use Cloud Spanner instead of MySQL for higher availability and performance.
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const dpms2 = new gcp.dataproc.MetastoreService("dpms2", {
serviceId: "ms-dpms2",
location: "us-central1",
databaseType: "SPANNER",
hiveMetastoreConfig: {
version: "3.1.2",
},
scalingConfig: {
instanceSize: "EXTRA_SMALL",
},
});
import pulumi
import pulumi_gcp as gcp
dpms2 = gcp.dataproc.MetastoreService("dpms2",
service_id="ms-dpms2",
location="us-central1",
database_type="SPANNER",
hive_metastore_config={
"version": "3.1.2",
},
scaling_config={
"instance_size": "EXTRA_SMALL",
})
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/dataproc"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := dataproc.NewMetastoreService(ctx, "dpms2", &dataproc.MetastoreServiceArgs{
ServiceId: pulumi.String("ms-dpms2"),
Location: pulumi.String("us-central1"),
DatabaseType: pulumi.String("SPANNER"),
HiveMetastoreConfig: &dataproc.MetastoreServiceHiveMetastoreConfigArgs{
Version: pulumi.String("3.1.2"),
},
ScalingConfig: &dataproc.MetastoreServiceScalingConfigArgs{
InstanceSize: pulumi.String("EXTRA_SMALL"),
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var dpms2 = new Gcp.Dataproc.MetastoreService("dpms2", new()
{
ServiceId = "ms-dpms2",
Location = "us-central1",
DatabaseType = "SPANNER",
HiveMetastoreConfig = new Gcp.Dataproc.Inputs.MetastoreServiceHiveMetastoreConfigArgs
{
Version = "3.1.2",
},
ScalingConfig = new Gcp.Dataproc.Inputs.MetastoreServiceScalingConfigArgs
{
InstanceSize = "EXTRA_SMALL",
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.dataproc.MetastoreService;
import com.pulumi.gcp.dataproc.MetastoreServiceArgs;
import com.pulumi.gcp.dataproc.inputs.MetastoreServiceHiveMetastoreConfigArgs;
import com.pulumi.gcp.dataproc.inputs.MetastoreServiceScalingConfigArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var dpms2 = new MetastoreService("dpms2", MetastoreServiceArgs.builder()
.serviceId("ms-dpms2")
.location("us-central1")
.databaseType("SPANNER")
.hiveMetastoreConfig(MetastoreServiceHiveMetastoreConfigArgs.builder()
.version("3.1.2")
.build())
.scalingConfig(MetastoreServiceScalingConfigArgs.builder()
.instanceSize("EXTRA_SMALL")
.build())
.build());
}
}
resources:
dpms2:
type: gcp:dataproc:MetastoreService
properties:
serviceId: ms-dpms2
location: us-central1
databaseType: SPANNER
hiveMetastoreConfig:
version: 3.1.2
scalingConfig:
instanceSize: EXTRA_SMALL
The databaseType property switches from MySQL to Spanner. The scalingConfig defines capacity; instanceSize sets a fixed size like EXTRA_SMALL. Spanner-backed metastores don’t require maintenance windows since Spanner handles updates transparently.
Enable autoscaling with capacity bounds
Spanner-backed metastores can scale capacity automatically based on load.
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const testResource = new gcp.dataproc.MetastoreService("test_resource", {
serviceId: "test-service",
location: "us-central1",
databaseType: "SPANNER",
hiveMetastoreConfig: {
version: "3.1.2",
},
scalingConfig: {
autoscalingConfig: {
autoscalingEnabled: true,
limitConfig: {
minScalingFactor: 0.1,
maxScalingFactor: 1,
},
},
},
});
import pulumi
import pulumi_gcp as gcp
test_resource = gcp.dataproc.MetastoreService("test_resource",
service_id="test-service",
location="us-central1",
database_type="SPANNER",
hive_metastore_config={
"version": "3.1.2",
},
scaling_config={
"autoscaling_config": {
"autoscaling_enabled": True,
"limit_config": {
"min_scaling_factor": 0.1,
"max_scaling_factor": 1,
},
},
})
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/dataproc"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := dataproc.NewMetastoreService(ctx, "test_resource", &dataproc.MetastoreServiceArgs{
ServiceId: pulumi.String("test-service"),
Location: pulumi.String("us-central1"),
DatabaseType: pulumi.String("SPANNER"),
HiveMetastoreConfig: &dataproc.MetastoreServiceHiveMetastoreConfigArgs{
Version: pulumi.String("3.1.2"),
},
ScalingConfig: &dataproc.MetastoreServiceScalingConfigArgs{
AutoscalingConfig: &dataproc.MetastoreServiceScalingConfigAutoscalingConfigArgs{
AutoscalingEnabled: pulumi.Bool(true),
LimitConfig: &dataproc.MetastoreServiceScalingConfigAutoscalingConfigLimitConfigArgs{
MinScalingFactor: pulumi.Float64(0.1),
MaxScalingFactor: pulumi.Float64(1),
},
},
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var testResource = new Gcp.Dataproc.MetastoreService("test_resource", new()
{
ServiceId = "test-service",
Location = "us-central1",
DatabaseType = "SPANNER",
HiveMetastoreConfig = new Gcp.Dataproc.Inputs.MetastoreServiceHiveMetastoreConfigArgs
{
Version = "3.1.2",
},
ScalingConfig = new Gcp.Dataproc.Inputs.MetastoreServiceScalingConfigArgs
{
AutoscalingConfig = new Gcp.Dataproc.Inputs.MetastoreServiceScalingConfigAutoscalingConfigArgs
{
AutoscalingEnabled = true,
LimitConfig = new Gcp.Dataproc.Inputs.MetastoreServiceScalingConfigAutoscalingConfigLimitConfigArgs
{
MinScalingFactor = 0.1,
MaxScalingFactor = 1,
},
},
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.dataproc.MetastoreService;
import com.pulumi.gcp.dataproc.MetastoreServiceArgs;
import com.pulumi.gcp.dataproc.inputs.MetastoreServiceHiveMetastoreConfigArgs;
import com.pulumi.gcp.dataproc.inputs.MetastoreServiceScalingConfigArgs;
import com.pulumi.gcp.dataproc.inputs.MetastoreServiceScalingConfigAutoscalingConfigArgs;
import com.pulumi.gcp.dataproc.inputs.MetastoreServiceScalingConfigAutoscalingConfigLimitConfigArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var testResource = new MetastoreService("testResource", MetastoreServiceArgs.builder()
.serviceId("test-service")
.location("us-central1")
.databaseType("SPANNER")
.hiveMetastoreConfig(MetastoreServiceHiveMetastoreConfigArgs.builder()
.version("3.1.2")
.build())
.scalingConfig(MetastoreServiceScalingConfigArgs.builder()
.autoscalingConfig(MetastoreServiceScalingConfigAutoscalingConfigArgs.builder()
.autoscalingEnabled(true)
.limitConfig(MetastoreServiceScalingConfigAutoscalingConfigLimitConfigArgs.builder()
.minScalingFactor(0.1)
.maxScalingFactor(1.0)
.build())
.build())
.build())
.build());
}
}
resources:
testResource:
type: gcp:dataproc:MetastoreService
name: test_resource
properties:
serviceId: test-service
location: us-central1
databaseType: SPANNER
hiveMetastoreConfig:
version: 3.1.2
scalingConfig:
autoscalingConfig:
autoscalingEnabled: true
limitConfig:
minScalingFactor: 0.1
maxScalingFactor: 1
The autoscalingConfig enables automatic scaling within defined bounds. The limitConfig sets minScalingFactor and maxScalingFactor to control the range. Scaling factor is a multiplier on base capacity; 0.1 to 1 means the service scales from 10% to 100% of a baseline size.
Beyond these examples
These snippets focus on specific metastore service features: service tiers and Hive versions, encryption and VPC networking, and Spanner backend with scaling options. They’re intentionally minimal rather than full data platform deployments.
The examples may reference pre-existing infrastructure such as KMS keys for encryption, VPC networks and subnets, and Cloud Storage buckets for backups. They focus on configuring the metastore service rather than provisioning all surrounding infrastructure.
To keep things focused, common metastore patterns are omitted, including:
- Metadata integration with external systems
- Telemetry configuration (defaults to JSON)
- Release channel selection (CANARY vs STABLE)
- Deletion protection for production services
These omissions are intentional: the goal is to illustrate how each metastore feature is wired, not provide drop-in data platform modules. See the Dataproc Metastore Service resource reference for all available configuration options.
Let's configure GCP Dataproc Metastore Services
Get started with Pulumi Cloud, then follow our quick setup guide to deploy this infrastructure.
Try Pulumi Cloud for FREEFrequently Asked Questions
Configuration & Setup
serviceId must be 3-63 characters long, contain only letters (a-z, A-Z), numbers (0-9), underscores (_), and hyphens (-), and cannot begin or end with an underscore or hyphen. This field is immutable after creation.serviceId, project, location, network, networkConfig, databaseType, releaseChannel, and tags.MYSQL (default) and SPANNER database types. The choice affects whether a maintenance window is required.port property.Security & Protection
deletionProtection to true to prevent accidental deletions of your metastore service.encryptionConfig with a kmsKey pointing to your Cloud KMS CryptoKey ID, as shown in the CMEK example.Networking
networkConfig with consumers containing your subnetwork ID. You can optionally enable customRoutesEnabled for custom routing.Scaling & Performance
scalingConfig with autoscalingConfig to enable autoscaling. You can set autoscalingEnabled to true and optionally configure limitConfig with minScalingFactor and maxScalingFactor to control scaling bounds.Maintenance & Backups
SPANNER database type. Only MYSQL databases require maintenance window configuration.scheduledBackup with enabled set to true, a cronSchedule (e.g., “0 0 * * *”), timeZone (e.g., “UTC”), and backupLocation pointing to a Cloud Storage bucket.Labels & Metadata
labels field is non-authoritative and only manages labels present in your Pulumi configuration. To see all labels on the resource (including those set by other clients or services), use the effectiveLabels output property.Using a different cloud?
Explore analytics guides for other cloud providers: