The gcp:dataproc/sessionTemplate:SessionTemplate resource, part of the Pulumi GCP provider, defines reusable configuration for Dataproc Serverless interactive sessions: runtime properties, execution environment, and session type. This guide focuses on three capabilities: Jupyter notebook sessions with Spark configuration, Spark Connect sessions for remote clients, and KMS encryption with metastore integration.
Session templates reference VPC subnetworks and may integrate with KMS keys, Dataproc Metastore services, and Spark History Server clusters. The examples are intentionally small. Combine them with your own VPC, encryption, and data infrastructure.
Create a Jupyter session template with runtime properties
Teams building interactive data science workflows start with Jupyter notebooks that need consistent Spark configuration across sessions.
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const exampleSessionTemplatesJupyter = new gcp.dataproc.SessionTemplate("example_session_templates_jupyter", {
name: "projects/my-project-name/locations/us-central1/sessionTemplates/jupyter-session-template",
location: "us-central1",
labels: {
session_template_test: "terraform",
},
runtimeConfig: {
properties: {
"spark.dynamicAllocation.enabled": "false",
"spark.executor.instances": "2",
},
},
environmentConfig: {
executionConfig: {
subnetworkUri: "default",
idleTtl: "3600s",
networkTags: ["tag1"],
authenticationConfig: {
userWorkloadAuthenticationType: "END_USER_CREDENTIALS",
},
},
},
jupyterSession: {
kernel: "PYTHON",
displayName: "tf python kernel",
},
});
import pulumi
import pulumi_gcp as gcp
example_session_templates_jupyter = gcp.dataproc.SessionTemplate("example_session_templates_jupyter",
name="projects/my-project-name/locations/us-central1/sessionTemplates/jupyter-session-template",
location="us-central1",
labels={
"session_template_test": "terraform",
},
runtime_config={
"properties": {
"spark.dynamicAllocation.enabled": "false",
"spark.executor.instances": "2",
},
},
environment_config={
"execution_config": {
"subnetwork_uri": "default",
"idle_ttl": "3600s",
"network_tags": ["tag1"],
"authentication_config": {
"user_workload_authentication_type": "END_USER_CREDENTIALS",
},
},
},
jupyter_session={
"kernel": "PYTHON",
"display_name": "tf python kernel",
})
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/dataproc"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := dataproc.NewSessionTemplate(ctx, "example_session_templates_jupyter", &dataproc.SessionTemplateArgs{
Name: pulumi.String("projects/my-project-name/locations/us-central1/sessionTemplates/jupyter-session-template"),
Location: pulumi.String("us-central1"),
Labels: pulumi.StringMap{
"session_template_test": pulumi.String("terraform"),
},
RuntimeConfig: &dataproc.SessionTemplateRuntimeConfigArgs{
Properties: pulumi.StringMap{
"spark.dynamicAllocation.enabled": pulumi.String("false"),
"spark.executor.instances": pulumi.String("2"),
},
},
EnvironmentConfig: &dataproc.SessionTemplateEnvironmentConfigArgs{
ExecutionConfig: &dataproc.SessionTemplateEnvironmentConfigExecutionConfigArgs{
SubnetworkUri: pulumi.String("default"),
IdleTtl: pulumi.String("3600s"),
NetworkTags: pulumi.StringArray{
pulumi.String("tag1"),
},
AuthenticationConfig: &dataproc.SessionTemplateEnvironmentConfigExecutionConfigAuthenticationConfigArgs{
UserWorkloadAuthenticationType: pulumi.String("END_USER_CREDENTIALS"),
},
},
},
JupyterSession: &dataproc.SessionTemplateJupyterSessionArgs{
Kernel: pulumi.String("PYTHON"),
DisplayName: pulumi.String("tf python kernel"),
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var exampleSessionTemplatesJupyter = new Gcp.Dataproc.SessionTemplate("example_session_templates_jupyter", new()
{
Name = "projects/my-project-name/locations/us-central1/sessionTemplates/jupyter-session-template",
Location = "us-central1",
Labels =
{
{ "session_template_test", "terraform" },
},
RuntimeConfig = new Gcp.Dataproc.Inputs.SessionTemplateRuntimeConfigArgs
{
Properties =
{
{ "spark.dynamicAllocation.enabled", "false" },
{ "spark.executor.instances", "2" },
},
},
EnvironmentConfig = new Gcp.Dataproc.Inputs.SessionTemplateEnvironmentConfigArgs
{
ExecutionConfig = new Gcp.Dataproc.Inputs.SessionTemplateEnvironmentConfigExecutionConfigArgs
{
SubnetworkUri = "default",
IdleTtl = "3600s",
NetworkTags = new[]
{
"tag1",
},
AuthenticationConfig = new Gcp.Dataproc.Inputs.SessionTemplateEnvironmentConfigExecutionConfigAuthenticationConfigArgs
{
UserWorkloadAuthenticationType = "END_USER_CREDENTIALS",
},
},
},
JupyterSession = new Gcp.Dataproc.Inputs.SessionTemplateJupyterSessionArgs
{
Kernel = "PYTHON",
DisplayName = "tf python kernel",
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.dataproc.SessionTemplate;
import com.pulumi.gcp.dataproc.SessionTemplateArgs;
import com.pulumi.gcp.dataproc.inputs.SessionTemplateRuntimeConfigArgs;
import com.pulumi.gcp.dataproc.inputs.SessionTemplateEnvironmentConfigArgs;
import com.pulumi.gcp.dataproc.inputs.SessionTemplateEnvironmentConfigExecutionConfigArgs;
import com.pulumi.gcp.dataproc.inputs.SessionTemplateEnvironmentConfigExecutionConfigAuthenticationConfigArgs;
import com.pulumi.gcp.dataproc.inputs.SessionTemplateJupyterSessionArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var exampleSessionTemplatesJupyter = new SessionTemplate("exampleSessionTemplatesJupyter", SessionTemplateArgs.builder()
.name("projects/my-project-name/locations/us-central1/sessionTemplates/jupyter-session-template")
.location("us-central1")
.labels(Map.of("session_template_test", "terraform"))
.runtimeConfig(SessionTemplateRuntimeConfigArgs.builder()
.properties(Map.ofEntries(
Map.entry("spark.dynamicAllocation.enabled", "false"),
Map.entry("spark.executor.instances", "2")
))
.build())
.environmentConfig(SessionTemplateEnvironmentConfigArgs.builder()
.executionConfig(SessionTemplateEnvironmentConfigExecutionConfigArgs.builder()
.subnetworkUri("default")
.idleTtl("3600s")
.networkTags("tag1")
.authenticationConfig(SessionTemplateEnvironmentConfigExecutionConfigAuthenticationConfigArgs.builder()
.userWorkloadAuthenticationType("END_USER_CREDENTIALS")
.build())
.build())
.build())
.jupyterSession(SessionTemplateJupyterSessionArgs.builder()
.kernel("PYTHON")
.displayName("tf python kernel")
.build())
.build());
}
}
resources:
exampleSessionTemplatesJupyter:
type: gcp:dataproc:SessionTemplate
name: example_session_templates_jupyter
properties:
name: projects/my-project-name/locations/us-central1/sessionTemplates/jupyter-session-template
location: us-central1
labels:
session_template_test: terraform
runtimeConfig:
properties:
spark.dynamicAllocation.enabled: 'false'
spark.executor.instances: '2'
environmentConfig:
executionConfig:
subnetworkUri: default
idleTtl: 3600s
networkTags:
- tag1
authenticationConfig:
userWorkloadAuthenticationType: END_USER_CREDENTIALS
jupyterSession:
kernel: PYTHON
displayName: tf python kernel
The jupyterSession block defines the notebook kernel (PYTHON, SCALA, or R). The runtimeConfig sets Spark properties that apply to all sessions created from this template, such as disabling dynamic allocation and fixing executor count. The executionConfig places sessions in your VPC subnetwork and sets idle timeout (idleTtl) for automatic cleanup. The authenticationConfig determines whether sessions run with end-user credentials or a service account.
Configure Spark Connect sessions for remote clients
Applications that connect to Spark remotely need session templates without the Jupyter interface.
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const exampleSessionTemplatesSparkConnect = new gcp.dataproc.SessionTemplate("example_session_templates_spark_connect", {
name: "projects/my-project-name/locations/us-central1/sessionTemplates/sc-session-template",
location: "us-central1",
labels: {
session_template_test: "terraform",
},
runtimeConfig: {
properties: {
"spark.dynamicAllocation.enabled": "false",
"spark.executor.instances": "2",
},
},
environmentConfig: {
executionConfig: {
subnetworkUri: "default",
ttl: "3600s",
networkTags: ["tag1"],
},
},
});
import pulumi
import pulumi_gcp as gcp
example_session_templates_spark_connect = gcp.dataproc.SessionTemplate("example_session_templates_spark_connect",
name="projects/my-project-name/locations/us-central1/sessionTemplates/sc-session-template",
location="us-central1",
labels={
"session_template_test": "terraform",
},
runtime_config={
"properties": {
"spark.dynamicAllocation.enabled": "false",
"spark.executor.instances": "2",
},
},
environment_config={
"execution_config": {
"subnetwork_uri": "default",
"ttl": "3600s",
"network_tags": ["tag1"],
},
})
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/dataproc"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := dataproc.NewSessionTemplate(ctx, "example_session_templates_spark_connect", &dataproc.SessionTemplateArgs{
Name: pulumi.String("projects/my-project-name/locations/us-central1/sessionTemplates/sc-session-template"),
Location: pulumi.String("us-central1"),
Labels: pulumi.StringMap{
"session_template_test": pulumi.String("terraform"),
},
RuntimeConfig: &dataproc.SessionTemplateRuntimeConfigArgs{
Properties: pulumi.StringMap{
"spark.dynamicAllocation.enabled": pulumi.String("false"),
"spark.executor.instances": pulumi.String("2"),
},
},
EnvironmentConfig: &dataproc.SessionTemplateEnvironmentConfigArgs{
ExecutionConfig: &dataproc.SessionTemplateEnvironmentConfigExecutionConfigArgs{
SubnetworkUri: pulumi.String("default"),
Ttl: pulumi.String("3600s"),
NetworkTags: pulumi.StringArray{
pulumi.String("tag1"),
},
},
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var exampleSessionTemplatesSparkConnect = new Gcp.Dataproc.SessionTemplate("example_session_templates_spark_connect", new()
{
Name = "projects/my-project-name/locations/us-central1/sessionTemplates/sc-session-template",
Location = "us-central1",
Labels =
{
{ "session_template_test", "terraform" },
},
RuntimeConfig = new Gcp.Dataproc.Inputs.SessionTemplateRuntimeConfigArgs
{
Properties =
{
{ "spark.dynamicAllocation.enabled", "false" },
{ "spark.executor.instances", "2" },
},
},
EnvironmentConfig = new Gcp.Dataproc.Inputs.SessionTemplateEnvironmentConfigArgs
{
ExecutionConfig = new Gcp.Dataproc.Inputs.SessionTemplateEnvironmentConfigExecutionConfigArgs
{
SubnetworkUri = "default",
Ttl = "3600s",
NetworkTags = new[]
{
"tag1",
},
},
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.dataproc.SessionTemplate;
import com.pulumi.gcp.dataproc.SessionTemplateArgs;
import com.pulumi.gcp.dataproc.inputs.SessionTemplateRuntimeConfigArgs;
import com.pulumi.gcp.dataproc.inputs.SessionTemplateEnvironmentConfigArgs;
import com.pulumi.gcp.dataproc.inputs.SessionTemplateEnvironmentConfigExecutionConfigArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var exampleSessionTemplatesSparkConnect = new SessionTemplate("exampleSessionTemplatesSparkConnect", SessionTemplateArgs.builder()
.name("projects/my-project-name/locations/us-central1/sessionTemplates/sc-session-template")
.location("us-central1")
.labels(Map.of("session_template_test", "terraform"))
.runtimeConfig(SessionTemplateRuntimeConfigArgs.builder()
.properties(Map.ofEntries(
Map.entry("spark.dynamicAllocation.enabled", "false"),
Map.entry("spark.executor.instances", "2")
))
.build())
.environmentConfig(SessionTemplateEnvironmentConfigArgs.builder()
.executionConfig(SessionTemplateEnvironmentConfigExecutionConfigArgs.builder()
.subnetworkUri("default")
.ttl("3600s")
.networkTags("tag1")
.build())
.build())
.build());
}
}
resources:
exampleSessionTemplatesSparkConnect:
type: gcp:dataproc:SessionTemplate
name: example_session_templates_spark_connect
properties:
name: projects/my-project-name/locations/us-central1/sessionTemplates/sc-session-template
location: us-central1
labels:
session_template_test: terraform
runtimeConfig:
properties:
spark.dynamicAllocation.enabled: 'false'
spark.executor.instances: '2'
environmentConfig:
executionConfig:
subnetworkUri: default
ttl: 3600s
networkTags:
- tag1
Spark Connect sessions omit the jupyterSession block entirely. The ttl property sets the maximum session lifetime (different from idleTtl, which triggers on inactivity). Network tags control firewall rules for remote client access. This configuration enables programmatic Spark access from applications, IDEs, or notebooks running outside Dataproc.
Add encryption, metastore, and history server integration
Production deployments integrate with enterprise data infrastructure: KMS encryption, Hive Metastore, and Spark History Server.
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const project = gcp.organizations.getProject({});
const gcsAccount = gcp.storage.getProjectServiceAccount({});
const bucket = new gcp.storage.Bucket("bucket", {
uniformBucketLevelAccess: true,
name: "dataproc-bucket",
location: "US",
forceDestroy: true,
});
const cryptoKeyMember1 = new gcp.kms.CryptoKeyIAMMember("crypto_key_member_1", {
cryptoKeyId: "example-key",
role: "roles/cloudkms.cryptoKeyEncrypterDecrypter",
member: project.then(project => `serviceAccount:service-${project.number}@dataproc-accounts.iam.gserviceaccount.com`),
});
const ms = new gcp.dataproc.MetastoreService("ms", {
serviceId: "jupyter-session-template",
location: "us-central1",
port: 9080,
tier: "DEVELOPER",
maintenanceWindow: {
hourOfDay: 2,
dayOfWeek: "SUNDAY",
},
hiveMetastoreConfig: {
version: "3.1.2",
},
networkConfig: {
consumers: [{
subnetwork: "projects/my-project-name/regions/us-central1/subnetworks/default",
}],
},
});
const basic = new gcp.dataproc.Cluster("basic", {
name: "jupyter-session-template",
region: "us-central1",
clusterConfig: {
softwareConfig: {
overrideProperties: {
"dataproc:dataproc.allow.zero.workers": "true",
"spark:spark.history.fs.logDirectory": pulumi.interpolate`gs://${bucket.name}/*/spark-job-history`,
},
},
gceClusterConfig: {
subnetwork: "default",
},
endpointConfig: {
enableHttpPortAccess: true,
},
masterConfig: {
numInstances: 1,
machineType: "e2-standard-2",
diskConfig: {
bootDiskSizeGb: 35,
},
},
metastoreConfig: {
dataprocMetastoreService: ms.name,
},
},
});
const dataprocSessionTemplatesJupyterFull = new gcp.dataproc.SessionTemplate("dataproc_session_templates_jupyter_full", {
name: "projects/my-project-name/locations/us-central1/sessionTemplates/jupyter-session-template",
location: "us-central1",
labels: {
session_template_test: "terraform",
},
runtimeConfig: {
properties: {
"spark.dynamicAllocation.enabled": "false",
"spark.executor.instances": "2",
},
version: "2.2",
containerImage: "us-docker.pkg.dev/my-project-name/s8s-spark-test-images/s8s-spark:latest",
},
environmentConfig: {
executionConfig: {
ttl: "3600s",
networkTags: ["tag1"],
kmsKey: "example-key",
subnetworkUri: "default",
serviceAccount: project.then(project => `${project.number}-compute@developer.gserviceaccount.com`),
stagingBucket: bucket.name,
authenticationConfig: {
userWorkloadAuthenticationType: "SERVICE_ACCOUNT",
},
},
peripheralsConfig: {
metastoreService: ms.name,
sparkHistoryServerConfig: {
dataprocCluster: basic.id,
},
},
},
jupyterSession: {
kernel: "PYTHON",
displayName: "tf python kernel",
},
}, {
dependsOn: [cryptoKeyMember1],
});
import pulumi
import pulumi_gcp as gcp
project = gcp.organizations.get_project()
gcs_account = gcp.storage.get_project_service_account()
bucket = gcp.storage.Bucket("bucket",
uniform_bucket_level_access=True,
name="dataproc-bucket",
location="US",
force_destroy=True)
crypto_key_member1 = gcp.kms.CryptoKeyIAMMember("crypto_key_member_1",
crypto_key_id="example-key",
role="roles/cloudkms.cryptoKeyEncrypterDecrypter",
member=f"serviceAccount:service-{project.number}@dataproc-accounts.iam.gserviceaccount.com")
ms = gcp.dataproc.MetastoreService("ms",
service_id="jupyter-session-template",
location="us-central1",
port=9080,
tier="DEVELOPER",
maintenance_window={
"hour_of_day": 2,
"day_of_week": "SUNDAY",
},
hive_metastore_config={
"version": "3.1.2",
},
network_config={
"consumers": [{
"subnetwork": "projects/my-project-name/regions/us-central1/subnetworks/default",
}],
})
basic = gcp.dataproc.Cluster("basic",
name="jupyter-session-template",
region="us-central1",
cluster_config={
"software_config": {
"override_properties": {
"dataproc:dataproc.allow.zero.workers": "true",
"spark:spark.history.fs.logDirectory": bucket.name.apply(lambda name: f"gs://{name}/*/spark-job-history"),
},
},
"gce_cluster_config": {
"subnetwork": "default",
},
"endpoint_config": {
"enable_http_port_access": True,
},
"master_config": {
"num_instances": 1,
"machine_type": "e2-standard-2",
"disk_config": {
"boot_disk_size_gb": 35,
},
},
"metastore_config": {
"dataproc_metastore_service": ms.name,
},
})
dataproc_session_templates_jupyter_full = gcp.dataproc.SessionTemplate("dataproc_session_templates_jupyter_full",
name="projects/my-project-name/locations/us-central1/sessionTemplates/jupyter-session-template",
location="us-central1",
labels={
"session_template_test": "terraform",
},
runtime_config={
"properties": {
"spark.dynamicAllocation.enabled": "false",
"spark.executor.instances": "2",
},
"version": "2.2",
"container_image": "us-docker.pkg.dev/my-project-name/s8s-spark-test-images/s8s-spark:latest",
},
environment_config={
"execution_config": {
"ttl": "3600s",
"network_tags": ["tag1"],
"kms_key": "example-key",
"subnetwork_uri": "default",
"service_account": f"{project.number}-compute@developer.gserviceaccount.com",
"staging_bucket": bucket.name,
"authentication_config": {
"user_workload_authentication_type": "SERVICE_ACCOUNT",
},
},
"peripherals_config": {
"metastore_service": ms.name,
"spark_history_server_config": {
"dataproc_cluster": basic.id,
},
},
},
jupyter_session={
"kernel": "PYTHON",
"display_name": "tf python kernel",
},
opts = pulumi.ResourceOptions(depends_on=[crypto_key_member1]))
package main
import (
"fmt"
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/dataproc"
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/kms"
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/organizations"
"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/storage"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
project, err := organizations.LookupProject(ctx, &organizations.LookupProjectArgs{}, nil)
if err != nil {
return err
}
_, err = storage.GetProjectServiceAccount(ctx, &storage.GetProjectServiceAccountArgs{}, nil)
if err != nil {
return err
}
bucket, err := storage.NewBucket(ctx, "bucket", &storage.BucketArgs{
UniformBucketLevelAccess: pulumi.Bool(true),
Name: pulumi.String("dataproc-bucket"),
Location: pulumi.String("US"),
ForceDestroy: pulumi.Bool(true),
})
if err != nil {
return err
}
cryptoKeyMember1, err := kms.NewCryptoKeyIAMMember(ctx, "crypto_key_member_1", &kms.CryptoKeyIAMMemberArgs{
CryptoKeyId: pulumi.String("example-key"),
Role: pulumi.String("roles/cloudkms.cryptoKeyEncrypterDecrypter"),
Member: pulumi.Sprintf("serviceAccount:service-%v@dataproc-accounts.iam.gserviceaccount.com", project.Number),
})
if err != nil {
return err
}
ms, err := dataproc.NewMetastoreService(ctx, "ms", &dataproc.MetastoreServiceArgs{
ServiceId: pulumi.String("jupyter-session-template"),
Location: pulumi.String("us-central1"),
Port: pulumi.Int(9080),
Tier: pulumi.String("DEVELOPER"),
MaintenanceWindow: &dataproc.MetastoreServiceMaintenanceWindowArgs{
HourOfDay: pulumi.Int(2),
DayOfWeek: pulumi.String("SUNDAY"),
},
HiveMetastoreConfig: &dataproc.MetastoreServiceHiveMetastoreConfigArgs{
Version: pulumi.String("3.1.2"),
},
NetworkConfig: &dataproc.MetastoreServiceNetworkConfigArgs{
Consumers: dataproc.MetastoreServiceNetworkConfigConsumerArray{
&dataproc.MetastoreServiceNetworkConfigConsumerArgs{
Subnetwork: pulumi.String("projects/my-project-name/regions/us-central1/subnetworks/default"),
},
},
},
})
if err != nil {
return err
}
basic, err := dataproc.NewCluster(ctx, "basic", &dataproc.ClusterArgs{
Name: pulumi.String("jupyter-session-template"),
Region: pulumi.String("us-central1"),
ClusterConfig: &dataproc.ClusterClusterConfigArgs{
SoftwareConfig: &dataproc.ClusterClusterConfigSoftwareConfigArgs{
OverrideProperties: pulumi.StringMap{
"dataproc:dataproc.allow.zero.workers": pulumi.String("true"),
"spark:spark.history.fs.logDirectory": bucket.Name.ApplyT(func(name string) (string, error) {
return fmt.Sprintf("gs://%v/*/spark-job-history", name), nil
}).(pulumi.StringOutput),
},
},
GceClusterConfig: &dataproc.ClusterClusterConfigGceClusterConfigArgs{
Subnetwork: pulumi.String("default"),
},
EndpointConfig: &dataproc.ClusterClusterConfigEndpointConfigArgs{
EnableHttpPortAccess: pulumi.Bool(true),
},
MasterConfig: &dataproc.ClusterClusterConfigMasterConfigArgs{
NumInstances: pulumi.Int(1),
MachineType: pulumi.String("e2-standard-2"),
DiskConfig: &dataproc.ClusterClusterConfigMasterConfigDiskConfigArgs{
BootDiskSizeGb: pulumi.Int(35),
},
},
MetastoreConfig: &dataproc.ClusterClusterConfigMetastoreConfigArgs{
DataprocMetastoreService: ms.Name,
},
},
})
if err != nil {
return err
}
_, err = dataproc.NewSessionTemplate(ctx, "dataproc_session_templates_jupyter_full", &dataproc.SessionTemplateArgs{
Name: pulumi.String("projects/my-project-name/locations/us-central1/sessionTemplates/jupyter-session-template"),
Location: pulumi.String("us-central1"),
Labels: pulumi.StringMap{
"session_template_test": pulumi.String("terraform"),
},
RuntimeConfig: &dataproc.SessionTemplateRuntimeConfigArgs{
Properties: pulumi.StringMap{
"spark.dynamicAllocation.enabled": pulumi.String("false"),
"spark.executor.instances": pulumi.String("2"),
},
Version: pulumi.String("2.2"),
ContainerImage: pulumi.String("us-docker.pkg.dev/my-project-name/s8s-spark-test-images/s8s-spark:latest"),
},
EnvironmentConfig: &dataproc.SessionTemplateEnvironmentConfigArgs{
ExecutionConfig: &dataproc.SessionTemplateEnvironmentConfigExecutionConfigArgs{
Ttl: pulumi.String("3600s"),
NetworkTags: pulumi.StringArray{
pulumi.String("tag1"),
},
KmsKey: pulumi.String("example-key"),
SubnetworkUri: pulumi.String("default"),
ServiceAccount: pulumi.Sprintf("%v-compute@developer.gserviceaccount.com", project.Number),
StagingBucket: bucket.Name,
AuthenticationConfig: &dataproc.SessionTemplateEnvironmentConfigExecutionConfigAuthenticationConfigArgs{
UserWorkloadAuthenticationType: pulumi.String("SERVICE_ACCOUNT"),
},
},
PeripheralsConfig: &dataproc.SessionTemplateEnvironmentConfigPeripheralsConfigArgs{
MetastoreService: ms.Name,
SparkHistoryServerConfig: &dataproc.SessionTemplateEnvironmentConfigPeripheralsConfigSparkHistoryServerConfigArgs{
DataprocCluster: basic.ID(),
},
},
},
JupyterSession: &dataproc.SessionTemplateJupyterSessionArgs{
Kernel: pulumi.String("PYTHON"),
DisplayName: pulumi.String("tf python kernel"),
},
}, pulumi.DependsOn([]pulumi.Resource{
cryptoKeyMember1,
}))
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var project = Gcp.Organizations.GetProject.Invoke();
var gcsAccount = Gcp.Storage.GetProjectServiceAccount.Invoke();
var bucket = new Gcp.Storage.Bucket("bucket", new()
{
UniformBucketLevelAccess = true,
Name = "dataproc-bucket",
Location = "US",
ForceDestroy = true,
});
var cryptoKeyMember1 = new Gcp.Kms.CryptoKeyIAMMember("crypto_key_member_1", new()
{
CryptoKeyId = "example-key",
Role = "roles/cloudkms.cryptoKeyEncrypterDecrypter",
Member = $"serviceAccount:service-{project.Apply(getProjectResult => getProjectResult.Number)}@dataproc-accounts.iam.gserviceaccount.com",
});
var ms = new Gcp.Dataproc.MetastoreService("ms", new()
{
ServiceId = "jupyter-session-template",
Location = "us-central1",
Port = 9080,
Tier = "DEVELOPER",
MaintenanceWindow = new Gcp.Dataproc.Inputs.MetastoreServiceMaintenanceWindowArgs
{
HourOfDay = 2,
DayOfWeek = "SUNDAY",
},
HiveMetastoreConfig = new Gcp.Dataproc.Inputs.MetastoreServiceHiveMetastoreConfigArgs
{
Version = "3.1.2",
},
NetworkConfig = new Gcp.Dataproc.Inputs.MetastoreServiceNetworkConfigArgs
{
Consumers = new[]
{
new Gcp.Dataproc.Inputs.MetastoreServiceNetworkConfigConsumerArgs
{
Subnetwork = "projects/my-project-name/regions/us-central1/subnetworks/default",
},
},
},
});
var basic = new Gcp.Dataproc.Cluster("basic", new()
{
Name = "jupyter-session-template",
Region = "us-central1",
ClusterConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigArgs
{
SoftwareConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigSoftwareConfigArgs
{
OverrideProperties =
{
{ "dataproc:dataproc.allow.zero.workers", "true" },
{ "spark:spark.history.fs.logDirectory", bucket.Name.Apply(name => $"gs://{name}/*/spark-job-history") },
},
},
GceClusterConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigGceClusterConfigArgs
{
Subnetwork = "default",
},
EndpointConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigEndpointConfigArgs
{
EnableHttpPortAccess = true,
},
MasterConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigMasterConfigArgs
{
NumInstances = 1,
MachineType = "e2-standard-2",
DiskConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigMasterConfigDiskConfigArgs
{
BootDiskSizeGb = 35,
},
},
MetastoreConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigMetastoreConfigArgs
{
DataprocMetastoreService = ms.Name,
},
},
});
var dataprocSessionTemplatesJupyterFull = new Gcp.Dataproc.SessionTemplate("dataproc_session_templates_jupyter_full", new()
{
Name = "projects/my-project-name/locations/us-central1/sessionTemplates/jupyter-session-template",
Location = "us-central1",
Labels =
{
{ "session_template_test", "terraform" },
},
RuntimeConfig = new Gcp.Dataproc.Inputs.SessionTemplateRuntimeConfigArgs
{
Properties =
{
{ "spark.dynamicAllocation.enabled", "false" },
{ "spark.executor.instances", "2" },
},
Version = "2.2",
ContainerImage = "us-docker.pkg.dev/my-project-name/s8s-spark-test-images/s8s-spark:latest",
},
EnvironmentConfig = new Gcp.Dataproc.Inputs.SessionTemplateEnvironmentConfigArgs
{
ExecutionConfig = new Gcp.Dataproc.Inputs.SessionTemplateEnvironmentConfigExecutionConfigArgs
{
Ttl = "3600s",
NetworkTags = new[]
{
"tag1",
},
KmsKey = "example-key",
SubnetworkUri = "default",
ServiceAccount = $"{project.Apply(getProjectResult => getProjectResult.Number)}-compute@developer.gserviceaccount.com",
StagingBucket = bucket.Name,
AuthenticationConfig = new Gcp.Dataproc.Inputs.SessionTemplateEnvironmentConfigExecutionConfigAuthenticationConfigArgs
{
UserWorkloadAuthenticationType = "SERVICE_ACCOUNT",
},
},
PeripheralsConfig = new Gcp.Dataproc.Inputs.SessionTemplateEnvironmentConfigPeripheralsConfigArgs
{
MetastoreService = ms.Name,
SparkHistoryServerConfig = new Gcp.Dataproc.Inputs.SessionTemplateEnvironmentConfigPeripheralsConfigSparkHistoryServerConfigArgs
{
DataprocCluster = basic.Id,
},
},
},
JupyterSession = new Gcp.Dataproc.Inputs.SessionTemplateJupyterSessionArgs
{
Kernel = "PYTHON",
DisplayName = "tf python kernel",
},
}, new CustomResourceOptions
{
DependsOn =
{
cryptoKeyMember1,
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.organizations.OrganizationsFunctions;
import com.pulumi.gcp.organizations.inputs.GetProjectArgs;
import com.pulumi.gcp.storage.StorageFunctions;
import com.pulumi.gcp.storage.inputs.GetProjectServiceAccountArgs;
import com.pulumi.gcp.storage.Bucket;
import com.pulumi.gcp.storage.BucketArgs;
import com.pulumi.gcp.kms.CryptoKeyIAMMember;
import com.pulumi.gcp.kms.CryptoKeyIAMMemberArgs;
import com.pulumi.gcp.dataproc.MetastoreService;
import com.pulumi.gcp.dataproc.MetastoreServiceArgs;
import com.pulumi.gcp.dataproc.inputs.MetastoreServiceMaintenanceWindowArgs;
import com.pulumi.gcp.dataproc.inputs.MetastoreServiceHiveMetastoreConfigArgs;
import com.pulumi.gcp.dataproc.inputs.MetastoreServiceNetworkConfigArgs;
import com.pulumi.gcp.dataproc.Cluster;
import com.pulumi.gcp.dataproc.ClusterArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigSoftwareConfigArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigGceClusterConfigArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigEndpointConfigArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigMasterConfigArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigMasterConfigDiskConfigArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigMetastoreConfigArgs;
import com.pulumi.gcp.dataproc.SessionTemplate;
import com.pulumi.gcp.dataproc.SessionTemplateArgs;
import com.pulumi.gcp.dataproc.inputs.SessionTemplateRuntimeConfigArgs;
import com.pulumi.gcp.dataproc.inputs.SessionTemplateEnvironmentConfigArgs;
import com.pulumi.gcp.dataproc.inputs.SessionTemplateEnvironmentConfigExecutionConfigArgs;
import com.pulumi.gcp.dataproc.inputs.SessionTemplateEnvironmentConfigExecutionConfigAuthenticationConfigArgs;
import com.pulumi.gcp.dataproc.inputs.SessionTemplateEnvironmentConfigPeripheralsConfigArgs;
import com.pulumi.gcp.dataproc.inputs.SessionTemplateEnvironmentConfigPeripheralsConfigSparkHistoryServerConfigArgs;
import com.pulumi.gcp.dataproc.inputs.SessionTemplateJupyterSessionArgs;
import com.pulumi.resources.CustomResourceOptions;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
final var project = OrganizationsFunctions.getProject(GetProjectArgs.builder()
.build());
final var gcsAccount = StorageFunctions.getProjectServiceAccount(GetProjectServiceAccountArgs.builder()
.build());
var bucket = new Bucket("bucket", BucketArgs.builder()
.uniformBucketLevelAccess(true)
.name("dataproc-bucket")
.location("US")
.forceDestroy(true)
.build());
var cryptoKeyMember1 = new CryptoKeyIAMMember("cryptoKeyMember1", CryptoKeyIAMMemberArgs.builder()
.cryptoKeyId("example-key")
.role("roles/cloudkms.cryptoKeyEncrypterDecrypter")
.member(String.format("serviceAccount:service-%s@dataproc-accounts.iam.gserviceaccount.com", project.number()))
.build());
var ms = new MetastoreService("ms", MetastoreServiceArgs.builder()
.serviceId("jupyter-session-template")
.location("us-central1")
.port(9080)
.tier("DEVELOPER")
.maintenanceWindow(MetastoreServiceMaintenanceWindowArgs.builder()
.hourOfDay(2)
.dayOfWeek("SUNDAY")
.build())
.hiveMetastoreConfig(MetastoreServiceHiveMetastoreConfigArgs.builder()
.version("3.1.2")
.build())
.networkConfig(MetastoreServiceNetworkConfigArgs.builder()
.consumers(MetastoreServiceNetworkConfigConsumerArgs.builder()
.subnetwork("projects/my-project-name/regions/us-central1/subnetworks/default")
.build())
.build())
.build());
var basic = new Cluster("basic", ClusterArgs.builder()
.name("jupyter-session-template")
.region("us-central1")
.clusterConfig(ClusterClusterConfigArgs.builder()
.softwareConfig(ClusterClusterConfigSoftwareConfigArgs.builder()
.overrideProperties(Map.ofEntries(
Map.entry("dataproc:dataproc.allow.zero.workers", "true"),
Map.entry("spark:spark.history.fs.logDirectory", bucket.name().applyValue(_name -> String.format("gs://%s/*/spark-job-history", _name)))
))
.build())
.gceClusterConfig(ClusterClusterConfigGceClusterConfigArgs.builder()
.subnetwork("default")
.build())
.endpointConfig(ClusterClusterConfigEndpointConfigArgs.builder()
.enableHttpPortAccess(true)
.build())
.masterConfig(ClusterClusterConfigMasterConfigArgs.builder()
.numInstances(1)
.machineType("e2-standard-2")
.diskConfig(ClusterClusterConfigMasterConfigDiskConfigArgs.builder()
.bootDiskSizeGb(35)
.build())
.build())
.metastoreConfig(ClusterClusterConfigMetastoreConfigArgs.builder()
.dataprocMetastoreService(ms.name())
.build())
.build())
.build());
var dataprocSessionTemplatesJupyterFull = new SessionTemplate("dataprocSessionTemplatesJupyterFull", SessionTemplateArgs.builder()
.name("projects/my-project-name/locations/us-central1/sessionTemplates/jupyter-session-template")
.location("us-central1")
.labels(Map.of("session_template_test", "terraform"))
.runtimeConfig(SessionTemplateRuntimeConfigArgs.builder()
.properties(Map.ofEntries(
Map.entry("spark.dynamicAllocation.enabled", "false"),
Map.entry("spark.executor.instances", "2")
))
.version("2.2")
.containerImage("us-docker.pkg.dev/my-project-name/s8s-spark-test-images/s8s-spark:latest")
.build())
.environmentConfig(SessionTemplateEnvironmentConfigArgs.builder()
.executionConfig(SessionTemplateEnvironmentConfigExecutionConfigArgs.builder()
.ttl("3600s")
.networkTags("tag1")
.kmsKey("example-key")
.subnetworkUri("default")
.serviceAccount(String.format("%s-compute@developer.gserviceaccount.com", project.number()))
.stagingBucket(bucket.name())
.authenticationConfig(SessionTemplateEnvironmentConfigExecutionConfigAuthenticationConfigArgs.builder()
.userWorkloadAuthenticationType("SERVICE_ACCOUNT")
.build())
.build())
.peripheralsConfig(SessionTemplateEnvironmentConfigPeripheralsConfigArgs.builder()
.metastoreService(ms.name())
.sparkHistoryServerConfig(SessionTemplateEnvironmentConfigPeripheralsConfigSparkHistoryServerConfigArgs.builder()
.dataprocCluster(basic.id())
.build())
.build())
.build())
.jupyterSession(SessionTemplateJupyterSessionArgs.builder()
.kernel("PYTHON")
.displayName("tf python kernel")
.build())
.build(), CustomResourceOptions.builder()
.dependsOn(cryptoKeyMember1)
.build());
}
}
resources:
dataprocSessionTemplatesJupyterFull:
type: gcp:dataproc:SessionTemplate
name: dataproc_session_templates_jupyter_full
properties:
name: projects/my-project-name/locations/us-central1/sessionTemplates/jupyter-session-template
location: us-central1
labels:
session_template_test: terraform
runtimeConfig:
properties:
spark.dynamicAllocation.enabled: 'false'
spark.executor.instances: '2'
version: '2.2'
containerImage: us-docker.pkg.dev/my-project-name/s8s-spark-test-images/s8s-spark:latest
environmentConfig:
executionConfig:
ttl: 3600s
networkTags:
- tag1
kmsKey: example-key
subnetworkUri: default
serviceAccount: ${project.number}-compute@developer.gserviceaccount.com
stagingBucket: ${bucket.name}
authenticationConfig:
userWorkloadAuthenticationType: SERVICE_ACCOUNT
peripheralsConfig:
metastoreService: ${ms.name}
sparkHistoryServerConfig:
dataprocCluster: ${basic.id}
jupyterSession:
kernel: PYTHON
displayName: tf python kernel
options:
dependsOn:
- ${cryptoKeyMember1}
bucket:
type: gcp:storage:Bucket
properties:
uniformBucketLevelAccess: true
name: dataproc-bucket
location: US
forceDestroy: true
cryptoKeyMember1:
type: gcp:kms:CryptoKeyIAMMember
name: crypto_key_member_1
properties:
cryptoKeyId: example-key
role: roles/cloudkms.cryptoKeyEncrypterDecrypter
member: serviceAccount:service-${project.number}@dataproc-accounts.iam.gserviceaccount.com
basic:
type: gcp:dataproc:Cluster
properties:
name: jupyter-session-template
region: us-central1
clusterConfig:
softwareConfig:
overrideProperties:
dataproc:dataproc.allow.zero.workers: 'true'
spark:spark.history.fs.logDirectory: gs://${bucket.name}/*/spark-job-history
gceClusterConfig:
subnetwork: default
endpointConfig:
enableHttpPortAccess: true
masterConfig:
numInstances: 1
machineType: e2-standard-2
diskConfig:
bootDiskSizeGb: 35
metastoreConfig:
dataprocMetastoreService: ${ms.name}
ms:
type: gcp:dataproc:MetastoreService
properties:
serviceId: jupyter-session-template
location: us-central1
port: 9080
tier: DEVELOPER
maintenanceWindow:
hourOfDay: 2
dayOfWeek: SUNDAY
hiveMetastoreConfig:
version: 3.1.2
networkConfig:
consumers:
- subnetwork: projects/my-project-name/regions/us-central1/subnetworks/default
variables:
project:
fn::invoke:
function: gcp:organizations:getProject
arguments: {}
gcsAccount:
fn::invoke:
function: gcp:storage:getProjectServiceAccount
arguments: {}
The kmsKey property encrypts session data at rest. The serviceAccount grants sessions specific IAM permissions. The stagingBucket stores temporary files during execution. The peripheralsConfig connects sessions to a Dataproc Metastore service for shared table metadata and a Spark History Server cluster for job monitoring. This example creates the metastore service and history cluster inline; in practice, you’d reference existing infrastructure. The dependsOn ensures KMS permissions are granted before creating the template.
Beyond these examples
These snippets focus on specific session template features: Jupyter and Spark Connect session types, runtime configuration and Spark properties, and KMS encryption and metastore integration. They’re intentionally minimal rather than full data platform deployments.
The examples may reference pre-existing infrastructure such as VPC subnetworks, KMS encryption keys, and GCS buckets for staging. They focus on configuring the session template rather than provisioning the surrounding infrastructure.
To keep things focused, common session template patterns are omitted, including:
- Session lifecycle management (idleTtl vs ttl)
- Custom container images (containerImage)
- Authentication types (END_USER_CREDENTIALS vs SERVICE_ACCOUNT)
- Network isolation and firewall rules
These omissions are intentional: the goal is to illustrate how each session template feature is wired, not provide drop-in data science modules. See the Dataproc SessionTemplate resource reference for all available configuration options.
Let's configure GCP Dataproc Serverless Session Templates
Get started with Pulumi Cloud, then follow our quick setup guide to deploy this infrastructure.
Try Pulumi Cloud for FREEFrequently Asked Questions
Configuration & Immutability
name, project, and location properties are immutable. Changing any of these will force recreation of the template.projects/{project}/locations/{location}/sessionTemplates/{template_id}. For example: projects/my-project-name/locations/us-central1/sessionTemplates/jupyter-session-template.Labels & Metadata
labels field is non-authoritative and only manages labels in your configuration. Use effectiveLabels to see all labels on the resource in GCP (including those from other clients). The pulumiLabels field shows labels configured directly on the resource plus default provider labels.Session Types & Runtime
jupyterSession for Jupyter notebook sessions or sparkConnectSession for Spark Connect sessions. These are mutually exclusive session types with different configuration options.ttl property sets the maximum session lifetime, while idleTtl sets the idle timeout before automatic termination. You can use either or both depending on your session management needs.containerImage in runtimeConfig pointing to your container registry. The full Jupyter example shows using us-docker.pkg.dev/my-project-name/s8s-spark-test-images/s8s-spark:latest.Dependencies & Permissions
kmsKey in executionConfig, the Dataproc service account needs the roles/cloudkms.cryptoKeyEncrypterDecrypter role. Use dependsOn to ensure the IAM binding is created first, as shown in the full Jupyter example.stagingBucket), metastore service (metastoreService), Spark history server cluster (sparkHistoryServerConfig), and KMS key with proper IAM permissions.Using a different cloud?
Explore analytics guides for other cloud providers: