gcp logo
Google Cloud Classic v6.52.0, Mar 22 23

gcp.dataproc.Cluster

Manages a Cloud Dataproc cluster resource within GCP.

!> Warning: Due to limitations of the API, all arguments except labels,cluster_config.worker_config.num_instances and cluster_config.preemptible_worker_config.num_instances are non-updatable. Changing others will cause recreation of the whole cluster!

Example Usage

Basic

using System.Collections.Generic;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var simplecluster = new Gcp.Dataproc.Cluster("simplecluster", new()
    {
        Region = "us-central1",
    });

});
package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v6/go/gcp/dataproc"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := dataproc.NewCluster(ctx, "simplecluster", &dataproc.ClusterArgs{
			Region: pulumi.String("us-central1"),
		})
		if err != nil {
			return err
		}
		return nil
	})
}
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.dataproc.Cluster;
import com.pulumi.gcp.dataproc.ClusterArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var simplecluster = new Cluster("simplecluster", ClusterArgs.builder()        
            .region("us-central1")
            .build());

    }
}
import pulumi
import pulumi_gcp as gcp

simplecluster = gcp.dataproc.Cluster("simplecluster", region="us-central1")
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const simplecluster = new gcp.dataproc.Cluster("simplecluster", {region: "us-central1"});
resources:
  simplecluster:
    type: gcp:dataproc:Cluster
    properties:
      region: us-central1

Advanced

using System.Collections.Generic;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var @default = new Gcp.ServiceAccount.Account("default", new()
    {
        AccountId = "service-account-id",
        DisplayName = "Service Account",
    });

    var mycluster = new Gcp.Dataproc.Cluster("mycluster", new()
    {
        Region = "us-central1",
        GracefulDecommissionTimeout = "120s",
        Labels = 
        {
            { "foo", "bar" },
        },
        ClusterConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigArgs
        {
            StagingBucket = "dataproc-staging-bucket",
            MasterConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigMasterConfigArgs
            {
                NumInstances = 1,
                MachineType = "e2-medium",
                DiskConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigMasterConfigDiskConfigArgs
                {
                    BootDiskType = "pd-ssd",
                    BootDiskSizeGb = 30,
                },
            },
            WorkerConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigWorkerConfigArgs
            {
                NumInstances = 2,
                MachineType = "e2-medium",
                MinCpuPlatform = "Intel Skylake",
                DiskConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigWorkerConfigDiskConfigArgs
                {
                    BootDiskSizeGb = 30,
                    NumLocalSsds = 1,
                },
            },
            PreemptibleWorkerConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigPreemptibleWorkerConfigArgs
            {
                NumInstances = 0,
            },
            SoftwareConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigSoftwareConfigArgs
            {
                ImageVersion = "2.0.35-debian10",
                OverrideProperties = 
                {
                    { "dataproc:dataproc.allow.zero.workers", "true" },
                },
            },
            GceClusterConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigGceClusterConfigArgs
            {
                Tags = new[]
                {
                    "foo",
                    "bar",
                },
                ServiceAccount = @default.Email,
                ServiceAccountScopes = new[]
                {
                    "cloud-platform",
                },
            },
            InitializationActions = new[]
            {
                new Gcp.Dataproc.Inputs.ClusterClusterConfigInitializationActionArgs
                {
                    Script = "gs://dataproc-initialization-actions/stackdriver/stackdriver.sh",
                    TimeoutSec = 500,
                },
            },
        },
    });

});
package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v6/go/gcp/dataproc"
	"github.com/pulumi/pulumi-gcp/sdk/v6/go/gcp/serviceAccount"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := serviceAccount.NewAccount(ctx, "default", &serviceAccount.AccountArgs{
			AccountId:   pulumi.String("service-account-id"),
			DisplayName: pulumi.String("Service Account"),
		})
		if err != nil {
			return err
		}
		_, err = dataproc.NewCluster(ctx, "mycluster", &dataproc.ClusterArgs{
			Region:                      pulumi.String("us-central1"),
			GracefulDecommissionTimeout: pulumi.String("120s"),
			Labels: pulumi.StringMap{
				"foo": pulumi.String("bar"),
			},
			ClusterConfig: &dataproc.ClusterClusterConfigArgs{
				StagingBucket: pulumi.String("dataproc-staging-bucket"),
				MasterConfig: &dataproc.ClusterClusterConfigMasterConfigArgs{
					NumInstances: pulumi.Int(1),
					MachineType:  pulumi.String("e2-medium"),
					DiskConfig: &dataproc.ClusterClusterConfigMasterConfigDiskConfigArgs{
						BootDiskType:   pulumi.String("pd-ssd"),
						BootDiskSizeGb: pulumi.Int(30),
					},
				},
				WorkerConfig: &dataproc.ClusterClusterConfigWorkerConfigArgs{
					NumInstances:   pulumi.Int(2),
					MachineType:    pulumi.String("e2-medium"),
					MinCpuPlatform: pulumi.String("Intel Skylake"),
					DiskConfig: &dataproc.ClusterClusterConfigWorkerConfigDiskConfigArgs{
						BootDiskSizeGb: pulumi.Int(30),
						NumLocalSsds:   pulumi.Int(1),
					},
				},
				PreemptibleWorkerConfig: &dataproc.ClusterClusterConfigPreemptibleWorkerConfigArgs{
					NumInstances: pulumi.Int(0),
				},
				SoftwareConfig: &dataproc.ClusterClusterConfigSoftwareConfigArgs{
					ImageVersion: pulumi.String("2.0.35-debian10"),
					OverrideProperties: pulumi.StringMap{
						"dataproc:dataproc.allow.zero.workers": pulumi.String("true"),
					},
				},
				GceClusterConfig: &dataproc.ClusterClusterConfigGceClusterConfigArgs{
					Tags: pulumi.StringArray{
						pulumi.String("foo"),
						pulumi.String("bar"),
					},
					ServiceAccount: _default.Email,
					ServiceAccountScopes: pulumi.StringArray{
						pulumi.String("cloud-platform"),
					},
				},
				InitializationActions: dataproc.ClusterClusterConfigInitializationActionArray{
					&dataproc.ClusterClusterConfigInitializationActionArgs{
						Script:     pulumi.String("gs://dataproc-initialization-actions/stackdriver/stackdriver.sh"),
						TimeoutSec: pulumi.Int(500),
					},
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.serviceAccount.Account;
import com.pulumi.gcp.serviceAccount.AccountArgs;
import com.pulumi.gcp.dataproc.Cluster;
import com.pulumi.gcp.dataproc.ClusterArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigMasterConfigArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigMasterConfigDiskConfigArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigWorkerConfigArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigWorkerConfigDiskConfigArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigPreemptibleWorkerConfigArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigSoftwareConfigArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigGceClusterConfigArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var default_ = new Account("default", AccountArgs.builder()        
            .accountId("service-account-id")
            .displayName("Service Account")
            .build());

        var mycluster = new Cluster("mycluster", ClusterArgs.builder()        
            .region("us-central1")
            .gracefulDecommissionTimeout("120s")
            .labels(Map.of("foo", "bar"))
            .clusterConfig(ClusterClusterConfigArgs.builder()
                .stagingBucket("dataproc-staging-bucket")
                .masterConfig(ClusterClusterConfigMasterConfigArgs.builder()
                    .numInstances(1)
                    .machineType("e2-medium")
                    .diskConfig(ClusterClusterConfigMasterConfigDiskConfigArgs.builder()
                        .bootDiskType("pd-ssd")
                        .bootDiskSizeGb(30)
                        .build())
                    .build())
                .workerConfig(ClusterClusterConfigWorkerConfigArgs.builder()
                    .numInstances(2)
                    .machineType("e2-medium")
                    .minCpuPlatform("Intel Skylake")
                    .diskConfig(ClusterClusterConfigWorkerConfigDiskConfigArgs.builder()
                        .bootDiskSizeGb(30)
                        .numLocalSsds(1)
                        .build())
                    .build())
                .preemptibleWorkerConfig(ClusterClusterConfigPreemptibleWorkerConfigArgs.builder()
                    .numInstances(0)
                    .build())
                .softwareConfig(ClusterClusterConfigSoftwareConfigArgs.builder()
                    .imageVersion("2.0.35-debian10")
                    .overrideProperties(Map.of("dataproc:dataproc.allow.zero.workers", "true"))
                    .build())
                .gceClusterConfig(ClusterClusterConfigGceClusterConfigArgs.builder()
                    .tags(                    
                        "foo",
                        "bar")
                    .serviceAccount(default_.email())
                    .serviceAccountScopes("cloud-platform")
                    .build())
                .initializationActions(ClusterClusterConfigInitializationActionArgs.builder()
                    .script("gs://dataproc-initialization-actions/stackdriver/stackdriver.sh")
                    .timeoutSec(500)
                    .build())
                .build())
            .build());

    }
}
import pulumi
import pulumi_gcp as gcp

default = gcp.service_account.Account("default",
    account_id="service-account-id",
    display_name="Service Account")
mycluster = gcp.dataproc.Cluster("mycluster",
    region="us-central1",
    graceful_decommission_timeout="120s",
    labels={
        "foo": "bar",
    },
    cluster_config=gcp.dataproc.ClusterClusterConfigArgs(
        staging_bucket="dataproc-staging-bucket",
        master_config=gcp.dataproc.ClusterClusterConfigMasterConfigArgs(
            num_instances=1,
            machine_type="e2-medium",
            disk_config=gcp.dataproc.ClusterClusterConfigMasterConfigDiskConfigArgs(
                boot_disk_type="pd-ssd",
                boot_disk_size_gb=30,
            ),
        ),
        worker_config=gcp.dataproc.ClusterClusterConfigWorkerConfigArgs(
            num_instances=2,
            machine_type="e2-medium",
            min_cpu_platform="Intel Skylake",
            disk_config=gcp.dataproc.ClusterClusterConfigWorkerConfigDiskConfigArgs(
                boot_disk_size_gb=30,
                num_local_ssds=1,
            ),
        ),
        preemptible_worker_config=gcp.dataproc.ClusterClusterConfigPreemptibleWorkerConfigArgs(
            num_instances=0,
        ),
        software_config=gcp.dataproc.ClusterClusterConfigSoftwareConfigArgs(
            image_version="2.0.35-debian10",
            override_properties={
                "dataproc:dataproc.allow.zero.workers": "true",
            },
        ),
        gce_cluster_config=gcp.dataproc.ClusterClusterConfigGceClusterConfigArgs(
            tags=[
                "foo",
                "bar",
            ],
            service_account=default.email,
            service_account_scopes=["cloud-platform"],
        ),
        initialization_actions=[gcp.dataproc.ClusterClusterConfigInitializationActionArgs(
            script="gs://dataproc-initialization-actions/stackdriver/stackdriver.sh",
            timeout_sec=500,
        )],
    ))
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const _default = new gcp.serviceaccount.Account("default", {
    accountId: "service-account-id",
    displayName: "Service Account",
});
const mycluster = new gcp.dataproc.Cluster("mycluster", {
    region: "us-central1",
    gracefulDecommissionTimeout: "120s",
    labels: {
        foo: "bar",
    },
    clusterConfig: {
        stagingBucket: "dataproc-staging-bucket",
        masterConfig: {
            numInstances: 1,
            machineType: "e2-medium",
            diskConfig: {
                bootDiskType: "pd-ssd",
                bootDiskSizeGb: 30,
            },
        },
        workerConfig: {
            numInstances: 2,
            machineType: "e2-medium",
            minCpuPlatform: "Intel Skylake",
            diskConfig: {
                bootDiskSizeGb: 30,
                numLocalSsds: 1,
            },
        },
        preemptibleWorkerConfig: {
            numInstances: 0,
        },
        softwareConfig: {
            imageVersion: "2.0.35-debian10",
            overrideProperties: {
                "dataproc:dataproc.allow.zero.workers": "true",
            },
        },
        gceClusterConfig: {
            tags: [
                "foo",
                "bar",
            ],
            serviceAccount: _default.email,
            serviceAccountScopes: ["cloud-platform"],
        },
        initializationActions: [{
            script: "gs://dataproc-initialization-actions/stackdriver/stackdriver.sh",
            timeoutSec: 500,
        }],
    },
});
resources:
  default:
    type: gcp:serviceAccount:Account
    properties:
      accountId: service-account-id
      displayName: Service Account
  mycluster:
    type: gcp:dataproc:Cluster
    properties:
      region: us-central1
      gracefulDecommissionTimeout: 120s
      labels:
        foo: bar
      clusterConfig:
        stagingBucket: dataproc-staging-bucket
        masterConfig:
          numInstances: 1
          machineType: e2-medium
          diskConfig:
            bootDiskType: pd-ssd
            bootDiskSizeGb: 30
        workerConfig:
          numInstances: 2
          machineType: e2-medium
          minCpuPlatform: Intel Skylake
          diskConfig:
            bootDiskSizeGb: 30
            numLocalSsds: 1
        preemptibleWorkerConfig:
          numInstances: 0
        softwareConfig:
          imageVersion: 2.0.35-debian10
          overrideProperties:
            dataproc:dataproc.allow.zero.workers: 'true'
        gceClusterConfig:
          tags:
            - foo
            - bar
          serviceAccount: ${default.email}
          serviceAccountScopes:
            - cloud-platform
        initializationActions:
          - script: gs://dataproc-initialization-actions/stackdriver/stackdriver.sh
            timeoutSec: 500

Using A GPU Accelerator

using System.Collections.Generic;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var acceleratedCluster = new Gcp.Dataproc.Cluster("acceleratedCluster", new()
    {
        ClusterConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigArgs
        {
            GceClusterConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigGceClusterConfigArgs
            {
                Zone = "us-central1-a",
            },
            MasterConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigMasterConfigArgs
            {
                Accelerators = new[]
                {
                    new Gcp.Dataproc.Inputs.ClusterClusterConfigMasterConfigAcceleratorArgs
                    {
                        AcceleratorCount = 1,
                        AcceleratorType = "nvidia-tesla-k80",
                    },
                },
            },
        },
        Region = "us-central1",
    });

});
package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v6/go/gcp/dataproc"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := dataproc.NewCluster(ctx, "acceleratedCluster", &dataproc.ClusterArgs{
			ClusterConfig: &dataproc.ClusterClusterConfigArgs{
				GceClusterConfig: &dataproc.ClusterClusterConfigGceClusterConfigArgs{
					Zone: pulumi.String("us-central1-a"),
				},
				MasterConfig: &dataproc.ClusterClusterConfigMasterConfigArgs{
					Accelerators: dataproc.ClusterClusterConfigMasterConfigAcceleratorArray{
						&dataproc.ClusterClusterConfigMasterConfigAcceleratorArgs{
							AcceleratorCount: pulumi.Int(1),
							AcceleratorType:  pulumi.String("nvidia-tesla-k80"),
						},
					},
				},
			},
			Region: pulumi.String("us-central1"),
		})
		if err != nil {
			return err
		}
		return nil
	})
}
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.dataproc.Cluster;
import com.pulumi.gcp.dataproc.ClusterArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigGceClusterConfigArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigMasterConfigArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var acceleratedCluster = new Cluster("acceleratedCluster", ClusterArgs.builder()        
            .clusterConfig(ClusterClusterConfigArgs.builder()
                .gceClusterConfig(ClusterClusterConfigGceClusterConfigArgs.builder()
                    .zone("us-central1-a")
                    .build())
                .masterConfig(ClusterClusterConfigMasterConfigArgs.builder()
                    .accelerators(ClusterClusterConfigMasterConfigAcceleratorArgs.builder()
                        .acceleratorCount("1")
                        .acceleratorType("nvidia-tesla-k80")
                        .build())
                    .build())
                .build())
            .region("us-central1")
            .build());

    }
}
import pulumi
import pulumi_gcp as gcp

accelerated_cluster = gcp.dataproc.Cluster("acceleratedCluster",
    cluster_config=gcp.dataproc.ClusterClusterConfigArgs(
        gce_cluster_config=gcp.dataproc.ClusterClusterConfigGceClusterConfigArgs(
            zone="us-central1-a",
        ),
        master_config=gcp.dataproc.ClusterClusterConfigMasterConfigArgs(
            accelerators=[gcp.dataproc.ClusterClusterConfigMasterConfigAcceleratorArgs(
                accelerator_count=1,
                accelerator_type="nvidia-tesla-k80",
            )],
        ),
    ),
    region="us-central1")
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const acceleratedCluster = new gcp.dataproc.Cluster("acceleratedCluster", {
    clusterConfig: {
        gceClusterConfig: {
            zone: "us-central1-a",
        },
        masterConfig: {
            accelerators: [{
                acceleratorCount: 1,
                acceleratorType: "nvidia-tesla-k80",
            }],
        },
    },
    region: "us-central1",
});
resources:
  acceleratedCluster:
    type: gcp:dataproc:Cluster
    properties:
      clusterConfig:
        gceClusterConfig:
          zone: us-central1-a
        masterConfig:
          accelerators:
            - acceleratorCount: '1'
              acceleratorType: nvidia-tesla-k80
      region: us-central1

Create Cluster Resource

new Cluster(name: string, args?: ClusterArgs, opts?: CustomResourceOptions);
@overload
def Cluster(resource_name: str,
            opts: Optional[ResourceOptions] = None,
            cluster_config: Optional[ClusterClusterConfigArgs] = None,
            graceful_decommission_timeout: Optional[str] = None,
            labels: Optional[Mapping[str, str]] = None,
            name: Optional[str] = None,
            project: Optional[str] = None,
            region: Optional[str] = None,
            virtual_cluster_config: Optional[ClusterVirtualClusterConfigArgs] = None)
@overload
def Cluster(resource_name: str,
            args: Optional[ClusterArgs] = None,
            opts: Optional[ResourceOptions] = None)
func NewCluster(ctx *Context, name string, args *ClusterArgs, opts ...ResourceOption) (*Cluster, error)
public Cluster(string name, ClusterArgs? args = null, CustomResourceOptions? opts = null)
public Cluster(String name, ClusterArgs args)
public Cluster(String name, ClusterArgs args, CustomResourceOptions options)
type: gcp:dataproc:Cluster
properties: # The arguments to resource properties.
options: # Bag of options to control resource's behavior.

name string
The unique name of the resource.
args ClusterArgs
The arguments to resource properties.
opts CustomResourceOptions
Bag of options to control resource's behavior.
resource_name str
The unique name of the resource.
args ClusterArgs
The arguments to resource properties.
opts ResourceOptions
Bag of options to control resource's behavior.
ctx Context
Context object for the current deployment.
name string
The unique name of the resource.
args ClusterArgs
The arguments to resource properties.
opts ResourceOption
Bag of options to control resource's behavior.
name string
The unique name of the resource.
args ClusterArgs
The arguments to resource properties.
opts CustomResourceOptions
Bag of options to control resource's behavior.
name String
The unique name of the resource.
args ClusterArgs
The arguments to resource properties.
options CustomResourceOptions
Bag of options to control resource's behavior.

Cluster Resource Properties

To learn more about resource properties and how to use them, see Inputs and Outputs in the Architecture and Concepts docs.

Inputs

The Cluster resource accepts the following input properties:

ClusterConfig ClusterClusterConfigArgs

Allows you to configure various aspects of the cluster. Structure defined below.

GracefulDecommissionTimeout string

The timeout duration which allows graceful decomissioning when you change the number of worker nodes directly through a terraform apply

Labels Dictionary<string, string>

The list of labels (key/value pairs) to be applied to instances in the cluster. GCP generates some itself including goog-dataproc-cluster-name which is the name of the cluster.

Name string

The name of the cluster, unique within the project and zone.

Project string

The ID of the project in which the cluster will exist. If it is not provided, the provider project is used.

Region string

The region in which the cluster and associated nodes will be created in. Defaults to global.

VirtualClusterConfig ClusterVirtualClusterConfigArgs

Allows you to configure a virtual Dataproc on GKE cluster. Structure defined below.

ClusterConfig ClusterClusterConfigArgs

Allows you to configure various aspects of the cluster. Structure defined below.

GracefulDecommissionTimeout string

The timeout duration which allows graceful decomissioning when you change the number of worker nodes directly through a terraform apply

Labels map[string]string

The list of labels (key/value pairs) to be applied to instances in the cluster. GCP generates some itself including goog-dataproc-cluster-name which is the name of the cluster.

Name string

The name of the cluster, unique within the project and zone.

Project string

The ID of the project in which the cluster will exist. If it is not provided, the provider project is used.

Region string

The region in which the cluster and associated nodes will be created in. Defaults to global.

VirtualClusterConfig ClusterVirtualClusterConfigArgs

Allows you to configure a virtual Dataproc on GKE cluster. Structure defined below.

clusterConfig ClusterClusterConfigArgs

Allows you to configure various aspects of the cluster. Structure defined below.

gracefulDecommissionTimeout String

The timeout duration which allows graceful decomissioning when you change the number of worker nodes directly through a terraform apply

labels Map<String,String>

The list of labels (key/value pairs) to be applied to instances in the cluster. GCP generates some itself including goog-dataproc-cluster-name which is the name of the cluster.

name String

The name of the cluster, unique within the project and zone.

project String

The ID of the project in which the cluster will exist. If it is not provided, the provider project is used.

region String

The region in which the cluster and associated nodes will be created in. Defaults to global.

virtualClusterConfig ClusterVirtualClusterConfigArgs

Allows you to configure a virtual Dataproc on GKE cluster. Structure defined below.

clusterConfig ClusterClusterConfigArgs

Allows you to configure various aspects of the cluster. Structure defined below.

gracefulDecommissionTimeout string

The timeout duration which allows graceful decomissioning when you change the number of worker nodes directly through a terraform apply

labels {[key: string]: string}

The list of labels (key/value pairs) to be applied to instances in the cluster. GCP generates some itself including goog-dataproc-cluster-name which is the name of the cluster.

name string

The name of the cluster, unique within the project and zone.

project string

The ID of the project in which the cluster will exist. If it is not provided, the provider project is used.

region string

The region in which the cluster and associated nodes will be created in. Defaults to global.

virtualClusterConfig ClusterVirtualClusterConfigArgs

Allows you to configure a virtual Dataproc on GKE cluster. Structure defined below.

cluster_config ClusterClusterConfigArgs

Allows you to configure various aspects of the cluster. Structure defined below.

graceful_decommission_timeout str

The timeout duration which allows graceful decomissioning when you change the number of worker nodes directly through a terraform apply

labels Mapping[str, str]

The list of labels (key/value pairs) to be applied to instances in the cluster. GCP generates some itself including goog-dataproc-cluster-name which is the name of the cluster.

name str

The name of the cluster, unique within the project and zone.

project str

The ID of the project in which the cluster will exist. If it is not provided, the provider project is used.

region str

The region in which the cluster and associated nodes will be created in. Defaults to global.

virtual_cluster_config ClusterVirtualClusterConfigArgs

Allows you to configure a virtual Dataproc on GKE cluster. Structure defined below.

clusterConfig Property Map

Allows you to configure various aspects of the cluster. Structure defined below.

gracefulDecommissionTimeout String

The timeout duration which allows graceful decomissioning when you change the number of worker nodes directly through a terraform apply

labels Map<String>

The list of labels (key/value pairs) to be applied to instances in the cluster. GCP generates some itself including goog-dataproc-cluster-name which is the name of the cluster.

name String

The name of the cluster, unique within the project and zone.

project String

The ID of the project in which the cluster will exist. If it is not provided, the provider project is used.

region String

The region in which the cluster and associated nodes will be created in. Defaults to global.

virtualClusterConfig Property Map

Allows you to configure a virtual Dataproc on GKE cluster. Structure defined below.

Outputs

All input properties are implicitly available as output properties. Additionally, the Cluster resource produces the following output properties:

Id string

The provider-assigned unique ID for this managed resource.

Id string

The provider-assigned unique ID for this managed resource.

id String

The provider-assigned unique ID for this managed resource.

id string

The provider-assigned unique ID for this managed resource.

id str

The provider-assigned unique ID for this managed resource.

id String

The provider-assigned unique ID for this managed resource.

Look up Existing Cluster Resource

Get an existing Cluster resource’s state with the given name, ID, and optional extra properties used to qualify the lookup.

public static get(name: string, id: Input<ID>, state?: ClusterState, opts?: CustomResourceOptions): Cluster
@staticmethod
def get(resource_name: str,
        id: str,
        opts: Optional[ResourceOptions] = None,
        cluster_config: Optional[ClusterClusterConfigArgs] = None,
        graceful_decommission_timeout: Optional[str] = None,
        labels: Optional[Mapping[str, str]] = None,
        name: Optional[str] = None,
        project: Optional[str] = None,
        region: Optional[str] = None,
        virtual_cluster_config: Optional[ClusterVirtualClusterConfigArgs] = None) -> Cluster
func GetCluster(ctx *Context, name string, id IDInput, state *ClusterState, opts ...ResourceOption) (*Cluster, error)
public static Cluster Get(string name, Input<string> id, ClusterState? state, CustomResourceOptions? opts = null)
public static Cluster get(String name, Output<String> id, ClusterState state, CustomResourceOptions options)
Resource lookup is not supported in YAML
name
The unique name of the resulting resource.
id
The unique provider ID of the resource to lookup.
state
Any extra arguments used during the lookup.
opts
A bag of options that control this resource's behavior.
resource_name
The unique name of the resulting resource.
id
The unique provider ID of the resource to lookup.
name
The unique name of the resulting resource.
id
The unique provider ID of the resource to lookup.
state
Any extra arguments used during the lookup.
opts
A bag of options that control this resource's behavior.
name
The unique name of the resulting resource.
id
The unique provider ID of the resource to lookup.
state
Any extra arguments used during the lookup.
opts
A bag of options that control this resource's behavior.
name
The unique name of the resulting resource.
id
The unique provider ID of the resource to lookup.
state
Any extra arguments used during the lookup.
opts
A bag of options that control this resource's behavior.
The following state arguments are supported:
ClusterConfig ClusterClusterConfigArgs

Allows you to configure various aspects of the cluster. Structure defined below.

GracefulDecommissionTimeout string

The timeout duration which allows graceful decomissioning when you change the number of worker nodes directly through a terraform apply

Labels Dictionary<string, string>

The list of labels (key/value pairs) to be applied to instances in the cluster. GCP generates some itself including goog-dataproc-cluster-name which is the name of the cluster.

Name string

The name of the cluster, unique within the project and zone.

Project string

The ID of the project in which the cluster will exist. If it is not provided, the provider project is used.

Region string

The region in which the cluster and associated nodes will be created in. Defaults to global.

VirtualClusterConfig ClusterVirtualClusterConfigArgs

Allows you to configure a virtual Dataproc on GKE cluster. Structure defined below.

ClusterConfig ClusterClusterConfigArgs

Allows you to configure various aspects of the cluster. Structure defined below.

GracefulDecommissionTimeout string

The timeout duration which allows graceful decomissioning when you change the number of worker nodes directly through a terraform apply

Labels map[string]string

The list of labels (key/value pairs) to be applied to instances in the cluster. GCP generates some itself including goog-dataproc-cluster-name which is the name of the cluster.

Name string

The name of the cluster, unique within the project and zone.

Project string

The ID of the project in which the cluster will exist. If it is not provided, the provider project is used.

Region string

The region in which the cluster and associated nodes will be created in. Defaults to global.

VirtualClusterConfig ClusterVirtualClusterConfigArgs

Allows you to configure a virtual Dataproc on GKE cluster. Structure defined below.

clusterConfig ClusterClusterConfigArgs

Allows you to configure various aspects of the cluster. Structure defined below.

gracefulDecommissionTimeout String

The timeout duration which allows graceful decomissioning when you change the number of worker nodes directly through a terraform apply

labels Map<String,String>

The list of labels (key/value pairs) to be applied to instances in the cluster. GCP generates some itself including goog-dataproc-cluster-name which is the name of the cluster.

name String

The name of the cluster, unique within the project and zone.

project String

The ID of the project in which the cluster will exist. If it is not provided, the provider project is used.

region String

The region in which the cluster and associated nodes will be created in. Defaults to global.

virtualClusterConfig ClusterVirtualClusterConfigArgs

Allows you to configure a virtual Dataproc on GKE cluster. Structure defined below.

clusterConfig ClusterClusterConfigArgs

Allows you to configure various aspects of the cluster. Structure defined below.

gracefulDecommissionTimeout string

The timeout duration which allows graceful decomissioning when you change the number of worker nodes directly through a terraform apply

labels {[key: string]: string}

The list of labels (key/value pairs) to be applied to instances in the cluster. GCP generates some itself including goog-dataproc-cluster-name which is the name of the cluster.

name string

The name of the cluster, unique within the project and zone.

project string

The ID of the project in which the cluster will exist. If it is not provided, the provider project is used.

region string

The region in which the cluster and associated nodes will be created in. Defaults to global.

virtualClusterConfig ClusterVirtualClusterConfigArgs

Allows you to configure a virtual Dataproc on GKE cluster. Structure defined below.

cluster_config ClusterClusterConfigArgs

Allows you to configure various aspects of the cluster. Structure defined below.

graceful_decommission_timeout str

The timeout duration which allows graceful decomissioning when you change the number of worker nodes directly through a terraform apply

labels Mapping[str, str]

The list of labels (key/value pairs) to be applied to instances in the cluster. GCP generates some itself including goog-dataproc-cluster-name which is the name of the cluster.

name str

The name of the cluster, unique within the project and zone.

project str

The ID of the project in which the cluster will exist. If it is not provided, the provider project is used.

region str

The region in which the cluster and associated nodes will be created in. Defaults to global.

virtual_cluster_config ClusterVirtualClusterConfigArgs

Allows you to configure a virtual Dataproc on GKE cluster. Structure defined below.

clusterConfig Property Map

Allows you to configure various aspects of the cluster. Structure defined below.

gracefulDecommissionTimeout String

The timeout duration which allows graceful decomissioning when you change the number of worker nodes directly through a terraform apply

labels Map<String>

The list of labels (key/value pairs) to be applied to instances in the cluster. GCP generates some itself including goog-dataproc-cluster-name which is the name of the cluster.

name String

The name of the cluster, unique within the project and zone.

project String

The ID of the project in which the cluster will exist. If it is not provided, the provider project is used.

region String

The region in which the cluster and associated nodes will be created in. Defaults to global.

virtualClusterConfig Property Map

Allows you to configure a virtual Dataproc on GKE cluster. Structure defined below.

Supporting Types

ClusterClusterConfig

AutoscalingConfig ClusterClusterConfigAutoscalingConfig

The autoscaling policy config associated with the cluster. Note that once set, if autoscaling_config is the only field set in cluster_config, it can only be removed by setting policy_uri = "", rather than removing the whole block. Structure defined below.

Bucket string
DataprocMetricConfig ClusterClusterConfigDataprocMetricConfig

The Compute Engine accelerator (GPU) configuration for these instances. Can be specified multiple times. Structure defined below.

EncryptionConfig ClusterClusterConfigEncryptionConfig

The Customer managed encryption keys settings for the cluster. Structure defined below.

EndpointConfig ClusterClusterConfigEndpointConfig

The config settings for port access on the cluster. Structure defined below.

GceClusterConfig ClusterClusterConfigGceClusterConfig

Common config settings for resources of Google Compute Engine cluster instances, applicable to all instances in the cluster. Structure defined below.

InitializationActions List<ClusterClusterConfigInitializationAction>

Commands to execute on each node after config is completed. You can specify multiple versions of these. Structure defined below.

LifecycleConfig ClusterClusterConfigLifecycleConfig

The settings for auto deletion cluster schedule. Structure defined below.

MasterConfig ClusterClusterConfigMasterConfig

The Google Compute Engine config settings for the master instances in a cluster. Structure defined below.

MetastoreConfig ClusterClusterConfigMetastoreConfig

The config setting for metastore service with the cluster. Structure defined below.


PreemptibleWorkerConfig ClusterClusterConfigPreemptibleWorkerConfig

The Google Compute Engine config settings for the additional instances in a cluster. Structure defined below.

  • NOTE : preemptible_worker_config is an alias for the api's secondaryWorkerConfig. The name doesn't necessarily mean it is preemptible and is named as such for legacy/compatibility reasons.
SecurityConfig ClusterClusterConfigSecurityConfig

Security related configuration. Structure defined below.

SoftwareConfig ClusterClusterConfigSoftwareConfig

The config settings for software inside the cluster. Structure defined below.

StagingBucket string

The Cloud Storage staging bucket used to stage files, such as Hadoop jars, between client machines and the cluster. Note: If you don't explicitly specify a staging_bucket then GCP will auto create / assign one for you. However, you are not guaranteed an auto generated bucket which is solely dedicated to your cluster; it may be shared with other clusters in the same region/zone also choosing to use the auto generation option.

TempBucket string

The Cloud Storage temp bucket used to store ephemeral cluster and jobs data, such as Spark and MapReduce history files. Note: If you don't explicitly specify a temp_bucket then GCP will auto create / assign one for you.

WorkerConfig ClusterClusterConfigWorkerConfig

The Google Compute Engine config settings for the worker instances in a cluster. Structure defined below.

AutoscalingConfig ClusterClusterConfigAutoscalingConfig

The autoscaling policy config associated with the cluster. Note that once set, if autoscaling_config is the only field set in cluster_config, it can only be removed by setting policy_uri = "", rather than removing the whole block. Structure defined below.

Bucket string
DataprocMetricConfig ClusterClusterConfigDataprocMetricConfig

The Compute Engine accelerator (GPU) configuration for these instances. Can be specified multiple times. Structure defined below.

EncryptionConfig ClusterClusterConfigEncryptionConfig

The Customer managed encryption keys settings for the cluster. Structure defined below.

EndpointConfig ClusterClusterConfigEndpointConfig

The config settings for port access on the cluster. Structure defined below.

GceClusterConfig ClusterClusterConfigGceClusterConfig

Common config settings for resources of Google Compute Engine cluster instances, applicable to all instances in the cluster. Structure defined below.

InitializationActions []ClusterClusterConfigInitializationAction

Commands to execute on each node after config is completed. You can specify multiple versions of these. Structure defined below.

LifecycleConfig ClusterClusterConfigLifecycleConfig

The settings for auto deletion cluster schedule. Structure defined below.

MasterConfig ClusterClusterConfigMasterConfig

The Google Compute Engine config settings for the master instances in a cluster. Structure defined below.

MetastoreConfig ClusterClusterConfigMetastoreConfig

The config setting for metastore service with the cluster. Structure defined below.


PreemptibleWorkerConfig ClusterClusterConfigPreemptibleWorkerConfig

The Google Compute Engine config settings for the additional instances in a cluster. Structure defined below.

  • NOTE : preemptible_worker_config is an alias for the api's secondaryWorkerConfig. The name doesn't necessarily mean it is preemptible and is named as such for legacy/compatibility reasons.
SecurityConfig ClusterClusterConfigSecurityConfig

Security related configuration. Structure defined below.

SoftwareConfig ClusterClusterConfigSoftwareConfig

The config settings for software inside the cluster. Structure defined below.

StagingBucket string

The Cloud Storage staging bucket used to stage files, such as Hadoop jars, between client machines and the cluster. Note: If you don't explicitly specify a staging_bucket then GCP will auto create / assign one for you. However, you are not guaranteed an auto generated bucket which is solely dedicated to your cluster; it may be shared with other clusters in the same region/zone also choosing to use the auto generation option.

TempBucket string

The Cloud Storage temp bucket used to store ephemeral cluster and jobs data, such as Spark and MapReduce history files. Note: If you don't explicitly specify a temp_bucket then GCP will auto create / assign one for you.

WorkerConfig ClusterClusterConfigWorkerConfig

The Google Compute Engine config settings for the worker instances in a cluster. Structure defined below.

autoscalingConfig ClusterClusterConfigAutoscalingConfig

The autoscaling policy config associated with the cluster. Note that once set, if autoscaling_config is the only field set in cluster_config, it can only be removed by setting policy_uri = "", rather than removing the whole block. Structure defined below.

bucket String
dataprocMetricConfig ClusterClusterConfigDataprocMetricConfig

The Compute Engine accelerator (GPU) configuration for these instances. Can be specified multiple times. Structure defined below.

encryptionConfig ClusterClusterConfigEncryptionConfig

The Customer managed encryption keys settings for the cluster. Structure defined below.

endpointConfig ClusterClusterConfigEndpointConfig

The config settings for port access on the cluster. Structure defined below.

gceClusterConfig ClusterClusterConfigGceClusterConfig

Common config settings for resources of Google Compute Engine cluster instances, applicable to all instances in the cluster. Structure defined below.

initializationActions List<ClusterClusterConfigInitializationAction>

Commands to execute on each node after config is completed. You can specify multiple versions of these. Structure defined below.

lifecycleConfig ClusterClusterConfigLifecycleConfig

The settings for auto deletion cluster schedule. Structure defined below.

masterConfig ClusterClusterConfigMasterConfig

The Google Compute Engine config settings for the master instances in a cluster. Structure defined below.

metastoreConfig ClusterClusterConfigMetastoreConfig

The config setting for metastore service with the cluster. Structure defined below.


preemptibleWorkerConfig ClusterClusterConfigPreemptibleWorkerConfig

The Google Compute Engine config settings for the additional instances in a cluster. Structure defined below.

  • NOTE : preemptible_worker_config is an alias for the api's secondaryWorkerConfig. The name doesn't necessarily mean it is preemptible and is named as such for legacy/compatibility reasons.
securityConfig ClusterClusterConfigSecurityConfig

Security related configuration. Structure defined below.

softwareConfig ClusterClusterConfigSoftwareConfig

The config settings for software inside the cluster. Structure defined below.

stagingBucket String

The Cloud Storage staging bucket used to stage files, such as Hadoop jars, between client machines and the cluster. Note: If you don't explicitly specify a staging_bucket then GCP will auto create / assign one for you. However, you are not guaranteed an auto generated bucket which is solely dedicated to your cluster; it may be shared with other clusters in the same region/zone also choosing to use the auto generation option.

tempBucket String

The Cloud Storage temp bucket used to store ephemeral cluster and jobs data, such as Spark and MapReduce history files. Note: If you don't explicitly specify a temp_bucket then GCP will auto create / assign one for you.

workerConfig ClusterClusterConfigWorkerConfig

The Google Compute Engine config settings for the worker instances in a cluster. Structure defined below.

autoscalingConfig ClusterClusterConfigAutoscalingConfig

The autoscaling policy config associated with the cluster. Note that once set, if autoscaling_config is the only field set in cluster_config, it can only be removed by setting policy_uri = "", rather than removing the whole block. Structure defined below.

bucket string
dataprocMetricConfig ClusterClusterConfigDataprocMetricConfig

The Compute Engine accelerator (GPU) configuration for these instances. Can be specified multiple times. Structure defined below.

encryptionConfig ClusterClusterConfigEncryptionConfig

The Customer managed encryption keys settings for the cluster. Structure defined below.

endpointConfig ClusterClusterConfigEndpointConfig

The config settings for port access on the cluster. Structure defined below.

gceClusterConfig ClusterClusterConfigGceClusterConfig

Common config settings for resources of Google Compute Engine cluster instances, applicable to all instances in the cluster. Structure defined below.

initializationActions ClusterClusterConfigInitializationAction[]

Commands to execute on each node after config is completed. You can specify multiple versions of these. Structure defined below.

lifecycleConfig ClusterClusterConfigLifecycleConfig

The settings for auto deletion cluster schedule. Structure defined below.

masterConfig ClusterClusterConfigMasterConfig

The Google Compute Engine config settings for the master instances in a cluster. Structure defined below.

metastoreConfig ClusterClusterConfigMetastoreConfig

The config setting for metastore service with the cluster. Structure defined below.


preemptibleWorkerConfig ClusterClusterConfigPreemptibleWorkerConfig

The Google Compute Engine config settings for the additional instances in a cluster. Structure defined below.

  • NOTE : preemptible_worker_config is an alias for the api's secondaryWorkerConfig. The name doesn't necessarily mean it is preemptible and is named as such for legacy/compatibility reasons.
securityConfig ClusterClusterConfigSecurityConfig

Security related configuration. Structure defined below.

softwareConfig ClusterClusterConfigSoftwareConfig

The config settings for software inside the cluster. Structure defined below.

stagingBucket string

The Cloud Storage staging bucket used to stage files, such as Hadoop jars, between client machines and the cluster. Note: If you don't explicitly specify a staging_bucket then GCP will auto create / assign one for you. However, you are not guaranteed an auto generated bucket which is solely dedicated to your cluster; it may be shared with other clusters in the same region/zone also choosing to use the auto generation option.

tempBucket string

The Cloud Storage temp bucket used to store ephemeral cluster and jobs data, such as Spark and MapReduce history files. Note: If you don't explicitly specify a temp_bucket then GCP will auto create / assign one for you.

workerConfig ClusterClusterConfigWorkerConfig

The Google Compute Engine config settings for the worker instances in a cluster. Structure defined below.

autoscaling_config ClusterClusterConfigAutoscalingConfig

The autoscaling policy config associated with the cluster. Note that once set, if autoscaling_config is the only field set in cluster_config, it can only be removed by setting policy_uri = "", rather than removing the whole block. Structure defined below.

bucket str
dataproc_metric_config ClusterClusterConfigDataprocMetricConfig

The Compute Engine accelerator (GPU) configuration for these instances. Can be specified multiple times. Structure defined below.

encryption_config ClusterClusterConfigEncryptionConfig

The Customer managed encryption keys settings for the cluster. Structure defined below.

endpoint_config ClusterClusterConfigEndpointConfig

The config settings for port access on the cluster. Structure defined below.

gce_cluster_config ClusterClusterConfigGceClusterConfig

Common config settings for resources of Google Compute Engine cluster instances, applicable to all instances in the cluster. Structure defined below.

initialization_actions Sequence[ClusterClusterConfigInitializationAction]

Commands to execute on each node after config is completed. You can specify multiple versions of these. Structure defined below.

lifecycle_config ClusterClusterConfigLifecycleConfig

The settings for auto deletion cluster schedule. Structure defined below.

master_config ClusterClusterConfigMasterConfig

The Google Compute Engine config settings for the master instances in a cluster. Structure defined below.

metastore_config ClusterClusterConfigMetastoreConfig

The config setting for metastore service with the cluster. Structure defined below.


preemptible_worker_config ClusterClusterConfigPreemptibleWorkerConfig

The Google Compute Engine config settings for the additional instances in a cluster. Structure defined below.

  • NOTE : preemptible_worker_config is an alias for the api's secondaryWorkerConfig. The name doesn't necessarily mean it is preemptible and is named as such for legacy/compatibility reasons.
security_config ClusterClusterConfigSecurityConfig

Security related configuration. Structure defined below.

software_config ClusterClusterConfigSoftwareConfig

The config settings for software inside the cluster. Structure defined below.

staging_bucket str

The Cloud Storage staging bucket used to stage files, such as Hadoop jars, between client machines and the cluster. Note: If you don't explicitly specify a staging_bucket then GCP will auto create / assign one for you. However, you are not guaranteed an auto generated bucket which is solely dedicated to your cluster; it may be shared with other clusters in the same region/zone also choosing to use the auto generation option.

temp_bucket str

The Cloud Storage temp bucket used to store ephemeral cluster and jobs data, such as Spark and MapReduce history files. Note: If you don't explicitly specify a temp_bucket then GCP will auto create / assign one for you.

worker_config ClusterClusterConfigWorkerConfig

The Google Compute Engine config settings for the worker instances in a cluster. Structure defined below.

autoscalingConfig Property Map

The autoscaling policy config associated with the cluster. Note that once set, if autoscaling_config is the only field set in cluster_config, it can only be removed by setting policy_uri = "", rather than removing the whole block. Structure defined below.

bucket String
dataprocMetricConfig Property Map

The Compute Engine accelerator (GPU) configuration for these instances. Can be specified multiple times. Structure defined below.

encryptionConfig Property Map

The Customer managed encryption keys settings for the cluster. Structure defined below.

endpointConfig Property Map

The config settings for port access on the cluster. Structure defined below.

gceClusterConfig Property Map

Common config settings for resources of Google Compute Engine cluster instances, applicable to all instances in the cluster. Structure defined below.

initializationActions List<Property Map>

Commands to execute on each node after config is completed. You can specify multiple versions of these. Structure defined below.

lifecycleConfig Property Map

The settings for auto deletion cluster schedule. Structure defined below.

masterConfig Property Map

The Google Compute Engine config settings for the master instances in a cluster. Structure defined below.

metastoreConfig Property Map

The config setting for metastore service with the cluster. Structure defined below.


preemptibleWorkerConfig Property Map

The Google Compute Engine config settings for the additional instances in a cluster. Structure defined below.

  • NOTE : preemptible_worker_config is an alias for the api's secondaryWorkerConfig. The name doesn't necessarily mean it is preemptible and is named as such for legacy/compatibility reasons.
securityConfig Property Map

Security related configuration. Structure defined below.

softwareConfig Property Map

The config settings for software inside the cluster. Structure defined below.

stagingBucket String

The Cloud Storage staging bucket used to stage files, such as Hadoop jars, between client machines and the cluster. Note: If you don't explicitly specify a staging_bucket then GCP will auto create / assign one for you. However, you are not guaranteed an auto generated bucket which is solely dedicated to your cluster; it may be shared with other clusters in the same region/zone also choosing to use the auto generation option.

tempBucket String

The Cloud Storage temp bucket used to store ephemeral cluster and jobs data, such as Spark and MapReduce history files. Note: If you don't explicitly specify a temp_bucket then GCP will auto create / assign one for you.

workerConfig Property Map

The Google Compute Engine config settings for the worker instances in a cluster. Structure defined below.

ClusterClusterConfigAutoscalingConfig

PolicyUri string

The autoscaling policy used by the cluster.

PolicyUri string

The autoscaling policy used by the cluster.

policyUri String

The autoscaling policy used by the cluster.

policyUri string

The autoscaling policy used by the cluster.

policy_uri str

The autoscaling policy used by the cluster.

policyUri String

The autoscaling policy used by the cluster.

ClusterClusterConfigDataprocMetricConfig

metrics List<Property Map>

Metrics sources to enable.

ClusterClusterConfigDataprocMetricConfigMetric

MetricSource string

A source for the collection of Dataproc OSS metrics (see available OSS metrics).

MetricOverrides List<string>

One or more [available OSS metrics] (https://cloud.google.com/dataproc/docs/guides/monitoring#available_oss_metrics) to collect for the metric course.

MetricSource string

A source for the collection of Dataproc OSS metrics (see available OSS metrics).

MetricOverrides []string

One or more [available OSS metrics] (https://cloud.google.com/dataproc/docs/guides/monitoring#available_oss_metrics) to collect for the metric course.

metricSource String

A source for the collection of Dataproc OSS metrics (see available OSS metrics).

metricOverrides List<String>

One or more [available OSS metrics] (https://cloud.google.com/dataproc/docs/guides/monitoring#available_oss_metrics) to collect for the metric course.

metricSource string

A source for the collection of Dataproc OSS metrics (see available OSS metrics).

metricOverrides string[]

One or more [available OSS metrics] (https://cloud.google.com/dataproc/docs/guides/monitoring#available_oss_metrics) to collect for the metric course.

metric_source str

A source for the collection of Dataproc OSS metrics (see available OSS metrics).

metric_overrides Sequence[str]

One or more [available OSS metrics] (https://cloud.google.com/dataproc/docs/guides/monitoring#available_oss_metrics) to collect for the metric course.

metricSource String

A source for the collection of Dataproc OSS metrics (see available OSS metrics).

metricOverrides List<String>

One or more [available OSS metrics] (https://cloud.google.com/dataproc/docs/guides/monitoring#available_oss_metrics) to collect for the metric course.

ClusterClusterConfigEncryptionConfig

KmsKeyName string

The Cloud KMS key name to use for PD disk encryption for all instances in the cluster.

KmsKeyName string

The Cloud KMS key name to use for PD disk encryption for all instances in the cluster.

kmsKeyName String

The Cloud KMS key name to use for PD disk encryption for all instances in the cluster.

kmsKeyName string

The Cloud KMS key name to use for PD disk encryption for all instances in the cluster.

kms_key_name str

The Cloud KMS key name to use for PD disk encryption for all instances in the cluster.

kmsKeyName String

The Cloud KMS key name to use for PD disk encryption for all instances in the cluster.

ClusterClusterConfigEndpointConfig

EnableHttpPortAccess bool

The flag to enable http access to specific ports on the cluster from external sources (aka Component Gateway). Defaults to false.

HttpPorts Dictionary<string, object>
EnableHttpPortAccess bool

The flag to enable http access to specific ports on the cluster from external sources (aka Component Gateway). Defaults to false.

HttpPorts map[string]interface{}
enableHttpPortAccess Boolean

The flag to enable http access to specific ports on the cluster from external sources (aka Component Gateway). Defaults to false.

httpPorts Map<String,Object>
enableHttpPortAccess boolean

The flag to enable http access to specific ports on the cluster from external sources (aka Component Gateway). Defaults to false.

httpPorts {[key: string]: any}
enable_http_port_access bool

The flag to enable http access to specific ports on the cluster from external sources (aka Component Gateway). Defaults to false.

http_ports Mapping[str, Any]
enableHttpPortAccess Boolean

The flag to enable http access to specific ports on the cluster from external sources (aka Component Gateway). Defaults to false.

httpPorts Map<Any>

ClusterClusterConfigGceClusterConfig

InternalIpOnly bool

By default, clusters are not restricted to internal IP addresses, and will have ephemeral external IP addresses assigned to each instance. If set to true, all instances in the cluster will only have internal IP addresses. Note: Private Google Access (also known as privateIpGoogleAccess) must be enabled on the subnetwork that the cluster will be launched in.

Metadata Dictionary<string, string>

A map of the Compute Engine metadata entries to add to all instances (see Project and instance metadata).

Network string

The name or self_link of the Google Compute Engine network to the cluster will be part of. Conflicts with subnetwork. If neither is specified, this defaults to the "default" network.

ServiceAccount string

The service account to be used by the Node VMs. If not specified, the "default" service account is used.

ServiceAccountScopes List<string>

The set of Google API scopes to be made available on all of the node VMs under the service_account specified. Both OAuth2 URLs and gcloud short names are supported. To allow full access to all Cloud APIs, use the cloud-platform scope. See a complete list of scopes here.

ShieldedInstanceConfig ClusterClusterConfigGceClusterConfigShieldedInstanceConfig

Shielded Instance Config for clusters using Compute Engine Shielded VMs.

Subnetwork string

The name or self_link of the Google Compute Engine subnetwork the cluster will be part of. Conflicts with network.

Tags List<string>

The list of instance tags applied to instances in the cluster. Tags are used to identify valid sources or targets for network firewalls.

Zone string

The GCP zone where your data is stored and used (i.e. where the master and the worker nodes will be created in). If region is set to 'global' (default) then zone is mandatory, otherwise GCP is able to make use of Auto Zone Placement to determine this automatically for you. Note: This setting additionally determines and restricts which computing resources are available for use with other configs such as cluster_config.master_config.machine_type and cluster_config.worker_config.machine_type.

InternalIpOnly bool

By default, clusters are not restricted to internal IP addresses, and will have ephemeral external IP addresses assigned to each instance. If set to true, all instances in the cluster will only have internal IP addresses. Note: Private Google Access (also known as privateIpGoogleAccess) must be enabled on the subnetwork that the cluster will be launched in.

Metadata map[string]string

A map of the Compute Engine metadata entries to add to all instances (see Project and instance metadata).

Network string

The name or self_link of the Google Compute Engine network to the cluster will be part of. Conflicts with subnetwork. If neither is specified, this defaults to the "default" network.

ServiceAccount string

The service account to be used by the Node VMs. If not specified, the "default" service account is used.

ServiceAccountScopes []string

The set of Google API scopes to be made available on all of the node VMs under the service_account specified. Both OAuth2 URLs and gcloud short names are supported. To allow full access to all Cloud APIs, use the cloud-platform scope. See a complete list of scopes here.

ShieldedInstanceConfig ClusterClusterConfigGceClusterConfigShieldedInstanceConfig

Shielded Instance Config for clusters using Compute Engine Shielded VMs.

Subnetwork string

The name or self_link of the Google Compute Engine subnetwork the cluster will be part of. Conflicts with network.

Tags []string

The list of instance tags applied to instances in the cluster. Tags are used to identify valid sources or targets for network firewalls.

Zone string

The GCP zone where your data is stored and used (i.e. where the master and the worker nodes will be created in). If region is set to 'global' (default) then zone is mandatory, otherwise GCP is able to make use of Auto Zone Placement to determine this automatically for you. Note: This setting additionally determines and restricts which computing resources are available for use with other configs such as cluster_config.master_config.machine_type and cluster_config.worker_config.machine_type.

internalIpOnly Boolean

By default, clusters are not restricted to internal IP addresses, and will have ephemeral external IP addresses assigned to each instance. If set to true, all instances in the cluster will only have internal IP addresses. Note: Private Google Access (also known as privateIpGoogleAccess) must be enabled on the subnetwork that the cluster will be launched in.

metadata Map<String,String>

A map of the Compute Engine metadata entries to add to all instances (see Project and instance metadata).

network String

The name or self_link of the Google Compute Engine network to the cluster will be part of. Conflicts with subnetwork. If neither is specified, this defaults to the "default" network.

serviceAccount String

The service account to be used by the Node VMs. If not specified, the "default" service account is used.

serviceAccountScopes List<String>

The set of Google API scopes to be made available on all of the node VMs under the service_account specified. Both OAuth2 URLs and gcloud short names are supported. To allow full access to all Cloud APIs, use the cloud-platform scope. See a complete list of scopes here.

shieldedInstanceConfig ClusterClusterConfigGceClusterConfigShieldedInstanceConfig

Shielded Instance Config for clusters using Compute Engine Shielded VMs.

subnetwork String

The name or self_link of the Google Compute Engine subnetwork the cluster will be part of. Conflicts with network.

tags List<String>

The list of instance tags applied to instances in the cluster. Tags are used to identify valid sources or targets for network firewalls.

zone String

The GCP zone where your data is stored and used (i.e. where the master and the worker nodes will be created in). If region is set to 'global' (default) then zone is mandatory, otherwise GCP is able to make use of Auto Zone Placement to determine this automatically for you. Note: This setting additionally determines and restricts which computing resources are available for use with other configs such as cluster_config.master_config.machine_type and cluster_config.worker_config.machine_type.

internalIpOnly boolean

By default, clusters are not restricted to internal IP addresses, and will have ephemeral external IP addresses assigned to each instance. If set to true, all instances in the cluster will only have internal IP addresses. Note: Private Google Access (also known as privateIpGoogleAccess) must be enabled on the subnetwork that the cluster will be launched in.

metadata {[key: string]: string}

A map of the Compute Engine metadata entries to add to all instances (see Project and instance metadata).

network string

The name or self_link of the Google Compute Engine network to the cluster will be part of. Conflicts with subnetwork. If neither is specified, this defaults to the "default" network.

serviceAccount string

The service account to be used by the Node VMs. If not specified, the "default" service account is used.

serviceAccountScopes string[]

The set of Google API scopes to be made available on all of the node VMs under the service_account specified. Both OAuth2 URLs and gcloud short names are supported. To allow full access to all Cloud APIs, use the cloud-platform scope. See a complete list of scopes here.

shieldedInstanceConfig ClusterClusterConfigGceClusterConfigShieldedInstanceConfig

Shielded Instance Config for clusters using Compute Engine Shielded VMs.

subnetwork string

The name or self_link of the Google Compute Engine subnetwork the cluster will be part of. Conflicts with network.

tags string[]

The list of instance tags applied to instances in the cluster. Tags are used to identify valid sources or targets for network firewalls.

zone string

The GCP zone where your data is stored and used (i.e. where the master and the worker nodes will be created in). If region is set to 'global' (default) then zone is mandatory, otherwise GCP is able to make use of Auto Zone Placement to determine this automatically for you. Note: This setting additionally determines and restricts which computing resources are available for use with other configs such as cluster_config.master_config.machine_type and cluster_config.worker_config.machine_type.

internal_ip_only bool

By default, clusters are not restricted to internal IP addresses, and will have ephemeral external IP addresses assigned to each instance. If set to true, all instances in the cluster will only have internal IP addresses. Note: Private Google Access (also known as privateIpGoogleAccess) must be enabled on the subnetwork that the cluster will be launched in.

metadata Mapping[str, str]

A map of the Compute Engine metadata entries to add to all instances (see Project and instance metadata).

network str

The name or self_link of the Google Compute Engine network to the cluster will be part of. Conflicts with subnetwork. If neither is specified, this defaults to the "default" network.

service_account str

The service account to be used by the Node VMs. If not specified, the "default" service account is used.

service_account_scopes Sequence[str]

The set of Google API scopes to be made available on all of the node VMs under the service_account specified. Both OAuth2 URLs and gcloud short names are supported. To allow full access to all Cloud APIs, use the cloud-platform scope. See a complete list of scopes here.

shielded_instance_config ClusterClusterConfigGceClusterConfigShieldedInstanceConfig

Shielded Instance Config for clusters using Compute Engine Shielded VMs.

subnetwork str

The name or self_link of the Google Compute Engine subnetwork the cluster will be part of. Conflicts with network.

tags Sequence[str]

The list of instance tags applied to instances in the cluster. Tags are used to identify valid sources or targets for network firewalls.

zone str

The GCP zone where your data is stored and used (i.e. where the master and the worker nodes will be created in). If region is set to 'global' (default) then zone is mandatory, otherwise GCP is able to make use of Auto Zone Placement to determine this automatically for you. Note: This setting additionally determines and restricts which computing resources are available for use with other configs such as cluster_config.master_config.machine_type and cluster_config.worker_config.machine_type.

internalIpOnly Boolean

By default, clusters are not restricted to internal IP addresses, and will have ephemeral external IP addresses assigned to each instance. If set to true, all instances in the cluster will only have internal IP addresses. Note: Private Google Access (also known as privateIpGoogleAccess) must be enabled on the subnetwork that the cluster will be launched in.

metadata Map<String>

A map of the Compute Engine metadata entries to add to all instances (see Project and instance metadata).

network String

The name or self_link of the Google Compute Engine network to the cluster will be part of. Conflicts with subnetwork. If neither is specified, this defaults to the "default" network.

serviceAccount String

The service account to be used by the Node VMs. If not specified, the "default" service account is used.

serviceAccountScopes List<String>

The set of Google API scopes to be made available on all of the node VMs under the service_account specified. Both OAuth2 URLs and gcloud short names are supported. To allow full access to all Cloud APIs, use the cloud-platform scope. See a complete list of scopes here.

shieldedInstanceConfig Property Map

Shielded Instance Config for clusters using Compute Engine Shielded VMs.

subnetwork String

The name or self_link of the Google Compute Engine subnetwork the cluster will be part of. Conflicts with network.

tags List<String>

The list of instance tags applied to instances in the cluster. Tags are used to identify valid sources or targets for network firewalls.

zone String

The GCP zone where your data is stored and used (i.e. where the master and the worker nodes will be created in). If region is set to 'global' (default) then zone is mandatory, otherwise GCP is able to make use of Auto Zone Placement to determine this automatically for you. Note: This setting additionally determines and restricts which computing resources are available for use with other configs such as cluster_config.master_config.machine_type and cluster_config.worker_config.machine_type.

ClusterClusterConfigGceClusterConfigShieldedInstanceConfig

EnableIntegrityMonitoring bool

Defines whether instances have integrity monitoring enabled.

EnableSecureBoot bool

Defines whether instances have Secure Boot enabled.

EnableVtpm bool

Defines whether instances have the vTPM enabled.

EnableIntegrityMonitoring bool

Defines whether instances have integrity monitoring enabled.

EnableSecureBoot bool

Defines whether instances have Secure Boot enabled.

EnableVtpm bool

Defines whether instances have the vTPM enabled.

enableIntegrityMonitoring Boolean

Defines whether instances have integrity monitoring enabled.

enableSecureBoot Boolean

Defines whether instances have Secure Boot enabled.

enableVtpm Boolean

Defines whether instances have the vTPM enabled.

enableIntegrityMonitoring boolean

Defines whether instances have integrity monitoring enabled.

enableSecureBoot boolean

Defines whether instances have Secure Boot enabled.

enableVtpm boolean

Defines whether instances have the vTPM enabled.

enable_integrity_monitoring bool

Defines whether instances have integrity monitoring enabled.

enable_secure_boot bool

Defines whether instances have Secure Boot enabled.

enable_vtpm bool

Defines whether instances have the vTPM enabled.

enableIntegrityMonitoring Boolean

Defines whether instances have integrity monitoring enabled.

enableSecureBoot Boolean

Defines whether instances have Secure Boot enabled.

enableVtpm Boolean

Defines whether instances have the vTPM enabled.

ClusterClusterConfigInitializationAction

Script string

The script to be executed during initialization of the cluster. The script must be a GCS file with a gs:// prefix.

TimeoutSec int

The maximum duration (in seconds) which script is allowed to take to execute its action. GCP will default to a predetermined computed value if not set (currently 300).

Script string

The script to be executed during initialization of the cluster. The script must be a GCS file with a gs:// prefix.

TimeoutSec int

The maximum duration (in seconds) which script is allowed to take to execute its action. GCP will default to a predetermined computed value if not set (currently 300).

script String

The script to be executed during initialization of the cluster. The script must be a GCS file with a gs:// prefix.

timeoutSec Integer

The maximum duration (in seconds) which script is allowed to take to execute its action. GCP will default to a predetermined computed value if not set (currently 300).

script string

The script to be executed during initialization of the cluster. The script must be a GCS file with a gs:// prefix.

timeoutSec number

The maximum duration (in seconds) which script is allowed to take to execute its action. GCP will default to a predetermined computed value if not set (currently 300).

script str

The script to be executed during initialization of the cluster. The script must be a GCS file with a gs:// prefix.

timeout_sec int

The maximum duration (in seconds) which script is allowed to take to execute its action. GCP will default to a predetermined computed value if not set (currently 300).

script String

The script to be executed during initialization of the cluster. The script must be a GCS file with a gs:// prefix.

timeoutSec Number

The maximum duration (in seconds) which script is allowed to take to execute its action. GCP will default to a predetermined computed value if not set (currently 300).

ClusterClusterConfigLifecycleConfig

AutoDeleteTime string

The time when cluster will be auto-deleted. A timestamp in RFC3339 UTC "Zulu" format, accurate to nanoseconds. Example: "2014-10-02T15:01:23.045123456Z".

IdleDeleteTtl string

The duration to keep the cluster alive while idling (no jobs running). After this TTL, the cluster will be deleted. Valid range: [10m, 14d].

IdleStartTime string
AutoDeleteTime string

The time when cluster will be auto-deleted. A timestamp in RFC3339 UTC "Zulu" format, accurate to nanoseconds. Example: "2014-10-02T15:01:23.045123456Z".

IdleDeleteTtl string

The duration to keep the cluster alive while idling (no jobs running). After this TTL, the cluster will be deleted. Valid range: [10m, 14d].

IdleStartTime string
autoDeleteTime String

The time when cluster will be auto-deleted. A timestamp in RFC3339 UTC "Zulu" format, accurate to nanoseconds. Example: "2014-10-02T15:01:23.045123456Z".

idleDeleteTtl String

The duration to keep the cluster alive while idling (no jobs running). After this TTL, the cluster will be deleted. Valid range: [10m, 14d].

idleStartTime String
autoDeleteTime string

The time when cluster will be auto-deleted. A timestamp in RFC3339 UTC "Zulu" format, accurate to nanoseconds. Example: "2014-10-02T15:01:23.045123456Z".

idleDeleteTtl string

The duration to keep the cluster alive while idling (no jobs running). After this TTL, the cluster will be deleted. Valid range: [10m, 14d].

idleStartTime string
auto_delete_time str

The time when cluster will be auto-deleted. A timestamp in RFC3339 UTC "Zulu" format, accurate to nanoseconds. Example: "2014-10-02T15:01:23.045123456Z".

idle_delete_ttl str

The duration to keep the cluster alive while idling (no jobs running). After this TTL, the cluster will be deleted. Valid range: [10m, 14d].

idle_start_time str
autoDeleteTime String

The time when cluster will be auto-deleted. A timestamp in RFC3339 UTC "Zulu" format, accurate to nanoseconds. Example: "2014-10-02T15:01:23.045123456Z".

idleDeleteTtl String

The duration to keep the cluster alive while idling (no jobs running). After this TTL, the cluster will be deleted. Valid range: [10m, 14d].

idleStartTime String

ClusterClusterConfigMasterConfig

Accelerators List<ClusterClusterConfigMasterConfigAccelerator>

The Compute Engine accelerator (GPU) configuration for these instances. Can be specified multiple times.

DiskConfig ClusterClusterConfigMasterConfigDiskConfig

Disk Config

ImageUri string

The URI for the image to use for this worker. See the guide for more information.

InstanceNames List<string>
MachineType string

The name of a Google Compute Engine machine type to create for the master. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4).

MinCpuPlatform string

The name of a minimum generation of CPU family for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.

NumInstances int

Specifies the number of master nodes to create. If not specified, GCP will default to a predetermined computed value (currently 1).

Accelerators []ClusterClusterConfigMasterConfigAccelerator

The Compute Engine accelerator (GPU) configuration for these instances. Can be specified multiple times.

DiskConfig ClusterClusterConfigMasterConfigDiskConfig

Disk Config

ImageUri string

The URI for the image to use for this worker. See the guide for more information.

InstanceNames []string
MachineType string

The name of a Google Compute Engine machine type to create for the master. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4).

MinCpuPlatform string

The name of a minimum generation of CPU family for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.

NumInstances int

Specifies the number of master nodes to create. If not specified, GCP will default to a predetermined computed value (currently 1).

accelerators List<ClusterClusterConfigMasterConfigAccelerator>

The Compute Engine accelerator (GPU) configuration for these instances. Can be specified multiple times.

diskConfig ClusterClusterConfigMasterConfigDiskConfig

Disk Config

imageUri String

The URI for the image to use for this worker. See the guide for more information.

instanceNames List<String>
machineType String

The name of a Google Compute Engine machine type to create for the master. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4).

minCpuPlatform String

The name of a minimum generation of CPU family for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.

numInstances Integer

Specifies the number of master nodes to create. If not specified, GCP will default to a predetermined computed value (currently 1).

accelerators ClusterClusterConfigMasterConfigAccelerator[]

The Compute Engine accelerator (GPU) configuration for these instances. Can be specified multiple times.

diskConfig ClusterClusterConfigMasterConfigDiskConfig

Disk Config

imageUri string

The URI for the image to use for this worker. See the guide for more information.

instanceNames string[]
machineType string

The name of a Google Compute Engine machine type to create for the master. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4).

minCpuPlatform string

The name of a minimum generation of CPU family for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.

numInstances number

Specifies the number of master nodes to create. If not specified, GCP will default to a predetermined computed value (currently 1).

accelerators Sequence[ClusterClusterConfigMasterConfigAccelerator]

The Compute Engine accelerator (GPU) configuration for these instances. Can be specified multiple times.

disk_config ClusterClusterConfigMasterConfigDiskConfig

Disk Config

image_uri str

The URI for the image to use for this worker. See the guide for more information.

instance_names Sequence[str]
machine_type str

The name of a Google Compute Engine machine type to create for the master. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4).

min_cpu_platform str

The name of a minimum generation of CPU family for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.

num_instances int

Specifies the number of master nodes to create. If not specified, GCP will default to a predetermined computed value (currently 1).

accelerators List<Property Map>

The Compute Engine accelerator (GPU) configuration for these instances. Can be specified multiple times.

diskConfig Property Map

Disk Config

imageUri String

The URI for the image to use for this worker. See the guide for more information.

instanceNames List<String>
machineType String

The name of a Google Compute Engine machine type to create for the master. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4).

minCpuPlatform String

The name of a minimum generation of CPU family for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.

numInstances Number

Specifies the number of master nodes to create. If not specified, GCP will default to a predetermined computed value (currently 1).

ClusterClusterConfigMasterConfigAccelerator

AcceleratorCount int

The number of the accelerator cards of this type exposed to this instance. Often restricted to one of 1, 2, 4, or 8.

AcceleratorType string

The short name of the accelerator type to expose to this instance. For example, nvidia-tesla-k80.

AcceleratorCount int

The number of the accelerator cards of this type exposed to this instance. Often restricted to one of 1, 2, 4, or 8.

AcceleratorType string

The short name of the accelerator type to expose to this instance. For example, nvidia-tesla-k80.

acceleratorCount Integer

The number of the accelerator cards of this type exposed to this instance. Often restricted to one of 1, 2, 4, or 8.

acceleratorType String

The short name of the accelerator type to expose to this instance. For example, nvidia-tesla-k80.

acceleratorCount number

The number of the accelerator cards of this type exposed to this instance. Often restricted to one of 1, 2, 4, or 8.

acceleratorType string

The short name of the accelerator type to expose to this instance. For example, nvidia-tesla-k80.

accelerator_count int

The number of the accelerator cards of this type exposed to this instance. Often restricted to one of 1, 2, 4, or 8.

accelerator_type str

The short name of the accelerator type to expose to this instance. For example, nvidia-tesla-k80.

acceleratorCount Number

The number of the accelerator cards of this type exposed to this instance. Often restricted to one of 1, 2, 4, or 8.

acceleratorType String

The short name of the accelerator type to expose to this instance. For example, nvidia-tesla-k80.

ClusterClusterConfigMasterConfigDiskConfig

BootDiskSizeGb int

Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories. in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories. in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

BootDiskType string

The disk type of the primary disk attached to each node. One of "pd-ssd" or "pd-standard". Defaults to "pd-standard". One of "pd-ssd" or "pd-standard". Defaults to "pd-standard". One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

NumLocalSsds int

The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0. attached to each worker cluster node. Defaults to 0. attached to each preemptible worker node. Defaults to 0.

BootDiskSizeGb int

Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories. in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories. in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

BootDiskType string

The disk type of the primary disk attached to each node. One of "pd-ssd" or "pd-standard". Defaults to "pd-standard". One of "pd-ssd" or "pd-standard". Defaults to "pd-standard". One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

NumLocalSsds int

The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0. attached to each worker cluster node. Defaults to 0. attached to each preemptible worker node. Defaults to 0.

bootDiskSizeGb Integer

Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories. in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories. in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

bootDiskType String

The disk type of the primary disk attached to each node. One of "pd-ssd" or "pd-standard". Defaults to "pd-standard". One of "pd-ssd" or "pd-standard". Defaults to "pd-standard". One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

numLocalSsds Integer

The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0. attached to each worker cluster node. Defaults to 0. attached to each preemptible worker node. Defaults to 0.

bootDiskSizeGb number

Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories. in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories. in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

bootDiskType string

The disk type of the primary disk attached to each node. One of "pd-ssd" or "pd-standard". Defaults to "pd-standard". One of "pd-ssd" or "pd-standard". Defaults to "pd-standard". One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

numLocalSsds number

The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0. attached to each worker cluster node. Defaults to 0. attached to each preemptible worker node. Defaults to 0.

boot_disk_size_gb int

Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories. in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories. in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

boot_disk_type str

The disk type of the primary disk attached to each node. One of "pd-ssd" or "pd-standard". Defaults to "pd-standard". One of "pd-ssd" or "pd-standard". Defaults to "pd-standard". One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

num_local_ssds int

The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0. attached to each worker cluster node. Defaults to 0. attached to each preemptible worker node. Defaults to 0.

bootDiskSizeGb Number

Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories. in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories. in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

bootDiskType String

The disk type of the primary disk attached to each node. One of "pd-ssd" or "pd-standard". Defaults to "pd-standard". One of "pd-ssd" or "pd-standard". Defaults to "pd-standard". One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

numLocalSsds Number

The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0. attached to each worker cluster node. Defaults to 0. attached to each preemptible worker node. Defaults to 0.

ClusterClusterConfigMetastoreConfig

DataprocMetastoreService string

Resource name of an existing Dataproc Metastore service.

DataprocMetastoreService string

Resource name of an existing Dataproc Metastore service.

dataprocMetastoreService String

Resource name of an existing Dataproc Metastore service.

dataprocMetastoreService string

Resource name of an existing Dataproc Metastore service.

dataproc_metastore_service str

Resource name of an existing Dataproc Metastore service.

dataprocMetastoreService String

Resource name of an existing Dataproc Metastore service.

ClusterClusterConfigPreemptibleWorkerConfig

DiskConfig ClusterClusterConfigPreemptibleWorkerConfigDiskConfig

Disk Config

InstanceNames List<string>
NumInstances int

Specifies the number of preemptible nodes to create. Defaults to 0.

Preemptibility string

Specifies the preemptibility of the secondary workers. The default value is PREEMPTIBLE Accepted values are:

  • PREEMPTIBILITY_UNSPECIFIED
  • NON_PREEMPTIBLE
  • PREEMPTIBLE
DiskConfig ClusterClusterConfigPreemptibleWorkerConfigDiskConfig

Disk Config

InstanceNames []string
NumInstances int

Specifies the number of preemptible nodes to create. Defaults to 0.

Preemptibility string

Specifies the preemptibility of the secondary workers. The default value is PREEMPTIBLE Accepted values are:

  • PREEMPTIBILITY_UNSPECIFIED
  • NON_PREEMPTIBLE
  • PREEMPTIBLE
diskConfig ClusterClusterConfigPreemptibleWorkerConfigDiskConfig

Disk Config

instanceNames List<String>
numInstances Integer

Specifies the number of preemptible nodes to create. Defaults to 0.

preemptibility String

Specifies the preemptibility of the secondary workers. The default value is PREEMPTIBLE Accepted values are:

  • PREEMPTIBILITY_UNSPECIFIED
  • NON_PREEMPTIBLE
  • PREEMPTIBLE
diskConfig ClusterClusterConfigPreemptibleWorkerConfigDiskConfig

Disk Config

instanceNames string[]
numInstances number

Specifies the number of preemptible nodes to create. Defaults to 0.

preemptibility string

Specifies the preemptibility of the secondary workers. The default value is PREEMPTIBLE Accepted values are:

  • PREEMPTIBILITY_UNSPECIFIED
  • NON_PREEMPTIBLE
  • PREEMPTIBLE
disk_config ClusterClusterConfigPreemptibleWorkerConfigDiskConfig

Disk Config

instance_names Sequence[str]
num_instances int

Specifies the number of preemptible nodes to create. Defaults to 0.

preemptibility str

Specifies the preemptibility of the secondary workers. The default value is PREEMPTIBLE Accepted values are:

  • PREEMPTIBILITY_UNSPECIFIED
  • NON_PREEMPTIBLE
  • PREEMPTIBLE
diskConfig Property Map

Disk Config

instanceNames List<String>
numInstances Number

Specifies the number of preemptible nodes to create. Defaults to 0.

preemptibility String

Specifies the preemptibility of the secondary workers. The default value is PREEMPTIBLE Accepted values are:

  • PREEMPTIBILITY_UNSPECIFIED
  • NON_PREEMPTIBLE
  • PREEMPTIBLE

ClusterClusterConfigPreemptibleWorkerConfigDiskConfig

BootDiskSizeGb int

Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories. in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories. in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

BootDiskType string

The disk type of the primary disk attached to each node. One of "pd-ssd" or "pd-standard". Defaults to "pd-standard". One of "pd-ssd" or "pd-standard". Defaults to "pd-standard". One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

NumLocalSsds int

The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0. attached to each worker cluster node. Defaults to 0. attached to each preemptible worker node. Defaults to 0.

BootDiskSizeGb int

Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories. in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories. in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

BootDiskType string

The disk type of the primary disk attached to each node. One of "pd-ssd" or "pd-standard". Defaults to "pd-standard". One of "pd-ssd" or "pd-standard". Defaults to "pd-standard". One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

NumLocalSsds int

The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0. attached to each worker cluster node. Defaults to 0. attached to each preemptible worker node. Defaults to 0.

bootDiskSizeGb Integer

Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories. in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories. in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

bootDiskType String

The disk type of the primary disk attached to each node. One of "pd-ssd" or "pd-standard". Defaults to "pd-standard". One of "pd-ssd" or "pd-standard". Defaults to "pd-standard". One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

numLocalSsds Integer

The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0. attached to each worker cluster node. Defaults to 0. attached to each preemptible worker node. Defaults to 0.

bootDiskSizeGb number

Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories. in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories. in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

bootDiskType string

The disk type of the primary disk attached to each node. One of "pd-ssd" or "pd-standard". Defaults to "pd-standard". One of "pd-ssd" or "pd-standard". Defaults to "pd-standard". One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

numLocalSsds number

The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0. attached to each worker cluster node. Defaults to 0. attached to each preemptible worker node. Defaults to 0.

boot_disk_size_gb int

Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories. in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories. in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

boot_disk_type str

The disk type of the primary disk attached to each node. One of "pd-ssd" or "pd-standard". Defaults to "pd-standard". One of "pd-ssd" or "pd-standard". Defaults to "pd-standard". One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

num_local_ssds int

The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0. attached to each worker cluster node. Defaults to 0. attached to each preemptible worker node. Defaults to 0.

bootDiskSizeGb Number

Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories. in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories. in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

bootDiskType String

The disk type of the primary disk attached to each node. One of "pd-ssd" or "pd-standard". Defaults to "pd-standard". One of "pd-ssd" or "pd-standard". Defaults to "pd-standard". One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

numLocalSsds Number

The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0. attached to each worker cluster node. Defaults to 0. attached to each preemptible worker node. Defaults to 0.

ClusterClusterConfigSecurityConfig

kerberosConfig Property Map

Kerberos Configuration

ClusterClusterConfigSecurityConfigKerberosConfig

KmsKeyUri string

The URI of the KMS key used to encrypt various sensitive files.

RootPrincipalPasswordUri string

The Cloud Storage URI of a KMS encrypted file containing the root principal password.

CrossRealmTrustAdminServer string

The admin server (IP or hostname) for the remote trusted realm in a cross realm trust relationship.

CrossRealmTrustKdc string

The KDC (IP or hostname) for the remote trusted realm in a cross realm trust relationship.

CrossRealmTrustRealm string

The remote realm the Dataproc on-cluster KDC will trust, should the user enable cross realm trust.

CrossRealmTrustSharedPasswordUri string

The Cloud Storage URI of a KMS encrypted file containing the shared password between the on-cluster Kerberos realm and the remote trusted realm, in a cross realm trust relationship.

EnableKerberos bool

Flag to indicate whether to Kerberize the cluster.

KdcDbKeyUri string

The Cloud Storage URI of a KMS encrypted file containing the master key of the KDC database.

KeyPasswordUri string

The Cloud Storage URI of a KMS encrypted file containing the password to the user provided key. For the self-signed certificate, this password is generated by Dataproc.

KeystorePasswordUri string

The Cloud Storage URI of a KMS encrypted file containing the password to the user provided keystore. For the self-signed certificated, the password is generated by Dataproc.

KeystoreUri string

The Cloud Storage URI of the keystore file used for SSL encryption. If not provided, Dataproc will provide a self-signed certificate.

Realm string

The name of the on-cluster Kerberos realm. If not specified, the uppercased domain of hostnames will be the realm.

TgtLifetimeHours int

The lifetime of the ticket granting ticket, in hours.

TruststorePasswordUri string

The Cloud Storage URI of a KMS encrypted file containing the password to the user provided truststore. For the self-signed certificate, this password is generated by Dataproc.

TruststoreUri string

The Cloud Storage URI of the truststore file used for SSL encryption. If not provided, Dataproc will provide a self-signed certificate.

KmsKeyUri string

The URI of the KMS key used to encrypt various sensitive files.

RootPrincipalPasswordUri string

The Cloud Storage URI of a KMS encrypted file containing the root principal password.

CrossRealmTrustAdminServer string

The admin server (IP or hostname) for the remote trusted realm in a cross realm trust relationship.

CrossRealmTrustKdc string

The KDC (IP or hostname) for the remote trusted realm in a cross realm trust relationship.

CrossRealmTrustRealm string

The remote realm the Dataproc on-cluster KDC will trust, should the user enable cross realm trust.

CrossRealmTrustSharedPasswordUri string

The Cloud Storage URI of a KMS encrypted file containing the shared password between the on-cluster Kerberos realm and the remote trusted realm, in a cross realm trust relationship.

EnableKerberos bool

Flag to indicate whether to Kerberize the cluster.

KdcDbKeyUri string

The Cloud Storage URI of a KMS encrypted file containing the master key of the KDC database.

KeyPasswordUri string

The Cloud Storage URI of a KMS encrypted file containing the password to the user provided key. For the self-signed certificate, this password is generated by Dataproc.

KeystorePasswordUri string

The Cloud Storage URI of a KMS encrypted file containing the password to the user provided keystore. For the self-signed certificated, the password is generated by Dataproc.

KeystoreUri string

The Cloud Storage URI of the keystore file used for SSL encryption. If not provided, Dataproc will provide a self-signed certificate.

Realm string

The name of the on-cluster Kerberos realm. If not specified, the uppercased domain of hostnames will be the realm.

TgtLifetimeHours int

The lifetime of the ticket granting ticket, in hours.

TruststorePasswordUri string

The Cloud Storage URI of a KMS encrypted file containing the password to the user provided truststore. For the self-signed certificate, this password is generated by Dataproc.

TruststoreUri string

The Cloud Storage URI of the truststore file used for SSL encryption. If not provided, Dataproc will provide a self-signed certificate.

kmsKeyUri String

The URI of the KMS key used to encrypt various sensitive files.

rootPrincipalPasswordUri String

The Cloud Storage URI of a KMS encrypted file containing the root principal password.

crossRealmTrustAdminServer String

The admin server (IP or hostname) for the remote trusted realm in a cross realm trust relationship.

crossRealmTrustKdc String

The KDC (IP or hostname) for the remote trusted realm in a cross realm trust relationship.

crossRealmTrustRealm String

The remote realm the Dataproc on-cluster KDC will trust, should the user enable cross realm trust.

crossRealmTrustSharedPasswordUri String

The Cloud Storage URI of a KMS encrypted file containing the shared password between the on-cluster Kerberos realm and the remote trusted realm, in a cross realm trust relationship.

enableKerberos Boolean

Flag to indicate whether to Kerberize the cluster.

kdcDbKeyUri String

The Cloud Storage URI of a KMS encrypted file containing the master key of the KDC database.

keyPasswordUri String

The Cloud Storage URI of a KMS encrypted file containing the password to the user provided key. For the self-signed certificate, this password is generated by Dataproc.

keystorePasswordUri String

The Cloud Storage URI of a KMS encrypted file containing the password to the user provided keystore. For the self-signed certificated, the password is generated by Dataproc.

keystoreUri String

The Cloud Storage URI of the keystore file used for SSL encryption. If not provided, Dataproc will provide a self-signed certificate.

realm String

The name of the on-cluster Kerberos realm. If not specified, the uppercased domain of hostnames will be the realm.

tgtLifetimeHours Integer

The lifetime of the ticket granting ticket, in hours.

truststorePasswordUri String

The Cloud Storage URI of a KMS encrypted file containing the password to the user provided truststore. For the self-signed certificate, this password is generated by Dataproc.

truststoreUri String

The Cloud Storage URI of the truststore file used for SSL encryption. If not provided, Dataproc will provide a self-signed certificate.

kmsKeyUri string

The URI of the KMS key used to encrypt various sensitive files.

rootPrincipalPasswordUri string

The Cloud Storage URI of a KMS encrypted file containing the root principal password.

crossRealmTrustAdminServer string

The admin server (IP or hostname) for the remote trusted realm in a cross realm trust relationship.

crossRealmTrustKdc string

The KDC (IP or hostname) for the remote trusted realm in a cross realm trust relationship.

crossRealmTrustRealm string

The remote realm the Dataproc on-cluster KDC will trust, should the user enable cross realm trust.

crossRealmTrustSharedPasswordUri string

The Cloud Storage URI of a KMS encrypted file containing the shared password between the on-cluster Kerberos realm and the remote trusted realm, in a cross realm trust relationship.

enableKerberos boolean

Flag to indicate whether to Kerberize the cluster.

kdcDbKeyUri string

The Cloud Storage URI of a KMS encrypted file containing the master key of the KDC database.

keyPasswordUri string

The Cloud Storage URI of a KMS encrypted file containing the password to the user provided key. For the self-signed certificate, this password is generated by Dataproc.

keystorePasswordUri string

The Cloud Storage URI of a KMS encrypted file containing the password to the user provided keystore. For the self-signed certificated, the password is generated by Dataproc.

keystoreUri string

The Cloud Storage URI of the keystore file used for SSL encryption. If not provided, Dataproc will provide a self-signed certificate.

realm string

The name of the on-cluster Kerberos realm. If not specified, the uppercased domain of hostnames will be the realm.

tgtLifetimeHours number

The lifetime of the ticket granting ticket, in hours.

truststorePasswordUri string

The Cloud Storage URI of a KMS encrypted file containing the password to the user provided truststore. For the self-signed certificate, this password is generated by Dataproc.

truststoreUri string

The Cloud Storage URI of the truststore file used for SSL encryption. If not provided, Dataproc will provide a self-signed certificate.

kms_key_uri str

The URI of the KMS key used to encrypt various sensitive files.

root_principal_password_uri str

The Cloud Storage URI of a KMS encrypted file containing the root principal password.

cross_realm_trust_admin_server str

The admin server (IP or hostname) for the remote trusted realm in a cross realm trust relationship.

cross_realm_trust_kdc str

The KDC (IP or hostname) for the remote trusted realm in a cross realm trust relationship.

cross_realm_trust_realm str

The remote realm the Dataproc on-cluster KDC will trust, should the user enable cross realm trust.

cross_realm_trust_shared_password_uri str

The Cloud Storage URI of a KMS encrypted file containing the shared password between the on-cluster Kerberos realm and the remote trusted realm, in a cross realm trust relationship.

enable_kerberos bool

Flag to indicate whether to Kerberize the cluster.

kdc_db_key_uri str

The Cloud Storage URI of a KMS encrypted file containing the master key of the KDC database.

key_password_uri str

The Cloud Storage URI of a KMS encrypted file containing the password to the user provided key. For the self-signed certificate, this password is generated by Dataproc.

keystore_password_uri str

The Cloud Storage URI of a KMS encrypted file containing the password to the user provided keystore. For the self-signed certificated, the password is generated by Dataproc.

keystore_uri str

The Cloud Storage URI of the keystore file used for SSL encryption. If not provided, Dataproc will provide a self-signed certificate.

realm str

The name of the on-cluster Kerberos realm. If not specified, the uppercased domain of hostnames will be the realm.

tgt_lifetime_hours int

The lifetime of the ticket granting ticket, in hours.

truststore_password_uri str

The Cloud Storage URI of a KMS encrypted file containing the password to the user provided truststore. For the self-signed certificate, this password is generated by Dataproc.

truststore_uri str

The Cloud Storage URI of the truststore file used for SSL encryption. If not provided, Dataproc will provide a self-signed certificate.

kmsKeyUri String

The URI of the KMS key used to encrypt various sensitive files.

rootPrincipalPasswordUri String

The Cloud Storage URI of a KMS encrypted file containing the root principal password.

crossRealmTrustAdminServer String

The admin server (IP or hostname) for the remote trusted realm in a cross realm trust relationship.

crossRealmTrustKdc String

The KDC (IP or hostname) for the remote trusted realm in a cross realm trust relationship.

crossRealmTrustRealm String

The remote realm the Dataproc on-cluster KDC will trust, should the user enable cross realm trust.

crossRealmTrustSharedPasswordUri String

The Cloud Storage URI of a KMS encrypted file containing the shared password between the on-cluster Kerberos realm and the remote trusted realm, in a cross realm trust relationship.

enableKerberos Boolean

Flag to indicate whether to Kerberize the cluster.

kdcDbKeyUri String

The Cloud Storage URI of a KMS encrypted file containing the master key of the KDC database.

keyPasswordUri String

The Cloud Storage URI of a KMS encrypted file containing the password to the user provided key. For the self-signed certificate, this password is generated by Dataproc.

keystorePasswordUri String

The Cloud Storage URI of a KMS encrypted file containing the password to the user provided keystore. For the self-signed certificated, the password is generated by Dataproc.

keystoreUri String

The Cloud Storage URI of the keystore file used for SSL encryption. If not provided, Dataproc will provide a self-signed certificate.

realm String

The name of the on-cluster Kerberos realm. If not specified, the uppercased domain of hostnames will be the realm.

tgtLifetimeHours Number

The lifetime of the ticket granting ticket, in hours.

truststorePasswordUri String

The Cloud Storage URI of a KMS encrypted file containing the password to the user provided truststore. For the self-signed certificate, this password is generated by Dataproc.

truststoreUri String

The Cloud Storage URI of the truststore file used for SSL encryption. If not provided, Dataproc will provide a self-signed certificate.

ClusterClusterConfigSoftwareConfig

ImageVersion string

The Cloud Dataproc image version to use for the cluster - this controls the sets of software versions installed onto the nodes when you create clusters. If not specified, defaults to the latest version. For a list of valid versions see Cloud Dataproc versions

OptionalComponents List<string>

The set of optional components to activate on the cluster. Accepted values are:

  • ANACONDA
  • DRUID
  • FLINK
  • HBASE
  • HIVE_WEBHCAT
  • JUPYTER
  • PRESTO
  • RANGER
  • SOLR
  • ZEPPELIN
  • ZOOKEEPER
OverrideProperties Dictionary<string, string>

A list of override and additional properties (key/value pairs) used to modify various aspects of the common configuration files used when creating a cluster. For a list of valid properties please see Cluster properties

Properties Dictionary<string, object>

The properties to set on daemon config files. Property keys are specified in prefix:property format, for example spark:spark.kubernetes.container.image.

ImageVersion string

The Cloud Dataproc image version to use for the cluster - this controls the sets of software versions installed onto the nodes when you create clusters. If not specified, defaults to the latest version. For a list of valid versions see Cloud Dataproc versions

OptionalComponents []string

The set of optional components to activate on the cluster. Accepted values are:

  • ANACONDA
  • DRUID
  • FLINK
  • HBASE
  • HIVE_WEBHCAT
  • JUPYTER
  • PRESTO
  • RANGER
  • SOLR
  • ZEPPELIN
  • ZOOKEEPER
OverrideProperties map[string]string

A list of override and additional properties (key/value pairs) used to modify various aspects of the common configuration files used when creating a cluster. For a list of valid properties please see Cluster properties

Properties map[string]interface{}

The properties to set on daemon config files. Property keys are specified in prefix:property format, for example spark:spark.kubernetes.container.image.

imageVersion String

The Cloud Dataproc image version to use for the cluster - this controls the sets of software versions installed onto the nodes when you create clusters. If not specified, defaults to the latest version. For a list of valid versions see Cloud Dataproc versions

optionalComponents List<String>

The set of optional components to activate on the cluster. Accepted values are:

  • ANACONDA
  • DRUID
  • FLINK
  • HBASE
  • HIVE_WEBHCAT
  • JUPYTER
  • PRESTO
  • RANGER
  • SOLR
  • ZEPPELIN
  • ZOOKEEPER
overrideProperties Map<String,String>

A list of override and additional properties (key/value pairs) used to modify various aspects of the common configuration files used when creating a cluster. For a list of valid properties please see Cluster properties

properties Map<String,Object>

The properties to set on daemon config files. Property keys are specified in prefix:property format, for example spark:spark.kubernetes.container.image.

imageVersion string

The Cloud Dataproc image version to use for the cluster - this controls the sets of software versions installed onto the nodes when you create clusters. If not specified, defaults to the latest version. For a list of valid versions see Cloud Dataproc versions

optionalComponents string[]

The set of optional components to activate on the cluster. Accepted values are:

  • ANACONDA
  • DRUID
  • FLINK
  • HBASE
  • HIVE_WEBHCAT
  • JUPYTER
  • PRESTO
  • RANGER
  • SOLR
  • ZEPPELIN
  • ZOOKEEPER
overrideProperties {[key: string]: string}

A list of override and additional properties (key/value pairs) used to modify various aspects of the common configuration files used when creating a cluster. For a list of valid properties please see Cluster properties

properties {[key: string]: any}

The properties to set on daemon config files. Property keys are specified in prefix:property format, for example spark:spark.kubernetes.container.image.

image_version str

The Cloud Dataproc image version to use for the cluster - this controls the sets of software versions installed onto the nodes when you create clusters. If not specified, defaults to the latest version. For a list of valid versions see Cloud Dataproc versions

optional_components Sequence[str]

The set of optional components to activate on the cluster. Accepted values are:

  • ANACONDA
  • DRUID
  • FLINK
  • HBASE
  • HIVE_WEBHCAT
  • JUPYTER
  • PRESTO
  • RANGER
  • SOLR
  • ZEPPELIN
  • ZOOKEEPER
override_properties Mapping[str, str]

A list of override and additional properties (key/value pairs) used to modify various aspects of the common configuration files used when creating a cluster. For a list of valid properties please see Cluster properties

properties Mapping[str, Any]

The properties to set on daemon config files. Property keys are specified in prefix:property format, for example spark:spark.kubernetes.container.image.

imageVersion String

The Cloud Dataproc image version to use for the cluster - this controls the sets of software versions installed onto the nodes when you create clusters. If not specified, defaults to the latest version. For a list of valid versions see Cloud Dataproc versions

optionalComponents List<String>

The set of optional components to activate on the cluster. Accepted values are:

  • ANACONDA
  • DRUID
  • FLINK
  • HBASE
  • HIVE_WEBHCAT
  • JUPYTER
  • PRESTO
  • RANGER
  • SOLR
  • ZEPPELIN
  • ZOOKEEPER
overrideProperties Map<String>

A list of override and additional properties (key/value pairs) used to modify various aspects of the common configuration files used when creating a cluster. For a list of valid properties please see Cluster properties

properties Map<Any>

The properties to set on daemon config files. Property keys are specified in prefix:property format, for example spark:spark.kubernetes.container.image.

ClusterClusterConfigWorkerConfig

Accelerators List<ClusterClusterConfigWorkerConfigAccelerator>

The Compute Engine accelerator configuration for these instances. Can be specified multiple times.

DiskConfig ClusterClusterConfigWorkerConfigDiskConfig

Disk Config

ImageUri string

The URI for the image to use for this worker. See the guide for more information.

InstanceNames List<string>
MachineType string

The name of a Google Compute Engine machine type to create for the worker nodes. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4).

MinCpuPlatform string

The name of a minimum generation of CPU family for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.

NumInstances int

Specifies the number of worker nodes to create. If not specified, GCP will default to a predetermined computed value (currently 2). There is currently a beta feature which allows you to run a Single Node Cluster. In order to take advantage of this you need to set "dataproc:dataproc.allow.zero.workers" = "true" in cluster_config.software_config.properties

Accelerators []ClusterClusterConfigWorkerConfigAccelerator

The Compute Engine accelerator configuration for these instances. Can be specified multiple times.

DiskConfig ClusterClusterConfigWorkerConfigDiskConfig

Disk Config

ImageUri string

The URI for the image to use for this worker. See the guide for more information.

InstanceNames []string
MachineType string

The name of a Google Compute Engine machine type to create for the worker nodes. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4).

MinCpuPlatform string

The name of a minimum generation of CPU family for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.

NumInstances int

Specifies the number of worker nodes to create. If not specified, GCP will default to a predetermined computed value (currently 2). There is currently a beta feature which allows you to run a Single Node Cluster. In order to take advantage of this you need to set "dataproc:dataproc.allow.zero.workers" = "true" in cluster_config.software_config.properties

accelerators List<ClusterClusterConfigWorkerConfigAccelerator>

The Compute Engine accelerator configuration for these instances. Can be specified multiple times.

diskConfig ClusterClusterConfigWorkerConfigDiskConfig

Disk Config

imageUri String

The URI for the image to use for this worker. See the guide for more information.

instanceNames List<String>
machineType String

The name of a Google Compute Engine machine type to create for the worker nodes. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4).

minCpuPlatform String

The name of a minimum generation of CPU family for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.

numInstances Integer

Specifies the number of worker nodes to create. If not specified, GCP will default to a predetermined computed value (currently 2). There is currently a beta feature which allows you to run a Single Node Cluster. In order to take advantage of this you need to set "dataproc:dataproc.allow.zero.workers" = "true" in cluster_config.software_config.properties

accelerators ClusterClusterConfigWorkerConfigAccelerator[]

The Compute Engine accelerator configuration for these instances. Can be specified multiple times.

diskConfig ClusterClusterConfigWorkerConfigDiskConfig

Disk Config

imageUri string

The URI for the image to use for this worker. See the guide for more information.

instanceNames string[]
machineType string

The name of a Google Compute Engine machine type to create for the worker nodes. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4).

minCpuPlatform string

The name of a minimum generation of CPU family for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.

numInstances number

Specifies the number of worker nodes to create. If not specified, GCP will default to a predetermined computed value (currently 2). There is currently a beta feature which allows you to run a Single Node Cluster. In order to take advantage of this you need to set "dataproc:dataproc.allow.zero.workers" = "true" in cluster_config.software_config.properties

accelerators Sequence[ClusterClusterConfigWorkerConfigAccelerator]

The Compute Engine accelerator configuration for these instances. Can be specified multiple times.

disk_config ClusterClusterConfigWorkerConfigDiskConfig

Disk Config

image_uri str

The URI for the image to use for this worker. See the guide for more information.

instance_names Sequence[str]
machine_type str

The name of a Google Compute Engine machine type to create for the worker nodes. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4).

min_cpu_platform str

The name of a minimum generation of CPU family for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.

num_instances int

Specifies the number of worker nodes to create. If not specified, GCP will default to a predetermined computed value (currently 2). There is currently a beta feature which allows you to run a Single Node Cluster. In order to take advantage of this you need to set "dataproc:dataproc.allow.zero.workers" = "true" in cluster_config.software_config.properties

accelerators List<Property Map>

The Compute Engine accelerator configuration for these instances. Can be specified multiple times.

diskConfig Property Map

Disk Config

imageUri String

The URI for the image to use for this worker. See the guide for more information.

instanceNames List<String>
machineType String

The name of a Google Compute Engine machine type to create for the worker nodes. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4).

minCpuPlatform String

The name of a minimum generation of CPU family for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.

numInstances Number

Specifies the number of worker nodes to create. If not specified, GCP will default to a predetermined computed value (currently 2). There is currently a beta feature which allows you to run a Single Node Cluster. In order to take advantage of this you need to set "dataproc:dataproc.allow.zero.workers" = "true" in cluster_config.software_config.properties

ClusterClusterConfigWorkerConfigAccelerator

AcceleratorCount int

The number of the accelerator cards of this type exposed to this instance. Often restricted to one of 1, 2, 4, or 8.

AcceleratorType string

The short name of the accelerator type to expose to this instance. For example, nvidia-tesla-k80.

AcceleratorCount int

The number of the accelerator cards of this type exposed to this instance. Often restricted to one of 1, 2, 4, or 8.

AcceleratorType string

The short name of the accelerator type to expose to this instance. For example, nvidia-tesla-k80.

acceleratorCount Integer

The number of the accelerator cards of this type exposed to this instance. Often restricted to one of 1, 2, 4, or 8.

acceleratorType String

The short name of the accelerator type to expose to this instance. For example, nvidia-tesla-k80.

acceleratorCount number

The number of the accelerator cards of this type exposed to this instance. Often restricted to one of 1, 2, 4, or 8.

acceleratorType string

The short name of the accelerator type to expose to this instance. For example, nvidia-tesla-k80.

accelerator_count int

The number of the accelerator cards of this type exposed to this instance. Often restricted to one of 1, 2, 4, or 8.

accelerator_type str

The short name of the accelerator type to expose to this instance. For example, nvidia-tesla-k80.

acceleratorCount Number

The number of the accelerator cards of this type exposed to this instance. Often restricted to one of 1, 2, 4, or 8.

acceleratorType String

The short name of the accelerator type to expose to this instance. For example, nvidia-tesla-k80.

ClusterClusterConfigWorkerConfigDiskConfig

BootDiskSizeGb int

Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories. in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories. in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

BootDiskType string

The disk type of the primary disk attached to each node. One of "pd-ssd" or "pd-standard". Defaults to "pd-standard". One of "pd-ssd" or "pd-standard". Defaults to "pd-standard". One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

NumLocalSsds int

The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0. attached to each worker cluster node. Defaults to 0. attached to each preemptible worker node. Defaults to 0.

BootDiskSizeGb int

Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories. in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories. in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

BootDiskType string

The disk type of the primary disk attached to each node. One of "pd-ssd" or "pd-standard". Defaults to "pd-standard". One of "pd-ssd" or "pd-standard". Defaults to "pd-standard". One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

NumLocalSsds int

The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0. attached to each worker cluster node. Defaults to 0. attached to each preemptible worker node. Defaults to 0.

bootDiskSizeGb Integer

Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories. in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories. in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

bootDiskType String

The disk type of the primary disk attached to each node. One of "pd-ssd" or "pd-standard". Defaults to "pd-standard". One of "pd-ssd" or "pd-standard". Defaults to "pd-standard". One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

numLocalSsds Integer

The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0. attached to each worker cluster node. Defaults to 0. attached to each preemptible worker node. Defaults to 0.

bootDiskSizeGb number

Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories. in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories. in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

bootDiskType string

The disk type of the primary disk attached to each node. One of "pd-ssd" or "pd-standard". Defaults to "pd-standard". One of "pd-ssd" or "pd-standard". Defaults to "pd-standard". One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

numLocalSsds number

The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0. attached to each worker cluster node. Defaults to 0. attached to each preemptible worker node. Defaults to 0.

boot_disk_size_gb int

Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories. in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories. in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

boot_disk_type str

The disk type of the primary disk attached to each node. One of "pd-ssd" or "pd-standard". Defaults to "pd-standard". One of "pd-ssd" or "pd-standard". Defaults to "pd-standard". One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

num_local_ssds int

The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0. attached to each worker cluster node. Defaults to 0. attached to each preemptible worker node. Defaults to 0.

bootDiskSizeGb Number

Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories. in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories. in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

bootDiskType String

The disk type of the primary disk attached to each node. One of "pd-ssd" or "pd-standard". Defaults to "pd-standard". One of "pd-ssd" or "pd-standard". Defaults to "pd-standard". One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

numLocalSsds Number

The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0. attached to each worker cluster node. Defaults to 0. attached to each preemptible worker node. Defaults to 0.

ClusterVirtualClusterConfig

AuxiliaryServicesConfig ClusterVirtualClusterConfigAuxiliaryServicesConfig

Configuration of auxiliary services used by this cluster. Structure defined below.

KubernetesClusterConfig ClusterVirtualClusterConfigKubernetesClusterConfig

The configuration for running the Dataproc cluster on Kubernetes. Structure defined below.


StagingBucket string

The Cloud Storage staging bucket used to stage files, such as Hadoop jars, between client machines and the cluster. Note: If you don't explicitly specify a staging_bucket then GCP will auto create / assign one for you. However, you are not guaranteed an auto generated bucket which is solely dedicated to your cluster; it may be shared with other clusters in the same region/zone also choosing to use the auto generation option.

AuxiliaryServicesConfig ClusterVirtualClusterConfigAuxiliaryServicesConfig

Configuration of auxiliary services used by this cluster. Structure defined below.

KubernetesClusterConfig ClusterVirtualClusterConfigKubernetesClusterConfig

The configuration for running the Dataproc cluster on Kubernetes. Structure defined below.


StagingBucket string

The Cloud Storage staging bucket used to stage files, such as Hadoop jars, between client machines and the cluster. Note: If you don't explicitly specify a staging_bucket then GCP will auto create / assign one for you. However, you are not guaranteed an auto generated bucket which is solely dedicated to your cluster; it may be shared with other clusters in the same region/zone also choosing to use the auto generation option.

auxiliaryServicesConfig ClusterVirtualClusterConfigAuxiliaryServicesConfig

Configuration of auxiliary services used by this cluster. Structure defined below.

kubernetesClusterConfig ClusterVirtualClusterConfigKubernetesClusterConfig

The configuration for running the Dataproc cluster on Kubernetes. Structure defined below.


stagingBucket String

The Cloud Storage staging bucket used to stage files, such as Hadoop jars, between client machines and the cluster. Note: If you don't explicitly specify a staging_bucket then GCP will auto create / assign one for you. However, you are not guaranteed an auto generated bucket which is solely dedicated to your cluster; it may be shared with other clusters in the same region/zone also choosing to use the auto generation option.

auxiliaryServicesConfig ClusterVirtualClusterConfigAuxiliaryServicesConfig

Configuration of auxiliary services used by this cluster. Structure defined below.

kubernetesClusterConfig ClusterVirtualClusterConfigKubernetesClusterConfig

The configuration for running the Dataproc cluster on Kubernetes. Structure defined below.


stagingBucket string

The Cloud Storage staging bucket used to stage files, such as Hadoop jars, between client machines and the cluster. Note: If you don't explicitly specify a staging_bucket then GCP will auto create / assign one for you. However, you are not guaranteed an auto generated bucket which is solely dedicated to your cluster; it may be shared with other clusters in the same region/zone also choosing to use the auto generation option.

auxiliary_services_config ClusterVirtualClusterConfigAuxiliaryServicesConfig

Configuration of auxiliary services used by this cluster. Structure defined below.

kubernetes_cluster_config ClusterVirtualClusterConfigKubernetesClusterConfig

The configuration for running the Dataproc cluster on Kubernetes. Structure defined below.


staging_bucket str

The Cloud Storage staging bucket used to stage files, such as Hadoop jars, between client machines and the cluster. Note: If you don't explicitly specify a staging_bucket then GCP will auto create / assign one for you. However, you are not guaranteed an auto generated bucket which is solely dedicated to your cluster; it may be shared with other clusters in the same region/zone also choosing to use the auto generation option.

auxiliaryServicesConfig Property Map

Configuration of auxiliary services used by this cluster. Structure defined below.

kubernetesClusterConfig Property Map

The configuration for running the Dataproc cluster on Kubernetes. Structure defined below.


stagingBucket String

The Cloud Storage staging bucket used to stage files, such as Hadoop jars, between client machines and the cluster. Note: If you don't explicitly specify a staging_bucket then GCP will auto create / assign one for you. However, you are not guaranteed an auto generated bucket which is solely dedicated to your cluster; it may be shared with other clusters in the same region/zone also choosing to use the auto generation option.

ClusterVirtualClusterConfigAuxiliaryServicesConfig

metastoreConfig Property Map

The Hive Metastore configuration for this workload.

sparkHistoryServerConfig Property Map

The Spark History Server configuration for the workload.

ClusterVirtualClusterConfigAuxiliaryServicesConfigMetastoreConfig

DataprocMetastoreService string

Resource name of an existing Dataproc Metastore service.

DataprocMetastoreService string

Resource name of an existing Dataproc Metastore service.

dataprocMetastoreService String

Resource name of an existing Dataproc Metastore service.

dataprocMetastoreService string

Resource name of an existing Dataproc Metastore service.

dataproc_metastore_service str

Resource name of an existing Dataproc Metastore service.

dataprocMetastoreService String

Resource name of an existing Dataproc Metastore service.

ClusterVirtualClusterConfigAuxiliaryServicesConfigSparkHistoryServerConfig

DataprocCluster string

Resource name of an existing Dataproc Cluster to act as a Spark History Server for the workload.


DataprocCluster string

Resource name of an existing Dataproc Cluster to act as a Spark History Server for the workload.


dataprocCluster String

Resource name of an existing Dataproc Cluster to act as a Spark History Server for the workload.


dataprocCluster string

Resource name of an existing Dataproc Cluster to act as a Spark History Server for the workload.


dataproc_cluster str

Resource name of an existing Dataproc Cluster to act as a Spark History Server for the workload.


dataprocCluster String

Resource name of an existing Dataproc Cluster to act as a Spark History Server for the workload.


ClusterVirtualClusterConfigKubernetesClusterConfig

GkeClusterConfig ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfig

The configuration for running the Dataproc cluster on GKE.

KubernetesSoftwareConfig ClusterVirtualClusterConfigKubernetesClusterConfigKubernetesSoftwareConfig

The software configuration for this Dataproc cluster running on Kubernetes.

KubernetesNamespace string

A namespace within the Kubernetes cluster to deploy into. If this namespace does not exist, it is created. If it exists, Dataproc verifies that another Dataproc VirtualCluster is not installed into it. If not specified, the name of the Dataproc Cluster is used.

GkeClusterConfig ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfig

The configuration for running the Dataproc cluster on GKE.

KubernetesSoftwareConfig ClusterVirtualClusterConfigKubernetesClusterConfigKubernetesSoftwareConfig

The software configuration for this Dataproc cluster running on Kubernetes.

KubernetesNamespace string

A namespace within the Kubernetes cluster to deploy into. If this namespace does not exist, it is created. If it exists, Dataproc verifies that another Dataproc VirtualCluster is not installed into it. If not specified, the name of the Dataproc Cluster is used.

gkeClusterConfig ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfig

The configuration for running the Dataproc cluster on GKE.

kubernetesSoftwareConfig ClusterVirtualClusterConfigKubernetesClusterConfigKubernetesSoftwareConfig

The software configuration for this Dataproc cluster running on Kubernetes.

kubernetesNamespace String

A namespace within the Kubernetes cluster to deploy into. If this namespace does not exist, it is created. If it exists, Dataproc verifies that another Dataproc VirtualCluster is not installed into it. If not specified, the name of the Dataproc Cluster is used.

gkeClusterConfig ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfig

The configuration for running the Dataproc cluster on GKE.

kubernetesSoftwareConfig ClusterVirtualClusterConfigKubernetesClusterConfigKubernetesSoftwareConfig

The software configuration for this Dataproc cluster running on Kubernetes.

kubernetesNamespace string

A namespace within the Kubernetes cluster to deploy into. If this namespace does not exist, it is created. If it exists, Dataproc verifies that another Dataproc VirtualCluster is not installed into it. If not specified, the name of the Dataproc Cluster is used.

gke_cluster_config ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfig

The configuration for running the Dataproc cluster on GKE.

kubernetes_software_config ClusterVirtualClusterConfigKubernetesClusterConfigKubernetesSoftwareConfig

The software configuration for this Dataproc cluster running on Kubernetes.

kubernetes_namespace str

A namespace within the Kubernetes cluster to deploy into. If this namespace does not exist, it is created. If it exists, Dataproc verifies that another Dataproc VirtualCluster is not installed into it. If not specified, the name of the Dataproc Cluster is used.

gkeClusterConfig Property Map

The configuration for running the Dataproc cluster on GKE.

kubernetesSoftwareConfig Property Map

The software configuration for this Dataproc cluster running on Kubernetes.

kubernetesNamespace String

A namespace within the Kubernetes cluster to deploy into. If this namespace does not exist, it is created. If it exists, Dataproc verifies that another Dataproc VirtualCluster is not installed into it. If not specified, the name of the Dataproc Cluster is used.

ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfig

GkeClusterTarget string

A target GKE cluster to deploy to. It must be in the same project and region as the Dataproc cluster (the GKE cluster can be zonal or regional)

NodePoolTargets List<ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTarget>

GKE node pools where workloads will be scheduled. At least one node pool must be assigned the DEFAULT GkeNodePoolTarget.Role. If a GkeNodePoolTarget is not specified, Dataproc constructs a DEFAULT GkeNodePoolTarget. Each role can be given to only one GkeNodePoolTarget. All node pools must have the same location settings.

GkeClusterTarget string

A target GKE cluster to deploy to. It must be in the same project and region as the Dataproc cluster (the GKE cluster can be zonal or regional)

NodePoolTargets []ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTarget

GKE node pools where workloads will be scheduled. At least one node pool must be assigned the DEFAULT GkeNodePoolTarget.Role. If a GkeNodePoolTarget is not specified, Dataproc constructs a DEFAULT GkeNodePoolTarget. Each role can be given to only one GkeNodePoolTarget. All node pools must have the same location settings.

gkeClusterTarget String

A target GKE cluster to deploy to. It must be in the same project and region as the Dataproc cluster (the GKE cluster can be zonal or regional)

nodePoolTargets List<ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTarget>

GKE node pools where workloads will be scheduled. At least one node pool must be assigned the DEFAULT GkeNodePoolTarget.Role. If a GkeNodePoolTarget is not specified, Dataproc constructs a DEFAULT GkeNodePoolTarget. Each role can be given to only one GkeNodePoolTarget. All node pools must have the same location settings.

gkeClusterTarget string

A target GKE cluster to deploy to. It must be in the same project and region as the Dataproc cluster (the GKE cluster can be zonal or regional)

nodePoolTargets ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTarget[]

GKE node pools where workloads will be scheduled. At least one node pool must be assigned the DEFAULT GkeNodePoolTarget.Role. If a GkeNodePoolTarget is not specified, Dataproc constructs a DEFAULT GkeNodePoolTarget. Each role can be given to only one GkeNodePoolTarget. All node pools must have the same location settings.

gke_cluster_target str

A target GKE cluster to deploy to. It must be in the same project and region as the Dataproc cluster (the GKE cluster can be zonal or regional)

node_pool_targets Sequence[ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTarget]

GKE node pools where workloads will be scheduled. At least one node pool must be assigned the DEFAULT GkeNodePoolTarget.Role. If a GkeNodePoolTarget is not specified, Dataproc constructs a DEFAULT GkeNodePoolTarget. Each role can be given to only one GkeNodePoolTarget. All node pools must have the same location settings.

gkeClusterTarget String

A target GKE cluster to deploy to. It must be in the same project and region as the Dataproc cluster (the GKE cluster can be zonal or regional)

nodePoolTargets List<Property Map>

GKE node pools where workloads will be scheduled. At least one node pool must be assigned the DEFAULT GkeNodePoolTarget.Role. If a GkeNodePoolTarget is not specified, Dataproc constructs a DEFAULT GkeNodePoolTarget. Each role can be given to only one GkeNodePoolTarget. All node pools must have the same location settings.

ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTarget

NodePool string

The target GKE node pool.

Roles List<string>

The roles associated with the GKE node pool. One of "DEFAULT", "CONTROLLER", "SPARK_DRIVER" or "SPARK_EXECUTOR".

NodePoolConfig ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfig

The configuration for the GKE node pool. If specified, Dataproc attempts to create a node pool with the specified shape. If one with the same name already exists, it is verified against all specified fields. If a field differs, the virtual cluster creation will fail.

NodePool string

The target GKE node pool.

Roles []string

The roles associated with the GKE node pool. One of "DEFAULT", "CONTROLLER", "SPARK_DRIVER" or "SPARK_EXECUTOR".

NodePoolConfig ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfig

The configuration for the GKE node pool. If specified, Dataproc attempts to create a node pool with the specified shape. If one with the same name already exists, it is verified against all specified fields. If a field differs, the virtual cluster creation will fail.

nodePool String

The target GKE node pool.

roles List<String>

The roles associated with the GKE node pool. One of "DEFAULT", "CONTROLLER", "SPARK_DRIVER" or "SPARK_EXECUTOR".

nodePoolConfig ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfig

The configuration for the GKE node pool. If specified, Dataproc attempts to create a node pool with the specified shape. If one with the same name already exists, it is verified against all specified fields. If a field differs, the virtual cluster creation will fail.

nodePool string

The target GKE node pool.

roles string[]

The roles associated with the GKE node pool. One of "DEFAULT", "CONTROLLER", "SPARK_DRIVER" or "SPARK_EXECUTOR".

nodePoolConfig ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfig

The configuration for the GKE node pool. If specified, Dataproc attempts to create a node pool with the specified shape. If one with the same name already exists, it is verified against all specified fields. If a field differs, the virtual cluster creation will fail.

node_pool str

The target GKE node pool.

roles Sequence[str]

The roles associated with the GKE node pool. One of "DEFAULT", "CONTROLLER", "SPARK_DRIVER" or "SPARK_EXECUTOR".

node_pool_config ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfig

The configuration for the GKE node pool. If specified, Dataproc attempts to create a node pool with the specified shape. If one with the same name already exists, it is verified against all specified fields. If a field differs, the virtual cluster creation will fail.

nodePool String

The target GKE node pool.

roles List<String>

The roles associated with the GKE node pool. One of "DEFAULT", "CONTROLLER", "SPARK_DRIVER" or "SPARK_EXECUTOR".

nodePoolConfig Property Map

The configuration for the GKE node pool. If specified, Dataproc attempts to create a node pool with the specified shape. If one with the same name already exists, it is verified against all specified fields. If a field differs, the virtual cluster creation will fail.

ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfig

Locations List<string>

The list of Compute Engine zones where node pool nodes associated with a Dataproc on GKE virtual cluster will be located.


Autoscaling ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfigAutoscaling

The autoscaler configuration for this node pool. The autoscaler is enabled only when a valid configuration is present.

Config ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfigConfig

The node pool configuration.

Locations []string

The list of Compute Engine zones where node pool nodes associated with a Dataproc on GKE virtual cluster will be located.


Autoscaling ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfigAutoscaling

The autoscaler configuration for this node pool. The autoscaler is enabled only when a valid configuration is present.

Config ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfigConfig

The node pool configuration.

locations List<String>

The list of Compute Engine zones where node pool nodes associated with a Dataproc on GKE virtual cluster will be located.


autoscaling ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfigAutoscaling

The autoscaler configuration for this node pool. The autoscaler is enabled only when a valid configuration is present.

config ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfigConfig

The node pool configuration.

locations string[]

The list of Compute Engine zones where node pool nodes associated with a Dataproc on GKE virtual cluster will be located.


autoscaling ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfigAutoscaling

The autoscaler configuration for this node pool. The autoscaler is enabled only when a valid configuration is present.

config ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfigConfig

The node pool configuration.

locations Sequence[str]

The list of Compute Engine zones where node pool nodes associated with a Dataproc on GKE virtual cluster will be located.


autoscaling ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfigAutoscaling

The autoscaler configuration for this node pool. The autoscaler is enabled only when a valid configuration is present.

config ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfigConfig

The node pool configuration.

locations List<String>

The list of Compute Engine zones where node pool nodes associated with a Dataproc on GKE virtual cluster will be located.


autoscaling Property Map

The autoscaler configuration for this node pool. The autoscaler is enabled only when a valid configuration is present.

config Property Map

The node pool configuration.

ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfigAutoscaling

MaxNodeCount int

The maximum number of nodes in the node pool. Must be >= minNodeCount, and must be > 0.

MinNodeCount int

The minimum number of nodes in the node pool. Must be >= 0 and <= maxNodeCount.

MaxNodeCount int

The maximum number of nodes in the node pool. Must be >= minNodeCount, and must be > 0.

MinNodeCount int

The minimum number of nodes in the node pool. Must be >= 0 and <= maxNodeCount.

maxNodeCount Integer

The maximum number of nodes in the node pool. Must be >= minNodeCount, and must be > 0.

minNodeCount Integer

The minimum number of nodes in the node pool. Must be >= 0 and <= maxNodeCount.

maxNodeCount number

The maximum number of nodes in the node pool. Must be >= minNodeCount, and must be > 0.

minNodeCount number

The minimum number of nodes in the node pool. Must be >= 0 and <= maxNodeCount.

max_node_count int

The maximum number of nodes in the node pool. Must be >= minNodeCount, and must be > 0.

min_node_count int

The minimum number of nodes in the node pool. Must be >= 0 and <= maxNodeCount.

maxNodeCount Number

The maximum number of nodes in the node pool. Must be >= minNodeCount, and must be > 0.

minNodeCount Number

The minimum number of nodes in the node pool. Must be >= 0 and <= maxNodeCount.

ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfigConfig

LocalSsdCount int

The number of local SSD disks to attach to the node, which is limited by the maximum number of disks allowable per zone.

MachineType string

The name of a Compute Engine machine type. to create for the master. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4). to create for the worker nodes. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4).

MinCpuPlatform string

Minimum CPU platform to be used by this instance. The instance may be scheduled on the specified or a newer CPU platform. Specify the friendly names of CPU platforms, such as "Intel Haswell" or "Intel Sandy Bridge". for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone. for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.

Preemptible bool

Whether the nodes are created as preemptible VM instances. Preemptible nodes cannot be used in a node pool with the CONTROLLER role or in the DEFAULT node pool if the CONTROLLER role is not assigned (the DEFAULT node pool will assume the CONTROLLER role).

Spot bool

Spot flag for enabling Spot VM, which is a rebrand of the existing preemptible flag.

LocalSsdCount int

The number of local SSD disks to attach to the node, which is limited by the maximum number of disks allowable per zone.

MachineType string

The name of a Compute Engine machine type. to create for the master. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4). to create for the worker nodes. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4).

MinCpuPlatform string

Minimum CPU platform to be used by this instance. The instance may be scheduled on the specified or a newer CPU platform. Specify the friendly names of CPU platforms, such as "Intel Haswell" or "Intel Sandy Bridge". for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone. for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.

Preemptible bool

Whether the nodes are created as preemptible VM instances. Preemptible nodes cannot be used in a node pool with the CONTROLLER role or in the DEFAULT node pool if the CONTROLLER role is not assigned (the DEFAULT node pool will assume the CONTROLLER role).

Spot bool

Spot flag for enabling Spot VM, which is a rebrand of the existing preemptible flag.

localSsdCount Integer

The number of local SSD disks to attach to the node, which is limited by the maximum number of disks allowable per zone.

machineType String

The name of a Compute Engine machine type. to create for the master. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4). to create for the worker nodes. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4).

minCpuPlatform String

Minimum CPU platform to be used by this instance. The instance may be scheduled on the specified or a newer CPU platform. Specify the friendly names of CPU platforms, such as "Intel Haswell" or "Intel Sandy Bridge". for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone. for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.

preemptible Boolean

Whether the nodes are created as preemptible VM instances. Preemptible nodes cannot be used in a node pool with the CONTROLLER role or in the DEFAULT node pool if the CONTROLLER role is not assigned (the DEFAULT node pool will assume the CONTROLLER role).

spot Boolean

Spot flag for enabling Spot VM, which is a rebrand of the existing preemptible flag.

localSsdCount number

The number of local SSD disks to attach to the node, which is limited by the maximum number of disks allowable per zone.

machineType string

The name of a Compute Engine machine type. to create for the master. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4). to create for the worker nodes. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4).

minCpuPlatform string

Minimum CPU platform to be used by this instance. The instance may be scheduled on the specified or a newer CPU platform. Specify the friendly names of CPU platforms, such as "Intel Haswell" or "Intel Sandy Bridge". for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone. for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.

preemptible boolean

Whether the nodes are created as preemptible VM instances. Preemptible nodes cannot be used in a node pool with the CONTROLLER role or in the DEFAULT node pool if the CONTROLLER role is not assigned (the DEFAULT node pool will assume the CONTROLLER role).

spot boolean

Spot flag for enabling Spot VM, which is a rebrand of the existing preemptible flag.

local_ssd_count int

The number of local SSD disks to attach to the node, which is limited by the maximum number of disks allowable per zone.

machine_type str

The name of a Compute Engine machine type. to create for the master. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4). to create for the worker nodes. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4).

min_cpu_platform str

Minimum CPU platform to be used by this instance. The instance may be scheduled on the specified or a newer CPU platform. Specify the friendly names of CPU platforms, such as "Intel Haswell" or "Intel Sandy Bridge". for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone. for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.

preemptible bool

Whether the nodes are created as preemptible VM instances. Preemptible nodes cannot be used in a node pool with the CONTROLLER role or in the DEFAULT node pool if the CONTROLLER role is not assigned (the DEFAULT node pool will assume the CONTROLLER role).

spot bool

Spot flag for enabling Spot VM, which is a rebrand of the existing preemptible flag.

localSsdCount Number

The number of local SSD disks to attach to the node, which is limited by the maximum number of disks allowable per zone.

machineType String

The name of a Compute Engine machine type. to create for the master. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4). to create for the worker nodes. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4).

minCpuPlatform String

Minimum CPU platform to be used by this instance. The instance may be scheduled on the specified or a newer CPU platform. Specify the friendly names of CPU platforms, such as "Intel Haswell" or "Intel Sandy Bridge". for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone. for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.

preemptible Boolean

Whether the nodes are created as preemptible VM instances. Preemptible nodes cannot be used in a node pool with the CONTROLLER role or in the DEFAULT node pool if the CONTROLLER role is not assigned (the DEFAULT node pool will assume the CONTROLLER role).

spot Boolean

Spot flag for enabling Spot VM, which is a rebrand of the existing preemptible flag.

ClusterVirtualClusterConfigKubernetesClusterConfigKubernetesSoftwareConfig

ComponentVersion Dictionary<string, string>

The components that should be installed in this Dataproc cluster. The key must be a string from the
KubernetesComponent enumeration. The value is the version of the software to be installed. At least one entry must be specified.

  • NOTE : component_version[SPARK] is mandatory to set, or the creation of the cluster will fail.
Properties Dictionary<string, string>

The properties to set on daemon config files. Property keys are specified in prefix:property format, for example spark:spark.kubernetes.container.image.

ComponentVersion map[string]string

The components that should be installed in this Dataproc cluster. The key must be a string from the
KubernetesComponent enumeration. The value is the version of the software to be installed. At least one entry must be specified.

  • NOTE : component_version[SPARK] is mandatory to set, or the creation of the cluster will fail.
Properties map[string]string

The properties to set on daemon config files. Property keys are specified in prefix:property format, for example spark:spark.kubernetes.container.image.

componentVersion Map<String,String>

The components that should be installed in this Dataproc cluster. The key must be a string from the
KubernetesComponent enumeration. The value is the version of the software to be installed. At least one entry must be specified.

  • NOTE : component_version[SPARK] is mandatory to set, or the creation of the cluster will fail.
properties Map<String,String>

The properties to set on daemon config files. Property keys are specified in prefix:property format, for example spark:spark.kubernetes.container.image.

componentVersion {[key: string]: string}

The components that should be installed in this Dataproc cluster. The key must be a string from the
KubernetesComponent enumeration. The value is the version of the software to be installed. At least one entry must be specified.

  • NOTE : component_version[SPARK] is mandatory to set, or the creation of the cluster will fail.
properties {[key: string]: string}

The properties to set on daemon config files. Property keys are specified in prefix:property format, for example spark:spark.kubernetes.container.image.

component_version Mapping[str, str]

The components that should be installed in this Dataproc cluster. The key must be a string from the
KubernetesComponent enumeration. The value is the version of the software to be installed. At least one entry must be specified.

  • NOTE : component_version[SPARK] is mandatory to set, or the creation of the cluster will fail.
properties Mapping[str, str]

The properties to set on daemon config files. Property keys are specified in prefix:property format, for example spark:spark.kubernetes.container.image.

componentVersion Map<String>

The components that should be installed in this Dataproc cluster. The key must be a string from the
KubernetesComponent enumeration. The value is the version of the software to be installed. At least one entry must be specified.

  • NOTE : component_version[SPARK] is mandatory to set, or the creation of the cluster will fail.
properties Map<String>

The properties to set on daemon config files. Property keys are specified in prefix:property format, for example spark:spark.kubernetes.container.image.

Import

This resource does not support import.

Package Details

Repository
Google Cloud (GCP) Classic pulumi/pulumi-gcp
License
Apache-2.0
Notes

This Pulumi package is based on the google-beta Terraform Provider.