1. Packages
  2. Google Cloud (GCP) Classic
  3. API Docs
  4. dataproc
  5. Cluster
Google Cloud Classic v6.67.0 published on Wednesday, Sep 27, 2023 by Pulumi

gcp.dataproc.Cluster

Explore with Pulumi AI

gcp logo
Google Cloud Classic v6.67.0 published on Wednesday, Sep 27, 2023 by Pulumi

    Manages a Cloud Dataproc cluster resource within GCP.

    !> Warning: Due to limitations of the API, all arguments except labels,cluster_config.worker_config.num_instances and cluster_config.preemptible_worker_config.num_instances are non-updatable. Changing others will cause recreation of the whole cluster!

    Example Usage

    Basic

    using System.Collections.Generic;
    using System.Linq;
    using Pulumi;
    using Gcp = Pulumi.Gcp;
    
    return await Deployment.RunAsync(() => 
    {
        var simplecluster = new Gcp.Dataproc.Cluster("simplecluster", new()
        {
            Region = "us-central1",
        });
    
    });
    
    package main
    
    import (
    	"github.com/pulumi/pulumi-gcp/sdk/v6/go/gcp/dataproc"
    	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
    )
    
    func main() {
    	pulumi.Run(func(ctx *pulumi.Context) error {
    		_, err := dataproc.NewCluster(ctx, "simplecluster", &dataproc.ClusterArgs{
    			Region: pulumi.String("us-central1"),
    		})
    		if err != nil {
    			return err
    		}
    		return nil
    	})
    }
    
    package generated_program;
    
    import com.pulumi.Context;
    import com.pulumi.Pulumi;
    import com.pulumi.core.Output;
    import com.pulumi.gcp.dataproc.Cluster;
    import com.pulumi.gcp.dataproc.ClusterArgs;
    import java.util.List;
    import java.util.ArrayList;
    import java.util.Map;
    import java.io.File;
    import java.nio.file.Files;
    import java.nio.file.Paths;
    
    public class App {
        public static void main(String[] args) {
            Pulumi.run(App::stack);
        }
    
        public static void stack(Context ctx) {
            var simplecluster = new Cluster("simplecluster", ClusterArgs.builder()        
                .region("us-central1")
                .build());
    
        }
    }
    
    import pulumi
    import pulumi_gcp as gcp
    
    simplecluster = gcp.dataproc.Cluster("simplecluster", region="us-central1")
    
    import * as pulumi from "@pulumi/pulumi";
    import * as gcp from "@pulumi/gcp";
    
    const simplecluster = new gcp.dataproc.Cluster("simplecluster", {region: "us-central1"});
    
    resources:
      simplecluster:
        type: gcp:dataproc:Cluster
        properties:
          region: us-central1
    

    Advanced

    using System.Collections.Generic;
    using System.Linq;
    using Pulumi;
    using Gcp = Pulumi.Gcp;
    
    return await Deployment.RunAsync(() => 
    {
        var @default = new Gcp.ServiceAccount.Account("default", new()
        {
            AccountId = "service-account-id",
            DisplayName = "Service Account",
        });
    
        var mycluster = new Gcp.Dataproc.Cluster("mycluster", new()
        {
            Region = "us-central1",
            GracefulDecommissionTimeout = "120s",
            Labels = 
            {
                { "foo", "bar" },
            },
            ClusterConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigArgs
            {
                StagingBucket = "dataproc-staging-bucket",
                MasterConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigMasterConfigArgs
                {
                    NumInstances = 1,
                    MachineType = "e2-medium",
                    DiskConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigMasterConfigDiskConfigArgs
                    {
                        BootDiskType = "pd-ssd",
                        BootDiskSizeGb = 30,
                    },
                },
                WorkerConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigWorkerConfigArgs
                {
                    NumInstances = 2,
                    MachineType = "e2-medium",
                    MinCpuPlatform = "Intel Skylake",
                    DiskConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigWorkerConfigDiskConfigArgs
                    {
                        BootDiskSizeGb = 30,
                        NumLocalSsds = 1,
                    },
                },
                PreemptibleWorkerConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigPreemptibleWorkerConfigArgs
                {
                    NumInstances = 0,
                },
                SoftwareConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigSoftwareConfigArgs
                {
                    ImageVersion = "2.0.35-debian10",
                    OverrideProperties = 
                    {
                        { "dataproc:dataproc.allow.zero.workers", "true" },
                    },
                },
                GceClusterConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigGceClusterConfigArgs
                {
                    Tags = new[]
                    {
                        "foo",
                        "bar",
                    },
                    ServiceAccount = @default.Email,
                    ServiceAccountScopes = new[]
                    {
                        "cloud-platform",
                    },
                },
                InitializationActions = new[]
                {
                    new Gcp.Dataproc.Inputs.ClusterClusterConfigInitializationActionArgs
                    {
                        Script = "gs://dataproc-initialization-actions/stackdriver/stackdriver.sh",
                        TimeoutSec = 500,
                    },
                },
            },
        });
    
    });
    
    package main
    
    import (
    	"github.com/pulumi/pulumi-gcp/sdk/v6/go/gcp/dataproc"
    	"github.com/pulumi/pulumi-gcp/sdk/v6/go/gcp/serviceAccount"
    	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
    )
    
    func main() {
    	pulumi.Run(func(ctx *pulumi.Context) error {
    		_, err := serviceAccount.NewAccount(ctx, "default", &serviceAccount.AccountArgs{
    			AccountId:   pulumi.String("service-account-id"),
    			DisplayName: pulumi.String("Service Account"),
    		})
    		if err != nil {
    			return err
    		}
    		_, err = dataproc.NewCluster(ctx, "mycluster", &dataproc.ClusterArgs{
    			Region:                      pulumi.String("us-central1"),
    			GracefulDecommissionTimeout: pulumi.String("120s"),
    			Labels: pulumi.StringMap{
    				"foo": pulumi.String("bar"),
    			},
    			ClusterConfig: &dataproc.ClusterClusterConfigArgs{
    				StagingBucket: pulumi.String("dataproc-staging-bucket"),
    				MasterConfig: &dataproc.ClusterClusterConfigMasterConfigArgs{
    					NumInstances: pulumi.Int(1),
    					MachineType:  pulumi.String("e2-medium"),
    					DiskConfig: &dataproc.ClusterClusterConfigMasterConfigDiskConfigArgs{
    						BootDiskType:   pulumi.String("pd-ssd"),
    						BootDiskSizeGb: pulumi.Int(30),
    					},
    				},
    				WorkerConfig: &dataproc.ClusterClusterConfigWorkerConfigArgs{
    					NumInstances:   pulumi.Int(2),
    					MachineType:    pulumi.String("e2-medium"),
    					MinCpuPlatform: pulumi.String("Intel Skylake"),
    					DiskConfig: &dataproc.ClusterClusterConfigWorkerConfigDiskConfigArgs{
    						BootDiskSizeGb: pulumi.Int(30),
    						NumLocalSsds:   pulumi.Int(1),
    					},
    				},
    				PreemptibleWorkerConfig: &dataproc.ClusterClusterConfigPreemptibleWorkerConfigArgs{
    					NumInstances: pulumi.Int(0),
    				},
    				SoftwareConfig: &dataproc.ClusterClusterConfigSoftwareConfigArgs{
    					ImageVersion: pulumi.String("2.0.35-debian10"),
    					OverrideProperties: pulumi.StringMap{
    						"dataproc:dataproc.allow.zero.workers": pulumi.String("true"),
    					},
    				},
    				GceClusterConfig: &dataproc.ClusterClusterConfigGceClusterConfigArgs{
    					Tags: pulumi.StringArray{
    						pulumi.String("foo"),
    						pulumi.String("bar"),
    					},
    					ServiceAccount: _default.Email,
    					ServiceAccountScopes: pulumi.StringArray{
    						pulumi.String("cloud-platform"),
    					},
    				},
    				InitializationActions: dataproc.ClusterClusterConfigInitializationActionArray{
    					&dataproc.ClusterClusterConfigInitializationActionArgs{
    						Script:     pulumi.String("gs://dataproc-initialization-actions/stackdriver/stackdriver.sh"),
    						TimeoutSec: pulumi.Int(500),
    					},
    				},
    			},
    		})
    		if err != nil {
    			return err
    		}
    		return nil
    	})
    }
    
    package generated_program;
    
    import com.pulumi.Context;
    import com.pulumi.Pulumi;
    import com.pulumi.core.Output;
    import com.pulumi.gcp.serviceAccount.Account;
    import com.pulumi.gcp.serviceAccount.AccountArgs;
    import com.pulumi.gcp.dataproc.Cluster;
    import com.pulumi.gcp.dataproc.ClusterArgs;
    import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigArgs;
    import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigMasterConfigArgs;
    import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigMasterConfigDiskConfigArgs;
    import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigWorkerConfigArgs;
    import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigWorkerConfigDiskConfigArgs;
    import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigPreemptibleWorkerConfigArgs;
    import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigSoftwareConfigArgs;
    import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigGceClusterConfigArgs;
    import java.util.List;
    import java.util.ArrayList;
    import java.util.Map;
    import java.io.File;
    import java.nio.file.Files;
    import java.nio.file.Paths;
    
    public class App {
        public static void main(String[] args) {
            Pulumi.run(App::stack);
        }
    
        public static void stack(Context ctx) {
            var default_ = new Account("default", AccountArgs.builder()        
                .accountId("service-account-id")
                .displayName("Service Account")
                .build());
    
            var mycluster = new Cluster("mycluster", ClusterArgs.builder()        
                .region("us-central1")
                .gracefulDecommissionTimeout("120s")
                .labels(Map.of("foo", "bar"))
                .clusterConfig(ClusterClusterConfigArgs.builder()
                    .stagingBucket("dataproc-staging-bucket")
                    .masterConfig(ClusterClusterConfigMasterConfigArgs.builder()
                        .numInstances(1)
                        .machineType("e2-medium")
                        .diskConfig(ClusterClusterConfigMasterConfigDiskConfigArgs.builder()
                            .bootDiskType("pd-ssd")
                            .bootDiskSizeGb(30)
                            .build())
                        .build())
                    .workerConfig(ClusterClusterConfigWorkerConfigArgs.builder()
                        .numInstances(2)
                        .machineType("e2-medium")
                        .minCpuPlatform("Intel Skylake")
                        .diskConfig(ClusterClusterConfigWorkerConfigDiskConfigArgs.builder()
                            .bootDiskSizeGb(30)
                            .numLocalSsds(1)
                            .build())
                        .build())
                    .preemptibleWorkerConfig(ClusterClusterConfigPreemptibleWorkerConfigArgs.builder()
                        .numInstances(0)
                        .build())
                    .softwareConfig(ClusterClusterConfigSoftwareConfigArgs.builder()
                        .imageVersion("2.0.35-debian10")
                        .overrideProperties(Map.of("dataproc:dataproc.allow.zero.workers", "true"))
                        .build())
                    .gceClusterConfig(ClusterClusterConfigGceClusterConfigArgs.builder()
                        .tags(                    
                            "foo",
                            "bar")
                        .serviceAccount(default_.email())
                        .serviceAccountScopes("cloud-platform")
                        .build())
                    .initializationActions(ClusterClusterConfigInitializationActionArgs.builder()
                        .script("gs://dataproc-initialization-actions/stackdriver/stackdriver.sh")
                        .timeoutSec(500)
                        .build())
                    .build())
                .build());
    
        }
    }
    
    import pulumi
    import pulumi_gcp as gcp
    
    default = gcp.service_account.Account("default",
        account_id="service-account-id",
        display_name="Service Account")
    mycluster = gcp.dataproc.Cluster("mycluster",
        region="us-central1",
        graceful_decommission_timeout="120s",
        labels={
            "foo": "bar",
        },
        cluster_config=gcp.dataproc.ClusterClusterConfigArgs(
            staging_bucket="dataproc-staging-bucket",
            master_config=gcp.dataproc.ClusterClusterConfigMasterConfigArgs(
                num_instances=1,
                machine_type="e2-medium",
                disk_config=gcp.dataproc.ClusterClusterConfigMasterConfigDiskConfigArgs(
                    boot_disk_type="pd-ssd",
                    boot_disk_size_gb=30,
                ),
            ),
            worker_config=gcp.dataproc.ClusterClusterConfigWorkerConfigArgs(
                num_instances=2,
                machine_type="e2-medium",
                min_cpu_platform="Intel Skylake",
                disk_config=gcp.dataproc.ClusterClusterConfigWorkerConfigDiskConfigArgs(
                    boot_disk_size_gb=30,
                    num_local_ssds=1,
                ),
            ),
            preemptible_worker_config=gcp.dataproc.ClusterClusterConfigPreemptibleWorkerConfigArgs(
                num_instances=0,
            ),
            software_config=gcp.dataproc.ClusterClusterConfigSoftwareConfigArgs(
                image_version="2.0.35-debian10",
                override_properties={
                    "dataproc:dataproc.allow.zero.workers": "true",
                },
            ),
            gce_cluster_config=gcp.dataproc.ClusterClusterConfigGceClusterConfigArgs(
                tags=[
                    "foo",
                    "bar",
                ],
                service_account=default.email,
                service_account_scopes=["cloud-platform"],
            ),
            initialization_actions=[gcp.dataproc.ClusterClusterConfigInitializationActionArgs(
                script="gs://dataproc-initialization-actions/stackdriver/stackdriver.sh",
                timeout_sec=500,
            )],
        ))
    
    import * as pulumi from "@pulumi/pulumi";
    import * as gcp from "@pulumi/gcp";
    
    const _default = new gcp.serviceaccount.Account("default", {
        accountId: "service-account-id",
        displayName: "Service Account",
    });
    const mycluster = new gcp.dataproc.Cluster("mycluster", {
        region: "us-central1",
        gracefulDecommissionTimeout: "120s",
        labels: {
            foo: "bar",
        },
        clusterConfig: {
            stagingBucket: "dataproc-staging-bucket",
            masterConfig: {
                numInstances: 1,
                machineType: "e2-medium",
                diskConfig: {
                    bootDiskType: "pd-ssd",
                    bootDiskSizeGb: 30,
                },
            },
            workerConfig: {
                numInstances: 2,
                machineType: "e2-medium",
                minCpuPlatform: "Intel Skylake",
                diskConfig: {
                    bootDiskSizeGb: 30,
                    numLocalSsds: 1,
                },
            },
            preemptibleWorkerConfig: {
                numInstances: 0,
            },
            softwareConfig: {
                imageVersion: "2.0.35-debian10",
                overrideProperties: {
                    "dataproc:dataproc.allow.zero.workers": "true",
                },
            },
            gceClusterConfig: {
                tags: [
                    "foo",
                    "bar",
                ],
                serviceAccount: _default.email,
                serviceAccountScopes: ["cloud-platform"],
            },
            initializationActions: [{
                script: "gs://dataproc-initialization-actions/stackdriver/stackdriver.sh",
                timeoutSec: 500,
            }],
        },
    });
    
    resources:
      default:
        type: gcp:serviceAccount:Account
        properties:
          accountId: service-account-id
          displayName: Service Account
      mycluster:
        type: gcp:dataproc:Cluster
        properties:
          region: us-central1
          gracefulDecommissionTimeout: 120s
          labels:
            foo: bar
          clusterConfig:
            stagingBucket: dataproc-staging-bucket
            masterConfig:
              numInstances: 1
              machineType: e2-medium
              diskConfig:
                bootDiskType: pd-ssd
                bootDiskSizeGb: 30
            workerConfig:
              numInstances: 2
              machineType: e2-medium
              minCpuPlatform: Intel Skylake
              diskConfig:
                bootDiskSizeGb: 30
                numLocalSsds: 1
            preemptibleWorkerConfig:
              numInstances: 0
            softwareConfig:
              imageVersion: 2.0.35-debian10
              overrideProperties:
                dataproc:dataproc.allow.zero.workers: 'true'
            gceClusterConfig:
              tags:
                - foo
                - bar
              serviceAccount: ${default.email}
              serviceAccountScopes:
                - cloud-platform
            initializationActions:
              - script: gs://dataproc-initialization-actions/stackdriver/stackdriver.sh
                timeoutSec: 500
    

    Using A GPU Accelerator

    using System.Collections.Generic;
    using System.Linq;
    using Pulumi;
    using Gcp = Pulumi.Gcp;
    
    return await Deployment.RunAsync(() => 
    {
        var acceleratedCluster = new Gcp.Dataproc.Cluster("acceleratedCluster", new()
        {
            ClusterConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigArgs
            {
                GceClusterConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigGceClusterConfigArgs
                {
                    Zone = "us-central1-a",
                },
                MasterConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigMasterConfigArgs
                {
                    Accelerators = new[]
                    {
                        new Gcp.Dataproc.Inputs.ClusterClusterConfigMasterConfigAcceleratorArgs
                        {
                            AcceleratorCount = 1,
                            AcceleratorType = "nvidia-tesla-k80",
                        },
                    },
                },
            },
            Region = "us-central1",
        });
    
    });
    
    package main
    
    import (
    	"github.com/pulumi/pulumi-gcp/sdk/v6/go/gcp/dataproc"
    	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
    )
    
    func main() {
    	pulumi.Run(func(ctx *pulumi.Context) error {
    		_, err := dataproc.NewCluster(ctx, "acceleratedCluster", &dataproc.ClusterArgs{
    			ClusterConfig: &dataproc.ClusterClusterConfigArgs{
    				GceClusterConfig: &dataproc.ClusterClusterConfigGceClusterConfigArgs{
    					Zone: pulumi.String("us-central1-a"),
    				},
    				MasterConfig: &dataproc.ClusterClusterConfigMasterConfigArgs{
    					Accelerators: dataproc.ClusterClusterConfigMasterConfigAcceleratorArray{
    						&dataproc.ClusterClusterConfigMasterConfigAcceleratorArgs{
    							AcceleratorCount: pulumi.Int(1),
    							AcceleratorType:  pulumi.String("nvidia-tesla-k80"),
    						},
    					},
    				},
    			},
    			Region: pulumi.String("us-central1"),
    		})
    		if err != nil {
    			return err
    		}
    		return nil
    	})
    }
    
    package generated_program;
    
    import com.pulumi.Context;
    import com.pulumi.Pulumi;
    import com.pulumi.core.Output;
    import com.pulumi.gcp.dataproc.Cluster;
    import com.pulumi.gcp.dataproc.ClusterArgs;
    import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigArgs;
    import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigGceClusterConfigArgs;
    import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigMasterConfigArgs;
    import java.util.List;
    import java.util.ArrayList;
    import java.util.Map;
    import java.io.File;
    import java.nio.file.Files;
    import java.nio.file.Paths;
    
    public class App {
        public static void main(String[] args) {
            Pulumi.run(App::stack);
        }
    
        public static void stack(Context ctx) {
            var acceleratedCluster = new Cluster("acceleratedCluster", ClusterArgs.builder()        
                .clusterConfig(ClusterClusterConfigArgs.builder()
                    .gceClusterConfig(ClusterClusterConfigGceClusterConfigArgs.builder()
                        .zone("us-central1-a")
                        .build())
                    .masterConfig(ClusterClusterConfigMasterConfigArgs.builder()
                        .accelerators(ClusterClusterConfigMasterConfigAcceleratorArgs.builder()
                            .acceleratorCount("1")
                            .acceleratorType("nvidia-tesla-k80")
                            .build())
                        .build())
                    .build())
                .region("us-central1")
                .build());
    
        }
    }
    
    import pulumi
    import pulumi_gcp as gcp
    
    accelerated_cluster = gcp.dataproc.Cluster("acceleratedCluster",
        cluster_config=gcp.dataproc.ClusterClusterConfigArgs(
            gce_cluster_config=gcp.dataproc.ClusterClusterConfigGceClusterConfigArgs(
                zone="us-central1-a",
            ),
            master_config=gcp.dataproc.ClusterClusterConfigMasterConfigArgs(
                accelerators=[gcp.dataproc.ClusterClusterConfigMasterConfigAcceleratorArgs(
                    accelerator_count=1,
                    accelerator_type="nvidia-tesla-k80",
                )],
            ),
        ),
        region="us-central1")
    
    import * as pulumi from "@pulumi/pulumi";
    import * as gcp from "@pulumi/gcp";
    
    const acceleratedCluster = new gcp.dataproc.Cluster("acceleratedCluster", {
        clusterConfig: {
            gceClusterConfig: {
                zone: "us-central1-a",
            },
            masterConfig: {
                accelerators: [{
                    acceleratorCount: 1,
                    acceleratorType: "nvidia-tesla-k80",
                }],
            },
        },
        region: "us-central1",
    });
    
    resources:
      acceleratedCluster:
        type: gcp:dataproc:Cluster
        properties:
          clusterConfig:
            gceClusterConfig:
              zone: us-central1-a
            masterConfig:
              accelerators:
                - acceleratorCount: '1'
                  acceleratorType: nvidia-tesla-k80
          region: us-central1
    

    Create Cluster Resource

    new Cluster(name: string, args?: ClusterArgs, opts?: CustomResourceOptions);
    @overload
    def Cluster(resource_name: str,
                opts: Optional[ResourceOptions] = None,
                cluster_config: Optional[ClusterClusterConfigArgs] = None,
                graceful_decommission_timeout: Optional[str] = None,
                labels: Optional[Mapping[str, str]] = None,
                name: Optional[str] = None,
                project: Optional[str] = None,
                region: Optional[str] = None,
                virtual_cluster_config: Optional[ClusterVirtualClusterConfigArgs] = None)
    @overload
    def Cluster(resource_name: str,
                args: Optional[ClusterArgs] = None,
                opts: Optional[ResourceOptions] = None)
    func NewCluster(ctx *Context, name string, args *ClusterArgs, opts ...ResourceOption) (*Cluster, error)
    public Cluster(string name, ClusterArgs? args = null, CustomResourceOptions? opts = null)
    public Cluster(String name, ClusterArgs args)
    public Cluster(String name, ClusterArgs args, CustomResourceOptions options)
    
    type: gcp:dataproc:Cluster
    properties: # The arguments to resource properties.
    options: # Bag of options to control resource's behavior.
    
    
    name string
    The unique name of the resource.
    args ClusterArgs
    The arguments to resource properties.
    opts CustomResourceOptions
    Bag of options to control resource's behavior.
    resource_name str
    The unique name of the resource.
    args ClusterArgs
    The arguments to resource properties.
    opts ResourceOptions
    Bag of options to control resource's behavior.
    ctx Context
    Context object for the current deployment.
    name string
    The unique name of the resource.
    args ClusterArgs
    The arguments to resource properties.
    opts ResourceOption
    Bag of options to control resource's behavior.
    name string
    The unique name of the resource.
    args ClusterArgs
    The arguments to resource properties.
    opts CustomResourceOptions
    Bag of options to control resource's behavior.
    name String
    The unique name of the resource.
    args ClusterArgs
    The arguments to resource properties.
    options CustomResourceOptions
    Bag of options to control resource's behavior.

    Cluster Resource Properties

    To learn more about resource properties and how to use them, see Inputs and Outputs in the Architecture and Concepts docs.

    Inputs

    The Cluster resource accepts the following input properties:

    ClusterConfig ClusterClusterConfig

    Allows you to configure various aspects of the cluster. Structure defined below.

    GracefulDecommissionTimeout string

    The timeout duration which allows graceful decomissioning when you change the number of worker nodes directly through a terraform apply

    Labels Dictionary<string, string>

    The list of labels (key/value pairs) to be applied to instances in the cluster. GCP generates some itself including goog-dataproc-cluster-name which is the name of the cluster.

    Name string

    The name of the cluster, unique within the project and zone.


    Project string

    The ID of the project in which the cluster will exist. If it is not provided, the provider project is used.

    Region string

    The region in which the cluster and associated nodes will be created in. Defaults to global.

    VirtualClusterConfig ClusterVirtualClusterConfig

    Allows you to configure a virtual Dataproc on GKE cluster. Structure defined below.

    ClusterConfig ClusterClusterConfigArgs

    Allows you to configure various aspects of the cluster. Structure defined below.

    GracefulDecommissionTimeout string

    The timeout duration which allows graceful decomissioning when you change the number of worker nodes directly through a terraform apply

    Labels map[string]string

    The list of labels (key/value pairs) to be applied to instances in the cluster. GCP generates some itself including goog-dataproc-cluster-name which is the name of the cluster.

    Name string

    The name of the cluster, unique within the project and zone.


    Project string

    The ID of the project in which the cluster will exist. If it is not provided, the provider project is used.

    Region string

    The region in which the cluster and associated nodes will be created in. Defaults to global.

    VirtualClusterConfig ClusterVirtualClusterConfigArgs

    Allows you to configure a virtual Dataproc on GKE cluster. Structure defined below.

    clusterConfig ClusterClusterConfig

    Allows you to configure various aspects of the cluster. Structure defined below.

    gracefulDecommissionTimeout String

    The timeout duration which allows graceful decomissioning when you change the number of worker nodes directly through a terraform apply

    labels Map<String,String>

    The list of labels (key/value pairs) to be applied to instances in the cluster. GCP generates some itself including goog-dataproc-cluster-name which is the name of the cluster.

    name String

    The name of the cluster, unique within the project and zone.


    project String

    The ID of the project in which the cluster will exist. If it is not provided, the provider project is used.

    region String

    The region in which the cluster and associated nodes will be created in. Defaults to global.

    virtualClusterConfig ClusterVirtualClusterConfig

    Allows you to configure a virtual Dataproc on GKE cluster. Structure defined below.

    clusterConfig ClusterClusterConfig

    Allows you to configure various aspects of the cluster. Structure defined below.

    gracefulDecommissionTimeout string

    The timeout duration which allows graceful decomissioning when you change the number of worker nodes directly through a terraform apply

    labels {[key: string]: string}

    The list of labels (key/value pairs) to be applied to instances in the cluster. GCP generates some itself including goog-dataproc-cluster-name which is the name of the cluster.

    name string

    The name of the cluster, unique within the project and zone.


    project string

    The ID of the project in which the cluster will exist. If it is not provided, the provider project is used.

    region string

    The region in which the cluster and associated nodes will be created in. Defaults to global.

    virtualClusterConfig ClusterVirtualClusterConfig

    Allows you to configure a virtual Dataproc on GKE cluster. Structure defined below.

    cluster_config ClusterClusterConfigArgs

    Allows you to configure various aspects of the cluster. Structure defined below.

    graceful_decommission_timeout str

    The timeout duration which allows graceful decomissioning when you change the number of worker nodes directly through a terraform apply

    labels Mapping[str, str]

    The list of labels (key/value pairs) to be applied to instances in the cluster. GCP generates some itself including goog-dataproc-cluster-name which is the name of the cluster.

    name str

    The name of the cluster, unique within the project and zone.


    project str

    The ID of the project in which the cluster will exist. If it is not provided, the provider project is used.

    region str

    The region in which the cluster and associated nodes will be created in. Defaults to global.

    virtual_cluster_config ClusterVirtualClusterConfigArgs

    Allows you to configure a virtual Dataproc on GKE cluster. Structure defined below.

    clusterConfig Property Map

    Allows you to configure various aspects of the cluster. Structure defined below.

    gracefulDecommissionTimeout String

    The timeout duration which allows graceful decomissioning when you change the number of worker nodes directly through a terraform apply

    labels Map<String>

    The list of labels (key/value pairs) to be applied to instances in the cluster. GCP generates some itself including goog-dataproc-cluster-name which is the name of the cluster.

    name String

    The name of the cluster, unique within the project and zone.


    project String

    The ID of the project in which the cluster will exist. If it is not provided, the provider project is used.

    region String

    The region in which the cluster and associated nodes will be created in. Defaults to global.

    virtualClusterConfig Property Map

    Allows you to configure a virtual Dataproc on GKE cluster. Structure defined below.

    Outputs

    All input properties are implicitly available as output properties. Additionally, the Cluster resource produces the following output properties:

    Id string

    The provider-assigned unique ID for this managed resource.

    Id string

    The provider-assigned unique ID for this managed resource.

    id String

    The provider-assigned unique ID for this managed resource.

    id string

    The provider-assigned unique ID for this managed resource.

    id str

    The provider-assigned unique ID for this managed resource.

    id String

    The provider-assigned unique ID for this managed resource.

    Look up Existing Cluster Resource

    Get an existing Cluster resource’s state with the given name, ID, and optional extra properties used to qualify the lookup.

    public static get(name: string, id: Input<ID>, state?: ClusterState, opts?: CustomResourceOptions): Cluster
    @staticmethod
    def get(resource_name: str,
            id: str,
            opts: Optional[ResourceOptions] = None,
            cluster_config: Optional[ClusterClusterConfigArgs] = None,
            graceful_decommission_timeout: Optional[str] = None,
            labels: Optional[Mapping[str, str]] = None,
            name: Optional[str] = None,
            project: Optional[str] = None,
            region: Optional[str] = None,
            virtual_cluster_config: Optional[ClusterVirtualClusterConfigArgs] = None) -> Cluster
    func GetCluster(ctx *Context, name string, id IDInput, state *ClusterState, opts ...ResourceOption) (*Cluster, error)
    public static Cluster Get(string name, Input<string> id, ClusterState? state, CustomResourceOptions? opts = null)
    public static Cluster get(String name, Output<String> id, ClusterState state, CustomResourceOptions options)
    Resource lookup is not supported in YAML
    name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    state
    Any extra arguments used during the lookup.
    opts
    A bag of options that control this resource's behavior.
    resource_name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    state
    Any extra arguments used during the lookup.
    opts
    A bag of options that control this resource's behavior.
    name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    state
    Any extra arguments used during the lookup.
    opts
    A bag of options that control this resource's behavior.
    name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    state
    Any extra arguments used during the lookup.
    opts
    A bag of options that control this resource's behavior.
    The following state arguments are supported:
    ClusterConfig ClusterClusterConfig

    Allows you to configure various aspects of the cluster. Structure defined below.

    GracefulDecommissionTimeout string

    The timeout duration which allows graceful decomissioning when you change the number of worker nodes directly through a terraform apply

    Labels Dictionary<string, string>

    The list of labels (key/value pairs) to be applied to instances in the cluster. GCP generates some itself including goog-dataproc-cluster-name which is the name of the cluster.

    Name string

    The name of the cluster, unique within the project and zone.


    Project string

    The ID of the project in which the cluster will exist. If it is not provided, the provider project is used.

    Region string

    The region in which the cluster and associated nodes will be created in. Defaults to global.

    VirtualClusterConfig ClusterVirtualClusterConfig

    Allows you to configure a virtual Dataproc on GKE cluster. Structure defined below.

    ClusterConfig ClusterClusterConfigArgs

    Allows you to configure various aspects of the cluster. Structure defined below.

    GracefulDecommissionTimeout string

    The timeout duration which allows graceful decomissioning when you change the number of worker nodes directly through a terraform apply

    Labels map[string]string

    The list of labels (key/value pairs) to be applied to instances in the cluster. GCP generates some itself including goog-dataproc-cluster-name which is the name of the cluster.

    Name string

    The name of the cluster, unique within the project and zone.


    Project string

    The ID of the project in which the cluster will exist. If it is not provided, the provider project is used.

    Region string

    The region in which the cluster and associated nodes will be created in. Defaults to global.

    VirtualClusterConfig ClusterVirtualClusterConfigArgs

    Allows you to configure a virtual Dataproc on GKE cluster. Structure defined below.

    clusterConfig ClusterClusterConfig

    Allows you to configure various aspects of the cluster. Structure defined below.

    gracefulDecommissionTimeout String

    The timeout duration which allows graceful decomissioning when you change the number of worker nodes directly through a terraform apply

    labels Map<String,String>

    The list of labels (key/value pairs) to be applied to instances in the cluster. GCP generates some itself including goog-dataproc-cluster-name which is the name of the cluster.

    name String

    The name of the cluster, unique within the project and zone.


    project String

    The ID of the project in which the cluster will exist. If it is not provided, the provider project is used.

    region String

    The region in which the cluster and associated nodes will be created in. Defaults to global.

    virtualClusterConfig ClusterVirtualClusterConfig

    Allows you to configure a virtual Dataproc on GKE cluster. Structure defined below.

    clusterConfig ClusterClusterConfig

    Allows you to configure various aspects of the cluster. Structure defined below.

    gracefulDecommissionTimeout string

    The timeout duration which allows graceful decomissioning when you change the number of worker nodes directly through a terraform apply

    labels {[key: string]: string}

    The list of labels (key/value pairs) to be applied to instances in the cluster. GCP generates some itself including goog-dataproc-cluster-name which is the name of the cluster.

    name string

    The name of the cluster, unique within the project and zone.


    project string

    The ID of the project in which the cluster will exist. If it is not provided, the provider project is used.

    region string

    The region in which the cluster and associated nodes will be created in. Defaults to global.

    virtualClusterConfig ClusterVirtualClusterConfig

    Allows you to configure a virtual Dataproc on GKE cluster. Structure defined below.

    cluster_config ClusterClusterConfigArgs

    Allows you to configure various aspects of the cluster. Structure defined below.

    graceful_decommission_timeout str

    The timeout duration which allows graceful decomissioning when you change the number of worker nodes directly through a terraform apply

    labels Mapping[str, str]

    The list of labels (key/value pairs) to be applied to instances in the cluster. GCP generates some itself including goog-dataproc-cluster-name which is the name of the cluster.

    name str

    The name of the cluster, unique within the project and zone.


    project str

    The ID of the project in which the cluster will exist. If it is not provided, the provider project is used.

    region str

    The region in which the cluster and associated nodes will be created in. Defaults to global.

    virtual_cluster_config ClusterVirtualClusterConfigArgs

    Allows you to configure a virtual Dataproc on GKE cluster. Structure defined below.

    clusterConfig Property Map

    Allows you to configure various aspects of the cluster. Structure defined below.

    gracefulDecommissionTimeout String

    The timeout duration which allows graceful decomissioning when you change the number of worker nodes directly through a terraform apply

    labels Map<String>

    The list of labels (key/value pairs) to be applied to instances in the cluster. GCP generates some itself including goog-dataproc-cluster-name which is the name of the cluster.

    name String

    The name of the cluster, unique within the project and zone.


    project String

    The ID of the project in which the cluster will exist. If it is not provided, the provider project is used.

    region String

    The region in which the cluster and associated nodes will be created in. Defaults to global.

    virtualClusterConfig Property Map

    Allows you to configure a virtual Dataproc on GKE cluster. Structure defined below.

    Supporting Types

    ClusterClusterConfig, ClusterClusterConfigArgs

    AutoscalingConfig ClusterClusterConfigAutoscalingConfig

    The autoscaling policy config associated with the cluster. Note that once set, if autoscaling_config is the only field set in cluster_config, it can only be removed by setting policy_uri = "", rather than removing the whole block. Structure defined below.

    Bucket string
    DataprocMetricConfig ClusterClusterConfigDataprocMetricConfig

    The Compute Engine accelerator (GPU) configuration for these instances. Can be specified multiple times. Structure defined below.

    EncryptionConfig ClusterClusterConfigEncryptionConfig

    The Customer managed encryption keys settings for the cluster. Structure defined below.

    EndpointConfig ClusterClusterConfigEndpointConfig

    The config settings for port access on the cluster. Structure defined below.

    GceClusterConfig ClusterClusterConfigGceClusterConfig

    Common config settings for resources of Google Compute Engine cluster instances, applicable to all instances in the cluster. Structure defined below.

    InitializationActions List<ClusterClusterConfigInitializationAction>

    Commands to execute on each node after config is completed. You can specify multiple versions of these. Structure defined below.

    LifecycleConfig ClusterClusterConfigLifecycleConfig

    The settings for auto deletion cluster schedule. Structure defined below.

    MasterConfig ClusterClusterConfigMasterConfig

    The Google Compute Engine config settings for the master instances in a cluster. Structure defined below.

    MetastoreConfig ClusterClusterConfigMetastoreConfig

    The config setting for metastore service with the cluster. Structure defined below.


    PreemptibleWorkerConfig ClusterClusterConfigPreemptibleWorkerConfig

    The Google Compute Engine config settings for the additional instances in a cluster. Structure defined below.

    • NOTE : preemptible_worker_config is an alias for the api's secondaryWorkerConfig. The name doesn't necessarily mean it is preemptible and is named as such for legacy/compatibility reasons.
    SecurityConfig ClusterClusterConfigSecurityConfig

    Security related configuration. Structure defined below.

    SoftwareConfig ClusterClusterConfigSoftwareConfig

    The config settings for software inside the cluster. Structure defined below.

    StagingBucket string

    The Cloud Storage staging bucket used to stage files, such as Hadoop jars, between client machines and the cluster. Note: If you don't explicitly specify a staging_bucket then GCP will auto create / assign one for you. However, you are not guaranteed an auto generated bucket which is solely dedicated to your cluster; it may be shared with other clusters in the same region/zone also choosing to use the auto generation option.

    TempBucket string

    The Cloud Storage temp bucket used to store ephemeral cluster and jobs data, such as Spark and MapReduce history files. Note: If you don't explicitly specify a temp_bucket then GCP will auto create / assign one for you.

    WorkerConfig ClusterClusterConfigWorkerConfig

    The Google Compute Engine config settings for the worker instances in a cluster. Structure defined below.

    AutoscalingConfig ClusterClusterConfigAutoscalingConfig

    The autoscaling policy config associated with the cluster. Note that once set, if autoscaling_config is the only field set in cluster_config, it can only be removed by setting policy_uri = "", rather than removing the whole block. Structure defined below.

    Bucket string
    DataprocMetricConfig ClusterClusterConfigDataprocMetricConfig

    The Compute Engine accelerator (GPU) configuration for these instances. Can be specified multiple times. Structure defined below.

    EncryptionConfig ClusterClusterConfigEncryptionConfig

    The Customer managed encryption keys settings for the cluster. Structure defined below.

    EndpointConfig ClusterClusterConfigEndpointConfig

    The config settings for port access on the cluster. Structure defined below.

    GceClusterConfig ClusterClusterConfigGceClusterConfig

    Common config settings for resources of Google Compute Engine cluster instances, applicable to all instances in the cluster. Structure defined below.

    InitializationActions []ClusterClusterConfigInitializationAction

    Commands to execute on each node after config is completed. You can specify multiple versions of these. Structure defined below.

    LifecycleConfig ClusterClusterConfigLifecycleConfig

    The settings for auto deletion cluster schedule. Structure defined below.

    MasterConfig ClusterClusterConfigMasterConfig

    The Google Compute Engine config settings for the master instances in a cluster. Structure defined below.

    MetastoreConfig ClusterClusterConfigMetastoreConfig

    The config setting for metastore service with the cluster. Structure defined below.


    PreemptibleWorkerConfig ClusterClusterConfigPreemptibleWorkerConfig

    The Google Compute Engine config settings for the additional instances in a cluster. Structure defined below.

    • NOTE : preemptible_worker_config is an alias for the api's secondaryWorkerConfig. The name doesn't necessarily mean it is preemptible and is named as such for legacy/compatibility reasons.
    SecurityConfig ClusterClusterConfigSecurityConfig

    Security related configuration. Structure defined below.

    SoftwareConfig ClusterClusterConfigSoftwareConfig

    The config settings for software inside the cluster. Structure defined below.

    StagingBucket string

    The Cloud Storage staging bucket used to stage files, such as Hadoop jars, between client machines and the cluster. Note: If you don't explicitly specify a staging_bucket then GCP will auto create / assign one for you. However, you are not guaranteed an auto generated bucket which is solely dedicated to your cluster; it may be shared with other clusters in the same region/zone also choosing to use the auto generation option.

    TempBucket string

    The Cloud Storage temp bucket used to store ephemeral cluster and jobs data, such as Spark and MapReduce history files. Note: If you don't explicitly specify a temp_bucket then GCP will auto create / assign one for you.

    WorkerConfig ClusterClusterConfigWorkerConfig

    The Google Compute Engine config settings for the worker instances in a cluster. Structure defined below.

    autoscalingConfig ClusterClusterConfigAutoscalingConfig

    The autoscaling policy config associated with the cluster. Note that once set, if autoscaling_config is the only field set in cluster_config, it can only be removed by setting policy_uri = "", rather than removing the whole block. Structure defined below.

    bucket String
    dataprocMetricConfig ClusterClusterConfigDataprocMetricConfig

    The Compute Engine accelerator (GPU) configuration for these instances. Can be specified multiple times. Structure defined below.

    encryptionConfig ClusterClusterConfigEncryptionConfig

    The Customer managed encryption keys settings for the cluster. Structure defined below.

    endpointConfig ClusterClusterConfigEndpointConfig

    The config settings for port access on the cluster. Structure defined below.

    gceClusterConfig ClusterClusterConfigGceClusterConfig

    Common config settings for resources of Google Compute Engine cluster instances, applicable to all instances in the cluster. Structure defined below.

    initializationActions List<ClusterClusterConfigInitializationAction>

    Commands to execute on each node after config is completed. You can specify multiple versions of these. Structure defined below.

    lifecycleConfig ClusterClusterConfigLifecycleConfig

    The settings for auto deletion cluster schedule. Structure defined below.

    masterConfig ClusterClusterConfigMasterConfig

    The Google Compute Engine config settings for the master instances in a cluster. Structure defined below.

    metastoreConfig ClusterClusterConfigMetastoreConfig

    The config setting for metastore service with the cluster. Structure defined below.


    preemptibleWorkerConfig ClusterClusterConfigPreemptibleWorkerConfig

    The Google Compute Engine config settings for the additional instances in a cluster. Structure defined below.

    • NOTE : preemptible_worker_config is an alias for the api's secondaryWorkerConfig. The name doesn't necessarily mean it is preemptible and is named as such for legacy/compatibility reasons.
    securityConfig ClusterClusterConfigSecurityConfig

    Security related configuration. Structure defined below.

    softwareConfig ClusterClusterConfigSoftwareConfig

    The config settings for software inside the cluster. Structure defined below.

    stagingBucket String

    The Cloud Storage staging bucket used to stage files, such as Hadoop jars, between client machines and the cluster. Note: If you don't explicitly specify a staging_bucket then GCP will auto create / assign one for you. However, you are not guaranteed an auto generated bucket which is solely dedicated to your cluster; it may be shared with other clusters in the same region/zone also choosing to use the auto generation option.

    tempBucket String

    The Cloud Storage temp bucket used to store ephemeral cluster and jobs data, such as Spark and MapReduce history files. Note: If you don't explicitly specify a temp_bucket then GCP will auto create / assign one for you.

    workerConfig ClusterClusterConfigWorkerConfig

    The Google Compute Engine config settings for the worker instances in a cluster. Structure defined below.

    autoscalingConfig ClusterClusterConfigAutoscalingConfig

    The autoscaling policy config associated with the cluster. Note that once set, if autoscaling_config is the only field set in cluster_config, it can only be removed by setting policy_uri = "", rather than removing the whole block. Structure defined below.

    bucket string
    dataprocMetricConfig ClusterClusterConfigDataprocMetricConfig

    The Compute Engine accelerator (GPU) configuration for these instances. Can be specified multiple times. Structure defined below.

    encryptionConfig ClusterClusterConfigEncryptionConfig

    The Customer managed encryption keys settings for the cluster. Structure defined below.

    endpointConfig ClusterClusterConfigEndpointConfig

    The config settings for port access on the cluster. Structure defined below.

    gceClusterConfig ClusterClusterConfigGceClusterConfig

    Common config settings for resources of Google Compute Engine cluster instances, applicable to all instances in the cluster. Structure defined below.

    initializationActions ClusterClusterConfigInitializationAction[]

    Commands to execute on each node after config is completed. You can specify multiple versions of these. Structure defined below.

    lifecycleConfig ClusterClusterConfigLifecycleConfig

    The settings for auto deletion cluster schedule. Structure defined below.

    masterConfig ClusterClusterConfigMasterConfig

    The Google Compute Engine config settings for the master instances in a cluster. Structure defined below.

    metastoreConfig ClusterClusterConfigMetastoreConfig

    The config setting for metastore service with the cluster. Structure defined below.


    preemptibleWorkerConfig ClusterClusterConfigPreemptibleWorkerConfig

    The Google Compute Engine config settings for the additional instances in a cluster. Structure defined below.

    • NOTE : preemptible_worker_config is an alias for the api's secondaryWorkerConfig. The name doesn't necessarily mean it is preemptible and is named as such for legacy/compatibility reasons.
    securityConfig ClusterClusterConfigSecurityConfig

    Security related configuration. Structure defined below.

    softwareConfig ClusterClusterConfigSoftwareConfig

    The config settings for software inside the cluster. Structure defined below.

    stagingBucket string

    The Cloud Storage staging bucket used to stage files, such as Hadoop jars, between client machines and the cluster. Note: If you don't explicitly specify a staging_bucket then GCP will auto create / assign one for you. However, you are not guaranteed an auto generated bucket which is solely dedicated to your cluster; it may be shared with other clusters in the same region/zone also choosing to use the auto generation option.

    tempBucket string

    The Cloud Storage temp bucket used to store ephemeral cluster and jobs data, such as Spark and MapReduce history files. Note: If you don't explicitly specify a temp_bucket then GCP will auto create / assign one for you.

    workerConfig ClusterClusterConfigWorkerConfig

    The Google Compute Engine config settings for the worker instances in a cluster. Structure defined below.

    autoscaling_config ClusterClusterConfigAutoscalingConfig

    The autoscaling policy config associated with the cluster. Note that once set, if autoscaling_config is the only field set in cluster_config, it can only be removed by setting policy_uri = "", rather than removing the whole block. Structure defined below.

    bucket str
    dataproc_metric_config ClusterClusterConfigDataprocMetricConfig

    The Compute Engine accelerator (GPU) configuration for these instances. Can be specified multiple times. Structure defined below.

    encryption_config ClusterClusterConfigEncryptionConfig

    The Customer managed encryption keys settings for the cluster. Structure defined below.

    endpoint_config ClusterClusterConfigEndpointConfig

    The config settings for port access on the cluster. Structure defined below.

    gce_cluster_config ClusterClusterConfigGceClusterConfig

    Common config settings for resources of Google Compute Engine cluster instances, applicable to all instances in the cluster. Structure defined below.

    initialization_actions Sequence[ClusterClusterConfigInitializationAction]

    Commands to execute on each node after config is completed. You can specify multiple versions of these. Structure defined below.

    lifecycle_config ClusterClusterConfigLifecycleConfig

    The settings for auto deletion cluster schedule. Structure defined below.

    master_config ClusterClusterConfigMasterConfig

    The Google Compute Engine config settings for the master instances in a cluster. Structure defined below.

    metastore_config ClusterClusterConfigMetastoreConfig

    The config setting for metastore service with the cluster. Structure defined below.


    preemptible_worker_config ClusterClusterConfigPreemptibleWorkerConfig

    The Google Compute Engine config settings for the additional instances in a cluster. Structure defined below.

    • NOTE : preemptible_worker_config is an alias for the api's secondaryWorkerConfig. The name doesn't necessarily mean it is preemptible and is named as such for legacy/compatibility reasons.
    security_config ClusterClusterConfigSecurityConfig

    Security related configuration. Structure defined below.

    software_config ClusterClusterConfigSoftwareConfig

    The config settings for software inside the cluster. Structure defined below.

    staging_bucket str

    The Cloud Storage staging bucket used to stage files, such as Hadoop jars, between client machines and the cluster. Note: If you don't explicitly specify a staging_bucket then GCP will auto create / assign one for you. However, you are not guaranteed an auto generated bucket which is solely dedicated to your cluster; it may be shared with other clusters in the same region/zone also choosing to use the auto generation option.

    temp_bucket str

    The Cloud Storage temp bucket used to store ephemeral cluster and jobs data, such as Spark and MapReduce history files. Note: If you don't explicitly specify a temp_bucket then GCP will auto create / assign one for you.

    worker_config ClusterClusterConfigWorkerConfig

    The Google Compute Engine config settings for the worker instances in a cluster. Structure defined below.

    autoscalingConfig Property Map

    The autoscaling policy config associated with the cluster. Note that once set, if autoscaling_config is the only field set in cluster_config, it can only be removed by setting policy_uri = "", rather than removing the whole block. Structure defined below.

    bucket String
    dataprocMetricConfig Property Map

    The Compute Engine accelerator (GPU) configuration for these instances. Can be specified multiple times. Structure defined below.

    encryptionConfig Property Map

    The Customer managed encryption keys settings for the cluster. Structure defined below.

    endpointConfig Property Map

    The config settings for port access on the cluster. Structure defined below.

    gceClusterConfig Property Map

    Common config settings for resources of Google Compute Engine cluster instances, applicable to all instances in the cluster. Structure defined below.

    initializationActions List<Property Map>

    Commands to execute on each node after config is completed. You can specify multiple versions of these. Structure defined below.

    lifecycleConfig Property Map

    The settings for auto deletion cluster schedule. Structure defined below.

    masterConfig Property Map

    The Google Compute Engine config settings for the master instances in a cluster. Structure defined below.

    metastoreConfig Property Map

    The config setting for metastore service with the cluster. Structure defined below.


    preemptibleWorkerConfig Property Map

    The Google Compute Engine config settings for the additional instances in a cluster. Structure defined below.

    • NOTE : preemptible_worker_config is an alias for the api's secondaryWorkerConfig. The name doesn't necessarily mean it is preemptible and is named as such for legacy/compatibility reasons.
    securityConfig Property Map

    Security related configuration. Structure defined below.

    softwareConfig Property Map

    The config settings for software inside the cluster. Structure defined below.

    stagingBucket String

    The Cloud Storage staging bucket used to stage files, such as Hadoop jars, between client machines and the cluster. Note: If you don't explicitly specify a staging_bucket then GCP will auto create / assign one for you. However, you are not guaranteed an auto generated bucket which is solely dedicated to your cluster; it may be shared with other clusters in the same region/zone also choosing to use the auto generation option.

    tempBucket String

    The Cloud Storage temp bucket used to store ephemeral cluster and jobs data, such as Spark and MapReduce history files. Note: If you don't explicitly specify a temp_bucket then GCP will auto create / assign one for you.

    workerConfig Property Map

    The Google Compute Engine config settings for the worker instances in a cluster. Structure defined below.

    ClusterClusterConfigAutoscalingConfig, ClusterClusterConfigAutoscalingConfigArgs

    PolicyUri string

    The autoscaling policy used by the cluster.

    Only resource names including projectid and location (region) are valid. Examples:

    https://www.googleapis.com/compute/v1/projects/[projectId]/locations/[dataproc_region]/autoscalingPolicies/[policy_id] projects/[projectId]/locations/[dataproc_region]/autoscalingPolicies/[policy_id] Note that the policy must be in the same project and Cloud Dataproc region.


    PolicyUri string

    The autoscaling policy used by the cluster.

    Only resource names including projectid and location (region) are valid. Examples:

    https://www.googleapis.com/compute/v1/projects/[projectId]/locations/[dataproc_region]/autoscalingPolicies/[policy_id] projects/[projectId]/locations/[dataproc_region]/autoscalingPolicies/[policy_id] Note that the policy must be in the same project and Cloud Dataproc region.


    policyUri String

    The autoscaling policy used by the cluster.

    Only resource names including projectid and location (region) are valid. Examples:

    https://www.googleapis.com/compute/v1/projects/[projectId]/locations/[dataproc_region]/autoscalingPolicies/[policy_id] projects/[projectId]/locations/[dataproc_region]/autoscalingPolicies/[policy_id] Note that the policy must be in the same project and Cloud Dataproc region.


    policyUri string

    The autoscaling policy used by the cluster.

    Only resource names including projectid and location (region) are valid. Examples:

    https://www.googleapis.com/compute/v1/projects/[projectId]/locations/[dataproc_region]/autoscalingPolicies/[policy_id] projects/[projectId]/locations/[dataproc_region]/autoscalingPolicies/[policy_id] Note that the policy must be in the same project and Cloud Dataproc region.


    policy_uri str

    The autoscaling policy used by the cluster.

    Only resource names including projectid and location (region) are valid. Examples:

    https://www.googleapis.com/compute/v1/projects/[projectId]/locations/[dataproc_region]/autoscalingPolicies/[policy_id] projects/[projectId]/locations/[dataproc_region]/autoscalingPolicies/[policy_id] Note that the policy must be in the same project and Cloud Dataproc region.


    policyUri String

    The autoscaling policy used by the cluster.

    Only resource names including projectid and location (region) are valid. Examples:

    https://www.googleapis.com/compute/v1/projects/[projectId]/locations/[dataproc_region]/autoscalingPolicies/[policy_id] projects/[projectId]/locations/[dataproc_region]/autoscalingPolicies/[policy_id] Note that the policy must be in the same project and Cloud Dataproc region.


    ClusterClusterConfigDataprocMetricConfig, ClusterClusterConfigDataprocMetricConfigArgs

    metrics List<Property Map>

    Metrics sources to enable.

    ClusterClusterConfigDataprocMetricConfigMetric, ClusterClusterConfigDataprocMetricConfigMetricArgs

    MetricSource string

    A source for the collection of Dataproc OSS metrics (see available OSS metrics).

    MetricOverrides List<string>

    One or more [available OSS metrics] (https://cloud.google.com/dataproc/docs/guides/monitoring#available_oss_metrics) to collect for the metric course.


    MetricSource string

    A source for the collection of Dataproc OSS metrics (see available OSS metrics).

    MetricOverrides []string

    One or more [available OSS metrics] (https://cloud.google.com/dataproc/docs/guides/monitoring#available_oss_metrics) to collect for the metric course.


    metricSource String

    A source for the collection of Dataproc OSS metrics (see available OSS metrics).

    metricOverrides List<String>

    One or more [available OSS metrics] (https://cloud.google.com/dataproc/docs/guides/monitoring#available_oss_metrics) to collect for the metric course.


    metricSource string

    A source for the collection of Dataproc OSS metrics (see available OSS metrics).

    metricOverrides string[]

    One or more [available OSS metrics] (https://cloud.google.com/dataproc/docs/guides/monitoring#available_oss_metrics) to collect for the metric course.


    metric_source str

    A source for the collection of Dataproc OSS metrics (see available OSS metrics).

    metric_overrides Sequence[str]

    One or more [available OSS metrics] (https://cloud.google.com/dataproc/docs/guides/monitoring#available_oss_metrics) to collect for the metric course.


    metricSource String

    A source for the collection of Dataproc OSS metrics (see available OSS metrics).

    metricOverrides List<String>

    One or more [available OSS metrics] (https://cloud.google.com/dataproc/docs/guides/monitoring#available_oss_metrics) to collect for the metric course.


    ClusterClusterConfigEncryptionConfig, ClusterClusterConfigEncryptionConfigArgs

    KmsKeyName string

    The Cloud KMS key name to use for PD disk encryption for all instances in the cluster.


    KmsKeyName string

    The Cloud KMS key name to use for PD disk encryption for all instances in the cluster.


    kmsKeyName String

    The Cloud KMS key name to use for PD disk encryption for all instances in the cluster.


    kmsKeyName string

    The Cloud KMS key name to use for PD disk encryption for all instances in the cluster.


    kms_key_name str

    The Cloud KMS key name to use for PD disk encryption for all instances in the cluster.


    kmsKeyName String

    The Cloud KMS key name to use for PD disk encryption for all instances in the cluster.


    ClusterClusterConfigEndpointConfig, ClusterClusterConfigEndpointConfigArgs

    EnableHttpPortAccess bool

    The flag to enable http access to specific ports on the cluster from external sources (aka Component Gateway). Defaults to false.

    HttpPorts Dictionary<string, object>
    EnableHttpPortAccess bool

    The flag to enable http access to specific ports on the cluster from external sources (aka Component Gateway). Defaults to false.

    HttpPorts map[string]interface{}
    enableHttpPortAccess Boolean

    The flag to enable http access to specific ports on the cluster from external sources (aka Component Gateway). Defaults to false.

    httpPorts Map<String,Object>
    enableHttpPortAccess boolean

    The flag to enable http access to specific ports on the cluster from external sources (aka Component Gateway). Defaults to false.

    httpPorts {[key: string]: any}
    enable_http_port_access bool

    The flag to enable http access to specific ports on the cluster from external sources (aka Component Gateway). Defaults to false.

    http_ports Mapping[str, Any]
    enableHttpPortAccess Boolean

    The flag to enable http access to specific ports on the cluster from external sources (aka Component Gateway). Defaults to false.

    httpPorts Map<Any>

    ClusterClusterConfigGceClusterConfig, ClusterClusterConfigGceClusterConfigArgs

    InternalIpOnly bool

    By default, clusters are not restricted to internal IP addresses, and will have ephemeral external IP addresses assigned to each instance. If set to true, all instances in the cluster will only have internal IP addresses. Note: Private Google Access (also known as privateIpGoogleAccess) must be enabled on the subnetwork that the cluster will be launched in.

    Metadata Dictionary<string, string>

    A map of the Compute Engine metadata entries to add to all instances (see Project and instance metadata).

    Network string

    The name or self_link of the Google Compute Engine network to the cluster will be part of. Conflicts with subnetwork. If neither is specified, this defaults to the "default" network.

    NodeGroupAffinity ClusterClusterConfigGceClusterConfigNodeGroupAffinity

    Node Group Affinity for sole-tenant clusters.

    ReservationAffinity ClusterClusterConfigGceClusterConfigReservationAffinity

    Reservation Affinity for consuming zonal reservation.

    ServiceAccount string

    The service account to be used by the Node VMs. If not specified, the "default" service account is used.

    ServiceAccountScopes List<string>

    The set of Google API scopes to be made available on all of the node VMs under the service_account specified. Both OAuth2 URLs and gcloud short names are supported. To allow full access to all Cloud APIs, use the cloud-platform scope. See a complete list of scopes here.

    ShieldedInstanceConfig ClusterClusterConfigGceClusterConfigShieldedInstanceConfig

    Shielded Instance Config for clusters using Compute Engine Shielded VMs.


    Subnetwork string

    The name or self_link of the Google Compute Engine subnetwork the cluster will be part of. Conflicts with network.

    Tags List<string>

    The list of instance tags applied to instances in the cluster. Tags are used to identify valid sources or targets for network firewalls.

    Zone string

    The GCP zone where your data is stored and used (i.e. where the master and the worker nodes will be created in). If region is set to 'global' (default) then zone is mandatory, otherwise GCP is able to make use of Auto Zone Placement to determine this automatically for you. Note: This setting additionally determines and restricts which computing resources are available for use with other configs such as cluster_config.master_config.machine_type and cluster_config.worker_config.machine_type.

    InternalIpOnly bool

    By default, clusters are not restricted to internal IP addresses, and will have ephemeral external IP addresses assigned to each instance. If set to true, all instances in the cluster will only have internal IP addresses. Note: Private Google Access (also known as privateIpGoogleAccess) must be enabled on the subnetwork that the cluster will be launched in.

    Metadata map[string]string

    A map of the Compute Engine metadata entries to add to all instances (see Project and instance metadata).

    Network string

    The name or self_link of the Google Compute Engine network to the cluster will be part of. Conflicts with subnetwork. If neither is specified, this defaults to the "default" network.

    NodeGroupAffinity ClusterClusterConfigGceClusterConfigNodeGroupAffinity

    Node Group Affinity for sole-tenant clusters.

    ReservationAffinity ClusterClusterConfigGceClusterConfigReservationAffinity

    Reservation Affinity for consuming zonal reservation.

    ServiceAccount string

    The service account to be used by the Node VMs. If not specified, the "default" service account is used.

    ServiceAccountScopes []string

    The set of Google API scopes to be made available on all of the node VMs under the service_account specified. Both OAuth2 URLs and gcloud short names are supported. To allow full access to all Cloud APIs, use the cloud-platform scope. See a complete list of scopes here.

    ShieldedInstanceConfig ClusterClusterConfigGceClusterConfigShieldedInstanceConfig

    Shielded Instance Config for clusters using Compute Engine Shielded VMs.


    Subnetwork string

    The name or self_link of the Google Compute Engine subnetwork the cluster will be part of. Conflicts with network.

    Tags []string

    The list of instance tags applied to instances in the cluster. Tags are used to identify valid sources or targets for network firewalls.

    Zone string

    The GCP zone where your data is stored and used (i.e. where the master and the worker nodes will be created in). If region is set to 'global' (default) then zone is mandatory, otherwise GCP is able to make use of Auto Zone Placement to determine this automatically for you. Note: This setting additionally determines and restricts which computing resources are available for use with other configs such as cluster_config.master_config.machine_type and cluster_config.worker_config.machine_type.

    internalIpOnly Boolean

    By default, clusters are not restricted to internal IP addresses, and will have ephemeral external IP addresses assigned to each instance. If set to true, all instances in the cluster will only have internal IP addresses. Note: Private Google Access (also known as privateIpGoogleAccess) must be enabled on the subnetwork that the cluster will be launched in.

    metadata Map<String,String>

    A map of the Compute Engine metadata entries to add to all instances (see Project and instance metadata).

    network String

    The name or self_link of the Google Compute Engine network to the cluster will be part of. Conflicts with subnetwork. If neither is specified, this defaults to the "default" network.

    nodeGroupAffinity ClusterClusterConfigGceClusterConfigNodeGroupAffinity

    Node Group Affinity for sole-tenant clusters.

    reservationAffinity ClusterClusterConfigGceClusterConfigReservationAffinity

    Reservation Affinity for consuming zonal reservation.

    serviceAccount String

    The service account to be used by the Node VMs. If not specified, the "default" service account is used.

    serviceAccountScopes List<String>

    The set of Google API scopes to be made available on all of the node VMs under the service_account specified. Both OAuth2 URLs and gcloud short names are supported. To allow full access to all Cloud APIs, use the cloud-platform scope. See a complete list of scopes here.

    shieldedInstanceConfig ClusterClusterConfigGceClusterConfigShieldedInstanceConfig

    Shielded Instance Config for clusters using Compute Engine Shielded VMs.


    subnetwork String

    The name or self_link of the Google Compute Engine subnetwork the cluster will be part of. Conflicts with network.

    tags List<String>

    The list of instance tags applied to instances in the cluster. Tags are used to identify valid sources or targets for network firewalls.

    zone String

    The GCP zone where your data is stored and used (i.e. where the master and the worker nodes will be created in). If region is set to 'global' (default) then zone is mandatory, otherwise GCP is able to make use of Auto Zone Placement to determine this automatically for you. Note: This setting additionally determines and restricts which computing resources are available for use with other configs such as cluster_config.master_config.machine_type and cluster_config.worker_config.machine_type.

    internalIpOnly boolean

    By default, clusters are not restricted to internal IP addresses, and will have ephemeral external IP addresses assigned to each instance. If set to true, all instances in the cluster will only have internal IP addresses. Note: Private Google Access (also known as privateIpGoogleAccess) must be enabled on the subnetwork that the cluster will be launched in.

    metadata {[key: string]: string}

    A map of the Compute Engine metadata entries to add to all instances (see Project and instance metadata).

    network string

    The name or self_link of the Google Compute Engine network to the cluster will be part of. Conflicts with subnetwork. If neither is specified, this defaults to the "default" network.

    nodeGroupAffinity ClusterClusterConfigGceClusterConfigNodeGroupAffinity

    Node Group Affinity for sole-tenant clusters.

    reservationAffinity ClusterClusterConfigGceClusterConfigReservationAffinity

    Reservation Affinity for consuming zonal reservation.

    serviceAccount string

    The service account to be used by the Node VMs. If not specified, the "default" service account is used.

    serviceAccountScopes string[]

    The set of Google API scopes to be made available on all of the node VMs under the service_account specified. Both OAuth2 URLs and gcloud short names are supported. To allow full access to all Cloud APIs, use the cloud-platform scope. See a complete list of scopes here.

    shieldedInstanceConfig ClusterClusterConfigGceClusterConfigShieldedInstanceConfig

    Shielded Instance Config for clusters using Compute Engine Shielded VMs.


    subnetwork string

    The name or self_link of the Google Compute Engine subnetwork the cluster will be part of. Conflicts with network.

    tags string[]

    The list of instance tags applied to instances in the cluster. Tags are used to identify valid sources or targets for network firewalls.

    zone string

    The GCP zone where your data is stored and used (i.e. where the master and the worker nodes will be created in). If region is set to 'global' (default) then zone is mandatory, otherwise GCP is able to make use of Auto Zone Placement to determine this automatically for you. Note: This setting additionally determines and restricts which computing resources are available for use with other configs such as cluster_config.master_config.machine_type and cluster_config.worker_config.machine_type.

    internal_ip_only bool

    By default, clusters are not restricted to internal IP addresses, and will have ephemeral external IP addresses assigned to each instance. If set to true, all instances in the cluster will only have internal IP addresses. Note: Private Google Access (also known as privateIpGoogleAccess) must be enabled on the subnetwork that the cluster will be launched in.

    metadata Mapping[str, str]

    A map of the Compute Engine metadata entries to add to all instances (see Project and instance metadata).

    network str

    The name or self_link of the Google Compute Engine network to the cluster will be part of. Conflicts with subnetwork. If neither is specified, this defaults to the "default" network.

    node_group_affinity ClusterClusterConfigGceClusterConfigNodeGroupAffinity

    Node Group Affinity for sole-tenant clusters.

    reservation_affinity ClusterClusterConfigGceClusterConfigReservationAffinity

    Reservation Affinity for consuming zonal reservation.

    service_account str

    The service account to be used by the Node VMs. If not specified, the "default" service account is used.

    service_account_scopes Sequence[str]

    The set of Google API scopes to be made available on all of the node VMs under the service_account specified. Both OAuth2 URLs and gcloud short names are supported. To allow full access to all Cloud APIs, use the cloud-platform scope. See a complete list of scopes here.

    shielded_instance_config ClusterClusterConfigGceClusterConfigShieldedInstanceConfig

    Shielded Instance Config for clusters using Compute Engine Shielded VMs.


    subnetwork str

    The name or self_link of the Google Compute Engine subnetwork the cluster will be part of. Conflicts with network.

    tags Sequence[str]

    The list of instance tags applied to instances in the cluster. Tags are used to identify valid sources or targets for network firewalls.

    zone str

    The GCP zone where your data is stored and used (i.e. where the master and the worker nodes will be created in). If region is set to 'global' (default) then zone is mandatory, otherwise GCP is able to make use of Auto Zone Placement to determine this automatically for you. Note: This setting additionally determines and restricts which computing resources are available for use with other configs such as cluster_config.master_config.machine_type and cluster_config.worker_config.machine_type.

    internalIpOnly Boolean

    By default, clusters are not restricted to internal IP addresses, and will have ephemeral external IP addresses assigned to each instance. If set to true, all instances in the cluster will only have internal IP addresses. Note: Private Google Access (also known as privateIpGoogleAccess) must be enabled on the subnetwork that the cluster will be launched in.

    metadata Map<String>

    A map of the Compute Engine metadata entries to add to all instances (see Project and instance metadata).

    network String

    The name or self_link of the Google Compute Engine network to the cluster will be part of. Conflicts with subnetwork. If neither is specified, this defaults to the "default" network.

    nodeGroupAffinity Property Map

    Node Group Affinity for sole-tenant clusters.

    reservationAffinity Property Map

    Reservation Affinity for consuming zonal reservation.

    serviceAccount String

    The service account to be used by the Node VMs. If not specified, the "default" service account is used.

    serviceAccountScopes List<String>

    The set of Google API scopes to be made available on all of the node VMs under the service_account specified. Both OAuth2 URLs and gcloud short names are supported. To allow full access to all Cloud APIs, use the cloud-platform scope. See a complete list of scopes here.

    shieldedInstanceConfig Property Map

    Shielded Instance Config for clusters using Compute Engine Shielded VMs.


    subnetwork String

    The name or self_link of the Google Compute Engine subnetwork the cluster will be part of. Conflicts with network.

    tags List<String>

    The list of instance tags applied to instances in the cluster. Tags are used to identify valid sources or targets for network firewalls.

    zone String

    The GCP zone where your data is stored and used (i.e. where the master and the worker nodes will be created in). If region is set to 'global' (default) then zone is mandatory, otherwise GCP is able to make use of Auto Zone Placement to determine this automatically for you. Note: This setting additionally determines and restricts which computing resources are available for use with other configs such as cluster_config.master_config.machine_type and cluster_config.worker_config.machine_type.

    ClusterClusterConfigGceClusterConfigNodeGroupAffinity, ClusterClusterConfigGceClusterConfigNodeGroupAffinityArgs

    NodeGroupUri string

    The URI of a sole-tenant node group resource that the cluster will be created on.

    NodeGroupUri string

    The URI of a sole-tenant node group resource that the cluster will be created on.

    nodeGroupUri String

    The URI of a sole-tenant node group resource that the cluster will be created on.

    nodeGroupUri string

    The URI of a sole-tenant node group resource that the cluster will be created on.

    node_group_uri str

    The URI of a sole-tenant node group resource that the cluster will be created on.

    nodeGroupUri String

    The URI of a sole-tenant node group resource that the cluster will be created on.

    ClusterClusterConfigGceClusterConfigReservationAffinity, ClusterClusterConfigGceClusterConfigReservationAffinityArgs

    ConsumeReservationType string

    Corresponds to the type of reservation consumption.

    Key string

    Corresponds to the label key of reservation resource.

    Values List<string>

    Corresponds to the label values of reservation resource.

    ConsumeReservationType string

    Corresponds to the type of reservation consumption.

    Key string

    Corresponds to the label key of reservation resource.

    Values []string

    Corresponds to the label values of reservation resource.

    consumeReservationType String

    Corresponds to the type of reservation consumption.

    key String

    Corresponds to the label key of reservation resource.

    values List<String>

    Corresponds to the label values of reservation resource.

    consumeReservationType string

    Corresponds to the type of reservation consumption.

    key string

    Corresponds to the label key of reservation resource.

    values string[]

    Corresponds to the label values of reservation resource.

    consume_reservation_type str

    Corresponds to the type of reservation consumption.

    key str

    Corresponds to the label key of reservation resource.

    values Sequence[str]

    Corresponds to the label values of reservation resource.

    consumeReservationType String

    Corresponds to the type of reservation consumption.

    key String

    Corresponds to the label key of reservation resource.

    values List<String>

    Corresponds to the label values of reservation resource.

    ClusterClusterConfigGceClusterConfigShieldedInstanceConfig, ClusterClusterConfigGceClusterConfigShieldedInstanceConfigArgs

    EnableIntegrityMonitoring bool

    Defines whether instances have integrity monitoring enabled.


    EnableSecureBoot bool

    Defines whether instances have Secure Boot enabled.

    EnableVtpm bool

    Defines whether instances have the vTPM enabled.

    EnableIntegrityMonitoring bool

    Defines whether instances have integrity monitoring enabled.


    EnableSecureBoot bool

    Defines whether instances have Secure Boot enabled.

    EnableVtpm bool

    Defines whether instances have the vTPM enabled.

    enableIntegrityMonitoring Boolean

    Defines whether instances have integrity monitoring enabled.


    enableSecureBoot Boolean

    Defines whether instances have Secure Boot enabled.

    enableVtpm Boolean

    Defines whether instances have the vTPM enabled.

    enableIntegrityMonitoring boolean

    Defines whether instances have integrity monitoring enabled.


    enableSecureBoot boolean

    Defines whether instances have Secure Boot enabled.

    enableVtpm boolean

    Defines whether instances have the vTPM enabled.

    enable_integrity_monitoring bool

    Defines whether instances have integrity monitoring enabled.


    enable_secure_boot bool

    Defines whether instances have Secure Boot enabled.

    enable_vtpm bool

    Defines whether instances have the vTPM enabled.

    enableIntegrityMonitoring Boolean

    Defines whether instances have integrity monitoring enabled.


    enableSecureBoot Boolean

    Defines whether instances have Secure Boot enabled.

    enableVtpm Boolean

    Defines whether instances have the vTPM enabled.

    ClusterClusterConfigInitializationAction, ClusterClusterConfigInitializationActionArgs

    Script string

    The script to be executed during initialization of the cluster. The script must be a GCS file with a gs:// prefix.

    TimeoutSec int

    The maximum duration (in seconds) which script is allowed to take to execute its action. GCP will default to a predetermined computed value if not set (currently 300).


    Script string

    The script to be executed during initialization of the cluster. The script must be a GCS file with a gs:// prefix.

    TimeoutSec int

    The maximum duration (in seconds) which script is allowed to take to execute its action. GCP will default to a predetermined computed value if not set (currently 300).


    script String

    The script to be executed during initialization of the cluster. The script must be a GCS file with a gs:// prefix.

    timeoutSec Integer

    The maximum duration (in seconds) which script is allowed to take to execute its action. GCP will default to a predetermined computed value if not set (currently 300).


    script string

    The script to be executed during initialization of the cluster. The script must be a GCS file with a gs:// prefix.

    timeoutSec number

    The maximum duration (in seconds) which script is allowed to take to execute its action. GCP will default to a predetermined computed value if not set (currently 300).


    script str

    The script to be executed during initialization of the cluster. The script must be a GCS file with a gs:// prefix.

    timeout_sec int

    The maximum duration (in seconds) which script is allowed to take to execute its action. GCP will default to a predetermined computed value if not set (currently 300).


    script String

    The script to be executed during initialization of the cluster. The script must be a GCS file with a gs:// prefix.

    timeoutSec Number

    The maximum duration (in seconds) which script is allowed to take to execute its action. GCP will default to a predetermined computed value if not set (currently 300).


    ClusterClusterConfigLifecycleConfig, ClusterClusterConfigLifecycleConfigArgs

    AutoDeleteTime string

    The time when cluster will be auto-deleted. A timestamp in RFC3339 UTC "Zulu" format, accurate to nanoseconds. Example: "2014-10-02T15:01:23.045123456Z".


    IdleDeleteTtl string

    The duration to keep the cluster alive while idling (no jobs running). After this TTL, the cluster will be deleted. Valid range: [10m, 14d].

    IdleStartTime string
    AutoDeleteTime string

    The time when cluster will be auto-deleted. A timestamp in RFC3339 UTC "Zulu" format, accurate to nanoseconds. Example: "2014-10-02T15:01:23.045123456Z".


    IdleDeleteTtl string

    The duration to keep the cluster alive while idling (no jobs running). After this TTL, the cluster will be deleted. Valid range: [10m, 14d].

    IdleStartTime string
    autoDeleteTime String

    The time when cluster will be auto-deleted. A timestamp in RFC3339 UTC "Zulu" format, accurate to nanoseconds. Example: "2014-10-02T15:01:23.045123456Z".


    idleDeleteTtl String

    The duration to keep the cluster alive while idling (no jobs running). After this TTL, the cluster will be deleted. Valid range: [10m, 14d].

    idleStartTime String
    autoDeleteTime string

    The time when cluster will be auto-deleted. A timestamp in RFC3339 UTC "Zulu" format, accurate to nanoseconds. Example: "2014-10-02T15:01:23.045123456Z".


    idleDeleteTtl string

    The duration to keep the cluster alive while idling (no jobs running). After this TTL, the cluster will be deleted. Valid range: [10m, 14d].

    idleStartTime string
    auto_delete_time str

    The time when cluster will be auto-deleted. A timestamp in RFC3339 UTC "Zulu" format, accurate to nanoseconds. Example: "2014-10-02T15:01:23.045123456Z".


    idle_delete_ttl str

    The duration to keep the cluster alive while idling (no jobs running). After this TTL, the cluster will be deleted. Valid range: [10m, 14d].

    idle_start_time str
    autoDeleteTime String

    The time when cluster will be auto-deleted. A timestamp in RFC3339 UTC "Zulu" format, accurate to nanoseconds. Example: "2014-10-02T15:01:23.045123456Z".


    idleDeleteTtl String

    The duration to keep the cluster alive while idling (no jobs running). After this TTL, the cluster will be deleted. Valid range: [10m, 14d].

    idleStartTime String

    ClusterClusterConfigMasterConfig, ClusterClusterConfigMasterConfigArgs

    Accelerators List<ClusterClusterConfigMasterConfigAccelerator>

    The Compute Engine accelerator (GPU) configuration for these instances. Can be specified multiple times.

    DiskConfig ClusterClusterConfigMasterConfigDiskConfig

    Disk Config

    ImageUri string

    The URI for the image to use for this worker. See the guide for more information.

    InstanceNames List<string>
    MachineType string

    The name of a Google Compute Engine machine type to create for the master. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4).

    MinCpuPlatform string

    The name of a minimum generation of CPU family for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.

    NumInstances int

    Specifies the number of master nodes to create. If not specified, GCP will default to a predetermined computed value (currently 1).

    Accelerators []ClusterClusterConfigMasterConfigAccelerator

    The Compute Engine accelerator (GPU) configuration for these instances. Can be specified multiple times.

    DiskConfig ClusterClusterConfigMasterConfigDiskConfig

    Disk Config

    ImageUri string

    The URI for the image to use for this worker. See the guide for more information.

    InstanceNames []string
    MachineType string

    The name of a Google Compute Engine machine type to create for the master. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4).

    MinCpuPlatform string

    The name of a minimum generation of CPU family for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.

    NumInstances int

    Specifies the number of master nodes to create. If not specified, GCP will default to a predetermined computed value (currently 1).

    accelerators List<ClusterClusterConfigMasterConfigAccelerator>

    The Compute Engine accelerator (GPU) configuration for these instances. Can be specified multiple times.

    diskConfig ClusterClusterConfigMasterConfigDiskConfig

    Disk Config

    imageUri String

    The URI for the image to use for this worker. See the guide for more information.

    instanceNames List<String>
    machineType String

    The name of a Google Compute Engine machine type to create for the master. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4).

    minCpuPlatform String

    The name of a minimum generation of CPU family for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.

    numInstances Integer

    Specifies the number of master nodes to create. If not specified, GCP will default to a predetermined computed value (currently 1).

    accelerators ClusterClusterConfigMasterConfigAccelerator[]

    The Compute Engine accelerator (GPU) configuration for these instances. Can be specified multiple times.

    diskConfig ClusterClusterConfigMasterConfigDiskConfig

    Disk Config

    imageUri string

    The URI for the image to use for this worker. See the guide for more information.

    instanceNames string[]
    machineType string

    The name of a Google Compute Engine machine type to create for the master. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4).

    minCpuPlatform string

    The name of a minimum generation of CPU family for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.

    numInstances number

    Specifies the number of master nodes to create. If not specified, GCP will default to a predetermined computed value (currently 1).

    accelerators Sequence[ClusterClusterConfigMasterConfigAccelerator]

    The Compute Engine accelerator (GPU) configuration for these instances. Can be specified multiple times.

    disk_config ClusterClusterConfigMasterConfigDiskConfig

    Disk Config

    image_uri str

    The URI for the image to use for this worker. See the guide for more information.

    instance_names Sequence[str]
    machine_type str

    The name of a Google Compute Engine machine type to create for the master. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4).

    min_cpu_platform str

    The name of a minimum generation of CPU family for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.

    num_instances int

    Specifies the number of master nodes to create. If not specified, GCP will default to a predetermined computed value (currently 1).

    accelerators List<Property Map>

    The Compute Engine accelerator (GPU) configuration for these instances. Can be specified multiple times.

    diskConfig Property Map

    Disk Config

    imageUri String

    The URI for the image to use for this worker. See the guide for more information.

    instanceNames List<String>
    machineType String

    The name of a Google Compute Engine machine type to create for the master. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4).

    minCpuPlatform String

    The name of a minimum generation of CPU family for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.

    numInstances Number

    Specifies the number of master nodes to create. If not specified, GCP will default to a predetermined computed value (currently 1).

    ClusterClusterConfigMasterConfigAccelerator, ClusterClusterConfigMasterConfigAcceleratorArgs

    AcceleratorCount int

    The number of the accelerator cards of this type exposed to this instance. Often restricted to one of 1, 2, 4, or 8.

    The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.


    The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.


    AcceleratorType string

    The short name of the accelerator type to expose to this instance. For example, nvidia-tesla-k80.

    AcceleratorCount int

    The number of the accelerator cards of this type exposed to this instance. Often restricted to one of 1, 2, 4, or 8.

    The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.


    The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.


    AcceleratorType string

    The short name of the accelerator type to expose to this instance. For example, nvidia-tesla-k80.

    acceleratorCount Integer

    The number of the accelerator cards of this type exposed to this instance. Often restricted to one of 1, 2, 4, or 8.

    The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.


    The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.


    acceleratorType String

    The short name of the accelerator type to expose to this instance. For example, nvidia-tesla-k80.

    acceleratorCount number

    The number of the accelerator cards of this type exposed to this instance. Often restricted to one of 1, 2, 4, or 8.

    The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.


    The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.


    acceleratorType string

    The short name of the accelerator type to expose to this instance. For example, nvidia-tesla-k80.

    accelerator_count int

    The number of the accelerator cards of this type exposed to this instance. Often restricted to one of 1, 2, 4, or 8.

    The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.


    The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.


    accelerator_type str

    The short name of the accelerator type to expose to this instance. For example, nvidia-tesla-k80.

    acceleratorCount Number

    The number of the accelerator cards of this type exposed to this instance. Often restricted to one of 1, 2, 4, or 8.

    The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.


    The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.


    acceleratorType String

    The short name of the accelerator type to expose to this instance. For example, nvidia-tesla-k80.

    ClusterClusterConfigMasterConfigDiskConfig, ClusterClusterConfigMasterConfigDiskConfigArgs

    BootDiskSizeGb int

    Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    BootDiskType string

    The disk type of the primary disk attached to each node. One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    NumLocalSsds int

    The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0.

    attached to each worker cluster node. Defaults to 0.

    attached to each preemptible worker node. Defaults to 0.


    BootDiskSizeGb int

    Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    BootDiskType string

    The disk type of the primary disk attached to each node. One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    NumLocalSsds int

    The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0.

    attached to each worker cluster node. Defaults to 0.

    attached to each preemptible worker node. Defaults to 0.


    bootDiskSizeGb Integer

    Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    bootDiskType String

    The disk type of the primary disk attached to each node. One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    numLocalSsds Integer

    The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0.

    attached to each worker cluster node. Defaults to 0.

    attached to each preemptible worker node. Defaults to 0.


    bootDiskSizeGb number

    Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    bootDiskType string

    The disk type of the primary disk attached to each node. One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    numLocalSsds number

    The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0.

    attached to each worker cluster node. Defaults to 0.

    attached to each preemptible worker node. Defaults to 0.


    boot_disk_size_gb int

    Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    boot_disk_type str

    The disk type of the primary disk attached to each node. One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    num_local_ssds int

    The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0.

    attached to each worker cluster node. Defaults to 0.

    attached to each preemptible worker node. Defaults to 0.


    bootDiskSizeGb Number

    Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    bootDiskType String

    The disk type of the primary disk attached to each node. One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    numLocalSsds Number

    The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0.

    attached to each worker cluster node. Defaults to 0.

    attached to each preemptible worker node. Defaults to 0.


    ClusterClusterConfigMetastoreConfig, ClusterClusterConfigMetastoreConfigArgs

    DataprocMetastoreService string

    Resource name of an existing Dataproc Metastore service.

    Only resource names including projectid and location (region) are valid. Examples:

    projects/[projectId]/locations/[dataproc_region]/services/[service-name]

    DataprocMetastoreService string

    Resource name of an existing Dataproc Metastore service.

    Only resource names including projectid and location (region) are valid. Examples:

    projects/[projectId]/locations/[dataproc_region]/services/[service-name]

    dataprocMetastoreService String

    Resource name of an existing Dataproc Metastore service.

    Only resource names including projectid and location (region) are valid. Examples:

    projects/[projectId]/locations/[dataproc_region]/services/[service-name]

    dataprocMetastoreService string

    Resource name of an existing Dataproc Metastore service.

    Only resource names including projectid and location (region) are valid. Examples:

    projects/[projectId]/locations/[dataproc_region]/services/[service-name]

    dataproc_metastore_service str

    Resource name of an existing Dataproc Metastore service.

    Only resource names including projectid and location (region) are valid. Examples:

    projects/[projectId]/locations/[dataproc_region]/services/[service-name]

    dataprocMetastoreService String

    Resource name of an existing Dataproc Metastore service.

    Only resource names including projectid and location (region) are valid. Examples:

    projects/[projectId]/locations/[dataproc_region]/services/[service-name]

    ClusterClusterConfigPreemptibleWorkerConfig, ClusterClusterConfigPreemptibleWorkerConfigArgs

    DiskConfig ClusterClusterConfigPreemptibleWorkerConfigDiskConfig

    Disk Config

    InstanceNames List<string>
    NumInstances int

    Specifies the number of preemptible nodes to create. Defaults to 0.

    Preemptibility string

    Specifies the preemptibility of the secondary workers. The default value is PREEMPTIBLE Accepted values are:

    • PREEMPTIBILITY_UNSPECIFIED
    • NON_PREEMPTIBLE
    • PREEMPTIBLE
    DiskConfig ClusterClusterConfigPreemptibleWorkerConfigDiskConfig

    Disk Config

    InstanceNames []string
    NumInstances int

    Specifies the number of preemptible nodes to create. Defaults to 0.

    Preemptibility string

    Specifies the preemptibility of the secondary workers. The default value is PREEMPTIBLE Accepted values are:

    • PREEMPTIBILITY_UNSPECIFIED
    • NON_PREEMPTIBLE
    • PREEMPTIBLE
    diskConfig ClusterClusterConfigPreemptibleWorkerConfigDiskConfig

    Disk Config

    instanceNames List<String>
    numInstances Integer

    Specifies the number of preemptible nodes to create. Defaults to 0.

    preemptibility String

    Specifies the preemptibility of the secondary workers. The default value is PREEMPTIBLE Accepted values are:

    • PREEMPTIBILITY_UNSPECIFIED
    • NON_PREEMPTIBLE
    • PREEMPTIBLE
    diskConfig ClusterClusterConfigPreemptibleWorkerConfigDiskConfig

    Disk Config

    instanceNames string[]
    numInstances number

    Specifies the number of preemptible nodes to create. Defaults to 0.

    preemptibility string

    Specifies the preemptibility of the secondary workers. The default value is PREEMPTIBLE Accepted values are:

    • PREEMPTIBILITY_UNSPECIFIED
    • NON_PREEMPTIBLE
    • PREEMPTIBLE
    disk_config ClusterClusterConfigPreemptibleWorkerConfigDiskConfig

    Disk Config

    instance_names Sequence[str]
    num_instances int

    Specifies the number of preemptible nodes to create. Defaults to 0.

    preemptibility str

    Specifies the preemptibility of the secondary workers. The default value is PREEMPTIBLE Accepted values are:

    • PREEMPTIBILITY_UNSPECIFIED
    • NON_PREEMPTIBLE
    • PREEMPTIBLE
    diskConfig Property Map

    Disk Config

    instanceNames List<String>
    numInstances Number

    Specifies the number of preemptible nodes to create. Defaults to 0.

    preemptibility String

    Specifies the preemptibility of the secondary workers. The default value is PREEMPTIBLE Accepted values are:

    • PREEMPTIBILITY_UNSPECIFIED
    • NON_PREEMPTIBLE
    • PREEMPTIBLE

    ClusterClusterConfigPreemptibleWorkerConfigDiskConfig, ClusterClusterConfigPreemptibleWorkerConfigDiskConfigArgs

    BootDiskSizeGb int

    Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    BootDiskType string

    The disk type of the primary disk attached to each node. One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    NumLocalSsds int

    The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0.

    attached to each worker cluster node. Defaults to 0.

    attached to each preemptible worker node. Defaults to 0.


    BootDiskSizeGb int

    Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    BootDiskType string

    The disk type of the primary disk attached to each node. One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    NumLocalSsds int

    The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0.

    attached to each worker cluster node. Defaults to 0.

    attached to each preemptible worker node. Defaults to 0.


    bootDiskSizeGb Integer

    Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    bootDiskType String

    The disk type of the primary disk attached to each node. One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    numLocalSsds Integer

    The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0.

    attached to each worker cluster node. Defaults to 0.

    attached to each preemptible worker node. Defaults to 0.


    bootDiskSizeGb number

    Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    bootDiskType string

    The disk type of the primary disk attached to each node. One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    numLocalSsds number

    The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0.

    attached to each worker cluster node. Defaults to 0.

    attached to each preemptible worker node. Defaults to 0.


    boot_disk_size_gb int

    Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    boot_disk_type str

    The disk type of the primary disk attached to each node. One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    num_local_ssds int

    The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0.

    attached to each worker cluster node. Defaults to 0.

    attached to each preemptible worker node. Defaults to 0.


    bootDiskSizeGb Number

    Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    bootDiskType String

    The disk type of the primary disk attached to each node. One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    numLocalSsds Number

    The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0.

    attached to each worker cluster node. Defaults to 0.

    attached to each preemptible worker node. Defaults to 0.


    ClusterClusterConfigSecurityConfig, ClusterClusterConfigSecurityConfigArgs

    kerberosConfig Property Map

    Kerberos Configuration

    ClusterClusterConfigSecurityConfigKerberosConfig, ClusterClusterConfigSecurityConfigKerberosConfigArgs

    KmsKeyUri string

    The URI of the KMS key used to encrypt various sensitive files.

    RootPrincipalPasswordUri string

    The Cloud Storage URI of a KMS encrypted file containing the root principal password.

    CrossRealmTrustAdminServer string

    The admin server (IP or hostname) for the remote trusted realm in a cross realm trust relationship.

    CrossRealmTrustKdc string

    The KDC (IP or hostname) for the remote trusted realm in a cross realm trust relationship.

    CrossRealmTrustRealm string

    The remote realm the Dataproc on-cluster KDC will trust, should the user enable cross realm trust.

    CrossRealmTrustSharedPasswordUri string

    The Cloud Storage URI of a KMS encrypted file containing the shared password between the on-cluster Kerberos realm and the remote trusted realm, in a cross realm trust relationship.

    EnableKerberos bool

    Flag to indicate whether to Kerberize the cluster.

    KdcDbKeyUri string

    The Cloud Storage URI of a KMS encrypted file containing the master key of the KDC database.

    KeyPasswordUri string

    The Cloud Storage URI of a KMS encrypted file containing the password to the user provided key. For the self-signed certificate, this password is generated by Dataproc.

    KeystorePasswordUri string

    The Cloud Storage URI of a KMS encrypted file containing the password to the user provided keystore. For the self-signed certificated, the password is generated by Dataproc.

    KeystoreUri string

    The Cloud Storage URI of the keystore file used for SSL encryption. If not provided, Dataproc will provide a self-signed certificate.

    Realm string

    The name of the on-cluster Kerberos realm. If not specified, the uppercased domain of hostnames will be the realm.

    TgtLifetimeHours int

    The lifetime of the ticket granting ticket, in hours.

    TruststorePasswordUri string

    The Cloud Storage URI of a KMS encrypted file containing the password to the user provided truststore. For the self-signed certificate, this password is generated by Dataproc.

    TruststoreUri string

    The Cloud Storage URI of the truststore file used for SSL encryption. If not provided, Dataproc will provide a self-signed certificate.


    KmsKeyUri string

    The URI of the KMS key used to encrypt various sensitive files.

    RootPrincipalPasswordUri string

    The Cloud Storage URI of a KMS encrypted file containing the root principal password.

    CrossRealmTrustAdminServer string

    The admin server (IP or hostname) for the remote trusted realm in a cross realm trust relationship.

    CrossRealmTrustKdc string

    The KDC (IP or hostname) for the remote trusted realm in a cross realm trust relationship.

    CrossRealmTrustRealm string

    The remote realm the Dataproc on-cluster KDC will trust, should the user enable cross realm trust.

    CrossRealmTrustSharedPasswordUri string

    The Cloud Storage URI of a KMS encrypted file containing the shared password between the on-cluster Kerberos realm and the remote trusted realm, in a cross realm trust relationship.

    EnableKerberos bool

    Flag to indicate whether to Kerberize the cluster.

    KdcDbKeyUri string

    The Cloud Storage URI of a KMS encrypted file containing the master key of the KDC database.

    KeyPasswordUri string

    The Cloud Storage URI of a KMS encrypted file containing the password to the user provided key. For the self-signed certificate, this password is generated by Dataproc.

    KeystorePasswordUri string

    The Cloud Storage URI of a KMS encrypted file containing the password to the user provided keystore. For the self-signed certificated, the password is generated by Dataproc.

    KeystoreUri string

    The Cloud Storage URI of the keystore file used for SSL encryption. If not provided, Dataproc will provide a self-signed certificate.

    Realm string

    The name of the on-cluster Kerberos realm. If not specified, the uppercased domain of hostnames will be the realm.

    TgtLifetimeHours int

    The lifetime of the ticket granting ticket, in hours.

    TruststorePasswordUri string

    The Cloud Storage URI of a KMS encrypted file containing the password to the user provided truststore. For the self-signed certificate, this password is generated by Dataproc.

    TruststoreUri string

    The Cloud Storage URI of the truststore file used for SSL encryption. If not provided, Dataproc will provide a self-signed certificate.


    kmsKeyUri String

    The URI of the KMS key used to encrypt various sensitive files.

    rootPrincipalPasswordUri String

    The Cloud Storage URI of a KMS encrypted file containing the root principal password.

    crossRealmTrustAdminServer String

    The admin server (IP or hostname) for the remote trusted realm in a cross realm trust relationship.

    crossRealmTrustKdc String

    The KDC (IP or hostname) for the remote trusted realm in a cross realm trust relationship.

    crossRealmTrustRealm String

    The remote realm the Dataproc on-cluster KDC will trust, should the user enable cross realm trust.

    crossRealmTrustSharedPasswordUri String

    The Cloud Storage URI of a KMS encrypted file containing the shared password between the on-cluster Kerberos realm and the remote trusted realm, in a cross realm trust relationship.

    enableKerberos Boolean

    Flag to indicate whether to Kerberize the cluster.

    kdcDbKeyUri String

    The Cloud Storage URI of a KMS encrypted file containing the master key of the KDC database.

    keyPasswordUri String

    The Cloud Storage URI of a KMS encrypted file containing the password to the user provided key. For the self-signed certificate, this password is generated by Dataproc.

    keystorePasswordUri String

    The Cloud Storage URI of a KMS encrypted file containing the password to the user provided keystore. For the self-signed certificated, the password is generated by Dataproc.

    keystoreUri String

    The Cloud Storage URI of the keystore file used for SSL encryption. If not provided, Dataproc will provide a self-signed certificate.

    realm String

    The name of the on-cluster Kerberos realm. If not specified, the uppercased domain of hostnames will be the realm.

    tgtLifetimeHours Integer

    The lifetime of the ticket granting ticket, in hours.

    truststorePasswordUri String

    The Cloud Storage URI of a KMS encrypted file containing the password to the user provided truststore. For the self-signed certificate, this password is generated by Dataproc.

    truststoreUri String

    The Cloud Storage URI of the truststore file used for SSL encryption. If not provided, Dataproc will provide a self-signed certificate.


    kmsKeyUri string

    The URI of the KMS key used to encrypt various sensitive files.

    rootPrincipalPasswordUri string

    The Cloud Storage URI of a KMS encrypted file containing the root principal password.

    crossRealmTrustAdminServer string

    The admin server (IP or hostname) for the remote trusted realm in a cross realm trust relationship.

    crossRealmTrustKdc string

    The KDC (IP or hostname) for the remote trusted realm in a cross realm trust relationship.

    crossRealmTrustRealm string

    The remote realm the Dataproc on-cluster KDC will trust, should the user enable cross realm trust.

    crossRealmTrustSharedPasswordUri string

    The Cloud Storage URI of a KMS encrypted file containing the shared password between the on-cluster Kerberos realm and the remote trusted realm, in a cross realm trust relationship.

    enableKerberos boolean

    Flag to indicate whether to Kerberize the cluster.

    kdcDbKeyUri string

    The Cloud Storage URI of a KMS encrypted file containing the master key of the KDC database.

    keyPasswordUri string

    The Cloud Storage URI of a KMS encrypted file containing the password to the user provided key. For the self-signed certificate, this password is generated by Dataproc.

    keystorePasswordUri string

    The Cloud Storage URI of a KMS encrypted file containing the password to the user provided keystore. For the self-signed certificated, the password is generated by Dataproc.

    keystoreUri string

    The Cloud Storage URI of the keystore file used for SSL encryption. If not provided, Dataproc will provide a self-signed certificate.

    realm string

    The name of the on-cluster Kerberos realm. If not specified, the uppercased domain of hostnames will be the realm.

    tgtLifetimeHours number

    The lifetime of the ticket granting ticket, in hours.

    truststorePasswordUri string

    The Cloud Storage URI of a KMS encrypted file containing the password to the user provided truststore. For the self-signed certificate, this password is generated by Dataproc.

    truststoreUri string

    The Cloud Storage URI of the truststore file used for SSL encryption. If not provided, Dataproc will provide a self-signed certificate.


    kms_key_uri str

    The URI of the KMS key used to encrypt various sensitive files.

    root_principal_password_uri str

    The Cloud Storage URI of a KMS encrypted file containing the root principal password.

    cross_realm_trust_admin_server str

    The admin server (IP or hostname) for the remote trusted realm in a cross realm trust relationship.

    cross_realm_trust_kdc str

    The KDC (IP or hostname) for the remote trusted realm in a cross realm trust relationship.

    cross_realm_trust_realm str

    The remote realm the Dataproc on-cluster KDC will trust, should the user enable cross realm trust.

    cross_realm_trust_shared_password_uri str

    The Cloud Storage URI of a KMS encrypted file containing the shared password between the on-cluster Kerberos realm and the remote trusted realm, in a cross realm trust relationship.

    enable_kerberos bool

    Flag to indicate whether to Kerberize the cluster.

    kdc_db_key_uri str

    The Cloud Storage URI of a KMS encrypted file containing the master key of the KDC database.

    key_password_uri str

    The Cloud Storage URI of a KMS encrypted file containing the password to the user provided key. For the self-signed certificate, this password is generated by Dataproc.

    keystore_password_uri str

    The Cloud Storage URI of a KMS encrypted file containing the password to the user provided keystore. For the self-signed certificated, the password is generated by Dataproc.

    keystore_uri str

    The Cloud Storage URI of the keystore file used for SSL encryption. If not provided, Dataproc will provide a self-signed certificate.

    realm str

    The name of the on-cluster Kerberos realm. If not specified, the uppercased domain of hostnames will be the realm.

    tgt_lifetime_hours int

    The lifetime of the ticket granting ticket, in hours.

    truststore_password_uri str

    The Cloud Storage URI of a KMS encrypted file containing the password to the user provided truststore. For the self-signed certificate, this password is generated by Dataproc.

    truststore_uri str

    The Cloud Storage URI of the truststore file used for SSL encryption. If not provided, Dataproc will provide a self-signed certificate.


    kmsKeyUri String

    The URI of the KMS key used to encrypt various sensitive files.

    rootPrincipalPasswordUri String

    The Cloud Storage URI of a KMS encrypted file containing the root principal password.

    crossRealmTrustAdminServer String

    The admin server (IP or hostname) for the remote trusted realm in a cross realm trust relationship.

    crossRealmTrustKdc String

    The KDC (IP or hostname) for the remote trusted realm in a cross realm trust relationship.

    crossRealmTrustRealm String

    The remote realm the Dataproc on-cluster KDC will trust, should the user enable cross realm trust.

    crossRealmTrustSharedPasswordUri String

    The Cloud Storage URI of a KMS encrypted file containing the shared password between the on-cluster Kerberos realm and the remote trusted realm, in a cross realm trust relationship.

    enableKerberos Boolean

    Flag to indicate whether to Kerberize the cluster.

    kdcDbKeyUri String

    The Cloud Storage URI of a KMS encrypted file containing the master key of the KDC database.

    keyPasswordUri String

    The Cloud Storage URI of a KMS encrypted file containing the password to the user provided key. For the self-signed certificate, this password is generated by Dataproc.

    keystorePasswordUri String

    The Cloud Storage URI of a KMS encrypted file containing the password to the user provided keystore. For the self-signed certificated, the password is generated by Dataproc.

    keystoreUri String

    The Cloud Storage URI of the keystore file used for SSL encryption. If not provided, Dataproc will provide a self-signed certificate.

    realm String

    The name of the on-cluster Kerberos realm. If not specified, the uppercased domain of hostnames will be the realm.

    tgtLifetimeHours Number

    The lifetime of the ticket granting ticket, in hours.

    truststorePasswordUri String

    The Cloud Storage URI of a KMS encrypted file containing the password to the user provided truststore. For the self-signed certificate, this password is generated by Dataproc.

    truststoreUri String

    The Cloud Storage URI of the truststore file used for SSL encryption. If not provided, Dataproc will provide a self-signed certificate.


    ClusterClusterConfigSoftwareConfig, ClusterClusterConfigSoftwareConfigArgs

    ImageVersion string

    The Cloud Dataproc image version to use for the cluster - this controls the sets of software versions installed onto the nodes when you create clusters. If not specified, defaults to the latest version. For a list of valid versions see Cloud Dataproc versions

    OptionalComponents List<string>

    The set of optional components to activate on the cluster. See Available Optional Components.


    OverrideProperties Dictionary<string, string>

    A list of override and additional properties (key/value pairs) used to modify various aspects of the common configuration files used when creating a cluster. For a list of valid properties please see Cluster properties

    Properties Dictionary<string, object>

    The properties to set on daemon config files. Property keys are specified in prefix:property format, for example spark:spark.kubernetes.container.image.

    ImageVersion string

    The Cloud Dataproc image version to use for the cluster - this controls the sets of software versions installed onto the nodes when you create clusters. If not specified, defaults to the latest version. For a list of valid versions see Cloud Dataproc versions

    OptionalComponents []string

    The set of optional components to activate on the cluster. See Available Optional Components.


    OverrideProperties map[string]string

    A list of override and additional properties (key/value pairs) used to modify various aspects of the common configuration files used when creating a cluster. For a list of valid properties please see Cluster properties

    Properties map[string]interface{}

    The properties to set on daemon config files. Property keys are specified in prefix:property format, for example spark:spark.kubernetes.container.image.

    imageVersion String

    The Cloud Dataproc image version to use for the cluster - this controls the sets of software versions installed onto the nodes when you create clusters. If not specified, defaults to the latest version. For a list of valid versions see Cloud Dataproc versions

    optionalComponents List<String>

    The set of optional components to activate on the cluster. See Available Optional Components.


    overrideProperties Map<String,String>

    A list of override and additional properties (key/value pairs) used to modify various aspects of the common configuration files used when creating a cluster. For a list of valid properties please see Cluster properties

    properties Map<String,Object>

    The properties to set on daemon config files. Property keys are specified in prefix:property format, for example spark:spark.kubernetes.container.image.

    imageVersion string

    The Cloud Dataproc image version to use for the cluster - this controls the sets of software versions installed onto the nodes when you create clusters. If not specified, defaults to the latest version. For a list of valid versions see Cloud Dataproc versions

    optionalComponents string[]

    The set of optional components to activate on the cluster. See Available Optional Components.


    overrideProperties {[key: string]: string}

    A list of override and additional properties (key/value pairs) used to modify various aspects of the common configuration files used when creating a cluster. For a list of valid properties please see Cluster properties

    properties {[key: string]: any}

    The properties to set on daemon config files. Property keys are specified in prefix:property format, for example spark:spark.kubernetes.container.image.

    image_version str

    The Cloud Dataproc image version to use for the cluster - this controls the sets of software versions installed onto the nodes when you create clusters. If not specified, defaults to the latest version. For a list of valid versions see Cloud Dataproc versions

    optional_components Sequence[str]

    The set of optional components to activate on the cluster. See Available Optional Components.


    override_properties Mapping[str, str]

    A list of override and additional properties (key/value pairs) used to modify various aspects of the common configuration files used when creating a cluster. For a list of valid properties please see Cluster properties

    properties Mapping[str, Any]

    The properties to set on daemon config files. Property keys are specified in prefix:property format, for example spark:spark.kubernetes.container.image.

    imageVersion String

    The Cloud Dataproc image version to use for the cluster - this controls the sets of software versions installed onto the nodes when you create clusters. If not specified, defaults to the latest version. For a list of valid versions see Cloud Dataproc versions

    optionalComponents List<String>

    The set of optional components to activate on the cluster. See Available Optional Components.


    overrideProperties Map<String>

    A list of override and additional properties (key/value pairs) used to modify various aspects of the common configuration files used when creating a cluster. For a list of valid properties please see Cluster properties

    properties Map<Any>

    The properties to set on daemon config files. Property keys are specified in prefix:property format, for example spark:spark.kubernetes.container.image.

    ClusterClusterConfigWorkerConfig, ClusterClusterConfigWorkerConfigArgs

    Accelerators List<ClusterClusterConfigWorkerConfigAccelerator>

    The Compute Engine accelerator configuration for these instances. Can be specified multiple times.

    DiskConfig ClusterClusterConfigWorkerConfigDiskConfig

    Disk Config

    ImageUri string

    The URI for the image to use for this worker. See the guide for more information.

    InstanceNames List<string>
    MachineType string

    The name of a Google Compute Engine machine type to create for the worker nodes. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4).

    MinCpuPlatform string

    The name of a minimum generation of CPU family for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.

    NumInstances int

    Specifies the number of worker nodes to create. If not specified, GCP will default to a predetermined computed value (currently 2). There is currently a beta feature which allows you to run a Single Node Cluster. In order to take advantage of this you need to set "dataproc:dataproc.allow.zero.workers" = "true" in cluster_config.software_config.properties

    Accelerators []ClusterClusterConfigWorkerConfigAccelerator

    The Compute Engine accelerator configuration for these instances. Can be specified multiple times.

    DiskConfig ClusterClusterConfigWorkerConfigDiskConfig

    Disk Config

    ImageUri string

    The URI for the image to use for this worker. See the guide for more information.

    InstanceNames []string
    MachineType string

    The name of a Google Compute Engine machine type to create for the worker nodes. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4).

    MinCpuPlatform string

    The name of a minimum generation of CPU family for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.

    NumInstances int

    Specifies the number of worker nodes to create. If not specified, GCP will default to a predetermined computed value (currently 2). There is currently a beta feature which allows you to run a Single Node Cluster. In order to take advantage of this you need to set "dataproc:dataproc.allow.zero.workers" = "true" in cluster_config.software_config.properties

    accelerators List<ClusterClusterConfigWorkerConfigAccelerator>

    The Compute Engine accelerator configuration for these instances. Can be specified multiple times.

    diskConfig ClusterClusterConfigWorkerConfigDiskConfig

    Disk Config

    imageUri String

    The URI for the image to use for this worker. See the guide for more information.

    instanceNames List<String>
    machineType String

    The name of a Google Compute Engine machine type to create for the worker nodes. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4).

    minCpuPlatform String

    The name of a minimum generation of CPU family for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.

    numInstances Integer

    Specifies the number of worker nodes to create. If not specified, GCP will default to a predetermined computed value (currently 2). There is currently a beta feature which allows you to run a Single Node Cluster. In order to take advantage of this you need to set "dataproc:dataproc.allow.zero.workers" = "true" in cluster_config.software_config.properties

    accelerators ClusterClusterConfigWorkerConfigAccelerator[]

    The Compute Engine accelerator configuration for these instances. Can be specified multiple times.

    diskConfig ClusterClusterConfigWorkerConfigDiskConfig

    Disk Config

    imageUri string

    The URI for the image to use for this worker. See the guide for more information.

    instanceNames string[]
    machineType string

    The name of a Google Compute Engine machine type to create for the worker nodes. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4).

    minCpuPlatform string

    The name of a minimum generation of CPU family for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.

    numInstances number

    Specifies the number of worker nodes to create. If not specified, GCP will default to a predetermined computed value (currently 2). There is currently a beta feature which allows you to run a Single Node Cluster. In order to take advantage of this you need to set "dataproc:dataproc.allow.zero.workers" = "true" in cluster_config.software_config.properties

    accelerators Sequence[ClusterClusterConfigWorkerConfigAccelerator]

    The Compute Engine accelerator configuration for these instances. Can be specified multiple times.

    disk_config ClusterClusterConfigWorkerConfigDiskConfig

    Disk Config

    image_uri str

    The URI for the image to use for this worker. See the guide for more information.

    instance_names Sequence[str]
    machine_type str

    The name of a Google Compute Engine machine type to create for the worker nodes. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4).

    min_cpu_platform str

    The name of a minimum generation of CPU family for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.

    num_instances int

    Specifies the number of worker nodes to create. If not specified, GCP will default to a predetermined computed value (currently 2). There is currently a beta feature which allows you to run a Single Node Cluster. In order to take advantage of this you need to set "dataproc:dataproc.allow.zero.workers" = "true" in cluster_config.software_config.properties

    accelerators List<Property Map>

    The Compute Engine accelerator configuration for these instances. Can be specified multiple times.

    diskConfig Property Map

    Disk Config

    imageUri String

    The URI for the image to use for this worker. See the guide for more information.

    instanceNames List<String>
    machineType String

    The name of a Google Compute Engine machine type to create for the worker nodes. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4).

    minCpuPlatform String

    The name of a minimum generation of CPU family for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.

    numInstances Number

    Specifies the number of worker nodes to create. If not specified, GCP will default to a predetermined computed value (currently 2). There is currently a beta feature which allows you to run a Single Node Cluster. In order to take advantage of this you need to set "dataproc:dataproc.allow.zero.workers" = "true" in cluster_config.software_config.properties

    ClusterClusterConfigWorkerConfigAccelerator, ClusterClusterConfigWorkerConfigAcceleratorArgs

    AcceleratorCount int

    The number of the accelerator cards of this type exposed to this instance. Often restricted to one of 1, 2, 4, or 8.

    The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.


    The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.


    AcceleratorType string

    The short name of the accelerator type to expose to this instance. For example, nvidia-tesla-k80.

    AcceleratorCount int

    The number of the accelerator cards of this type exposed to this instance. Often restricted to one of 1, 2, 4, or 8.

    The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.


    The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.


    AcceleratorType string

    The short name of the accelerator type to expose to this instance. For example, nvidia-tesla-k80.

    acceleratorCount Integer

    The number of the accelerator cards of this type exposed to this instance. Often restricted to one of 1, 2, 4, or 8.

    The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.


    The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.


    acceleratorType String

    The short name of the accelerator type to expose to this instance. For example, nvidia-tesla-k80.

    acceleratorCount number

    The number of the accelerator cards of this type exposed to this instance. Often restricted to one of 1, 2, 4, or 8.

    The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.


    The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.


    acceleratorType string

    The short name of the accelerator type to expose to this instance. For example, nvidia-tesla-k80.

    accelerator_count int

    The number of the accelerator cards of this type exposed to this instance. Often restricted to one of 1, 2, 4, or 8.

    The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.


    The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.


    accelerator_type str

    The short name of the accelerator type to expose to this instance. For example, nvidia-tesla-k80.

    acceleratorCount Number

    The number of the accelerator cards of this type exposed to this instance. Often restricted to one of 1, 2, 4, or 8.

    The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.


    The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.


    acceleratorType String

    The short name of the accelerator type to expose to this instance. For example, nvidia-tesla-k80.

    ClusterClusterConfigWorkerConfigDiskConfig, ClusterClusterConfigWorkerConfigDiskConfigArgs

    BootDiskSizeGb int

    Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    BootDiskType string

    The disk type of the primary disk attached to each node. One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    NumLocalSsds int

    The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0.

    attached to each worker cluster node. Defaults to 0.

    attached to each preemptible worker node. Defaults to 0.


    BootDiskSizeGb int

    Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    BootDiskType string

    The disk type of the primary disk attached to each node. One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    NumLocalSsds int

    The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0.

    attached to each worker cluster node. Defaults to 0.

    attached to each preemptible worker node. Defaults to 0.


    bootDiskSizeGb Integer

    Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    bootDiskType String

    The disk type of the primary disk attached to each node. One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    numLocalSsds Integer

    The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0.

    attached to each worker cluster node. Defaults to 0.

    attached to each preemptible worker node. Defaults to 0.


    bootDiskSizeGb number

    Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    bootDiskType string

    The disk type of the primary disk attached to each node. One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    numLocalSsds number

    The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0.

    attached to each worker cluster node. Defaults to 0.

    attached to each preemptible worker node. Defaults to 0.


    boot_disk_size_gb int

    Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    boot_disk_type str

    The disk type of the primary disk attached to each node. One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    num_local_ssds int

    The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0.

    attached to each worker cluster node. Defaults to 0.

    attached to each preemptible worker node. Defaults to 0.


    bootDiskSizeGb Number

    Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.

    bootDiskType String

    The disk type of the primary disk attached to each node. One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    One of "pd-ssd" or "pd-standard". Defaults to "pd-standard".

    numLocalSsds Number

    The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0.

    attached to each worker cluster node. Defaults to 0.

    attached to each preemptible worker node. Defaults to 0.


    ClusterVirtualClusterConfig, ClusterVirtualClusterConfigArgs

    AuxiliaryServicesConfig ClusterVirtualClusterConfigAuxiliaryServicesConfig

    Configuration of auxiliary services used by this cluster. Structure defined below.

    KubernetesClusterConfig ClusterVirtualClusterConfigKubernetesClusterConfig

    The configuration for running the Dataproc cluster on Kubernetes. Structure defined below.


    StagingBucket string

    The Cloud Storage staging bucket used to stage files, such as Hadoop jars, between client machines and the cluster. Note: If you don't explicitly specify a staging_bucket then GCP will auto create / assign one for you. However, you are not guaranteed an auto generated bucket which is solely dedicated to your cluster; it may be shared with other clusters in the same region/zone also choosing to use the auto generation option.

    AuxiliaryServicesConfig ClusterVirtualClusterConfigAuxiliaryServicesConfig

    Configuration of auxiliary services used by this cluster. Structure defined below.

    KubernetesClusterConfig ClusterVirtualClusterConfigKubernetesClusterConfig

    The configuration for running the Dataproc cluster on Kubernetes. Structure defined below.


    StagingBucket string

    The Cloud Storage staging bucket used to stage files, such as Hadoop jars, between client machines and the cluster. Note: If you don't explicitly specify a staging_bucket then GCP will auto create / assign one for you. However, you are not guaranteed an auto generated bucket which is solely dedicated to your cluster; it may be shared with other clusters in the same region/zone also choosing to use the auto generation option.

    auxiliaryServicesConfig ClusterVirtualClusterConfigAuxiliaryServicesConfig

    Configuration of auxiliary services used by this cluster. Structure defined below.

    kubernetesClusterConfig ClusterVirtualClusterConfigKubernetesClusterConfig

    The configuration for running the Dataproc cluster on Kubernetes. Structure defined below.


    stagingBucket String

    The Cloud Storage staging bucket used to stage files, such as Hadoop jars, between client machines and the cluster. Note: If you don't explicitly specify a staging_bucket then GCP will auto create / assign one for you. However, you are not guaranteed an auto generated bucket which is solely dedicated to your cluster; it may be shared with other clusters in the same region/zone also choosing to use the auto generation option.

    auxiliaryServicesConfig ClusterVirtualClusterConfigAuxiliaryServicesConfig

    Configuration of auxiliary services used by this cluster. Structure defined below.

    kubernetesClusterConfig ClusterVirtualClusterConfigKubernetesClusterConfig

    The configuration for running the Dataproc cluster on Kubernetes. Structure defined below.


    stagingBucket string

    The Cloud Storage staging bucket used to stage files, such as Hadoop jars, between client machines and the cluster. Note: If you don't explicitly specify a staging_bucket then GCP will auto create / assign one for you. However, you are not guaranteed an auto generated bucket which is solely dedicated to your cluster; it may be shared with other clusters in the same region/zone also choosing to use the auto generation option.

    auxiliary_services_config ClusterVirtualClusterConfigAuxiliaryServicesConfig

    Configuration of auxiliary services used by this cluster. Structure defined below.

    kubernetes_cluster_config ClusterVirtualClusterConfigKubernetesClusterConfig

    The configuration for running the Dataproc cluster on Kubernetes. Structure defined below.


    staging_bucket str

    The Cloud Storage staging bucket used to stage files, such as Hadoop jars, between client machines and the cluster. Note: If you don't explicitly specify a staging_bucket then GCP will auto create / assign one for you. However, you are not guaranteed an auto generated bucket which is solely dedicated to your cluster; it may be shared with other clusters in the same region/zone also choosing to use the auto generation option.

    auxiliaryServicesConfig Property Map

    Configuration of auxiliary services used by this cluster. Structure defined below.

    kubernetesClusterConfig Property Map

    The configuration for running the Dataproc cluster on Kubernetes. Structure defined below.


    stagingBucket String

    The Cloud Storage staging bucket used to stage files, such as Hadoop jars, between client machines and the cluster. Note: If you don't explicitly specify a staging_bucket then GCP will auto create / assign one for you. However, you are not guaranteed an auto generated bucket which is solely dedicated to your cluster; it may be shared with other clusters in the same region/zone also choosing to use the auto generation option.

    ClusterVirtualClusterConfigAuxiliaryServicesConfig, ClusterVirtualClusterConfigAuxiliaryServicesConfigArgs

    metastoreConfig Property Map

    The Hive Metastore configuration for this workload.

    sparkHistoryServerConfig Property Map

    The Spark History Server configuration for the workload.

    ClusterVirtualClusterConfigAuxiliaryServicesConfigMetastoreConfig, ClusterVirtualClusterConfigAuxiliaryServicesConfigMetastoreConfigArgs

    DataprocMetastoreService string

    Resource name of an existing Dataproc Metastore service.

    Only resource names including projectid and location (region) are valid. Examples:

    projects/[projectId]/locations/[dataproc_region]/services/[service-name]

    DataprocMetastoreService string

    Resource name of an existing Dataproc Metastore service.

    Only resource names including projectid and location (region) are valid. Examples:

    projects/[projectId]/locations/[dataproc_region]/services/[service-name]

    dataprocMetastoreService String

    Resource name of an existing Dataproc Metastore service.

    Only resource names including projectid and location (region) are valid. Examples:

    projects/[projectId]/locations/[dataproc_region]/services/[service-name]

    dataprocMetastoreService string

    Resource name of an existing Dataproc Metastore service.

    Only resource names including projectid and location (region) are valid. Examples:

    projects/[projectId]/locations/[dataproc_region]/services/[service-name]

    dataproc_metastore_service str

    Resource name of an existing Dataproc Metastore service.

    Only resource names including projectid and location (region) are valid. Examples:

    projects/[projectId]/locations/[dataproc_region]/services/[service-name]

    dataprocMetastoreService String

    Resource name of an existing Dataproc Metastore service.

    Only resource names including projectid and location (region) are valid. Examples:

    projects/[projectId]/locations/[dataproc_region]/services/[service-name]

    ClusterVirtualClusterConfigAuxiliaryServicesConfigSparkHistoryServerConfig, ClusterVirtualClusterConfigAuxiliaryServicesConfigSparkHistoryServerConfigArgs

    DataprocCluster string

    Resource name of an existing Dataproc Cluster to act as a Spark History Server for the workload.


    DataprocCluster string

    Resource name of an existing Dataproc Cluster to act as a Spark History Server for the workload.


    dataprocCluster String

    Resource name of an existing Dataproc Cluster to act as a Spark History Server for the workload.


    dataprocCluster string

    Resource name of an existing Dataproc Cluster to act as a Spark History Server for the workload.


    dataproc_cluster str

    Resource name of an existing Dataproc Cluster to act as a Spark History Server for the workload.


    dataprocCluster String

    Resource name of an existing Dataproc Cluster to act as a Spark History Server for the workload.


    ClusterVirtualClusterConfigKubernetesClusterConfig, ClusterVirtualClusterConfigKubernetesClusterConfigArgs

    GkeClusterConfig ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfig

    The configuration for running the Dataproc cluster on GKE.

    KubernetesSoftwareConfig ClusterVirtualClusterConfigKubernetesClusterConfigKubernetesSoftwareConfig

    The software configuration for this Dataproc cluster running on Kubernetes.

    KubernetesNamespace string

    A namespace within the Kubernetes cluster to deploy into. If this namespace does not exist, it is created. If it exists, Dataproc verifies that another Dataproc VirtualCluster is not installed into it. If not specified, the name of the Dataproc Cluster is used.

    GkeClusterConfig ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfig

    The configuration for running the Dataproc cluster on GKE.

    KubernetesSoftwareConfig ClusterVirtualClusterConfigKubernetesClusterConfigKubernetesSoftwareConfig

    The software configuration for this Dataproc cluster running on Kubernetes.

    KubernetesNamespace string

    A namespace within the Kubernetes cluster to deploy into. If this namespace does not exist, it is created. If it exists, Dataproc verifies that another Dataproc VirtualCluster is not installed into it. If not specified, the name of the Dataproc Cluster is used.

    gkeClusterConfig ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfig

    The configuration for running the Dataproc cluster on GKE.

    kubernetesSoftwareConfig ClusterVirtualClusterConfigKubernetesClusterConfigKubernetesSoftwareConfig

    The software configuration for this Dataproc cluster running on Kubernetes.

    kubernetesNamespace String

    A namespace within the Kubernetes cluster to deploy into. If this namespace does not exist, it is created. If it exists, Dataproc verifies that another Dataproc VirtualCluster is not installed into it. If not specified, the name of the Dataproc Cluster is used.

    gkeClusterConfig ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfig

    The configuration for running the Dataproc cluster on GKE.

    kubernetesSoftwareConfig ClusterVirtualClusterConfigKubernetesClusterConfigKubernetesSoftwareConfig

    The software configuration for this Dataproc cluster running on Kubernetes.

    kubernetesNamespace string

    A namespace within the Kubernetes cluster to deploy into. If this namespace does not exist, it is created. If it exists, Dataproc verifies that another Dataproc VirtualCluster is not installed into it. If not specified, the name of the Dataproc Cluster is used.

    gke_cluster_config ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfig

    The configuration for running the Dataproc cluster on GKE.

    kubernetes_software_config ClusterVirtualClusterConfigKubernetesClusterConfigKubernetesSoftwareConfig

    The software configuration for this Dataproc cluster running on Kubernetes.

    kubernetes_namespace str

    A namespace within the Kubernetes cluster to deploy into. If this namespace does not exist, it is created. If it exists, Dataproc verifies that another Dataproc VirtualCluster is not installed into it. If not specified, the name of the Dataproc Cluster is used.

    gkeClusterConfig Property Map

    The configuration for running the Dataproc cluster on GKE.

    kubernetesSoftwareConfig Property Map

    The software configuration for this Dataproc cluster running on Kubernetes.

    kubernetesNamespace String

    A namespace within the Kubernetes cluster to deploy into. If this namespace does not exist, it is created. If it exists, Dataproc verifies that another Dataproc VirtualCluster is not installed into it. If not specified, the name of the Dataproc Cluster is used.

    ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfig, ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigArgs

    GkeClusterTarget string

    A target GKE cluster to deploy to. It must be in the same project and region as the Dataproc cluster (the GKE cluster can be zonal or regional)

    NodePoolTargets List<ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTarget>

    GKE node pools where workloads will be scheduled. At least one node pool must be assigned the DEFAULT GkeNodePoolTarget.Role. If a GkeNodePoolTarget is not specified, Dataproc constructs a DEFAULT GkeNodePoolTarget. Each role can be given to only one GkeNodePoolTarget. All node pools must have the same location settings.

    GkeClusterTarget string

    A target GKE cluster to deploy to. It must be in the same project and region as the Dataproc cluster (the GKE cluster can be zonal or regional)

    NodePoolTargets []ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTarget

    GKE node pools where workloads will be scheduled. At least one node pool must be assigned the DEFAULT GkeNodePoolTarget.Role. If a GkeNodePoolTarget is not specified, Dataproc constructs a DEFAULT GkeNodePoolTarget. Each role can be given to only one GkeNodePoolTarget. All node pools must have the same location settings.

    gkeClusterTarget String

    A target GKE cluster to deploy to. It must be in the same project and region as the Dataproc cluster (the GKE cluster can be zonal or regional)

    nodePoolTargets List<ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTarget>

    GKE node pools where workloads will be scheduled. At least one node pool must be assigned the DEFAULT GkeNodePoolTarget.Role. If a GkeNodePoolTarget is not specified, Dataproc constructs a DEFAULT GkeNodePoolTarget. Each role can be given to only one GkeNodePoolTarget. All node pools must have the same location settings.

    gkeClusterTarget string

    A target GKE cluster to deploy to. It must be in the same project and region as the Dataproc cluster (the GKE cluster can be zonal or regional)

    nodePoolTargets ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTarget[]

    GKE node pools where workloads will be scheduled. At least one node pool must be assigned the DEFAULT GkeNodePoolTarget.Role. If a GkeNodePoolTarget is not specified, Dataproc constructs a DEFAULT GkeNodePoolTarget. Each role can be given to only one GkeNodePoolTarget. All node pools must have the same location settings.

    gke_cluster_target str

    A target GKE cluster to deploy to. It must be in the same project and region as the Dataproc cluster (the GKE cluster can be zonal or regional)

    node_pool_targets Sequence[ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTarget]

    GKE node pools where workloads will be scheduled. At least one node pool must be assigned the DEFAULT GkeNodePoolTarget.Role. If a GkeNodePoolTarget is not specified, Dataproc constructs a DEFAULT GkeNodePoolTarget. Each role can be given to only one GkeNodePoolTarget. All node pools must have the same location settings.

    gkeClusterTarget String

    A target GKE cluster to deploy to. It must be in the same project and region as the Dataproc cluster (the GKE cluster can be zonal or regional)

    nodePoolTargets List<Property Map>

    GKE node pools where workloads will be scheduled. At least one node pool must be assigned the DEFAULT GkeNodePoolTarget.Role. If a GkeNodePoolTarget is not specified, Dataproc constructs a DEFAULT GkeNodePoolTarget. Each role can be given to only one GkeNodePoolTarget. All node pools must have the same location settings.

    ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTarget, ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetArgs

    NodePool string

    The target GKE node pool.

    Roles List<string>

    The roles associated with the GKE node pool. One of "DEFAULT", "CONTROLLER", "SPARK_DRIVER" or "SPARK_EXECUTOR".

    NodePoolConfig ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfig

    The configuration for the GKE node pool. If specified, Dataproc attempts to create a node pool with the specified shape. If one with the same name already exists, it is verified against all specified fields. If a field differs, the virtual cluster creation will fail.

    NodePool string

    The target GKE node pool.

    Roles []string

    The roles associated with the GKE node pool. One of "DEFAULT", "CONTROLLER", "SPARK_DRIVER" or "SPARK_EXECUTOR".

    NodePoolConfig ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfig

    The configuration for the GKE node pool. If specified, Dataproc attempts to create a node pool with the specified shape. If one with the same name already exists, it is verified against all specified fields. If a field differs, the virtual cluster creation will fail.

    nodePool String

    The target GKE node pool.

    roles List<String>

    The roles associated with the GKE node pool. One of "DEFAULT", "CONTROLLER", "SPARK_DRIVER" or "SPARK_EXECUTOR".

    nodePoolConfig ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfig

    The configuration for the GKE node pool. If specified, Dataproc attempts to create a node pool with the specified shape. If one with the same name already exists, it is verified against all specified fields. If a field differs, the virtual cluster creation will fail.

    nodePool string

    The target GKE node pool.

    roles string[]

    The roles associated with the GKE node pool. One of "DEFAULT", "CONTROLLER", "SPARK_DRIVER" or "SPARK_EXECUTOR".

    nodePoolConfig ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfig

    The configuration for the GKE node pool. If specified, Dataproc attempts to create a node pool with the specified shape. If one with the same name already exists, it is verified against all specified fields. If a field differs, the virtual cluster creation will fail.

    node_pool str

    The target GKE node pool.

    roles Sequence[str]

    The roles associated with the GKE node pool. One of "DEFAULT", "CONTROLLER", "SPARK_DRIVER" or "SPARK_EXECUTOR".

    node_pool_config ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfig

    The configuration for the GKE node pool. If specified, Dataproc attempts to create a node pool with the specified shape. If one with the same name already exists, it is verified against all specified fields. If a field differs, the virtual cluster creation will fail.

    nodePool String

    The target GKE node pool.

    roles List<String>

    The roles associated with the GKE node pool. One of "DEFAULT", "CONTROLLER", "SPARK_DRIVER" or "SPARK_EXECUTOR".

    nodePoolConfig Property Map

    The configuration for the GKE node pool. If specified, Dataproc attempts to create a node pool with the specified shape. If one with the same name already exists, it is verified against all specified fields. If a field differs, the virtual cluster creation will fail.

    ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfig, ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfigArgs

    Locations List<string>

    The list of Compute Engine zones where node pool nodes associated with a Dataproc on GKE virtual cluster will be located.


    Autoscaling ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfigAutoscaling

    The autoscaler configuration for this node pool. The autoscaler is enabled only when a valid configuration is present.

    Config ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfigConfig

    The node pool configuration.

    Locations []string

    The list of Compute Engine zones where node pool nodes associated with a Dataproc on GKE virtual cluster will be located.


    Autoscaling ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfigAutoscaling

    The autoscaler configuration for this node pool. The autoscaler is enabled only when a valid configuration is present.

    Config ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfigConfig

    The node pool configuration.

    locations List<String>

    The list of Compute Engine zones where node pool nodes associated with a Dataproc on GKE virtual cluster will be located.


    autoscaling ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfigAutoscaling

    The autoscaler configuration for this node pool. The autoscaler is enabled only when a valid configuration is present.

    config ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfigConfig

    The node pool configuration.

    locations string[]

    The list of Compute Engine zones where node pool nodes associated with a Dataproc on GKE virtual cluster will be located.


    autoscaling ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfigAutoscaling

    The autoscaler configuration for this node pool. The autoscaler is enabled only when a valid configuration is present.

    config ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfigConfig

    The node pool configuration.

    locations Sequence[str]

    The list of Compute Engine zones where node pool nodes associated with a Dataproc on GKE virtual cluster will be located.


    autoscaling ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfigAutoscaling

    The autoscaler configuration for this node pool. The autoscaler is enabled only when a valid configuration is present.

    config ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfigConfig

    The node pool configuration.

    locations List<String>

    The list of Compute Engine zones where node pool nodes associated with a Dataproc on GKE virtual cluster will be located.


    autoscaling Property Map

    The autoscaler configuration for this node pool. The autoscaler is enabled only when a valid configuration is present.

    config Property Map

    The node pool configuration.

    ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfigAutoscaling, ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfigAutoscalingArgs

    MaxNodeCount int

    The maximum number of nodes in the node pool. Must be >= minNodeCount, and must be > 0.

    MinNodeCount int

    The minimum number of nodes in the node pool. Must be >= 0 and <= maxNodeCount.

    MaxNodeCount int

    The maximum number of nodes in the node pool. Must be >= minNodeCount, and must be > 0.

    MinNodeCount int

    The minimum number of nodes in the node pool. Must be >= 0 and <= maxNodeCount.

    maxNodeCount Integer

    The maximum number of nodes in the node pool. Must be >= minNodeCount, and must be > 0.

    minNodeCount Integer

    The minimum number of nodes in the node pool. Must be >= 0 and <= maxNodeCount.

    maxNodeCount number

    The maximum number of nodes in the node pool. Must be >= minNodeCount, and must be > 0.

    minNodeCount number

    The minimum number of nodes in the node pool. Must be >= 0 and <= maxNodeCount.

    max_node_count int

    The maximum number of nodes in the node pool. Must be >= minNodeCount, and must be > 0.

    min_node_count int

    The minimum number of nodes in the node pool. Must be >= 0 and <= maxNodeCount.

    maxNodeCount Number

    The maximum number of nodes in the node pool. Must be >= minNodeCount, and must be > 0.

    minNodeCount Number

    The minimum number of nodes in the node pool. Must be >= 0 and <= maxNodeCount.

    ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfigConfig, ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfigConfigArgs

    LocalSsdCount int

    The number of local SSD disks to attach to the node, which is limited by the maximum number of disks allowable per zone.

    MachineType string

    The name of a Compute Engine machine type.

    to create for the master. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4).

    to create for the worker nodes. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4).

    MinCpuPlatform string

    Minimum CPU platform to be used by this instance. The instance may be scheduled on the specified or a newer CPU platform. Specify the friendly names of CPU platforms, such as "Intel Haswell" or "Intel Sandy Bridge".

    for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.

    for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.

    Preemptible bool

    Whether the nodes are created as preemptible VM instances. Preemptible nodes cannot be used in a node pool with the CONTROLLER role or in the DEFAULT node pool if the CONTROLLER role is not assigned (the DEFAULT node pool will assume the CONTROLLER role).

    Spot bool

    Spot flag for enabling Spot VM, which is a rebrand of the existing preemptible flag.

    LocalSsdCount int

    The number of local SSD disks to attach to the node, which is limited by the maximum number of disks allowable per zone.

    MachineType string

    The name of a Compute Engine machine type.

    to create for the master. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4).

    to create for the worker nodes. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4).

    MinCpuPlatform string

    Minimum CPU platform to be used by this instance. The instance may be scheduled on the specified or a newer CPU platform. Specify the friendly names of CPU platforms, such as "Intel Haswell" or "Intel Sandy Bridge".

    for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.

    for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.

    Preemptible bool

    Whether the nodes are created as preemptible VM instances. Preemptible nodes cannot be used in a node pool with the CONTROLLER role or in the DEFAULT node pool if the CONTROLLER role is not assigned (the DEFAULT node pool will assume the CONTROLLER role).

    Spot bool

    Spot flag for enabling Spot VM, which is a rebrand of the existing preemptible flag.

    localSsdCount Integer

    The number of local SSD disks to attach to the node, which is limited by the maximum number of disks allowable per zone.

    machineType String

    The name of a Compute Engine machine type.

    to create for the master. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4).

    to create for the worker nodes. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4).

    minCpuPlatform String

    Minimum CPU platform to be used by this instance. The instance may be scheduled on the specified or a newer CPU platform. Specify the friendly names of CPU platforms, such as "Intel Haswell" or "Intel Sandy Bridge".

    for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.

    for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.

    preemptible Boolean

    Whether the nodes are created as preemptible VM instances. Preemptible nodes cannot be used in a node pool with the CONTROLLER role or in the DEFAULT node pool if the CONTROLLER role is not assigned (the DEFAULT node pool will assume the CONTROLLER role).

    spot Boolean

    Spot flag for enabling Spot VM, which is a rebrand of the existing preemptible flag.

    localSsdCount number

    The number of local SSD disks to attach to the node, which is limited by the maximum number of disks allowable per zone.

    machineType string

    The name of a Compute Engine machine type.

    to create for the master. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4).

    to create for the worker nodes. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4).

    minCpuPlatform string

    Minimum CPU platform to be used by this instance. The instance may be scheduled on the specified or a newer CPU platform. Specify the friendly names of CPU platforms, such as "Intel Haswell" or "Intel Sandy Bridge".

    for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.

    for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.

    preemptible boolean

    Whether the nodes are created as preemptible VM instances. Preemptible nodes cannot be used in a node pool with the CONTROLLER role or in the DEFAULT node pool if the CONTROLLER role is not assigned (the DEFAULT node pool will assume the CONTROLLER role).

    spot boolean

    Spot flag for enabling Spot VM, which is a rebrand of the existing preemptible flag.

    local_ssd_count int

    The number of local SSD disks to attach to the node, which is limited by the maximum number of disks allowable per zone.

    machine_type str

    The name of a Compute Engine machine type.

    to create for the master. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4).

    to create for the worker nodes. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4).

    min_cpu_platform str

    Minimum CPU platform to be used by this instance. The instance may be scheduled on the specified or a newer CPU platform. Specify the friendly names of CPU platforms, such as "Intel Haswell" or "Intel Sandy Bridge".

    for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.

    for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.

    preemptible bool

    Whether the nodes are created as preemptible VM instances. Preemptible nodes cannot be used in a node pool with the CONTROLLER role or in the DEFAULT node pool if the CONTROLLER role is not assigned (the DEFAULT node pool will assume the CONTROLLER role).

    spot bool

    Spot flag for enabling Spot VM, which is a rebrand of the existing preemptible flag.

    localSsdCount Number

    The number of local SSD disks to attach to the node, which is limited by the maximum number of disks allowable per zone.

    machineType String

    The name of a Compute Engine machine type.

    to create for the master. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4).

    to create for the worker nodes. If not specified, GCP will default to a predetermined computed value (currently n1-standard-4).

    minCpuPlatform String

    Minimum CPU platform to be used by this instance. The instance may be scheduled on the specified or a newer CPU platform. Specify the friendly names of CPU platforms, such as "Intel Haswell" or "Intel Sandy Bridge".

    for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.

    for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.

    preemptible Boolean

    Whether the nodes are created as preemptible VM instances. Preemptible nodes cannot be used in a node pool with the CONTROLLER role or in the DEFAULT node pool if the CONTROLLER role is not assigned (the DEFAULT node pool will assume the CONTROLLER role).

    spot Boolean

    Spot flag for enabling Spot VM, which is a rebrand of the existing preemptible flag.

    ClusterVirtualClusterConfigKubernetesClusterConfigKubernetesSoftwareConfig, ClusterVirtualClusterConfigKubernetesClusterConfigKubernetesSoftwareConfigArgs

    ComponentVersion Dictionary<string, string>

    The components that should be installed in this Dataproc cluster. The key must be a string from the
    KubernetesComponent enumeration. The value is the version of the software to be installed. At least one entry must be specified.

    • NOTE : component_version[SPARK] is mandatory to set, or the creation of the cluster will fail.
    Properties Dictionary<string, string>

    The properties to set on daemon config files. Property keys are specified in prefix:property format, for example spark:spark.kubernetes.container.image.

    ComponentVersion map[string]string

    The components that should be installed in this Dataproc cluster. The key must be a string from the
    KubernetesComponent enumeration. The value is the version of the software to be installed. At least one entry must be specified.

    • NOTE : component_version[SPARK] is mandatory to set, or the creation of the cluster will fail.
    Properties map[string]string

    The properties to set on daemon config files. Property keys are specified in prefix:property format, for example spark:spark.kubernetes.container.image.

    componentVersion Map<String,String>

    The components that should be installed in this Dataproc cluster. The key must be a string from the
    KubernetesComponent enumeration. The value is the version of the software to be installed. At least one entry must be specified.

    • NOTE : component_version[SPARK] is mandatory to set, or the creation of the cluster will fail.
    properties Map<String,String>

    The properties to set on daemon config files. Property keys are specified in prefix:property format, for example spark:spark.kubernetes.container.image.

    componentVersion {[key: string]: string}

    The components that should be installed in this Dataproc cluster. The key must be a string from the
    KubernetesComponent enumeration. The value is the version of the software to be installed. At least one entry must be specified.

    • NOTE : component_version[SPARK] is mandatory to set, or the creation of the cluster will fail.
    properties {[key: string]: string}

    The properties to set on daemon config files. Property keys are specified in prefix:property format, for example spark:spark.kubernetes.container.image.

    component_version Mapping[str, str]

    The components that should be installed in this Dataproc cluster. The key must be a string from the
    KubernetesComponent enumeration. The value is the version of the software to be installed. At least one entry must be specified.

    • NOTE : component_version[SPARK] is mandatory to set, or the creation of the cluster will fail.
    properties Mapping[str, str]

    The properties to set on daemon config files. Property keys are specified in prefix:property format, for example spark:spark.kubernetes.container.image.

    componentVersion Map<String>

    The components that should be installed in this Dataproc cluster. The key must be a string from the
    KubernetesComponent enumeration. The value is the version of the software to be installed. At least one entry must be specified.

    • NOTE : component_version[SPARK] is mandatory to set, or the creation of the cluster will fail.
    properties Map<String>

    The properties to set on daemon config files. Property keys are specified in prefix:property format, for example spark:spark.kubernetes.container.image.

    Import

    This resource does not support import.

    Package Details

    Repository
    Google Cloud (GCP) Classic pulumi/pulumi-gcp
    License
    Apache-2.0
    Notes

    This Pulumi package is based on the google-beta Terraform Provider.

    gcp logo
    Google Cloud Classic v6.67.0 published on Wednesday, Sep 27, 2023 by Pulumi