gcp.dataproc.Cluster
Explore with Pulumi AI
Manages a Cloud Dataproc cluster resource within GCP.
- API documentation
- How-to Guides
!> Warning: Due to limitations of the API, all arguments except
labels
,cluster_config.worker_config.num_instances
and cluster_config.preemptible_worker_config.num_instances
are non-updatable. Changing others will cause recreation of the
whole cluster!
Example Usage
Basic
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var simplecluster = new Gcp.Dataproc.Cluster("simplecluster", new()
{
Region = "us-central1",
});
});
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v6/go/gcp/dataproc"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := dataproc.NewCluster(ctx, "simplecluster", &dataproc.ClusterArgs{
Region: pulumi.String("us-central1"),
})
if err != nil {
return err
}
return nil
})
}
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.dataproc.Cluster;
import com.pulumi.gcp.dataproc.ClusterArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var simplecluster = new Cluster("simplecluster", ClusterArgs.builder()
.region("us-central1")
.build());
}
}
import pulumi
import pulumi_gcp as gcp
simplecluster = gcp.dataproc.Cluster("simplecluster", region="us-central1")
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const simplecluster = new gcp.dataproc.Cluster("simplecluster", {region: "us-central1"});
resources:
simplecluster:
type: gcp:dataproc:Cluster
properties:
region: us-central1
Advanced
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var @default = new Gcp.ServiceAccount.Account("default", new()
{
AccountId = "service-account-id",
DisplayName = "Service Account",
});
var mycluster = new Gcp.Dataproc.Cluster("mycluster", new()
{
Region = "us-central1",
GracefulDecommissionTimeout = "120s",
Labels =
{
{ "foo", "bar" },
},
ClusterConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigArgs
{
StagingBucket = "dataproc-staging-bucket",
MasterConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigMasterConfigArgs
{
NumInstances = 1,
MachineType = "e2-medium",
DiskConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigMasterConfigDiskConfigArgs
{
BootDiskType = "pd-ssd",
BootDiskSizeGb = 30,
},
},
WorkerConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigWorkerConfigArgs
{
NumInstances = 2,
MachineType = "e2-medium",
MinCpuPlatform = "Intel Skylake",
DiskConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigWorkerConfigDiskConfigArgs
{
BootDiskSizeGb = 30,
NumLocalSsds = 1,
},
},
PreemptibleWorkerConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigPreemptibleWorkerConfigArgs
{
NumInstances = 0,
},
SoftwareConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigSoftwareConfigArgs
{
ImageVersion = "2.0.35-debian10",
OverrideProperties =
{
{ "dataproc:dataproc.allow.zero.workers", "true" },
},
},
GceClusterConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigGceClusterConfigArgs
{
Tags = new[]
{
"foo",
"bar",
},
ServiceAccount = @default.Email,
ServiceAccountScopes = new[]
{
"cloud-platform",
},
},
InitializationActions = new[]
{
new Gcp.Dataproc.Inputs.ClusterClusterConfigInitializationActionArgs
{
Script = "gs://dataproc-initialization-actions/stackdriver/stackdriver.sh",
TimeoutSec = 500,
},
},
},
});
});
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v6/go/gcp/dataproc"
"github.com/pulumi/pulumi-gcp/sdk/v6/go/gcp/serviceAccount"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := serviceAccount.NewAccount(ctx, "default", &serviceAccount.AccountArgs{
AccountId: pulumi.String("service-account-id"),
DisplayName: pulumi.String("Service Account"),
})
if err != nil {
return err
}
_, err = dataproc.NewCluster(ctx, "mycluster", &dataproc.ClusterArgs{
Region: pulumi.String("us-central1"),
GracefulDecommissionTimeout: pulumi.String("120s"),
Labels: pulumi.StringMap{
"foo": pulumi.String("bar"),
},
ClusterConfig: &dataproc.ClusterClusterConfigArgs{
StagingBucket: pulumi.String("dataproc-staging-bucket"),
MasterConfig: &dataproc.ClusterClusterConfigMasterConfigArgs{
NumInstances: pulumi.Int(1),
MachineType: pulumi.String("e2-medium"),
DiskConfig: &dataproc.ClusterClusterConfigMasterConfigDiskConfigArgs{
BootDiskType: pulumi.String("pd-ssd"),
BootDiskSizeGb: pulumi.Int(30),
},
},
WorkerConfig: &dataproc.ClusterClusterConfigWorkerConfigArgs{
NumInstances: pulumi.Int(2),
MachineType: pulumi.String("e2-medium"),
MinCpuPlatform: pulumi.String("Intel Skylake"),
DiskConfig: &dataproc.ClusterClusterConfigWorkerConfigDiskConfigArgs{
BootDiskSizeGb: pulumi.Int(30),
NumLocalSsds: pulumi.Int(1),
},
},
PreemptibleWorkerConfig: &dataproc.ClusterClusterConfigPreemptibleWorkerConfigArgs{
NumInstances: pulumi.Int(0),
},
SoftwareConfig: &dataproc.ClusterClusterConfigSoftwareConfigArgs{
ImageVersion: pulumi.String("2.0.35-debian10"),
OverrideProperties: pulumi.StringMap{
"dataproc:dataproc.allow.zero.workers": pulumi.String("true"),
},
},
GceClusterConfig: &dataproc.ClusterClusterConfigGceClusterConfigArgs{
Tags: pulumi.StringArray{
pulumi.String("foo"),
pulumi.String("bar"),
},
ServiceAccount: _default.Email,
ServiceAccountScopes: pulumi.StringArray{
pulumi.String("cloud-platform"),
},
},
InitializationActions: dataproc.ClusterClusterConfigInitializationActionArray{
&dataproc.ClusterClusterConfigInitializationActionArgs{
Script: pulumi.String("gs://dataproc-initialization-actions/stackdriver/stackdriver.sh"),
TimeoutSec: pulumi.Int(500),
},
},
},
})
if err != nil {
return err
}
return nil
})
}
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.serviceAccount.Account;
import com.pulumi.gcp.serviceAccount.AccountArgs;
import com.pulumi.gcp.dataproc.Cluster;
import com.pulumi.gcp.dataproc.ClusterArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigMasterConfigArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigMasterConfigDiskConfigArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigWorkerConfigArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigWorkerConfigDiskConfigArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigPreemptibleWorkerConfigArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigSoftwareConfigArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigGceClusterConfigArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var default_ = new Account("default", AccountArgs.builder()
.accountId("service-account-id")
.displayName("Service Account")
.build());
var mycluster = new Cluster("mycluster", ClusterArgs.builder()
.region("us-central1")
.gracefulDecommissionTimeout("120s")
.labels(Map.of("foo", "bar"))
.clusterConfig(ClusterClusterConfigArgs.builder()
.stagingBucket("dataproc-staging-bucket")
.masterConfig(ClusterClusterConfigMasterConfigArgs.builder()
.numInstances(1)
.machineType("e2-medium")
.diskConfig(ClusterClusterConfigMasterConfigDiskConfigArgs.builder()
.bootDiskType("pd-ssd")
.bootDiskSizeGb(30)
.build())
.build())
.workerConfig(ClusterClusterConfigWorkerConfigArgs.builder()
.numInstances(2)
.machineType("e2-medium")
.minCpuPlatform("Intel Skylake")
.diskConfig(ClusterClusterConfigWorkerConfigDiskConfigArgs.builder()
.bootDiskSizeGb(30)
.numLocalSsds(1)
.build())
.build())
.preemptibleWorkerConfig(ClusterClusterConfigPreemptibleWorkerConfigArgs.builder()
.numInstances(0)
.build())
.softwareConfig(ClusterClusterConfigSoftwareConfigArgs.builder()
.imageVersion("2.0.35-debian10")
.overrideProperties(Map.of("dataproc:dataproc.allow.zero.workers", "true"))
.build())
.gceClusterConfig(ClusterClusterConfigGceClusterConfigArgs.builder()
.tags(
"foo",
"bar")
.serviceAccount(default_.email())
.serviceAccountScopes("cloud-platform")
.build())
.initializationActions(ClusterClusterConfigInitializationActionArgs.builder()
.script("gs://dataproc-initialization-actions/stackdriver/stackdriver.sh")
.timeoutSec(500)
.build())
.build())
.build());
}
}
import pulumi
import pulumi_gcp as gcp
default = gcp.service_account.Account("default",
account_id="service-account-id",
display_name="Service Account")
mycluster = gcp.dataproc.Cluster("mycluster",
region="us-central1",
graceful_decommission_timeout="120s",
labels={
"foo": "bar",
},
cluster_config=gcp.dataproc.ClusterClusterConfigArgs(
staging_bucket="dataproc-staging-bucket",
master_config=gcp.dataproc.ClusterClusterConfigMasterConfigArgs(
num_instances=1,
machine_type="e2-medium",
disk_config=gcp.dataproc.ClusterClusterConfigMasterConfigDiskConfigArgs(
boot_disk_type="pd-ssd",
boot_disk_size_gb=30,
),
),
worker_config=gcp.dataproc.ClusterClusterConfigWorkerConfigArgs(
num_instances=2,
machine_type="e2-medium",
min_cpu_platform="Intel Skylake",
disk_config=gcp.dataproc.ClusterClusterConfigWorkerConfigDiskConfigArgs(
boot_disk_size_gb=30,
num_local_ssds=1,
),
),
preemptible_worker_config=gcp.dataproc.ClusterClusterConfigPreemptibleWorkerConfigArgs(
num_instances=0,
),
software_config=gcp.dataproc.ClusterClusterConfigSoftwareConfigArgs(
image_version="2.0.35-debian10",
override_properties={
"dataproc:dataproc.allow.zero.workers": "true",
},
),
gce_cluster_config=gcp.dataproc.ClusterClusterConfigGceClusterConfigArgs(
tags=[
"foo",
"bar",
],
service_account=default.email,
service_account_scopes=["cloud-platform"],
),
initialization_actions=[gcp.dataproc.ClusterClusterConfigInitializationActionArgs(
script="gs://dataproc-initialization-actions/stackdriver/stackdriver.sh",
timeout_sec=500,
)],
))
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const _default = new gcp.serviceaccount.Account("default", {
accountId: "service-account-id",
displayName: "Service Account",
});
const mycluster = new gcp.dataproc.Cluster("mycluster", {
region: "us-central1",
gracefulDecommissionTimeout: "120s",
labels: {
foo: "bar",
},
clusterConfig: {
stagingBucket: "dataproc-staging-bucket",
masterConfig: {
numInstances: 1,
machineType: "e2-medium",
diskConfig: {
bootDiskType: "pd-ssd",
bootDiskSizeGb: 30,
},
},
workerConfig: {
numInstances: 2,
machineType: "e2-medium",
minCpuPlatform: "Intel Skylake",
diskConfig: {
bootDiskSizeGb: 30,
numLocalSsds: 1,
},
},
preemptibleWorkerConfig: {
numInstances: 0,
},
softwareConfig: {
imageVersion: "2.0.35-debian10",
overrideProperties: {
"dataproc:dataproc.allow.zero.workers": "true",
},
},
gceClusterConfig: {
tags: [
"foo",
"bar",
],
serviceAccount: _default.email,
serviceAccountScopes: ["cloud-platform"],
},
initializationActions: [{
script: "gs://dataproc-initialization-actions/stackdriver/stackdriver.sh",
timeoutSec: 500,
}],
},
});
resources:
default:
type: gcp:serviceAccount:Account
properties:
accountId: service-account-id
displayName: Service Account
mycluster:
type: gcp:dataproc:Cluster
properties:
region: us-central1
gracefulDecommissionTimeout: 120s
labels:
foo: bar
clusterConfig:
stagingBucket: dataproc-staging-bucket
masterConfig:
numInstances: 1
machineType: e2-medium
diskConfig:
bootDiskType: pd-ssd
bootDiskSizeGb: 30
workerConfig:
numInstances: 2
machineType: e2-medium
minCpuPlatform: Intel Skylake
diskConfig:
bootDiskSizeGb: 30
numLocalSsds: 1
preemptibleWorkerConfig:
numInstances: 0
softwareConfig:
imageVersion: 2.0.35-debian10
overrideProperties:
dataproc:dataproc.allow.zero.workers: 'true'
gceClusterConfig:
tags:
- foo
- bar
serviceAccount: ${default.email}
serviceAccountScopes:
- cloud-platform
initializationActions:
- script: gs://dataproc-initialization-actions/stackdriver/stackdriver.sh
timeoutSec: 500
Using A GPU Accelerator
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;
return await Deployment.RunAsync(() =>
{
var acceleratedCluster = new Gcp.Dataproc.Cluster("acceleratedCluster", new()
{
ClusterConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigArgs
{
GceClusterConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigGceClusterConfigArgs
{
Zone = "us-central1-a",
},
MasterConfig = new Gcp.Dataproc.Inputs.ClusterClusterConfigMasterConfigArgs
{
Accelerators = new[]
{
new Gcp.Dataproc.Inputs.ClusterClusterConfigMasterConfigAcceleratorArgs
{
AcceleratorCount = 1,
AcceleratorType = "nvidia-tesla-k80",
},
},
},
},
Region = "us-central1",
});
});
package main
import (
"github.com/pulumi/pulumi-gcp/sdk/v6/go/gcp/dataproc"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := dataproc.NewCluster(ctx, "acceleratedCluster", &dataproc.ClusterArgs{
ClusterConfig: &dataproc.ClusterClusterConfigArgs{
GceClusterConfig: &dataproc.ClusterClusterConfigGceClusterConfigArgs{
Zone: pulumi.String("us-central1-a"),
},
MasterConfig: &dataproc.ClusterClusterConfigMasterConfigArgs{
Accelerators: dataproc.ClusterClusterConfigMasterConfigAcceleratorArray{
&dataproc.ClusterClusterConfigMasterConfigAcceleratorArgs{
AcceleratorCount: pulumi.Int(1),
AcceleratorType: pulumi.String("nvidia-tesla-k80"),
},
},
},
},
Region: pulumi.String("us-central1"),
})
if err != nil {
return err
}
return nil
})
}
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.dataproc.Cluster;
import com.pulumi.gcp.dataproc.ClusterArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigGceClusterConfigArgs;
import com.pulumi.gcp.dataproc.inputs.ClusterClusterConfigMasterConfigArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var acceleratedCluster = new Cluster("acceleratedCluster", ClusterArgs.builder()
.clusterConfig(ClusterClusterConfigArgs.builder()
.gceClusterConfig(ClusterClusterConfigGceClusterConfigArgs.builder()
.zone("us-central1-a")
.build())
.masterConfig(ClusterClusterConfigMasterConfigArgs.builder()
.accelerators(ClusterClusterConfigMasterConfigAcceleratorArgs.builder()
.acceleratorCount("1")
.acceleratorType("nvidia-tesla-k80")
.build())
.build())
.build())
.region("us-central1")
.build());
}
}
import pulumi
import pulumi_gcp as gcp
accelerated_cluster = gcp.dataproc.Cluster("acceleratedCluster",
cluster_config=gcp.dataproc.ClusterClusterConfigArgs(
gce_cluster_config=gcp.dataproc.ClusterClusterConfigGceClusterConfigArgs(
zone="us-central1-a",
),
master_config=gcp.dataproc.ClusterClusterConfigMasterConfigArgs(
accelerators=[gcp.dataproc.ClusterClusterConfigMasterConfigAcceleratorArgs(
accelerator_count=1,
accelerator_type="nvidia-tesla-k80",
)],
),
),
region="us-central1")
import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";
const acceleratedCluster = new gcp.dataproc.Cluster("acceleratedCluster", {
clusterConfig: {
gceClusterConfig: {
zone: "us-central1-a",
},
masterConfig: {
accelerators: [{
acceleratorCount: 1,
acceleratorType: "nvidia-tesla-k80",
}],
},
},
region: "us-central1",
});
resources:
acceleratedCluster:
type: gcp:dataproc:Cluster
properties:
clusterConfig:
gceClusterConfig:
zone: us-central1-a
masterConfig:
accelerators:
- acceleratorCount: '1'
acceleratorType: nvidia-tesla-k80
region: us-central1
Create Cluster Resource
new Cluster(name: string, args?: ClusterArgs, opts?: CustomResourceOptions);
@overload
def Cluster(resource_name: str,
opts: Optional[ResourceOptions] = None,
cluster_config: Optional[ClusterClusterConfigArgs] = None,
graceful_decommission_timeout: Optional[str] = None,
labels: Optional[Mapping[str, str]] = None,
name: Optional[str] = None,
project: Optional[str] = None,
region: Optional[str] = None,
virtual_cluster_config: Optional[ClusterVirtualClusterConfigArgs] = None)
@overload
def Cluster(resource_name: str,
args: Optional[ClusterArgs] = None,
opts: Optional[ResourceOptions] = None)
func NewCluster(ctx *Context, name string, args *ClusterArgs, opts ...ResourceOption) (*Cluster, error)
public Cluster(string name, ClusterArgs? args = null, CustomResourceOptions? opts = null)
public Cluster(String name, ClusterArgs args)
public Cluster(String name, ClusterArgs args, CustomResourceOptions options)
type: gcp:dataproc:Cluster
properties: # The arguments to resource properties.
options: # Bag of options to control resource's behavior.
- name string
- The unique name of the resource.
- args ClusterArgs
- The arguments to resource properties.
- opts CustomResourceOptions
- Bag of options to control resource's behavior.
- resource_name str
- The unique name of the resource.
- args ClusterArgs
- The arguments to resource properties.
- opts ResourceOptions
- Bag of options to control resource's behavior.
- ctx Context
- Context object for the current deployment.
- name string
- The unique name of the resource.
- args ClusterArgs
- The arguments to resource properties.
- opts ResourceOption
- Bag of options to control resource's behavior.
- name string
- The unique name of the resource.
- args ClusterArgs
- The arguments to resource properties.
- opts CustomResourceOptions
- Bag of options to control resource's behavior.
- name String
- The unique name of the resource.
- args ClusterArgs
- The arguments to resource properties.
- options CustomResourceOptions
- Bag of options to control resource's behavior.
Cluster Resource Properties
To learn more about resource properties and how to use them, see Inputs and Outputs in the Architecture and Concepts docs.
Inputs
The Cluster resource accepts the following input properties:
- Cluster
Config ClusterCluster Config Allows you to configure various aspects of the cluster. Structure defined below.
- Graceful
Decommission stringTimeout The timeout duration which allows graceful decomissioning when you change the number of worker nodes directly through a terraform apply
- Labels Dictionary<string, string>
The list of labels (key/value pairs) to be applied to instances in the cluster. GCP generates some itself including
goog-dataproc-cluster-name
which is the name of the cluster.- Name string
The name of the cluster, unique within the project and zone.
- Project string
The ID of the project in which the
cluster
will exist. If it is not provided, the provider project is used.- Region string
The region in which the cluster and associated nodes will be created in. Defaults to
global
.- Virtual
Cluster ClusterConfig Virtual Cluster Config Allows you to configure a virtual Dataproc on GKE cluster. Structure defined below.
- Cluster
Config ClusterCluster Config Args Allows you to configure various aspects of the cluster. Structure defined below.
- Graceful
Decommission stringTimeout The timeout duration which allows graceful decomissioning when you change the number of worker nodes directly through a terraform apply
- Labels map[string]string
The list of labels (key/value pairs) to be applied to instances in the cluster. GCP generates some itself including
goog-dataproc-cluster-name
which is the name of the cluster.- Name string
The name of the cluster, unique within the project and zone.
- Project string
The ID of the project in which the
cluster
will exist. If it is not provided, the provider project is used.- Region string
The region in which the cluster and associated nodes will be created in. Defaults to
global
.- Virtual
Cluster ClusterConfig Virtual Cluster Config Args Allows you to configure a virtual Dataproc on GKE cluster. Structure defined below.
- cluster
Config ClusterCluster Config Allows you to configure various aspects of the cluster. Structure defined below.
- graceful
Decommission StringTimeout The timeout duration which allows graceful decomissioning when you change the number of worker nodes directly through a terraform apply
- labels Map<String,String>
The list of labels (key/value pairs) to be applied to instances in the cluster. GCP generates some itself including
goog-dataproc-cluster-name
which is the name of the cluster.- name String
The name of the cluster, unique within the project and zone.
- project String
The ID of the project in which the
cluster
will exist. If it is not provided, the provider project is used.- region String
The region in which the cluster and associated nodes will be created in. Defaults to
global
.- virtual
Cluster ClusterConfig Virtual Cluster Config Allows you to configure a virtual Dataproc on GKE cluster. Structure defined below.
- cluster
Config ClusterCluster Config Allows you to configure various aspects of the cluster. Structure defined below.
- graceful
Decommission stringTimeout The timeout duration which allows graceful decomissioning when you change the number of worker nodes directly through a terraform apply
- labels {[key: string]: string}
The list of labels (key/value pairs) to be applied to instances in the cluster. GCP generates some itself including
goog-dataproc-cluster-name
which is the name of the cluster.- name string
The name of the cluster, unique within the project and zone.
- project string
The ID of the project in which the
cluster
will exist. If it is not provided, the provider project is used.- region string
The region in which the cluster and associated nodes will be created in. Defaults to
global
.- virtual
Cluster ClusterConfig Virtual Cluster Config Allows you to configure a virtual Dataproc on GKE cluster. Structure defined below.
- cluster_
config ClusterCluster Config Args Allows you to configure various aspects of the cluster. Structure defined below.
- graceful_
decommission_ strtimeout The timeout duration which allows graceful decomissioning when you change the number of worker nodes directly through a terraform apply
- labels Mapping[str, str]
The list of labels (key/value pairs) to be applied to instances in the cluster. GCP generates some itself including
goog-dataproc-cluster-name
which is the name of the cluster.- name str
The name of the cluster, unique within the project and zone.
- project str
The ID of the project in which the
cluster
will exist. If it is not provided, the provider project is used.- region str
The region in which the cluster and associated nodes will be created in. Defaults to
global
.- virtual_
cluster_ Clusterconfig Virtual Cluster Config Args Allows you to configure a virtual Dataproc on GKE cluster. Structure defined below.
- cluster
Config Property Map Allows you to configure various aspects of the cluster. Structure defined below.
- graceful
Decommission StringTimeout The timeout duration which allows graceful decomissioning when you change the number of worker nodes directly through a terraform apply
- labels Map<String>
The list of labels (key/value pairs) to be applied to instances in the cluster. GCP generates some itself including
goog-dataproc-cluster-name
which is the name of the cluster.- name String
The name of the cluster, unique within the project and zone.
- project String
The ID of the project in which the
cluster
will exist. If it is not provided, the provider project is used.- region String
The region in which the cluster and associated nodes will be created in. Defaults to
global
.- virtual
Cluster Property MapConfig Allows you to configure a virtual Dataproc on GKE cluster. Structure defined below.
Outputs
All input properties are implicitly available as output properties. Additionally, the Cluster resource produces the following output properties:
- Id string
The provider-assigned unique ID for this managed resource.
- Id string
The provider-assigned unique ID for this managed resource.
- id String
The provider-assigned unique ID for this managed resource.
- id string
The provider-assigned unique ID for this managed resource.
- id str
The provider-assigned unique ID for this managed resource.
- id String
The provider-assigned unique ID for this managed resource.
Look up Existing Cluster Resource
Get an existing Cluster resource’s state with the given name, ID, and optional extra properties used to qualify the lookup.
public static get(name: string, id: Input<ID>, state?: ClusterState, opts?: CustomResourceOptions): Cluster
@staticmethod
def get(resource_name: str,
id: str,
opts: Optional[ResourceOptions] = None,
cluster_config: Optional[ClusterClusterConfigArgs] = None,
graceful_decommission_timeout: Optional[str] = None,
labels: Optional[Mapping[str, str]] = None,
name: Optional[str] = None,
project: Optional[str] = None,
region: Optional[str] = None,
virtual_cluster_config: Optional[ClusterVirtualClusterConfigArgs] = None) -> Cluster
func GetCluster(ctx *Context, name string, id IDInput, state *ClusterState, opts ...ResourceOption) (*Cluster, error)
public static Cluster Get(string name, Input<string> id, ClusterState? state, CustomResourceOptions? opts = null)
public static Cluster get(String name, Output<String> id, ClusterState state, CustomResourceOptions options)
Resource lookup is not supported in YAML
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- resource_name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- Cluster
Config ClusterCluster Config Allows you to configure various aspects of the cluster. Structure defined below.
- Graceful
Decommission stringTimeout The timeout duration which allows graceful decomissioning when you change the number of worker nodes directly through a terraform apply
- Labels Dictionary<string, string>
The list of labels (key/value pairs) to be applied to instances in the cluster. GCP generates some itself including
goog-dataproc-cluster-name
which is the name of the cluster.- Name string
The name of the cluster, unique within the project and zone.
- Project string
The ID of the project in which the
cluster
will exist. If it is not provided, the provider project is used.- Region string
The region in which the cluster and associated nodes will be created in. Defaults to
global
.- Virtual
Cluster ClusterConfig Virtual Cluster Config Allows you to configure a virtual Dataproc on GKE cluster. Structure defined below.
- Cluster
Config ClusterCluster Config Args Allows you to configure various aspects of the cluster. Structure defined below.
- Graceful
Decommission stringTimeout The timeout duration which allows graceful decomissioning when you change the number of worker nodes directly through a terraform apply
- Labels map[string]string
The list of labels (key/value pairs) to be applied to instances in the cluster. GCP generates some itself including
goog-dataproc-cluster-name
which is the name of the cluster.- Name string
The name of the cluster, unique within the project and zone.
- Project string
The ID of the project in which the
cluster
will exist. If it is not provided, the provider project is used.- Region string
The region in which the cluster and associated nodes will be created in. Defaults to
global
.- Virtual
Cluster ClusterConfig Virtual Cluster Config Args Allows you to configure a virtual Dataproc on GKE cluster. Structure defined below.
- cluster
Config ClusterCluster Config Allows you to configure various aspects of the cluster. Structure defined below.
- graceful
Decommission StringTimeout The timeout duration which allows graceful decomissioning when you change the number of worker nodes directly through a terraform apply
- labels Map<String,String>
The list of labels (key/value pairs) to be applied to instances in the cluster. GCP generates some itself including
goog-dataproc-cluster-name
which is the name of the cluster.- name String
The name of the cluster, unique within the project and zone.
- project String
The ID of the project in which the
cluster
will exist. If it is not provided, the provider project is used.- region String
The region in which the cluster and associated nodes will be created in. Defaults to
global
.- virtual
Cluster ClusterConfig Virtual Cluster Config Allows you to configure a virtual Dataproc on GKE cluster. Structure defined below.
- cluster
Config ClusterCluster Config Allows you to configure various aspects of the cluster. Structure defined below.
- graceful
Decommission stringTimeout The timeout duration which allows graceful decomissioning when you change the number of worker nodes directly through a terraform apply
- labels {[key: string]: string}
The list of labels (key/value pairs) to be applied to instances in the cluster. GCP generates some itself including
goog-dataproc-cluster-name
which is the name of the cluster.- name string
The name of the cluster, unique within the project and zone.
- project string
The ID of the project in which the
cluster
will exist. If it is not provided, the provider project is used.- region string
The region in which the cluster and associated nodes will be created in. Defaults to
global
.- virtual
Cluster ClusterConfig Virtual Cluster Config Allows you to configure a virtual Dataproc on GKE cluster. Structure defined below.
- cluster_
config ClusterCluster Config Args Allows you to configure various aspects of the cluster. Structure defined below.
- graceful_
decommission_ strtimeout The timeout duration which allows graceful decomissioning when you change the number of worker nodes directly through a terraform apply
- labels Mapping[str, str]
The list of labels (key/value pairs) to be applied to instances in the cluster. GCP generates some itself including
goog-dataproc-cluster-name
which is the name of the cluster.- name str
The name of the cluster, unique within the project and zone.
- project str
The ID of the project in which the
cluster
will exist. If it is not provided, the provider project is used.- region str
The region in which the cluster and associated nodes will be created in. Defaults to
global
.- virtual_
cluster_ Clusterconfig Virtual Cluster Config Args Allows you to configure a virtual Dataproc on GKE cluster. Structure defined below.
- cluster
Config Property Map Allows you to configure various aspects of the cluster. Structure defined below.
- graceful
Decommission StringTimeout The timeout duration which allows graceful decomissioning when you change the number of worker nodes directly through a terraform apply
- labels Map<String>
The list of labels (key/value pairs) to be applied to instances in the cluster. GCP generates some itself including
goog-dataproc-cluster-name
which is the name of the cluster.- name String
The name of the cluster, unique within the project and zone.
- project String
The ID of the project in which the
cluster
will exist. If it is not provided, the provider project is used.- region String
The region in which the cluster and associated nodes will be created in. Defaults to
global
.- virtual
Cluster Property MapConfig Allows you to configure a virtual Dataproc on GKE cluster. Structure defined below.
Supporting Types
ClusterClusterConfig, ClusterClusterConfigArgs
- Autoscaling
Config ClusterCluster Config Autoscaling Config The autoscaling policy config associated with the cluster. Note that once set, if
autoscaling_config
is the only field set incluster_config
, it can only be removed by settingpolicy_uri = ""
, rather than removing the whole block. Structure defined below.- Bucket string
- Dataproc
Metric ClusterConfig Cluster Config Dataproc Metric Config The Compute Engine accelerator (GPU) configuration for these instances. Can be specified multiple times. Structure defined below.
- Encryption
Config ClusterCluster Config Encryption Config The Customer managed encryption keys settings for the cluster. Structure defined below.
- Endpoint
Config ClusterCluster Config Endpoint Config The config settings for port access on the cluster. Structure defined below.
- Gce
Cluster ClusterConfig Cluster Config Gce Cluster Config Common config settings for resources of Google Compute Engine cluster instances, applicable to all instances in the cluster. Structure defined below.
- Initialization
Actions List<ClusterCluster Config Initialization Action> Commands to execute on each node after config is completed. You can specify multiple versions of these. Structure defined below.
- Lifecycle
Config ClusterCluster Config Lifecycle Config The settings for auto deletion cluster schedule. Structure defined below.
- Master
Config ClusterCluster Config Master Config The Google Compute Engine config settings for the master instances in a cluster. Structure defined below.
- Metastore
Config ClusterCluster Config Metastore Config The config setting for metastore service with the cluster. Structure defined below.
- Preemptible
Worker ClusterConfig Cluster Config Preemptible Worker Config The Google Compute Engine config settings for the additional instances in a cluster. Structure defined below.
- NOTE :
preemptible_worker_config
is an alias for the api's secondaryWorkerConfig. The name doesn't necessarily mean it is preemptible and is named as such for legacy/compatibility reasons.
- NOTE :
- Security
Config ClusterCluster Config Security Config Security related configuration. Structure defined below.
- Software
Config ClusterCluster Config Software Config The config settings for software inside the cluster. Structure defined below.
- Staging
Bucket string The Cloud Storage staging bucket used to stage files, such as Hadoop jars, between client machines and the cluster. Note: If you don't explicitly specify a
staging_bucket
then GCP will auto create / assign one for you. However, you are not guaranteed an auto generated bucket which is solely dedicated to your cluster; it may be shared with other clusters in the same region/zone also choosing to use the auto generation option.- Temp
Bucket string The Cloud Storage temp bucket used to store ephemeral cluster and jobs data, such as Spark and MapReduce history files. Note: If you don't explicitly specify a
temp_bucket
then GCP will auto create / assign one for you.- Worker
Config ClusterCluster Config Worker Config The Google Compute Engine config settings for the worker instances in a cluster. Structure defined below.
- Autoscaling
Config ClusterCluster Config Autoscaling Config The autoscaling policy config associated with the cluster. Note that once set, if
autoscaling_config
is the only field set incluster_config
, it can only be removed by settingpolicy_uri = ""
, rather than removing the whole block. Structure defined below.- Bucket string
- Dataproc
Metric ClusterConfig Cluster Config Dataproc Metric Config The Compute Engine accelerator (GPU) configuration for these instances. Can be specified multiple times. Structure defined below.
- Encryption
Config ClusterCluster Config Encryption Config The Customer managed encryption keys settings for the cluster. Structure defined below.
- Endpoint
Config ClusterCluster Config Endpoint Config The config settings for port access on the cluster. Structure defined below.
- Gce
Cluster ClusterConfig Cluster Config Gce Cluster Config Common config settings for resources of Google Compute Engine cluster instances, applicable to all instances in the cluster. Structure defined below.
- Initialization
Actions []ClusterCluster Config Initialization Action Commands to execute on each node after config is completed. You can specify multiple versions of these. Structure defined below.
- Lifecycle
Config ClusterCluster Config Lifecycle Config The settings for auto deletion cluster schedule. Structure defined below.
- Master
Config ClusterCluster Config Master Config The Google Compute Engine config settings for the master instances in a cluster. Structure defined below.
- Metastore
Config ClusterCluster Config Metastore Config The config setting for metastore service with the cluster. Structure defined below.
- Preemptible
Worker ClusterConfig Cluster Config Preemptible Worker Config The Google Compute Engine config settings for the additional instances in a cluster. Structure defined below.
- NOTE :
preemptible_worker_config
is an alias for the api's secondaryWorkerConfig. The name doesn't necessarily mean it is preemptible and is named as such for legacy/compatibility reasons.
- NOTE :
- Security
Config ClusterCluster Config Security Config Security related configuration. Structure defined below.
- Software
Config ClusterCluster Config Software Config The config settings for software inside the cluster. Structure defined below.
- Staging
Bucket string The Cloud Storage staging bucket used to stage files, such as Hadoop jars, between client machines and the cluster. Note: If you don't explicitly specify a
staging_bucket
then GCP will auto create / assign one for you. However, you are not guaranteed an auto generated bucket which is solely dedicated to your cluster; it may be shared with other clusters in the same region/zone also choosing to use the auto generation option.- Temp
Bucket string The Cloud Storage temp bucket used to store ephemeral cluster and jobs data, such as Spark and MapReduce history files. Note: If you don't explicitly specify a
temp_bucket
then GCP will auto create / assign one for you.- Worker
Config ClusterCluster Config Worker Config The Google Compute Engine config settings for the worker instances in a cluster. Structure defined below.
- autoscaling
Config ClusterCluster Config Autoscaling Config The autoscaling policy config associated with the cluster. Note that once set, if
autoscaling_config
is the only field set incluster_config
, it can only be removed by settingpolicy_uri = ""
, rather than removing the whole block. Structure defined below.- bucket String
- dataproc
Metric ClusterConfig Cluster Config Dataproc Metric Config The Compute Engine accelerator (GPU) configuration for these instances. Can be specified multiple times. Structure defined below.
- encryption
Config ClusterCluster Config Encryption Config The Customer managed encryption keys settings for the cluster. Structure defined below.
- endpoint
Config ClusterCluster Config Endpoint Config The config settings for port access on the cluster. Structure defined below.
- gce
Cluster ClusterConfig Cluster Config Gce Cluster Config Common config settings for resources of Google Compute Engine cluster instances, applicable to all instances in the cluster. Structure defined below.
- initialization
Actions List<ClusterCluster Config Initialization Action> Commands to execute on each node after config is completed. You can specify multiple versions of these. Structure defined below.
- lifecycle
Config ClusterCluster Config Lifecycle Config The settings for auto deletion cluster schedule. Structure defined below.
- master
Config ClusterCluster Config Master Config The Google Compute Engine config settings for the master instances in a cluster. Structure defined below.
- metastore
Config ClusterCluster Config Metastore Config The config setting for metastore service with the cluster. Structure defined below.
- preemptible
Worker ClusterConfig Cluster Config Preemptible Worker Config The Google Compute Engine config settings for the additional instances in a cluster. Structure defined below.
- NOTE :
preemptible_worker_config
is an alias for the api's secondaryWorkerConfig. The name doesn't necessarily mean it is preemptible and is named as such for legacy/compatibility reasons.
- NOTE :
- security
Config ClusterCluster Config Security Config Security related configuration. Structure defined below.
- software
Config ClusterCluster Config Software Config The config settings for software inside the cluster. Structure defined below.
- staging
Bucket String The Cloud Storage staging bucket used to stage files, such as Hadoop jars, between client machines and the cluster. Note: If you don't explicitly specify a
staging_bucket
then GCP will auto create / assign one for you. However, you are not guaranteed an auto generated bucket which is solely dedicated to your cluster; it may be shared with other clusters in the same region/zone also choosing to use the auto generation option.- temp
Bucket String The Cloud Storage temp bucket used to store ephemeral cluster and jobs data, such as Spark and MapReduce history files. Note: If you don't explicitly specify a
temp_bucket
then GCP will auto create / assign one for you.- worker
Config ClusterCluster Config Worker Config The Google Compute Engine config settings for the worker instances in a cluster. Structure defined below.
- autoscaling
Config ClusterCluster Config Autoscaling Config The autoscaling policy config associated with the cluster. Note that once set, if
autoscaling_config
is the only field set incluster_config
, it can only be removed by settingpolicy_uri = ""
, rather than removing the whole block. Structure defined below.- bucket string
- dataproc
Metric ClusterConfig Cluster Config Dataproc Metric Config The Compute Engine accelerator (GPU) configuration for these instances. Can be specified multiple times. Structure defined below.
- encryption
Config ClusterCluster Config Encryption Config The Customer managed encryption keys settings for the cluster. Structure defined below.
- endpoint
Config ClusterCluster Config Endpoint Config The config settings for port access on the cluster. Structure defined below.
- gce
Cluster ClusterConfig Cluster Config Gce Cluster Config Common config settings for resources of Google Compute Engine cluster instances, applicable to all instances in the cluster. Structure defined below.
- initialization
Actions ClusterCluster Config Initialization Action[] Commands to execute on each node after config is completed. You can specify multiple versions of these. Structure defined below.
- lifecycle
Config ClusterCluster Config Lifecycle Config The settings for auto deletion cluster schedule. Structure defined below.
- master
Config ClusterCluster Config Master Config The Google Compute Engine config settings for the master instances in a cluster. Structure defined below.
- metastore
Config ClusterCluster Config Metastore Config The config setting for metastore service with the cluster. Structure defined below.
- preemptible
Worker ClusterConfig Cluster Config Preemptible Worker Config The Google Compute Engine config settings for the additional instances in a cluster. Structure defined below.
- NOTE :
preemptible_worker_config
is an alias for the api's secondaryWorkerConfig. The name doesn't necessarily mean it is preemptible and is named as such for legacy/compatibility reasons.
- NOTE :
- security
Config ClusterCluster Config Security Config Security related configuration. Structure defined below.
- software
Config ClusterCluster Config Software Config The config settings for software inside the cluster. Structure defined below.
- staging
Bucket string The Cloud Storage staging bucket used to stage files, such as Hadoop jars, between client machines and the cluster. Note: If you don't explicitly specify a
staging_bucket
then GCP will auto create / assign one for you. However, you are not guaranteed an auto generated bucket which is solely dedicated to your cluster; it may be shared with other clusters in the same region/zone also choosing to use the auto generation option.- temp
Bucket string The Cloud Storage temp bucket used to store ephemeral cluster and jobs data, such as Spark and MapReduce history files. Note: If you don't explicitly specify a
temp_bucket
then GCP will auto create / assign one for you.- worker
Config ClusterCluster Config Worker Config The Google Compute Engine config settings for the worker instances in a cluster. Structure defined below.
- autoscaling_
config ClusterCluster Config Autoscaling Config The autoscaling policy config associated with the cluster. Note that once set, if
autoscaling_config
is the only field set incluster_config
, it can only be removed by settingpolicy_uri = ""
, rather than removing the whole block. Structure defined below.- bucket str
- dataproc_
metric_ Clusterconfig Cluster Config Dataproc Metric Config The Compute Engine accelerator (GPU) configuration for these instances. Can be specified multiple times. Structure defined below.
- encryption_
config ClusterCluster Config Encryption Config The Customer managed encryption keys settings for the cluster. Structure defined below.
- endpoint_
config ClusterCluster Config Endpoint Config The config settings for port access on the cluster. Structure defined below.
- gce_
cluster_ Clusterconfig Cluster Config Gce Cluster Config Common config settings for resources of Google Compute Engine cluster instances, applicable to all instances in the cluster. Structure defined below.
- initialization_
actions Sequence[ClusterCluster Config Initialization Action] Commands to execute on each node after config is completed. You can specify multiple versions of these. Structure defined below.
- lifecycle_
config ClusterCluster Config Lifecycle Config The settings for auto deletion cluster schedule. Structure defined below.
- master_
config ClusterCluster Config Master Config The Google Compute Engine config settings for the master instances in a cluster. Structure defined below.
- metastore_
config ClusterCluster Config Metastore Config The config setting for metastore service with the cluster. Structure defined below.
- preemptible_
worker_ Clusterconfig Cluster Config Preemptible Worker Config The Google Compute Engine config settings for the additional instances in a cluster. Structure defined below.
- NOTE :
preemptible_worker_config
is an alias for the api's secondaryWorkerConfig. The name doesn't necessarily mean it is preemptible and is named as such for legacy/compatibility reasons.
- NOTE :
- security_
config ClusterCluster Config Security Config Security related configuration. Structure defined below.
- software_
config ClusterCluster Config Software Config The config settings for software inside the cluster. Structure defined below.
- staging_
bucket str The Cloud Storage staging bucket used to stage files, such as Hadoop jars, between client machines and the cluster. Note: If you don't explicitly specify a
staging_bucket
then GCP will auto create / assign one for you. However, you are not guaranteed an auto generated bucket which is solely dedicated to your cluster; it may be shared with other clusters in the same region/zone also choosing to use the auto generation option.- temp_
bucket str The Cloud Storage temp bucket used to store ephemeral cluster and jobs data, such as Spark and MapReduce history files. Note: If you don't explicitly specify a
temp_bucket
then GCP will auto create / assign one for you.- worker_
config ClusterCluster Config Worker Config The Google Compute Engine config settings for the worker instances in a cluster. Structure defined below.
- autoscaling
Config Property Map The autoscaling policy config associated with the cluster. Note that once set, if
autoscaling_config
is the only field set incluster_config
, it can only be removed by settingpolicy_uri = ""
, rather than removing the whole block. Structure defined below.- bucket String
- dataproc
Metric Property MapConfig The Compute Engine accelerator (GPU) configuration for these instances. Can be specified multiple times. Structure defined below.
- encryption
Config Property Map The Customer managed encryption keys settings for the cluster. Structure defined below.
- endpoint
Config Property Map The config settings for port access on the cluster. Structure defined below.
- gce
Cluster Property MapConfig Common config settings for resources of Google Compute Engine cluster instances, applicable to all instances in the cluster. Structure defined below.
- initialization
Actions List<Property Map> Commands to execute on each node after config is completed. You can specify multiple versions of these. Structure defined below.
- lifecycle
Config Property Map The settings for auto deletion cluster schedule. Structure defined below.
- master
Config Property Map The Google Compute Engine config settings for the master instances in a cluster. Structure defined below.
- metastore
Config Property Map The config setting for metastore service with the cluster. Structure defined below.
- preemptible
Worker Property MapConfig The Google Compute Engine config settings for the additional instances in a cluster. Structure defined below.
- NOTE :
preemptible_worker_config
is an alias for the api's secondaryWorkerConfig. The name doesn't necessarily mean it is preemptible and is named as such for legacy/compatibility reasons.
- NOTE :
- security
Config Property Map Security related configuration. Structure defined below.
- software
Config Property Map The config settings for software inside the cluster. Structure defined below.
- staging
Bucket String The Cloud Storage staging bucket used to stage files, such as Hadoop jars, between client machines and the cluster. Note: If you don't explicitly specify a
staging_bucket
then GCP will auto create / assign one for you. However, you are not guaranteed an auto generated bucket which is solely dedicated to your cluster; it may be shared with other clusters in the same region/zone also choosing to use the auto generation option.- temp
Bucket String The Cloud Storage temp bucket used to store ephemeral cluster and jobs data, such as Spark and MapReduce history files. Note: If you don't explicitly specify a
temp_bucket
then GCP will auto create / assign one for you.- worker
Config Property Map The Google Compute Engine config settings for the worker instances in a cluster. Structure defined below.
ClusterClusterConfigAutoscalingConfig, ClusterClusterConfigAutoscalingConfigArgs
- Policy
Uri string The autoscaling policy used by the cluster.
Only resource names including projectid and location (region) are valid. Examples:
https://www.googleapis.com/compute/v1/projects/[projectId]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]
projects/[projectId]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]
Note that the policy must be in the same project and Cloud Dataproc region.
- Policy
Uri string The autoscaling policy used by the cluster.
Only resource names including projectid and location (region) are valid. Examples:
https://www.googleapis.com/compute/v1/projects/[projectId]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]
projects/[projectId]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]
Note that the policy must be in the same project and Cloud Dataproc region.
- policy
Uri String The autoscaling policy used by the cluster.
Only resource names including projectid and location (region) are valid. Examples:
https://www.googleapis.com/compute/v1/projects/[projectId]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]
projects/[projectId]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]
Note that the policy must be in the same project and Cloud Dataproc region.
- policy
Uri string The autoscaling policy used by the cluster.
Only resource names including projectid and location (region) are valid. Examples:
https://www.googleapis.com/compute/v1/projects/[projectId]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]
projects/[projectId]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]
Note that the policy must be in the same project and Cloud Dataproc region.
- policy_
uri str The autoscaling policy used by the cluster.
Only resource names including projectid and location (region) are valid. Examples:
https://www.googleapis.com/compute/v1/projects/[projectId]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]
projects/[projectId]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]
Note that the policy must be in the same project and Cloud Dataproc region.
- policy
Uri String The autoscaling policy used by the cluster.
Only resource names including projectid and location (region) are valid. Examples:
https://www.googleapis.com/compute/v1/projects/[projectId]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]
projects/[projectId]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]
Note that the policy must be in the same project and Cloud Dataproc region.
ClusterClusterConfigDataprocMetricConfig, ClusterClusterConfigDataprocMetricConfigArgs
- Metrics
List<Cluster
Cluster Config Dataproc Metric Config Metric> Metrics sources to enable.
- Metrics
[]Cluster
Cluster Config Dataproc Metric Config Metric Metrics sources to enable.
- metrics
List<Cluster
Cluster Config Dataproc Metric Config Metric> Metrics sources to enable.
- metrics
Cluster
Cluster Config Dataproc Metric Config Metric[] Metrics sources to enable.
- metrics
Sequence[Cluster
Cluster Config Dataproc Metric Config Metric] Metrics sources to enable.
- metrics List<Property Map>
Metrics sources to enable.
ClusterClusterConfigDataprocMetricConfigMetric, ClusterClusterConfigDataprocMetricConfigMetricArgs
- Metric
Source string A source for the collection of Dataproc OSS metrics (see available OSS metrics).
- Metric
Overrides List<string> One or more [available OSS metrics] (https://cloud.google.com/dataproc/docs/guides/monitoring#available_oss_metrics) to collect for the metric course.
- Metric
Source string A source for the collection of Dataproc OSS metrics (see available OSS metrics).
- Metric
Overrides []string One or more [available OSS metrics] (https://cloud.google.com/dataproc/docs/guides/monitoring#available_oss_metrics) to collect for the metric course.
- metric
Source String A source for the collection of Dataproc OSS metrics (see available OSS metrics).
- metric
Overrides List<String> One or more [available OSS metrics] (https://cloud.google.com/dataproc/docs/guides/monitoring#available_oss_metrics) to collect for the metric course.
- metric
Source string A source for the collection of Dataproc OSS metrics (see available OSS metrics).
- metric
Overrides string[] One or more [available OSS metrics] (https://cloud.google.com/dataproc/docs/guides/monitoring#available_oss_metrics) to collect for the metric course.
- metric_
source str A source for the collection of Dataproc OSS metrics (see available OSS metrics).
- metric_
overrides Sequence[str] One or more [available OSS metrics] (https://cloud.google.com/dataproc/docs/guides/monitoring#available_oss_metrics) to collect for the metric course.
- metric
Source String A source for the collection of Dataproc OSS metrics (see available OSS metrics).
- metric
Overrides List<String> One or more [available OSS metrics] (https://cloud.google.com/dataproc/docs/guides/monitoring#available_oss_metrics) to collect for the metric course.
ClusterClusterConfigEncryptionConfig, ClusterClusterConfigEncryptionConfigArgs
- Kms
Key stringName The Cloud KMS key name to use for PD disk encryption for all instances in the cluster.
- Kms
Key stringName The Cloud KMS key name to use for PD disk encryption for all instances in the cluster.
- kms
Key StringName The Cloud KMS key name to use for PD disk encryption for all instances in the cluster.
- kms
Key stringName The Cloud KMS key name to use for PD disk encryption for all instances in the cluster.
- kms_
key_ strname The Cloud KMS key name to use for PD disk encryption for all instances in the cluster.
- kms
Key StringName The Cloud KMS key name to use for PD disk encryption for all instances in the cluster.
ClusterClusterConfigEndpointConfig, ClusterClusterConfigEndpointConfigArgs
- Enable
Http boolPort Access The flag to enable http access to specific ports on the cluster from external sources (aka Component Gateway). Defaults to false.
- Http
Ports Dictionary<string, object>
- Enable
Http boolPort Access The flag to enable http access to specific ports on the cluster from external sources (aka Component Gateway). Defaults to false.
- Http
Ports map[string]interface{}
- enable
Http BooleanPort Access The flag to enable http access to specific ports on the cluster from external sources (aka Component Gateway). Defaults to false.
- http
Ports Map<String,Object>
- enable
Http booleanPort Access The flag to enable http access to specific ports on the cluster from external sources (aka Component Gateway). Defaults to false.
- http
Ports {[key: string]: any}
- enable_
http_ boolport_ access The flag to enable http access to specific ports on the cluster from external sources (aka Component Gateway). Defaults to false.
- http_
ports Mapping[str, Any]
- enable
Http BooleanPort Access The flag to enable http access to specific ports on the cluster from external sources (aka Component Gateway). Defaults to false.
- http
Ports Map<Any>
ClusterClusterConfigGceClusterConfig, ClusterClusterConfigGceClusterConfigArgs
- Internal
Ip boolOnly By default, clusters are not restricted to internal IP addresses, and will have ephemeral external IP addresses assigned to each instance. If set to true, all instances in the cluster will only have internal IP addresses. Note: Private Google Access (also known as
privateIpGoogleAccess
) must be enabled on the subnetwork that the cluster will be launched in.- Metadata Dictionary<string, string>
A map of the Compute Engine metadata entries to add to all instances (see Project and instance metadata).
- Network string
The name or self_link of the Google Compute Engine network to the cluster will be part of. Conflicts with
subnetwork
. If neither is specified, this defaults to the "default" network.- Node
Group ClusterAffinity Cluster Config Gce Cluster Config Node Group Affinity Node Group Affinity for sole-tenant clusters.
- Reservation
Affinity ClusterCluster Config Gce Cluster Config Reservation Affinity Reservation Affinity for consuming zonal reservation.
- Service
Account string The service account to be used by the Node VMs. If not specified, the "default" service account is used.
- Service
Account List<string>Scopes The set of Google API scopes to be made available on all of the node VMs under the
service_account
specified. Both OAuth2 URLs and gcloud short names are supported. To allow full access to all Cloud APIs, use thecloud-platform
scope. See a complete list of scopes here.- Shielded
Instance ClusterConfig Cluster Config Gce Cluster Config Shielded Instance Config Shielded Instance Config for clusters using Compute Engine Shielded VMs.
- Subnetwork string
The name or self_link of the Google Compute Engine subnetwork the cluster will be part of. Conflicts with
network
.- List<string>
The list of instance tags applied to instances in the cluster. Tags are used to identify valid sources or targets for network firewalls.
- Zone string
The GCP zone where your data is stored and used (i.e. where the master and the worker nodes will be created in). If
region
is set to 'global' (default) thenzone
is mandatory, otherwise GCP is able to make use of Auto Zone Placement to determine this automatically for you. Note: This setting additionally determines and restricts which computing resources are available for use with other configs such ascluster_config.master_config.machine_type
andcluster_config.worker_config.machine_type
.
- Internal
Ip boolOnly By default, clusters are not restricted to internal IP addresses, and will have ephemeral external IP addresses assigned to each instance. If set to true, all instances in the cluster will only have internal IP addresses. Note: Private Google Access (also known as
privateIpGoogleAccess
) must be enabled on the subnetwork that the cluster will be launched in.- Metadata map[string]string
A map of the Compute Engine metadata entries to add to all instances (see Project and instance metadata).
- Network string
The name or self_link of the Google Compute Engine network to the cluster will be part of. Conflicts with
subnetwork
. If neither is specified, this defaults to the "default" network.- Node
Group ClusterAffinity Cluster Config Gce Cluster Config Node Group Affinity Node Group Affinity for sole-tenant clusters.
- Reservation
Affinity ClusterCluster Config Gce Cluster Config Reservation Affinity Reservation Affinity for consuming zonal reservation.
- Service
Account string The service account to be used by the Node VMs. If not specified, the "default" service account is used.
- Service
Account []stringScopes The set of Google API scopes to be made available on all of the node VMs under the
service_account
specified. Both OAuth2 URLs and gcloud short names are supported. To allow full access to all Cloud APIs, use thecloud-platform
scope. See a complete list of scopes here.- Shielded
Instance ClusterConfig Cluster Config Gce Cluster Config Shielded Instance Config Shielded Instance Config for clusters using Compute Engine Shielded VMs.
- Subnetwork string
The name or self_link of the Google Compute Engine subnetwork the cluster will be part of. Conflicts with
network
.- []string
The list of instance tags applied to instances in the cluster. Tags are used to identify valid sources or targets for network firewalls.
- Zone string
The GCP zone where your data is stored and used (i.e. where the master and the worker nodes will be created in). If
region
is set to 'global' (default) thenzone
is mandatory, otherwise GCP is able to make use of Auto Zone Placement to determine this automatically for you. Note: This setting additionally determines and restricts which computing resources are available for use with other configs such ascluster_config.master_config.machine_type
andcluster_config.worker_config.machine_type
.
- internal
Ip BooleanOnly By default, clusters are not restricted to internal IP addresses, and will have ephemeral external IP addresses assigned to each instance. If set to true, all instances in the cluster will only have internal IP addresses. Note: Private Google Access (also known as
privateIpGoogleAccess
) must be enabled on the subnetwork that the cluster will be launched in.- metadata Map<String,String>
A map of the Compute Engine metadata entries to add to all instances (see Project and instance metadata).
- network String
The name or self_link of the Google Compute Engine network to the cluster will be part of. Conflicts with
subnetwork
. If neither is specified, this defaults to the "default" network.- node
Group ClusterAffinity Cluster Config Gce Cluster Config Node Group Affinity Node Group Affinity for sole-tenant clusters.
- reservation
Affinity ClusterCluster Config Gce Cluster Config Reservation Affinity Reservation Affinity for consuming zonal reservation.
- service
Account String The service account to be used by the Node VMs. If not specified, the "default" service account is used.
- service
Account List<String>Scopes The set of Google API scopes to be made available on all of the node VMs under the
service_account
specified. Both OAuth2 URLs and gcloud short names are supported. To allow full access to all Cloud APIs, use thecloud-platform
scope. See a complete list of scopes here.- shielded
Instance ClusterConfig Cluster Config Gce Cluster Config Shielded Instance Config Shielded Instance Config for clusters using Compute Engine Shielded VMs.
- subnetwork String
The name or self_link of the Google Compute Engine subnetwork the cluster will be part of. Conflicts with
network
.- List<String>
The list of instance tags applied to instances in the cluster. Tags are used to identify valid sources or targets for network firewalls.
- zone String
The GCP zone where your data is stored and used (i.e. where the master and the worker nodes will be created in). If
region
is set to 'global' (default) thenzone
is mandatory, otherwise GCP is able to make use of Auto Zone Placement to determine this automatically for you. Note: This setting additionally determines and restricts which computing resources are available for use with other configs such ascluster_config.master_config.machine_type
andcluster_config.worker_config.machine_type
.
- internal
Ip booleanOnly By default, clusters are not restricted to internal IP addresses, and will have ephemeral external IP addresses assigned to each instance. If set to true, all instances in the cluster will only have internal IP addresses. Note: Private Google Access (also known as
privateIpGoogleAccess
) must be enabled on the subnetwork that the cluster will be launched in.- metadata {[key: string]: string}
A map of the Compute Engine metadata entries to add to all instances (see Project and instance metadata).
- network string
The name or self_link of the Google Compute Engine network to the cluster will be part of. Conflicts with
subnetwork
. If neither is specified, this defaults to the "default" network.- node
Group ClusterAffinity Cluster Config Gce Cluster Config Node Group Affinity Node Group Affinity for sole-tenant clusters.
- reservation
Affinity ClusterCluster Config Gce Cluster Config Reservation Affinity Reservation Affinity for consuming zonal reservation.
- service
Account string The service account to be used by the Node VMs. If not specified, the "default" service account is used.
- service
Account string[]Scopes The set of Google API scopes to be made available on all of the node VMs under the
service_account
specified. Both OAuth2 URLs and gcloud short names are supported. To allow full access to all Cloud APIs, use thecloud-platform
scope. See a complete list of scopes here.- shielded
Instance ClusterConfig Cluster Config Gce Cluster Config Shielded Instance Config Shielded Instance Config for clusters using Compute Engine Shielded VMs.
- subnetwork string
The name or self_link of the Google Compute Engine subnetwork the cluster will be part of. Conflicts with
network
.- string[]
The list of instance tags applied to instances in the cluster. Tags are used to identify valid sources or targets for network firewalls.
- zone string
The GCP zone where your data is stored and used (i.e. where the master and the worker nodes will be created in). If
region
is set to 'global' (default) thenzone
is mandatory, otherwise GCP is able to make use of Auto Zone Placement to determine this automatically for you. Note: This setting additionally determines and restricts which computing resources are available for use with other configs such ascluster_config.master_config.machine_type
andcluster_config.worker_config.machine_type
.
- internal_
ip_ boolonly By default, clusters are not restricted to internal IP addresses, and will have ephemeral external IP addresses assigned to each instance. If set to true, all instances in the cluster will only have internal IP addresses. Note: Private Google Access (also known as
privateIpGoogleAccess
) must be enabled on the subnetwork that the cluster will be launched in.- metadata Mapping[str, str]
A map of the Compute Engine metadata entries to add to all instances (see Project and instance metadata).
- network str
The name or self_link of the Google Compute Engine network to the cluster will be part of. Conflicts with
subnetwork
. If neither is specified, this defaults to the "default" network.- node_
group_ Clusteraffinity Cluster Config Gce Cluster Config Node Group Affinity Node Group Affinity for sole-tenant clusters.
- reservation_
affinity ClusterCluster Config Gce Cluster Config Reservation Affinity Reservation Affinity for consuming zonal reservation.
- service_
account str The service account to be used by the Node VMs. If not specified, the "default" service account is used.
- service_
account_ Sequence[str]scopes The set of Google API scopes to be made available on all of the node VMs under the
service_account
specified. Both OAuth2 URLs and gcloud short names are supported. To allow full access to all Cloud APIs, use thecloud-platform
scope. See a complete list of scopes here.- shielded_
instance_ Clusterconfig Cluster Config Gce Cluster Config Shielded Instance Config Shielded Instance Config for clusters using Compute Engine Shielded VMs.
- subnetwork str
The name or self_link of the Google Compute Engine subnetwork the cluster will be part of. Conflicts with
network
.- Sequence[str]
The list of instance tags applied to instances in the cluster. Tags are used to identify valid sources or targets for network firewalls.
- zone str
The GCP zone where your data is stored and used (i.e. where the master and the worker nodes will be created in). If
region
is set to 'global' (default) thenzone
is mandatory, otherwise GCP is able to make use of Auto Zone Placement to determine this automatically for you. Note: This setting additionally determines and restricts which computing resources are available for use with other configs such ascluster_config.master_config.machine_type
andcluster_config.worker_config.machine_type
.
- internal
Ip BooleanOnly By default, clusters are not restricted to internal IP addresses, and will have ephemeral external IP addresses assigned to each instance. If set to true, all instances in the cluster will only have internal IP addresses. Note: Private Google Access (also known as
privateIpGoogleAccess
) must be enabled on the subnetwork that the cluster will be launched in.- metadata Map<String>
A map of the Compute Engine metadata entries to add to all instances (see Project and instance metadata).
- network String
The name or self_link of the Google Compute Engine network to the cluster will be part of. Conflicts with
subnetwork
. If neither is specified, this defaults to the "default" network.- node
Group Property MapAffinity Node Group Affinity for sole-tenant clusters.
- reservation
Affinity Property Map Reservation Affinity for consuming zonal reservation.
- service
Account String The service account to be used by the Node VMs. If not specified, the "default" service account is used.
- service
Account List<String>Scopes The set of Google API scopes to be made available on all of the node VMs under the
service_account
specified. Both OAuth2 URLs and gcloud short names are supported. To allow full access to all Cloud APIs, use thecloud-platform
scope. See a complete list of scopes here.- shielded
Instance Property MapConfig Shielded Instance Config for clusters using Compute Engine Shielded VMs.
- subnetwork String
The name or self_link of the Google Compute Engine subnetwork the cluster will be part of. Conflicts with
network
.- List<String>
The list of instance tags applied to instances in the cluster. Tags are used to identify valid sources or targets for network firewalls.
- zone String
The GCP zone where your data is stored and used (i.e. where the master and the worker nodes will be created in). If
region
is set to 'global' (default) thenzone
is mandatory, otherwise GCP is able to make use of Auto Zone Placement to determine this automatically for you. Note: This setting additionally determines and restricts which computing resources are available for use with other configs such ascluster_config.master_config.machine_type
andcluster_config.worker_config.machine_type
.
ClusterClusterConfigGceClusterConfigNodeGroupAffinity, ClusterClusterConfigGceClusterConfigNodeGroupAffinityArgs
- Node
Group stringUri The URI of a sole-tenant node group resource that the cluster will be created on.
- Node
Group stringUri The URI of a sole-tenant node group resource that the cluster will be created on.
- node
Group StringUri The URI of a sole-tenant node group resource that the cluster will be created on.
- node
Group stringUri The URI of a sole-tenant node group resource that the cluster will be created on.
- node_
group_ struri The URI of a sole-tenant node group resource that the cluster will be created on.
- node
Group StringUri The URI of a sole-tenant node group resource that the cluster will be created on.
ClusterClusterConfigGceClusterConfigReservationAffinity, ClusterClusterConfigGceClusterConfigReservationAffinityArgs
- Consume
Reservation stringType Corresponds to the type of reservation consumption.
- Key string
Corresponds to the label key of reservation resource.
- Values List<string>
Corresponds to the label values of reservation resource.
- Consume
Reservation stringType Corresponds to the type of reservation consumption.
- Key string
Corresponds to the label key of reservation resource.
- Values []string
Corresponds to the label values of reservation resource.
- consume
Reservation StringType Corresponds to the type of reservation consumption.
- key String
Corresponds to the label key of reservation resource.
- values List<String>
Corresponds to the label values of reservation resource.
- consume
Reservation stringType Corresponds to the type of reservation consumption.
- key string
Corresponds to the label key of reservation resource.
- values string[]
Corresponds to the label values of reservation resource.
- consume_
reservation_ strtype Corresponds to the type of reservation consumption.
- key str
Corresponds to the label key of reservation resource.
- values Sequence[str]
Corresponds to the label values of reservation resource.
- consume
Reservation StringType Corresponds to the type of reservation consumption.
- key String
Corresponds to the label key of reservation resource.
- values List<String>
Corresponds to the label values of reservation resource.
ClusterClusterConfigGceClusterConfigShieldedInstanceConfig, ClusterClusterConfigGceClusterConfigShieldedInstanceConfigArgs
- Enable
Integrity boolMonitoring Defines whether instances have integrity monitoring enabled.
- Enable
Secure boolBoot Defines whether instances have Secure Boot enabled.
- Enable
Vtpm bool Defines whether instances have the vTPM enabled.
- Enable
Integrity boolMonitoring Defines whether instances have integrity monitoring enabled.
- Enable
Secure boolBoot Defines whether instances have Secure Boot enabled.
- Enable
Vtpm bool Defines whether instances have the vTPM enabled.
- enable
Integrity BooleanMonitoring Defines whether instances have integrity monitoring enabled.
- enable
Secure BooleanBoot Defines whether instances have Secure Boot enabled.
- enable
Vtpm Boolean Defines whether instances have the vTPM enabled.
- enable
Integrity booleanMonitoring Defines whether instances have integrity monitoring enabled.
- enable
Secure booleanBoot Defines whether instances have Secure Boot enabled.
- enable
Vtpm boolean Defines whether instances have the vTPM enabled.
- enable_
integrity_ boolmonitoring Defines whether instances have integrity monitoring enabled.
- enable_
secure_ boolboot Defines whether instances have Secure Boot enabled.
- enable_
vtpm bool Defines whether instances have the vTPM enabled.
- enable
Integrity BooleanMonitoring Defines whether instances have integrity monitoring enabled.
- enable
Secure BooleanBoot Defines whether instances have Secure Boot enabled.
- enable
Vtpm Boolean Defines whether instances have the vTPM enabled.
ClusterClusterConfigInitializationAction, ClusterClusterConfigInitializationActionArgs
- Script string
The script to be executed during initialization of the cluster. The script must be a GCS file with a gs:// prefix.
- Timeout
Sec int The maximum duration (in seconds) which
script
is allowed to take to execute its action. GCP will default to a predetermined computed value if not set (currently 300).
- Script string
The script to be executed during initialization of the cluster. The script must be a GCS file with a gs:// prefix.
- Timeout
Sec int The maximum duration (in seconds) which
script
is allowed to take to execute its action. GCP will default to a predetermined computed value if not set (currently 300).
- script String
The script to be executed during initialization of the cluster. The script must be a GCS file with a gs:// prefix.
- timeout
Sec Integer The maximum duration (in seconds) which
script
is allowed to take to execute its action. GCP will default to a predetermined computed value if not set (currently 300).
- script string
The script to be executed during initialization of the cluster. The script must be a GCS file with a gs:// prefix.
- timeout
Sec number The maximum duration (in seconds) which
script
is allowed to take to execute its action. GCP will default to a predetermined computed value if not set (currently 300).
- script str
The script to be executed during initialization of the cluster. The script must be a GCS file with a gs:// prefix.
- timeout_
sec int The maximum duration (in seconds) which
script
is allowed to take to execute its action. GCP will default to a predetermined computed value if not set (currently 300).
- script String
The script to be executed during initialization of the cluster. The script must be a GCS file with a gs:// prefix.
- timeout
Sec Number The maximum duration (in seconds) which
script
is allowed to take to execute its action. GCP will default to a predetermined computed value if not set (currently 300).
ClusterClusterConfigLifecycleConfig, ClusterClusterConfigLifecycleConfigArgs
- Auto
Delete stringTime The time when cluster will be auto-deleted. A timestamp in RFC3339 UTC "Zulu" format, accurate to nanoseconds. Example: "2014-10-02T15:01:23.045123456Z".
- Idle
Delete stringTtl The duration to keep the cluster alive while idling (no jobs running). After this TTL, the cluster will be deleted. Valid range: [10m, 14d].
- Idle
Start stringTime
- Auto
Delete stringTime The time when cluster will be auto-deleted. A timestamp in RFC3339 UTC "Zulu" format, accurate to nanoseconds. Example: "2014-10-02T15:01:23.045123456Z".
- Idle
Delete stringTtl The duration to keep the cluster alive while idling (no jobs running). After this TTL, the cluster will be deleted. Valid range: [10m, 14d].
- Idle
Start stringTime
- auto
Delete StringTime The time when cluster will be auto-deleted. A timestamp in RFC3339 UTC "Zulu" format, accurate to nanoseconds. Example: "2014-10-02T15:01:23.045123456Z".
- idle
Delete StringTtl The duration to keep the cluster alive while idling (no jobs running). After this TTL, the cluster will be deleted. Valid range: [10m, 14d].
- idle
Start StringTime
- auto
Delete stringTime The time when cluster will be auto-deleted. A timestamp in RFC3339 UTC "Zulu" format, accurate to nanoseconds. Example: "2014-10-02T15:01:23.045123456Z".
- idle
Delete stringTtl The duration to keep the cluster alive while idling (no jobs running). After this TTL, the cluster will be deleted. Valid range: [10m, 14d].
- idle
Start stringTime
- auto_
delete_ strtime The time when cluster will be auto-deleted. A timestamp in RFC3339 UTC "Zulu" format, accurate to nanoseconds. Example: "2014-10-02T15:01:23.045123456Z".
- idle_
delete_ strttl The duration to keep the cluster alive while idling (no jobs running). After this TTL, the cluster will be deleted. Valid range: [10m, 14d].
- idle_
start_ strtime
- auto
Delete StringTime The time when cluster will be auto-deleted. A timestamp in RFC3339 UTC "Zulu" format, accurate to nanoseconds. Example: "2014-10-02T15:01:23.045123456Z".
- idle
Delete StringTtl The duration to keep the cluster alive while idling (no jobs running). After this TTL, the cluster will be deleted. Valid range: [10m, 14d].
- idle
Start StringTime
ClusterClusterConfigMasterConfig, ClusterClusterConfigMasterConfigArgs
- Accelerators
List<Cluster
Cluster Config Master Config Accelerator> The Compute Engine accelerator (GPU) configuration for these instances. Can be specified multiple times.
- Disk
Config ClusterCluster Config Master Config Disk Config Disk Config
- Image
Uri string The URI for the image to use for this worker. See the guide for more information.
- Instance
Names List<string> - Machine
Type string The name of a Google Compute Engine machine type to create for the master. If not specified, GCP will default to a predetermined computed value (currently
n1-standard-4
).- Min
Cpu stringPlatform The name of a minimum generation of CPU family for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.
- Num
Instances int Specifies the number of master nodes to create. If not specified, GCP will default to a predetermined computed value (currently 1).
- Accelerators
[]Cluster
Cluster Config Master Config Accelerator The Compute Engine accelerator (GPU) configuration for these instances. Can be specified multiple times.
- Disk
Config ClusterCluster Config Master Config Disk Config Disk Config
- Image
Uri string The URI for the image to use for this worker. See the guide for more information.
- Instance
Names []string - Machine
Type string The name of a Google Compute Engine machine type to create for the master. If not specified, GCP will default to a predetermined computed value (currently
n1-standard-4
).- Min
Cpu stringPlatform The name of a minimum generation of CPU family for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.
- Num
Instances int Specifies the number of master nodes to create. If not specified, GCP will default to a predetermined computed value (currently 1).
- accelerators
List<Cluster
Cluster Config Master Config Accelerator> The Compute Engine accelerator (GPU) configuration for these instances. Can be specified multiple times.
- disk
Config ClusterCluster Config Master Config Disk Config Disk Config
- image
Uri String The URI for the image to use for this worker. See the guide for more information.
- instance
Names List<String> - machine
Type String The name of a Google Compute Engine machine type to create for the master. If not specified, GCP will default to a predetermined computed value (currently
n1-standard-4
).- min
Cpu StringPlatform The name of a minimum generation of CPU family for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.
- num
Instances Integer Specifies the number of master nodes to create. If not specified, GCP will default to a predetermined computed value (currently 1).
- accelerators
Cluster
Cluster Config Master Config Accelerator[] The Compute Engine accelerator (GPU) configuration for these instances. Can be specified multiple times.
- disk
Config ClusterCluster Config Master Config Disk Config Disk Config
- image
Uri string The URI for the image to use for this worker. See the guide for more information.
- instance
Names string[] - machine
Type string The name of a Google Compute Engine machine type to create for the master. If not specified, GCP will default to a predetermined computed value (currently
n1-standard-4
).- min
Cpu stringPlatform The name of a minimum generation of CPU family for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.
- num
Instances number Specifies the number of master nodes to create. If not specified, GCP will default to a predetermined computed value (currently 1).
- accelerators
Sequence[Cluster
Cluster Config Master Config Accelerator] The Compute Engine accelerator (GPU) configuration for these instances. Can be specified multiple times.
- disk_
config ClusterCluster Config Master Config Disk Config Disk Config
- image_
uri str The URI for the image to use for this worker. See the guide for more information.
- instance_
names Sequence[str] - machine_
type str The name of a Google Compute Engine machine type to create for the master. If not specified, GCP will default to a predetermined computed value (currently
n1-standard-4
).- min_
cpu_ strplatform The name of a minimum generation of CPU family for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.
- num_
instances int Specifies the number of master nodes to create. If not specified, GCP will default to a predetermined computed value (currently 1).
- accelerators List<Property Map>
The Compute Engine accelerator (GPU) configuration for these instances. Can be specified multiple times.
- disk
Config Property Map Disk Config
- image
Uri String The URI for the image to use for this worker. See the guide for more information.
- instance
Names List<String> - machine
Type String The name of a Google Compute Engine machine type to create for the master. If not specified, GCP will default to a predetermined computed value (currently
n1-standard-4
).- min
Cpu StringPlatform The name of a minimum generation of CPU family for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.
- num
Instances Number Specifies the number of master nodes to create. If not specified, GCP will default to a predetermined computed value (currently 1).
ClusterClusterConfigMasterConfigAccelerator, ClusterClusterConfigMasterConfigAcceleratorArgs
- Accelerator
Count int The number of the accelerator cards of this type exposed to this instance. Often restricted to one of
1
,2
,4
, or8
.The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.
The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.
- Accelerator
Type string The short name of the accelerator type to expose to this instance. For example,
nvidia-tesla-k80
.
- Accelerator
Count int The number of the accelerator cards of this type exposed to this instance. Often restricted to one of
1
,2
,4
, or8
.The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.
The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.
- Accelerator
Type string The short name of the accelerator type to expose to this instance. For example,
nvidia-tesla-k80
.
- accelerator
Count Integer The number of the accelerator cards of this type exposed to this instance. Often restricted to one of
1
,2
,4
, or8
.The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.
The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.
- accelerator
Type String The short name of the accelerator type to expose to this instance. For example,
nvidia-tesla-k80
.
- accelerator
Count number The number of the accelerator cards of this type exposed to this instance. Often restricted to one of
1
,2
,4
, or8
.The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.
The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.
- accelerator
Type string The short name of the accelerator type to expose to this instance. For example,
nvidia-tesla-k80
.
- accelerator_
count int The number of the accelerator cards of this type exposed to this instance. Often restricted to one of
1
,2
,4
, or8
.The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.
The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.
- accelerator_
type str The short name of the accelerator type to expose to this instance. For example,
nvidia-tesla-k80
.
- accelerator
Count Number The number of the accelerator cards of this type exposed to this instance. Often restricted to one of
1
,2
,4
, or8
.The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.
The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.
- accelerator
Type String The short name of the accelerator type to expose to this instance. For example,
nvidia-tesla-k80
.
ClusterClusterConfigMasterConfigDiskConfig, ClusterClusterConfigMasterConfigDiskConfigArgs
- Boot
Disk intSize Gb Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
- Boot
Disk stringType The disk type of the primary disk attached to each node. One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.- Num
Local intSsds The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0.
attached to each worker cluster node. Defaults to 0.
attached to each preemptible worker node. Defaults to 0.
- Boot
Disk intSize Gb Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
- Boot
Disk stringType The disk type of the primary disk attached to each node. One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.- Num
Local intSsds The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0.
attached to each worker cluster node. Defaults to 0.
attached to each preemptible worker node. Defaults to 0.
- boot
Disk IntegerSize Gb Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
- boot
Disk StringType The disk type of the primary disk attached to each node. One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.- num
Local IntegerSsds The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0.
attached to each worker cluster node. Defaults to 0.
attached to each preemptible worker node. Defaults to 0.
- boot
Disk numberSize Gb Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
- boot
Disk stringType The disk type of the primary disk attached to each node. One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.- num
Local numberSsds The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0.
attached to each worker cluster node. Defaults to 0.
attached to each preemptible worker node. Defaults to 0.
- boot_
disk_ intsize_ gb Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
- boot_
disk_ strtype The disk type of the primary disk attached to each node. One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.- num_
local_ intssds The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0.
attached to each worker cluster node. Defaults to 0.
attached to each preemptible worker node. Defaults to 0.
- boot
Disk NumberSize Gb Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
- boot
Disk StringType The disk type of the primary disk attached to each node. One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.- num
Local NumberSsds The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0.
attached to each worker cluster node. Defaults to 0.
attached to each preemptible worker node. Defaults to 0.
ClusterClusterConfigMetastoreConfig, ClusterClusterConfigMetastoreConfigArgs
- Dataproc
Metastore stringService Resource name of an existing Dataproc Metastore service.
Only resource names including projectid and location (region) are valid. Examples:
projects/[projectId]/locations/[dataproc_region]/services/[service-name]
- Dataproc
Metastore stringService Resource name of an existing Dataproc Metastore service.
Only resource names including projectid and location (region) are valid. Examples:
projects/[projectId]/locations/[dataproc_region]/services/[service-name]
- dataproc
Metastore StringService Resource name of an existing Dataproc Metastore service.
Only resource names including projectid and location (region) are valid. Examples:
projects/[projectId]/locations/[dataproc_region]/services/[service-name]
- dataproc
Metastore stringService Resource name of an existing Dataproc Metastore service.
Only resource names including projectid and location (region) are valid. Examples:
projects/[projectId]/locations/[dataproc_region]/services/[service-name]
- dataproc_
metastore_ strservice Resource name of an existing Dataproc Metastore service.
Only resource names including projectid and location (region) are valid. Examples:
projects/[projectId]/locations/[dataproc_region]/services/[service-name]
- dataproc
Metastore StringService Resource name of an existing Dataproc Metastore service.
Only resource names including projectid and location (region) are valid. Examples:
projects/[projectId]/locations/[dataproc_region]/services/[service-name]
ClusterClusterConfigPreemptibleWorkerConfig, ClusterClusterConfigPreemptibleWorkerConfigArgs
- Disk
Config ClusterCluster Config Preemptible Worker Config Disk Config Disk Config
- Instance
Names List<string> - Num
Instances int Specifies the number of preemptible nodes to create. Defaults to 0.
- Preemptibility string
Specifies the preemptibility of the secondary workers. The default value is
PREEMPTIBLE
Accepted values are:- PREEMPTIBILITY_UNSPECIFIED
- NON_PREEMPTIBLE
- PREEMPTIBLE
- Disk
Config ClusterCluster Config Preemptible Worker Config Disk Config Disk Config
- Instance
Names []string - Num
Instances int Specifies the number of preemptible nodes to create. Defaults to 0.
- Preemptibility string
Specifies the preemptibility of the secondary workers. The default value is
PREEMPTIBLE
Accepted values are:- PREEMPTIBILITY_UNSPECIFIED
- NON_PREEMPTIBLE
- PREEMPTIBLE
- disk
Config ClusterCluster Config Preemptible Worker Config Disk Config Disk Config
- instance
Names List<String> - num
Instances Integer Specifies the number of preemptible nodes to create. Defaults to 0.
- preemptibility String
Specifies the preemptibility of the secondary workers. The default value is
PREEMPTIBLE
Accepted values are:- PREEMPTIBILITY_UNSPECIFIED
- NON_PREEMPTIBLE
- PREEMPTIBLE
- disk
Config ClusterCluster Config Preemptible Worker Config Disk Config Disk Config
- instance
Names string[] - num
Instances number Specifies the number of preemptible nodes to create. Defaults to 0.
- preemptibility string
Specifies the preemptibility of the secondary workers. The default value is
PREEMPTIBLE
Accepted values are:- PREEMPTIBILITY_UNSPECIFIED
- NON_PREEMPTIBLE
- PREEMPTIBLE
- disk_
config ClusterCluster Config Preemptible Worker Config Disk Config Disk Config
- instance_
names Sequence[str] - num_
instances int Specifies the number of preemptible nodes to create. Defaults to 0.
- preemptibility str
Specifies the preemptibility of the secondary workers. The default value is
PREEMPTIBLE
Accepted values are:- PREEMPTIBILITY_UNSPECIFIED
- NON_PREEMPTIBLE
- PREEMPTIBLE
- disk
Config Property Map Disk Config
- instance
Names List<String> - num
Instances Number Specifies the number of preemptible nodes to create. Defaults to 0.
- preemptibility String
Specifies the preemptibility of the secondary workers. The default value is
PREEMPTIBLE
Accepted values are:- PREEMPTIBILITY_UNSPECIFIED
- NON_PREEMPTIBLE
- PREEMPTIBLE
ClusterClusterConfigPreemptibleWorkerConfigDiskConfig, ClusterClusterConfigPreemptibleWorkerConfigDiskConfigArgs
- Boot
Disk intSize Gb Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
- Boot
Disk stringType The disk type of the primary disk attached to each node. One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.- Num
Local intSsds The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0.
attached to each worker cluster node. Defaults to 0.
attached to each preemptible worker node. Defaults to 0.
- Boot
Disk intSize Gb Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
- Boot
Disk stringType The disk type of the primary disk attached to each node. One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.- Num
Local intSsds The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0.
attached to each worker cluster node. Defaults to 0.
attached to each preemptible worker node. Defaults to 0.
- boot
Disk IntegerSize Gb Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
- boot
Disk StringType The disk type of the primary disk attached to each node. One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.- num
Local IntegerSsds The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0.
attached to each worker cluster node. Defaults to 0.
attached to each preemptible worker node. Defaults to 0.
- boot
Disk numberSize Gb Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
- boot
Disk stringType The disk type of the primary disk attached to each node. One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.- num
Local numberSsds The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0.
attached to each worker cluster node. Defaults to 0.
attached to each preemptible worker node. Defaults to 0.
- boot_
disk_ intsize_ gb Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
- boot_
disk_ strtype The disk type of the primary disk attached to each node. One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.- num_
local_ intssds The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0.
attached to each worker cluster node. Defaults to 0.
attached to each preemptible worker node. Defaults to 0.
- boot
Disk NumberSize Gb Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
- boot
Disk StringType The disk type of the primary disk attached to each node. One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.- num
Local NumberSsds The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0.
attached to each worker cluster node. Defaults to 0.
attached to each preemptible worker node. Defaults to 0.
ClusterClusterConfigSecurityConfig, ClusterClusterConfigSecurityConfigArgs
- Kerberos
Config ClusterCluster Config Security Config Kerberos Config Kerberos Configuration
- Kerberos
Config ClusterCluster Config Security Config Kerberos Config Kerberos Configuration
- kerberos
Config ClusterCluster Config Security Config Kerberos Config Kerberos Configuration
- kerberos
Config ClusterCluster Config Security Config Kerberos Config Kerberos Configuration
- kerberos_
config ClusterCluster Config Security Config Kerberos Config Kerberos Configuration
- kerberos
Config Property Map Kerberos Configuration
ClusterClusterConfigSecurityConfigKerberosConfig, ClusterClusterConfigSecurityConfigKerberosConfigArgs
- Kms
Key stringUri The URI of the KMS key used to encrypt various sensitive files.
- Root
Principal stringPassword Uri The Cloud Storage URI of a KMS encrypted file containing the root principal password.
- Cross
Realm stringTrust Admin Server The admin server (IP or hostname) for the remote trusted realm in a cross realm trust relationship.
- Cross
Realm stringTrust Kdc The KDC (IP or hostname) for the remote trusted realm in a cross realm trust relationship.
- Cross
Realm stringTrust Realm The remote realm the Dataproc on-cluster KDC will trust, should the user enable cross realm trust.
- string
The Cloud Storage URI of a KMS encrypted file containing the shared password between the on-cluster Kerberos realm and the remote trusted realm, in a cross realm trust relationship.
- Enable
Kerberos bool Flag to indicate whether to Kerberize the cluster.
- Kdc
Db stringKey Uri The Cloud Storage URI of a KMS encrypted file containing the master key of the KDC database.
- Key
Password stringUri The Cloud Storage URI of a KMS encrypted file containing the password to the user provided key. For the self-signed certificate, this password is generated by Dataproc.
- Keystore
Password stringUri The Cloud Storage URI of a KMS encrypted file containing the password to the user provided keystore. For the self-signed certificated, the password is generated by Dataproc.
- Keystore
Uri string The Cloud Storage URI of the keystore file used for SSL encryption. If not provided, Dataproc will provide a self-signed certificate.
- Realm string
The name of the on-cluster Kerberos realm. If not specified, the uppercased domain of hostnames will be the realm.
- Tgt
Lifetime intHours The lifetime of the ticket granting ticket, in hours.
- Truststore
Password stringUri The Cloud Storage URI of a KMS encrypted file containing the password to the user provided truststore. For the self-signed certificate, this password is generated by Dataproc.
- Truststore
Uri string The Cloud Storage URI of the truststore file used for SSL encryption. If not provided, Dataproc will provide a self-signed certificate.
- Kms
Key stringUri The URI of the KMS key used to encrypt various sensitive files.
- Root
Principal stringPassword Uri The Cloud Storage URI of a KMS encrypted file containing the root principal password.
- Cross
Realm stringTrust Admin Server The admin server (IP or hostname) for the remote trusted realm in a cross realm trust relationship.
- Cross
Realm stringTrust Kdc The KDC (IP or hostname) for the remote trusted realm in a cross realm trust relationship.
- Cross
Realm stringTrust Realm The remote realm the Dataproc on-cluster KDC will trust, should the user enable cross realm trust.
- string
The Cloud Storage URI of a KMS encrypted file containing the shared password between the on-cluster Kerberos realm and the remote trusted realm, in a cross realm trust relationship.
- Enable
Kerberos bool Flag to indicate whether to Kerberize the cluster.
- Kdc
Db stringKey Uri The Cloud Storage URI of a KMS encrypted file containing the master key of the KDC database.
- Key
Password stringUri The Cloud Storage URI of a KMS encrypted file containing the password to the user provided key. For the self-signed certificate, this password is generated by Dataproc.
- Keystore
Password stringUri The Cloud Storage URI of a KMS encrypted file containing the password to the user provided keystore. For the self-signed certificated, the password is generated by Dataproc.
- Keystore
Uri string The Cloud Storage URI of the keystore file used for SSL encryption. If not provided, Dataproc will provide a self-signed certificate.
- Realm string
The name of the on-cluster Kerberos realm. If not specified, the uppercased domain of hostnames will be the realm.
- Tgt
Lifetime intHours The lifetime of the ticket granting ticket, in hours.
- Truststore
Password stringUri The Cloud Storage URI of a KMS encrypted file containing the password to the user provided truststore. For the self-signed certificate, this password is generated by Dataproc.
- Truststore
Uri string The Cloud Storage URI of the truststore file used for SSL encryption. If not provided, Dataproc will provide a self-signed certificate.
- kms
Key StringUri The URI of the KMS key used to encrypt various sensitive files.
- root
Principal StringPassword Uri The Cloud Storage URI of a KMS encrypted file containing the root principal password.
- cross
Realm StringTrust Admin Server The admin server (IP or hostname) for the remote trusted realm in a cross realm trust relationship.
- cross
Realm StringTrust Kdc The KDC (IP or hostname) for the remote trusted realm in a cross realm trust relationship.
- cross
Realm StringTrust Realm The remote realm the Dataproc on-cluster KDC will trust, should the user enable cross realm trust.
- String
The Cloud Storage URI of a KMS encrypted file containing the shared password between the on-cluster Kerberos realm and the remote trusted realm, in a cross realm trust relationship.
- enable
Kerberos Boolean Flag to indicate whether to Kerberize the cluster.
- kdc
Db StringKey Uri The Cloud Storage URI of a KMS encrypted file containing the master key of the KDC database.
- key
Password StringUri The Cloud Storage URI of a KMS encrypted file containing the password to the user provided key. For the self-signed certificate, this password is generated by Dataproc.
- keystore
Password StringUri The Cloud Storage URI of a KMS encrypted file containing the password to the user provided keystore. For the self-signed certificated, the password is generated by Dataproc.
- keystore
Uri String The Cloud Storage URI of the keystore file used for SSL encryption. If not provided, Dataproc will provide a self-signed certificate.
- realm String
The name of the on-cluster Kerberos realm. If not specified, the uppercased domain of hostnames will be the realm.
- tgt
Lifetime IntegerHours The lifetime of the ticket granting ticket, in hours.
- truststore
Password StringUri The Cloud Storage URI of a KMS encrypted file containing the password to the user provided truststore. For the self-signed certificate, this password is generated by Dataproc.
- truststore
Uri String The Cloud Storage URI of the truststore file used for SSL encryption. If not provided, Dataproc will provide a self-signed certificate.
- kms
Key stringUri The URI of the KMS key used to encrypt various sensitive files.
- root
Principal stringPassword Uri The Cloud Storage URI of a KMS encrypted file containing the root principal password.
- cross
Realm stringTrust Admin Server The admin server (IP or hostname) for the remote trusted realm in a cross realm trust relationship.
- cross
Realm stringTrust Kdc The KDC (IP or hostname) for the remote trusted realm in a cross realm trust relationship.
- cross
Realm stringTrust Realm The remote realm the Dataproc on-cluster KDC will trust, should the user enable cross realm trust.
- string
The Cloud Storage URI of a KMS encrypted file containing the shared password between the on-cluster Kerberos realm and the remote trusted realm, in a cross realm trust relationship.
- enable
Kerberos boolean Flag to indicate whether to Kerberize the cluster.
- kdc
Db stringKey Uri The Cloud Storage URI of a KMS encrypted file containing the master key of the KDC database.
- key
Password stringUri The Cloud Storage URI of a KMS encrypted file containing the password to the user provided key. For the self-signed certificate, this password is generated by Dataproc.
- keystore
Password stringUri The Cloud Storage URI of a KMS encrypted file containing the password to the user provided keystore. For the self-signed certificated, the password is generated by Dataproc.
- keystore
Uri string The Cloud Storage URI of the keystore file used for SSL encryption. If not provided, Dataproc will provide a self-signed certificate.
- realm string
The name of the on-cluster Kerberos realm. If not specified, the uppercased domain of hostnames will be the realm.
- tgt
Lifetime numberHours The lifetime of the ticket granting ticket, in hours.
- truststore
Password stringUri The Cloud Storage URI of a KMS encrypted file containing the password to the user provided truststore. For the self-signed certificate, this password is generated by Dataproc.
- truststore
Uri string The Cloud Storage URI of the truststore file used for SSL encryption. If not provided, Dataproc will provide a self-signed certificate.
- kms_
key_ struri The URI of the KMS key used to encrypt various sensitive files.
- root_
principal_ strpassword_ uri The Cloud Storage URI of a KMS encrypted file containing the root principal password.
- cross_
realm_ strtrust_ admin_ server The admin server (IP or hostname) for the remote trusted realm in a cross realm trust relationship.
- cross_
realm_ strtrust_ kdc The KDC (IP or hostname) for the remote trusted realm in a cross realm trust relationship.
- cross_
realm_ strtrust_ realm The remote realm the Dataproc on-cluster KDC will trust, should the user enable cross realm trust.
- str
The Cloud Storage URI of a KMS encrypted file containing the shared password between the on-cluster Kerberos realm and the remote trusted realm, in a cross realm trust relationship.
- enable_
kerberos bool Flag to indicate whether to Kerberize the cluster.
- kdc_
db_ strkey_ uri The Cloud Storage URI of a KMS encrypted file containing the master key of the KDC database.
- key_
password_ struri The Cloud Storage URI of a KMS encrypted file containing the password to the user provided key. For the self-signed certificate, this password is generated by Dataproc.
- keystore_
password_ struri The Cloud Storage URI of a KMS encrypted file containing the password to the user provided keystore. For the self-signed certificated, the password is generated by Dataproc.
- keystore_
uri str The Cloud Storage URI of the keystore file used for SSL encryption. If not provided, Dataproc will provide a self-signed certificate.
- realm str
The name of the on-cluster Kerberos realm. If not specified, the uppercased domain of hostnames will be the realm.
- tgt_
lifetime_ inthours The lifetime of the ticket granting ticket, in hours.
- truststore_
password_ struri The Cloud Storage URI of a KMS encrypted file containing the password to the user provided truststore. For the self-signed certificate, this password is generated by Dataproc.
- truststore_
uri str The Cloud Storage URI of the truststore file used for SSL encryption. If not provided, Dataproc will provide a self-signed certificate.
- kms
Key StringUri The URI of the KMS key used to encrypt various sensitive files.
- root
Principal StringPassword Uri The Cloud Storage URI of a KMS encrypted file containing the root principal password.
- cross
Realm StringTrust Admin Server The admin server (IP or hostname) for the remote trusted realm in a cross realm trust relationship.
- cross
Realm StringTrust Kdc The KDC (IP or hostname) for the remote trusted realm in a cross realm trust relationship.
- cross
Realm StringTrust Realm The remote realm the Dataproc on-cluster KDC will trust, should the user enable cross realm trust.
- String
The Cloud Storage URI of a KMS encrypted file containing the shared password between the on-cluster Kerberos realm and the remote trusted realm, in a cross realm trust relationship.
- enable
Kerberos Boolean Flag to indicate whether to Kerberize the cluster.
- kdc
Db StringKey Uri The Cloud Storage URI of a KMS encrypted file containing the master key of the KDC database.
- key
Password StringUri The Cloud Storage URI of a KMS encrypted file containing the password to the user provided key. For the self-signed certificate, this password is generated by Dataproc.
- keystore
Password StringUri The Cloud Storage URI of a KMS encrypted file containing the password to the user provided keystore. For the self-signed certificated, the password is generated by Dataproc.
- keystore
Uri String The Cloud Storage URI of the keystore file used for SSL encryption. If not provided, Dataproc will provide a self-signed certificate.
- realm String
The name of the on-cluster Kerberos realm. If not specified, the uppercased domain of hostnames will be the realm.
- tgt
Lifetime NumberHours The lifetime of the ticket granting ticket, in hours.
- truststore
Password StringUri The Cloud Storage URI of a KMS encrypted file containing the password to the user provided truststore. For the self-signed certificate, this password is generated by Dataproc.
- truststore
Uri String The Cloud Storage URI of the truststore file used for SSL encryption. If not provided, Dataproc will provide a self-signed certificate.
ClusterClusterConfigSoftwareConfig, ClusterClusterConfigSoftwareConfigArgs
- Image
Version string The Cloud Dataproc image version to use for the cluster - this controls the sets of software versions installed onto the nodes when you create clusters. If not specified, defaults to the latest version. For a list of valid versions see Cloud Dataproc versions
- Optional
Components List<string> The set of optional components to activate on the cluster. See Available Optional Components.
- Override
Properties Dictionary<string, string> A list of override and additional properties (key/value pairs) used to modify various aspects of the common configuration files used when creating a cluster. For a list of valid properties please see Cluster properties
- Properties Dictionary<string, object>
The properties to set on daemon config files. Property keys are specified in prefix:property format, for example spark:spark.kubernetes.container.image.
- Image
Version string The Cloud Dataproc image version to use for the cluster - this controls the sets of software versions installed onto the nodes when you create clusters. If not specified, defaults to the latest version. For a list of valid versions see Cloud Dataproc versions
- Optional
Components []string The set of optional components to activate on the cluster. See Available Optional Components.
- Override
Properties map[string]string A list of override and additional properties (key/value pairs) used to modify various aspects of the common configuration files used when creating a cluster. For a list of valid properties please see Cluster properties
- Properties map[string]interface{}
The properties to set on daemon config files. Property keys are specified in prefix:property format, for example spark:spark.kubernetes.container.image.
- image
Version String The Cloud Dataproc image version to use for the cluster - this controls the sets of software versions installed onto the nodes when you create clusters. If not specified, defaults to the latest version. For a list of valid versions see Cloud Dataproc versions
- optional
Components List<String> The set of optional components to activate on the cluster. See Available Optional Components.
- override
Properties Map<String,String> A list of override and additional properties (key/value pairs) used to modify various aspects of the common configuration files used when creating a cluster. For a list of valid properties please see Cluster properties
- properties Map<String,Object>
The properties to set on daemon config files. Property keys are specified in prefix:property format, for example spark:spark.kubernetes.container.image.
- image
Version string The Cloud Dataproc image version to use for the cluster - this controls the sets of software versions installed onto the nodes when you create clusters. If not specified, defaults to the latest version. For a list of valid versions see Cloud Dataproc versions
- optional
Components string[] The set of optional components to activate on the cluster. See Available Optional Components.
- override
Properties {[key: string]: string} A list of override and additional properties (key/value pairs) used to modify various aspects of the common configuration files used when creating a cluster. For a list of valid properties please see Cluster properties
- properties {[key: string]: any}
The properties to set on daemon config files. Property keys are specified in prefix:property format, for example spark:spark.kubernetes.container.image.
- image_
version str The Cloud Dataproc image version to use for the cluster - this controls the sets of software versions installed onto the nodes when you create clusters. If not specified, defaults to the latest version. For a list of valid versions see Cloud Dataproc versions
- optional_
components Sequence[str] The set of optional components to activate on the cluster. See Available Optional Components.
- override_
properties Mapping[str, str] A list of override and additional properties (key/value pairs) used to modify various aspects of the common configuration files used when creating a cluster. For a list of valid properties please see Cluster properties
- properties Mapping[str, Any]
The properties to set on daemon config files. Property keys are specified in prefix:property format, for example spark:spark.kubernetes.container.image.
- image
Version String The Cloud Dataproc image version to use for the cluster - this controls the sets of software versions installed onto the nodes when you create clusters. If not specified, defaults to the latest version. For a list of valid versions see Cloud Dataproc versions
- optional
Components List<String> The set of optional components to activate on the cluster. See Available Optional Components.
- override
Properties Map<String> A list of override and additional properties (key/value pairs) used to modify various aspects of the common configuration files used when creating a cluster. For a list of valid properties please see Cluster properties
- properties Map<Any>
The properties to set on daemon config files. Property keys are specified in prefix:property format, for example spark:spark.kubernetes.container.image.
ClusterClusterConfigWorkerConfig, ClusterClusterConfigWorkerConfigArgs
- Accelerators
List<Cluster
Cluster Config Worker Config Accelerator> The Compute Engine accelerator configuration for these instances. Can be specified multiple times.
- Disk
Config ClusterCluster Config Worker Config Disk Config Disk Config
- Image
Uri string The URI for the image to use for this worker. See the guide for more information.
- Instance
Names List<string> - Machine
Type string The name of a Google Compute Engine machine type to create for the worker nodes. If not specified, GCP will default to a predetermined computed value (currently
n1-standard-4
).- Min
Cpu stringPlatform The name of a minimum generation of CPU family for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.
- Num
Instances int Specifies the number of worker nodes to create. If not specified, GCP will default to a predetermined computed value (currently 2). There is currently a beta feature which allows you to run a Single Node Cluster. In order to take advantage of this you need to set
"dataproc:dataproc.allow.zero.workers" = "true"
incluster_config.software_config.properties
- Accelerators
[]Cluster
Cluster Config Worker Config Accelerator The Compute Engine accelerator configuration for these instances. Can be specified multiple times.
- Disk
Config ClusterCluster Config Worker Config Disk Config Disk Config
- Image
Uri string The URI for the image to use for this worker. See the guide for more information.
- Instance
Names []string - Machine
Type string The name of a Google Compute Engine machine type to create for the worker nodes. If not specified, GCP will default to a predetermined computed value (currently
n1-standard-4
).- Min
Cpu stringPlatform The name of a minimum generation of CPU family for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.
- Num
Instances int Specifies the number of worker nodes to create. If not specified, GCP will default to a predetermined computed value (currently 2). There is currently a beta feature which allows you to run a Single Node Cluster. In order to take advantage of this you need to set
"dataproc:dataproc.allow.zero.workers" = "true"
incluster_config.software_config.properties
- accelerators
List<Cluster
Cluster Config Worker Config Accelerator> The Compute Engine accelerator configuration for these instances. Can be specified multiple times.
- disk
Config ClusterCluster Config Worker Config Disk Config Disk Config
- image
Uri String The URI for the image to use for this worker. See the guide for more information.
- instance
Names List<String> - machine
Type String The name of a Google Compute Engine machine type to create for the worker nodes. If not specified, GCP will default to a predetermined computed value (currently
n1-standard-4
).- min
Cpu StringPlatform The name of a minimum generation of CPU family for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.
- num
Instances Integer Specifies the number of worker nodes to create. If not specified, GCP will default to a predetermined computed value (currently 2). There is currently a beta feature which allows you to run a Single Node Cluster. In order to take advantage of this you need to set
"dataproc:dataproc.allow.zero.workers" = "true"
incluster_config.software_config.properties
- accelerators
Cluster
Cluster Config Worker Config Accelerator[] The Compute Engine accelerator configuration for these instances. Can be specified multiple times.
- disk
Config ClusterCluster Config Worker Config Disk Config Disk Config
- image
Uri string The URI for the image to use for this worker. See the guide for more information.
- instance
Names string[] - machine
Type string The name of a Google Compute Engine machine type to create for the worker nodes. If not specified, GCP will default to a predetermined computed value (currently
n1-standard-4
).- min
Cpu stringPlatform The name of a minimum generation of CPU family for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.
- num
Instances number Specifies the number of worker nodes to create. If not specified, GCP will default to a predetermined computed value (currently 2). There is currently a beta feature which allows you to run a Single Node Cluster. In order to take advantage of this you need to set
"dataproc:dataproc.allow.zero.workers" = "true"
incluster_config.software_config.properties
- accelerators
Sequence[Cluster
Cluster Config Worker Config Accelerator] The Compute Engine accelerator configuration for these instances. Can be specified multiple times.
- disk_
config ClusterCluster Config Worker Config Disk Config Disk Config
- image_
uri str The URI for the image to use for this worker. See the guide for more information.
- instance_
names Sequence[str] - machine_
type str The name of a Google Compute Engine machine type to create for the worker nodes. If not specified, GCP will default to a predetermined computed value (currently
n1-standard-4
).- min_
cpu_ strplatform The name of a minimum generation of CPU family for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.
- num_
instances int Specifies the number of worker nodes to create. If not specified, GCP will default to a predetermined computed value (currently 2). There is currently a beta feature which allows you to run a Single Node Cluster. In order to take advantage of this you need to set
"dataproc:dataproc.allow.zero.workers" = "true"
incluster_config.software_config.properties
- accelerators List<Property Map>
The Compute Engine accelerator configuration for these instances. Can be specified multiple times.
- disk
Config Property Map Disk Config
- image
Uri String The URI for the image to use for this worker. See the guide for more information.
- instance
Names List<String> - machine
Type String The name of a Google Compute Engine machine type to create for the worker nodes. If not specified, GCP will default to a predetermined computed value (currently
n1-standard-4
).- min
Cpu StringPlatform The name of a minimum generation of CPU family for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.
- num
Instances Number Specifies the number of worker nodes to create. If not specified, GCP will default to a predetermined computed value (currently 2). There is currently a beta feature which allows you to run a Single Node Cluster. In order to take advantage of this you need to set
"dataproc:dataproc.allow.zero.workers" = "true"
incluster_config.software_config.properties
ClusterClusterConfigWorkerConfigAccelerator, ClusterClusterConfigWorkerConfigAcceleratorArgs
- Accelerator
Count int The number of the accelerator cards of this type exposed to this instance. Often restricted to one of
1
,2
,4
, or8
.The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.
The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.
- Accelerator
Type string The short name of the accelerator type to expose to this instance. For example,
nvidia-tesla-k80
.
- Accelerator
Count int The number of the accelerator cards of this type exposed to this instance. Often restricted to one of
1
,2
,4
, or8
.The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.
The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.
- Accelerator
Type string The short name of the accelerator type to expose to this instance. For example,
nvidia-tesla-k80
.
- accelerator
Count Integer The number of the accelerator cards of this type exposed to this instance. Often restricted to one of
1
,2
,4
, or8
.The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.
The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.
- accelerator
Type String The short name of the accelerator type to expose to this instance. For example,
nvidia-tesla-k80
.
- accelerator
Count number The number of the accelerator cards of this type exposed to this instance. Often restricted to one of
1
,2
,4
, or8
.The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.
The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.
- accelerator
Type string The short name of the accelerator type to expose to this instance. For example,
nvidia-tesla-k80
.
- accelerator_
count int The number of the accelerator cards of this type exposed to this instance. Often restricted to one of
1
,2
,4
, or8
.The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.
The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.
- accelerator_
type str The short name of the accelerator type to expose to this instance. For example,
nvidia-tesla-k80
.
- accelerator
Count Number The number of the accelerator cards of this type exposed to this instance. Often restricted to one of
1
,2
,4
, or8
.The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.
The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check accelerator availability by zone if you are trying to use accelerators in a given zone.
- accelerator
Type String The short name of the accelerator type to expose to this instance. For example,
nvidia-tesla-k80
.
ClusterClusterConfigWorkerConfigDiskConfig, ClusterClusterConfigWorkerConfigDiskConfigArgs
- Boot
Disk intSize Gb Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
- Boot
Disk stringType The disk type of the primary disk attached to each node. One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.- Num
Local intSsds The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0.
attached to each worker cluster node. Defaults to 0.
attached to each preemptible worker node. Defaults to 0.
- Boot
Disk intSize Gb Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
- Boot
Disk stringType The disk type of the primary disk attached to each node. One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.- Num
Local intSsds The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0.
attached to each worker cluster node. Defaults to 0.
attached to each preemptible worker node. Defaults to 0.
- boot
Disk IntegerSize Gb Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
- boot
Disk StringType The disk type of the primary disk attached to each node. One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.- num
Local IntegerSsds The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0.
attached to each worker cluster node. Defaults to 0.
attached to each preemptible worker node. Defaults to 0.
- boot
Disk numberSize Gb Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
- boot
Disk stringType The disk type of the primary disk attached to each node. One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.- num
Local numberSsds The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0.
attached to each worker cluster node. Defaults to 0.
attached to each preemptible worker node. Defaults to 0.
- boot_
disk_ intsize_ gb Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
- boot_
disk_ strtype The disk type of the primary disk attached to each node. One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.- num_
local_ intssds The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0.
attached to each worker cluster node. Defaults to 0.
attached to each preemptible worker node. Defaults to 0.
- boot
Disk NumberSize Gb Size of the primary disk attached to each node, specified in GB. The primary disk contains the boot volume and system libraries, and the smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
in GB. The smallest allowed disk size is 10GB. GCP will default to a predetermined computed value if not set (currently 500GB). Note: If SSDs are not attached, it also contains the HDFS data blocks and Hadoop working directories.
- boot
Disk StringType The disk type of the primary disk attached to each node. One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.One of
"pd-ssd"
or"pd-standard"
. Defaults to"pd-standard"
.- num
Local NumberSsds The amount of local SSD disks that will be attached to each master cluster node. Defaults to 0.
attached to each worker cluster node. Defaults to 0.
attached to each preemptible worker node. Defaults to 0.
ClusterVirtualClusterConfig, ClusterVirtualClusterConfigArgs
- Auxiliary
Services ClusterConfig Virtual Cluster Config Auxiliary Services Config Configuration of auxiliary services used by this cluster. Structure defined below.
- Kubernetes
Cluster ClusterConfig Virtual Cluster Config Kubernetes Cluster Config The configuration for running the Dataproc cluster on Kubernetes. Structure defined below.
- Staging
Bucket string The Cloud Storage staging bucket used to stage files, such as Hadoop jars, between client machines and the cluster. Note: If you don't explicitly specify a
staging_bucket
then GCP will auto create / assign one for you. However, you are not guaranteed an auto generated bucket which is solely dedicated to your cluster; it may be shared with other clusters in the same region/zone also choosing to use the auto generation option.
- Auxiliary
Services ClusterConfig Virtual Cluster Config Auxiliary Services Config Configuration of auxiliary services used by this cluster. Structure defined below.
- Kubernetes
Cluster ClusterConfig Virtual Cluster Config Kubernetes Cluster Config The configuration for running the Dataproc cluster on Kubernetes. Structure defined below.
- Staging
Bucket string The Cloud Storage staging bucket used to stage files, such as Hadoop jars, between client machines and the cluster. Note: If you don't explicitly specify a
staging_bucket
then GCP will auto create / assign one for you. However, you are not guaranteed an auto generated bucket which is solely dedicated to your cluster; it may be shared with other clusters in the same region/zone also choosing to use the auto generation option.
- auxiliary
Services ClusterConfig Virtual Cluster Config Auxiliary Services Config Configuration of auxiliary services used by this cluster. Structure defined below.
- kubernetes
Cluster ClusterConfig Virtual Cluster Config Kubernetes Cluster Config The configuration for running the Dataproc cluster on Kubernetes. Structure defined below.
- staging
Bucket String The Cloud Storage staging bucket used to stage files, such as Hadoop jars, between client machines and the cluster. Note: If you don't explicitly specify a
staging_bucket
then GCP will auto create / assign one for you. However, you are not guaranteed an auto generated bucket which is solely dedicated to your cluster; it may be shared with other clusters in the same region/zone also choosing to use the auto generation option.
- auxiliary
Services ClusterConfig Virtual Cluster Config Auxiliary Services Config Configuration of auxiliary services used by this cluster. Structure defined below.
- kubernetes
Cluster ClusterConfig Virtual Cluster Config Kubernetes Cluster Config The configuration for running the Dataproc cluster on Kubernetes. Structure defined below.
- staging
Bucket string The Cloud Storage staging bucket used to stage files, such as Hadoop jars, between client machines and the cluster. Note: If you don't explicitly specify a
staging_bucket
then GCP will auto create / assign one for you. However, you are not guaranteed an auto generated bucket which is solely dedicated to your cluster; it may be shared with other clusters in the same region/zone also choosing to use the auto generation option.
- auxiliary_
services_ Clusterconfig Virtual Cluster Config Auxiliary Services Config Configuration of auxiliary services used by this cluster. Structure defined below.
- kubernetes_
cluster_ Clusterconfig Virtual Cluster Config Kubernetes Cluster Config The configuration for running the Dataproc cluster on Kubernetes. Structure defined below.
- staging_
bucket str The Cloud Storage staging bucket used to stage files, such as Hadoop jars, between client machines and the cluster. Note: If you don't explicitly specify a
staging_bucket
then GCP will auto create / assign one for you. However, you are not guaranteed an auto generated bucket which is solely dedicated to your cluster; it may be shared with other clusters in the same region/zone also choosing to use the auto generation option.
- auxiliary
Services Property MapConfig Configuration of auxiliary services used by this cluster. Structure defined below.
- kubernetes
Cluster Property MapConfig The configuration for running the Dataproc cluster on Kubernetes. Structure defined below.
- staging
Bucket String The Cloud Storage staging bucket used to stage files, such as Hadoop jars, between client machines and the cluster. Note: If you don't explicitly specify a
staging_bucket
then GCP will auto create / assign one for you. However, you are not guaranteed an auto generated bucket which is solely dedicated to your cluster; it may be shared with other clusters in the same region/zone also choosing to use the auto generation option.
ClusterVirtualClusterConfigAuxiliaryServicesConfig, ClusterVirtualClusterConfigAuxiliaryServicesConfigArgs
- Metastore
Config ClusterVirtual Cluster Config Auxiliary Services Config Metastore Config The Hive Metastore configuration for this workload.
- Spark
History ClusterServer Config Virtual Cluster Config Auxiliary Services Config Spark History Server Config The Spark History Server configuration for the workload.
- Metastore
Config ClusterVirtual Cluster Config Auxiliary Services Config Metastore Config The Hive Metastore configuration for this workload.
- Spark
History ClusterServer Config Virtual Cluster Config Auxiliary Services Config Spark History Server Config The Spark History Server configuration for the workload.
- metastore
Config ClusterVirtual Cluster Config Auxiliary Services Config Metastore Config The Hive Metastore configuration for this workload.
- spark
History ClusterServer Config Virtual Cluster Config Auxiliary Services Config Spark History Server Config The Spark History Server configuration for the workload.
- metastore
Config ClusterVirtual Cluster Config Auxiliary Services Config Metastore Config The Hive Metastore configuration for this workload.
- spark
History ClusterServer Config Virtual Cluster Config Auxiliary Services Config Spark History Server Config The Spark History Server configuration for the workload.
- metastore_
config ClusterVirtual Cluster Config Auxiliary Services Config Metastore Config The Hive Metastore configuration for this workload.
- spark_
history_ Clusterserver_ config Virtual Cluster Config Auxiliary Services Config Spark History Server Config The Spark History Server configuration for the workload.
- metastore
Config Property Map The Hive Metastore configuration for this workload.
- spark
History Property MapServer Config The Spark History Server configuration for the workload.
ClusterVirtualClusterConfigAuxiliaryServicesConfigMetastoreConfig, ClusterVirtualClusterConfigAuxiliaryServicesConfigMetastoreConfigArgs
- Dataproc
Metastore stringService Resource name of an existing Dataproc Metastore service.
Only resource names including projectid and location (region) are valid. Examples:
projects/[projectId]/locations/[dataproc_region]/services/[service-name]
- Dataproc
Metastore stringService Resource name of an existing Dataproc Metastore service.
Only resource names including projectid and location (region) are valid. Examples:
projects/[projectId]/locations/[dataproc_region]/services/[service-name]
- dataproc
Metastore StringService Resource name of an existing Dataproc Metastore service.
Only resource names including projectid and location (region) are valid. Examples:
projects/[projectId]/locations/[dataproc_region]/services/[service-name]
- dataproc
Metastore stringService Resource name of an existing Dataproc Metastore service.
Only resource names including projectid and location (region) are valid. Examples:
projects/[projectId]/locations/[dataproc_region]/services/[service-name]
- dataproc_
metastore_ strservice Resource name of an existing Dataproc Metastore service.
Only resource names including projectid and location (region) are valid. Examples:
projects/[projectId]/locations/[dataproc_region]/services/[service-name]
- dataproc
Metastore StringService Resource name of an existing Dataproc Metastore service.
Only resource names including projectid and location (region) are valid. Examples:
projects/[projectId]/locations/[dataproc_region]/services/[service-name]
ClusterVirtualClusterConfigAuxiliaryServicesConfigSparkHistoryServerConfig, ClusterVirtualClusterConfigAuxiliaryServicesConfigSparkHistoryServerConfigArgs
- Dataproc
Cluster string Resource name of an existing Dataproc Cluster to act as a Spark History Server for the workload.
- Dataproc
Cluster string Resource name of an existing Dataproc Cluster to act as a Spark History Server for the workload.
- dataproc
Cluster String Resource name of an existing Dataproc Cluster to act as a Spark History Server for the workload.
- dataproc
Cluster string Resource name of an existing Dataproc Cluster to act as a Spark History Server for the workload.
- dataproc_
cluster str Resource name of an existing Dataproc Cluster to act as a Spark History Server for the workload.
- dataproc
Cluster String Resource name of an existing Dataproc Cluster to act as a Spark History Server for the workload.
ClusterVirtualClusterConfigKubernetesClusterConfig, ClusterVirtualClusterConfigKubernetesClusterConfigArgs
- Gke
Cluster ClusterConfig Virtual Cluster Config Kubernetes Cluster Config Gke Cluster Config The configuration for running the Dataproc cluster on GKE.
- Kubernetes
Software ClusterConfig Virtual Cluster Config Kubernetes Cluster Config Kubernetes Software Config The software configuration for this Dataproc cluster running on Kubernetes.
- Kubernetes
Namespace string A namespace within the Kubernetes cluster to deploy into. If this namespace does not exist, it is created. If it exists, Dataproc verifies that another Dataproc VirtualCluster is not installed into it. If not specified, the name of the Dataproc Cluster is used.
- Gke
Cluster ClusterConfig Virtual Cluster Config Kubernetes Cluster Config Gke Cluster Config The configuration for running the Dataproc cluster on GKE.
- Kubernetes
Software ClusterConfig Virtual Cluster Config Kubernetes Cluster Config Kubernetes Software Config The software configuration for this Dataproc cluster running on Kubernetes.
- Kubernetes
Namespace string A namespace within the Kubernetes cluster to deploy into. If this namespace does not exist, it is created. If it exists, Dataproc verifies that another Dataproc VirtualCluster is not installed into it. If not specified, the name of the Dataproc Cluster is used.
- gke
Cluster ClusterConfig Virtual Cluster Config Kubernetes Cluster Config Gke Cluster Config The configuration for running the Dataproc cluster on GKE.
- kubernetes
Software ClusterConfig Virtual Cluster Config Kubernetes Cluster Config Kubernetes Software Config The software configuration for this Dataproc cluster running on Kubernetes.
- kubernetes
Namespace String A namespace within the Kubernetes cluster to deploy into. If this namespace does not exist, it is created. If it exists, Dataproc verifies that another Dataproc VirtualCluster is not installed into it. If not specified, the name of the Dataproc Cluster is used.
- gke
Cluster ClusterConfig Virtual Cluster Config Kubernetes Cluster Config Gke Cluster Config The configuration for running the Dataproc cluster on GKE.
- kubernetes
Software ClusterConfig Virtual Cluster Config Kubernetes Cluster Config Kubernetes Software Config The software configuration for this Dataproc cluster running on Kubernetes.
- kubernetes
Namespace string A namespace within the Kubernetes cluster to deploy into. If this namespace does not exist, it is created. If it exists, Dataproc verifies that another Dataproc VirtualCluster is not installed into it. If not specified, the name of the Dataproc Cluster is used.
- gke_
cluster_ Clusterconfig Virtual Cluster Config Kubernetes Cluster Config Gke Cluster Config The configuration for running the Dataproc cluster on GKE.
- kubernetes_
software_ Clusterconfig Virtual Cluster Config Kubernetes Cluster Config Kubernetes Software Config The software configuration for this Dataproc cluster running on Kubernetes.
- kubernetes_
namespace str A namespace within the Kubernetes cluster to deploy into. If this namespace does not exist, it is created. If it exists, Dataproc verifies that another Dataproc VirtualCluster is not installed into it. If not specified, the name of the Dataproc Cluster is used.
- gke
Cluster Property MapConfig The configuration for running the Dataproc cluster on GKE.
- kubernetes
Software Property MapConfig The software configuration for this Dataproc cluster running on Kubernetes.
- kubernetes
Namespace String A namespace within the Kubernetes cluster to deploy into. If this namespace does not exist, it is created. If it exists, Dataproc verifies that another Dataproc VirtualCluster is not installed into it. If not specified, the name of the Dataproc Cluster is used.
ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfig, ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigArgs
- Gke
Cluster stringTarget A target GKE cluster to deploy to. It must be in the same project and region as the Dataproc cluster (the GKE cluster can be zonal or regional)
- Node
Pool List<ClusterTargets Virtual Cluster Config Kubernetes Cluster Config Gke Cluster Config Node Pool Target> GKE node pools where workloads will be scheduled. At least one node pool must be assigned the
DEFAULT
GkeNodePoolTarget.Role. If a GkeNodePoolTarget is not specified, Dataproc constructs aDEFAULT
GkeNodePoolTarget. Each role can be given to only one GkeNodePoolTarget. All node pools must have the same location settings.
- Gke
Cluster stringTarget A target GKE cluster to deploy to. It must be in the same project and region as the Dataproc cluster (the GKE cluster can be zonal or regional)
- Node
Pool []ClusterTargets Virtual Cluster Config Kubernetes Cluster Config Gke Cluster Config Node Pool Target GKE node pools where workloads will be scheduled. At least one node pool must be assigned the
DEFAULT
GkeNodePoolTarget.Role. If a GkeNodePoolTarget is not specified, Dataproc constructs aDEFAULT
GkeNodePoolTarget. Each role can be given to only one GkeNodePoolTarget. All node pools must have the same location settings.
- gke
Cluster StringTarget A target GKE cluster to deploy to. It must be in the same project and region as the Dataproc cluster (the GKE cluster can be zonal or regional)
- node
Pool List<ClusterTargets Virtual Cluster Config Kubernetes Cluster Config Gke Cluster Config Node Pool Target> GKE node pools where workloads will be scheduled. At least one node pool must be assigned the
DEFAULT
GkeNodePoolTarget.Role. If a GkeNodePoolTarget is not specified, Dataproc constructs aDEFAULT
GkeNodePoolTarget. Each role can be given to only one GkeNodePoolTarget. All node pools must have the same location settings.
- gke
Cluster stringTarget A target GKE cluster to deploy to. It must be in the same project and region as the Dataproc cluster (the GKE cluster can be zonal or regional)
- node
Pool ClusterTargets Virtual Cluster Config Kubernetes Cluster Config Gke Cluster Config Node Pool Target[] GKE node pools where workloads will be scheduled. At least one node pool must be assigned the
DEFAULT
GkeNodePoolTarget.Role. If a GkeNodePoolTarget is not specified, Dataproc constructs aDEFAULT
GkeNodePoolTarget. Each role can be given to only one GkeNodePoolTarget. All node pools must have the same location settings.
- gke_
cluster_ strtarget A target GKE cluster to deploy to. It must be in the same project and region as the Dataproc cluster (the GKE cluster can be zonal or regional)
- node_
pool_ Sequence[Clustertargets Virtual Cluster Config Kubernetes Cluster Config Gke Cluster Config Node Pool Target] GKE node pools where workloads will be scheduled. At least one node pool must be assigned the
DEFAULT
GkeNodePoolTarget.Role. If a GkeNodePoolTarget is not specified, Dataproc constructs aDEFAULT
GkeNodePoolTarget. Each role can be given to only one GkeNodePoolTarget. All node pools must have the same location settings.
- gke
Cluster StringTarget A target GKE cluster to deploy to. It must be in the same project and region as the Dataproc cluster (the GKE cluster can be zonal or regional)
- node
Pool List<Property Map>Targets GKE node pools where workloads will be scheduled. At least one node pool must be assigned the
DEFAULT
GkeNodePoolTarget.Role. If a GkeNodePoolTarget is not specified, Dataproc constructs aDEFAULT
GkeNodePoolTarget. Each role can be given to only one GkeNodePoolTarget. All node pools must have the same location settings.
ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTarget, ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetArgs
- Node
Pool string The target GKE node pool.
- Roles List<string>
The roles associated with the GKE node pool. One of
"DEFAULT"
,"CONTROLLER"
,"SPARK_DRIVER"
or"SPARK_EXECUTOR"
.- Node
Pool ClusterConfig Virtual Cluster Config Kubernetes Cluster Config Gke Cluster Config Node Pool Target Node Pool Config The configuration for the GKE node pool. If specified, Dataproc attempts to create a node pool with the specified shape. If one with the same name already exists, it is verified against all specified fields. If a field differs, the virtual cluster creation will fail.
- Node
Pool string The target GKE node pool.
- Roles []string
The roles associated with the GKE node pool. One of
"DEFAULT"
,"CONTROLLER"
,"SPARK_DRIVER"
or"SPARK_EXECUTOR"
.- Node
Pool ClusterConfig Virtual Cluster Config Kubernetes Cluster Config Gke Cluster Config Node Pool Target Node Pool Config The configuration for the GKE node pool. If specified, Dataproc attempts to create a node pool with the specified shape. If one with the same name already exists, it is verified against all specified fields. If a field differs, the virtual cluster creation will fail.
- node
Pool String The target GKE node pool.
- roles List<String>
The roles associated with the GKE node pool. One of
"DEFAULT"
,"CONTROLLER"
,"SPARK_DRIVER"
or"SPARK_EXECUTOR"
.- node
Pool ClusterConfig Virtual Cluster Config Kubernetes Cluster Config Gke Cluster Config Node Pool Target Node Pool Config The configuration for the GKE node pool. If specified, Dataproc attempts to create a node pool with the specified shape. If one with the same name already exists, it is verified against all specified fields. If a field differs, the virtual cluster creation will fail.
- node
Pool string The target GKE node pool.
- roles string[]
The roles associated with the GKE node pool. One of
"DEFAULT"
,"CONTROLLER"
,"SPARK_DRIVER"
or"SPARK_EXECUTOR"
.- node
Pool ClusterConfig Virtual Cluster Config Kubernetes Cluster Config Gke Cluster Config Node Pool Target Node Pool Config The configuration for the GKE node pool. If specified, Dataproc attempts to create a node pool with the specified shape. If one with the same name already exists, it is verified against all specified fields. If a field differs, the virtual cluster creation will fail.
- node_
pool str The target GKE node pool.
- roles Sequence[str]
The roles associated with the GKE node pool. One of
"DEFAULT"
,"CONTROLLER"
,"SPARK_DRIVER"
or"SPARK_EXECUTOR"
.- node_
pool_ Clusterconfig Virtual Cluster Config Kubernetes Cluster Config Gke Cluster Config Node Pool Target Node Pool Config The configuration for the GKE node pool. If specified, Dataproc attempts to create a node pool with the specified shape. If one with the same name already exists, it is verified against all specified fields. If a field differs, the virtual cluster creation will fail.
- node
Pool String The target GKE node pool.
- roles List<String>
The roles associated with the GKE node pool. One of
"DEFAULT"
,"CONTROLLER"
,"SPARK_DRIVER"
or"SPARK_EXECUTOR"
.- node
Pool Property MapConfig The configuration for the GKE node pool. If specified, Dataproc attempts to create a node pool with the specified shape. If one with the same name already exists, it is verified against all specified fields. If a field differs, the virtual cluster creation will fail.
ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfig, ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfigArgs
- Locations List<string>
The list of Compute Engine zones where node pool nodes associated with a Dataproc on GKE virtual cluster will be located.
- Autoscaling
Cluster
Virtual Cluster Config Kubernetes Cluster Config Gke Cluster Config Node Pool Target Node Pool Config Autoscaling The autoscaler configuration for this node pool. The autoscaler is enabled only when a valid configuration is present.
- Config
Cluster
Virtual Cluster Config Kubernetes Cluster Config Gke Cluster Config Node Pool Target Node Pool Config Config The node pool configuration.
- Locations []string
The list of Compute Engine zones where node pool nodes associated with a Dataproc on GKE virtual cluster will be located.
- Autoscaling
Cluster
Virtual Cluster Config Kubernetes Cluster Config Gke Cluster Config Node Pool Target Node Pool Config Autoscaling The autoscaler configuration for this node pool. The autoscaler is enabled only when a valid configuration is present.
- Config
Cluster
Virtual Cluster Config Kubernetes Cluster Config Gke Cluster Config Node Pool Target Node Pool Config Config The node pool configuration.
- locations List<String>
The list of Compute Engine zones where node pool nodes associated with a Dataproc on GKE virtual cluster will be located.
- autoscaling
Cluster
Virtual Cluster Config Kubernetes Cluster Config Gke Cluster Config Node Pool Target Node Pool Config Autoscaling The autoscaler configuration for this node pool. The autoscaler is enabled only when a valid configuration is present.
- config
Cluster
Virtual Cluster Config Kubernetes Cluster Config Gke Cluster Config Node Pool Target Node Pool Config Config The node pool configuration.
- locations string[]
The list of Compute Engine zones where node pool nodes associated with a Dataproc on GKE virtual cluster will be located.
- autoscaling
Cluster
Virtual Cluster Config Kubernetes Cluster Config Gke Cluster Config Node Pool Target Node Pool Config Autoscaling The autoscaler configuration for this node pool. The autoscaler is enabled only when a valid configuration is present.
- config
Cluster
Virtual Cluster Config Kubernetes Cluster Config Gke Cluster Config Node Pool Target Node Pool Config Config The node pool configuration.
- locations Sequence[str]
The list of Compute Engine zones where node pool nodes associated with a Dataproc on GKE virtual cluster will be located.
- autoscaling
Cluster
Virtual Cluster Config Kubernetes Cluster Config Gke Cluster Config Node Pool Target Node Pool Config Autoscaling The autoscaler configuration for this node pool. The autoscaler is enabled only when a valid configuration is present.
- config
Cluster
Virtual Cluster Config Kubernetes Cluster Config Gke Cluster Config Node Pool Target Node Pool Config Config The node pool configuration.
- locations List<String>
The list of Compute Engine zones where node pool nodes associated with a Dataproc on GKE virtual cluster will be located.
- autoscaling Property Map
The autoscaler configuration for this node pool. The autoscaler is enabled only when a valid configuration is present.
- config Property Map
The node pool configuration.
ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfigAutoscaling, ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfigAutoscalingArgs
- Max
Node intCount The maximum number of nodes in the node pool. Must be >= minNodeCount, and must be > 0.
- Min
Node intCount The minimum number of nodes in the node pool. Must be >= 0 and <= maxNodeCount.
- Max
Node intCount The maximum number of nodes in the node pool. Must be >= minNodeCount, and must be > 0.
- Min
Node intCount The minimum number of nodes in the node pool. Must be >= 0 and <= maxNodeCount.
- max
Node IntegerCount The maximum number of nodes in the node pool. Must be >= minNodeCount, and must be > 0.
- min
Node IntegerCount The minimum number of nodes in the node pool. Must be >= 0 and <= maxNodeCount.
- max
Node numberCount The maximum number of nodes in the node pool. Must be >= minNodeCount, and must be > 0.
- min
Node numberCount The minimum number of nodes in the node pool. Must be >= 0 and <= maxNodeCount.
- max_
node_ intcount The maximum number of nodes in the node pool. Must be >= minNodeCount, and must be > 0.
- min_
node_ intcount The minimum number of nodes in the node pool. Must be >= 0 and <= maxNodeCount.
- max
Node NumberCount The maximum number of nodes in the node pool. Must be >= minNodeCount, and must be > 0.
- min
Node NumberCount The minimum number of nodes in the node pool. Must be >= 0 and <= maxNodeCount.
ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfigConfig, ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfigConfigArgs
- Local
Ssd intCount The number of local SSD disks to attach to the node, which is limited by the maximum number of disks allowable per zone.
- Machine
Type string The name of a Compute Engine machine type.
to create for the master. If not specified, GCP will default to a predetermined computed value (currently
n1-standard-4
).to create for the worker nodes. If not specified, GCP will default to a predetermined computed value (currently
n1-standard-4
).- Min
Cpu stringPlatform Minimum CPU platform to be used by this instance. The instance may be scheduled on the specified or a newer CPU platform. Specify the friendly names of CPU platforms, such as "Intel Haswell" or "Intel Sandy Bridge".
for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.
for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.
- Preemptible bool
Whether the nodes are created as preemptible VM instances. Preemptible nodes cannot be used in a node pool with the CONTROLLER role or in the DEFAULT node pool if the CONTROLLER role is not assigned (the DEFAULT node pool will assume the CONTROLLER role).
- Spot bool
Spot flag for enabling Spot VM, which is a rebrand of the existing preemptible flag.
- Local
Ssd intCount The number of local SSD disks to attach to the node, which is limited by the maximum number of disks allowable per zone.
- Machine
Type string The name of a Compute Engine machine type.
to create for the master. If not specified, GCP will default to a predetermined computed value (currently
n1-standard-4
).to create for the worker nodes. If not specified, GCP will default to a predetermined computed value (currently
n1-standard-4
).- Min
Cpu stringPlatform Minimum CPU platform to be used by this instance. The instance may be scheduled on the specified or a newer CPU platform. Specify the friendly names of CPU platforms, such as "Intel Haswell" or "Intel Sandy Bridge".
for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.
for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.
- Preemptible bool
Whether the nodes are created as preemptible VM instances. Preemptible nodes cannot be used in a node pool with the CONTROLLER role or in the DEFAULT node pool if the CONTROLLER role is not assigned (the DEFAULT node pool will assume the CONTROLLER role).
- Spot bool
Spot flag for enabling Spot VM, which is a rebrand of the existing preemptible flag.
- local
Ssd IntegerCount The number of local SSD disks to attach to the node, which is limited by the maximum number of disks allowable per zone.
- machine
Type String The name of a Compute Engine machine type.
to create for the master. If not specified, GCP will default to a predetermined computed value (currently
n1-standard-4
).to create for the worker nodes. If not specified, GCP will default to a predetermined computed value (currently
n1-standard-4
).- min
Cpu StringPlatform Minimum CPU platform to be used by this instance. The instance may be scheduled on the specified or a newer CPU platform. Specify the friendly names of CPU platforms, such as "Intel Haswell" or "Intel Sandy Bridge".
for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.
for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.
- preemptible Boolean
Whether the nodes are created as preemptible VM instances. Preemptible nodes cannot be used in a node pool with the CONTROLLER role or in the DEFAULT node pool if the CONTROLLER role is not assigned (the DEFAULT node pool will assume the CONTROLLER role).
- spot Boolean
Spot flag for enabling Spot VM, which is a rebrand of the existing preemptible flag.
- local
Ssd numberCount The number of local SSD disks to attach to the node, which is limited by the maximum number of disks allowable per zone.
- machine
Type string The name of a Compute Engine machine type.
to create for the master. If not specified, GCP will default to a predetermined computed value (currently
n1-standard-4
).to create for the worker nodes. If not specified, GCP will default to a predetermined computed value (currently
n1-standard-4
).- min
Cpu stringPlatform Minimum CPU platform to be used by this instance. The instance may be scheduled on the specified or a newer CPU platform. Specify the friendly names of CPU platforms, such as "Intel Haswell" or "Intel Sandy Bridge".
for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.
for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.
- preemptible boolean
Whether the nodes are created as preemptible VM instances. Preemptible nodes cannot be used in a node pool with the CONTROLLER role or in the DEFAULT node pool if the CONTROLLER role is not assigned (the DEFAULT node pool will assume the CONTROLLER role).
- spot boolean
Spot flag for enabling Spot VM, which is a rebrand of the existing preemptible flag.
- local_
ssd_ intcount The number of local SSD disks to attach to the node, which is limited by the maximum number of disks allowable per zone.
- machine_
type str The name of a Compute Engine machine type.
to create for the master. If not specified, GCP will default to a predetermined computed value (currently
n1-standard-4
).to create for the worker nodes. If not specified, GCP will default to a predetermined computed value (currently
n1-standard-4
).- min_
cpu_ strplatform Minimum CPU platform to be used by this instance. The instance may be scheduled on the specified or a newer CPU platform. Specify the friendly names of CPU platforms, such as "Intel Haswell" or "Intel Sandy Bridge".
for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.
for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.
- preemptible bool
Whether the nodes are created as preemptible VM instances. Preemptible nodes cannot be used in a node pool with the CONTROLLER role or in the DEFAULT node pool if the CONTROLLER role is not assigned (the DEFAULT node pool will assume the CONTROLLER role).
- spot bool
Spot flag for enabling Spot VM, which is a rebrand of the existing preemptible flag.
- local
Ssd NumberCount The number of local SSD disks to attach to the node, which is limited by the maximum number of disks allowable per zone.
- machine
Type String The name of a Compute Engine machine type.
to create for the master. If not specified, GCP will default to a predetermined computed value (currently
n1-standard-4
).to create for the worker nodes. If not specified, GCP will default to a predetermined computed value (currently
n1-standard-4
).- min
Cpu StringPlatform Minimum CPU platform to be used by this instance. The instance may be scheduled on the specified or a newer CPU platform. Specify the friendly names of CPU platforms, such as "Intel Haswell" or "Intel Sandy Bridge".
for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.
for the master. If not specified, GCP will default to a predetermined computed value for each zone. See the guide for details about which CPU families are available (and defaulted) for each zone.
- preemptible Boolean
Whether the nodes are created as preemptible VM instances. Preemptible nodes cannot be used in a node pool with the CONTROLLER role or in the DEFAULT node pool if the CONTROLLER role is not assigned (the DEFAULT node pool will assume the CONTROLLER role).
- spot Boolean
Spot flag for enabling Spot VM, which is a rebrand of the existing preemptible flag.
ClusterVirtualClusterConfigKubernetesClusterConfigKubernetesSoftwareConfig, ClusterVirtualClusterConfigKubernetesClusterConfigKubernetesSoftwareConfigArgs
- Component
Version Dictionary<string, string> The components that should be installed in this Dataproc cluster. The key must be a string from the
KubernetesComponent enumeration. The value is the version of the software to be installed. At least one entry must be specified.- NOTE :
component_version[SPARK]
is mandatory to set, or the creation of the cluster will fail.
- NOTE :
- Properties Dictionary<string, string>
The properties to set on daemon config files. Property keys are specified in prefix:property format, for example spark:spark.kubernetes.container.image.
- Component
Version map[string]string The components that should be installed in this Dataproc cluster. The key must be a string from the
KubernetesComponent enumeration. The value is the version of the software to be installed. At least one entry must be specified.- NOTE :
component_version[SPARK]
is mandatory to set, or the creation of the cluster will fail.
- NOTE :
- Properties map[string]string
The properties to set on daemon config files. Property keys are specified in prefix:property format, for example spark:spark.kubernetes.container.image.
- component
Version Map<String,String> The components that should be installed in this Dataproc cluster. The key must be a string from the
KubernetesComponent enumeration. The value is the version of the software to be installed. At least one entry must be specified.- NOTE :
component_version[SPARK]
is mandatory to set, or the creation of the cluster will fail.
- NOTE :
- properties Map<String,String>
The properties to set on daemon config files. Property keys are specified in prefix:property format, for example spark:spark.kubernetes.container.image.
- component
Version {[key: string]: string} The components that should be installed in this Dataproc cluster. The key must be a string from the
KubernetesComponent enumeration. The value is the version of the software to be installed. At least one entry must be specified.- NOTE :
component_version[SPARK]
is mandatory to set, or the creation of the cluster will fail.
- NOTE :
- properties {[key: string]: string}
The properties to set on daemon config files. Property keys are specified in prefix:property format, for example spark:spark.kubernetes.container.image.
- component_
version Mapping[str, str] The components that should be installed in this Dataproc cluster. The key must be a string from the
KubernetesComponent enumeration. The value is the version of the software to be installed. At least one entry must be specified.- NOTE :
component_version[SPARK]
is mandatory to set, or the creation of the cluster will fail.
- NOTE :
- properties Mapping[str, str]
The properties to set on daemon config files. Property keys are specified in prefix:property format, for example spark:spark.kubernetes.container.image.
- component
Version Map<String> The components that should be installed in this Dataproc cluster. The key must be a string from the
KubernetesComponent enumeration. The value is the version of the software to be installed. At least one entry must be specified.- NOTE :
component_version[SPARK]
is mandatory to set, or the creation of the cluster will fail.
- NOTE :
- properties Map<String>
The properties to set on daemon config files. Property keys are specified in prefix:property format, for example spark:spark.kubernetes.container.image.
Import
This resource does not support import.
Package Details
- Repository
- Google Cloud (GCP) Classic pulumi/pulumi-gcp
- License
- Apache-2.0
- Notes
This Pulumi package is based on the
google-beta
Terraform Provider.