1. Packages
  2. Packages
  3. Databricks Provider
  4. API Docs
  5. Cluster
Viewing docs for Databricks v0.4.0 (Older version)
published on Monday, Mar 9, 2026 by Pulumi
databricks logo
Viewing docs for Databricks v0.4.0 (Older version)
published on Monday, Mar 9, 2026 by Pulumi

    Import

    The resource cluster can be imported using cluster id. bash

     $ pulumi import databricks:index/cluster:Cluster this <cluster-id>
    

    Create Cluster Resource

    Resources are created with functions called constructors. To learn more about declaring and configuring resources, see Resources.

    Constructor syntax

    new Cluster(name: string, args: ClusterArgs, opts?: CustomResourceOptions);
    @overload
    def Cluster(resource_name: str,
                args: ClusterArgs,
                opts: Optional[ResourceOptions] = None)
    
    @overload
    def Cluster(resource_name: str,
                opts: Optional[ResourceOptions] = None,
                spark_version: Optional[str] = None,
                gcp_attributes: Optional[ClusterGcpAttributesArgs] = None,
                spark_env_vars: Optional[Mapping[str, Any]] = None,
                azure_attributes: Optional[ClusterAzureAttributesArgs] = None,
                cluster_id: Optional[str] = None,
                cluster_log_conf: Optional[ClusterClusterLogConfArgs] = None,
                cluster_name: Optional[str] = None,
                custom_tags: Optional[Mapping[str, Any]] = None,
                data_security_mode: Optional[str] = None,
                docker_image: Optional[ClusterDockerImageArgs] = None,
                driver_instance_pool_id: Optional[str] = None,
                driver_node_type_id: Optional[str] = None,
                idempotency_token: Optional[str] = None,
                aws_attributes: Optional[ClusterAwsAttributesArgs] = None,
                enable_local_disk_encryption: Optional[bool] = None,
                enable_elastic_disk: Optional[bool] = None,
                init_scripts: Optional[Sequence[ClusterInitScriptArgs]] = None,
                instance_pool_id: Optional[str] = None,
                is_pinned: Optional[bool] = None,
                libraries: Optional[Sequence[ClusterLibraryArgs]] = None,
                node_type_id: Optional[str] = None,
                num_workers: Optional[int] = None,
                policy_id: Optional[str] = None,
                single_user_name: Optional[str] = None,
                spark_conf: Optional[Mapping[str, Any]] = None,
                autoscale: Optional[ClusterAutoscaleArgs] = None,
                autotermination_minutes: Optional[int] = None,
                ssh_public_keys: Optional[Sequence[str]] = None)
    func NewCluster(ctx *Context, name string, args ClusterArgs, opts ...ResourceOption) (*Cluster, error)
    public Cluster(string name, ClusterArgs args, CustomResourceOptions? opts = null)
    public Cluster(String name, ClusterArgs args)
    public Cluster(String name, ClusterArgs args, CustomResourceOptions options)
    
    type: databricks:Cluster
    properties: # The arguments to resource properties.
    options: # Bag of options to control resource's behavior.
    
    

    Parameters

    name string
    The unique name of the resource.
    args ClusterArgs
    The arguments to resource properties.
    opts CustomResourceOptions
    Bag of options to control resource's behavior.
    resource_name str
    The unique name of the resource.
    args ClusterArgs
    The arguments to resource properties.
    opts ResourceOptions
    Bag of options to control resource's behavior.
    ctx Context
    Context object for the current deployment.
    name string
    The unique name of the resource.
    args ClusterArgs
    The arguments to resource properties.
    opts ResourceOption
    Bag of options to control resource's behavior.
    name string
    The unique name of the resource.
    args ClusterArgs
    The arguments to resource properties.
    opts CustomResourceOptions
    Bag of options to control resource's behavior.
    name String
    The unique name of the resource.
    args ClusterArgs
    The arguments to resource properties.
    options CustomResourceOptions
    Bag of options to control resource's behavior.

    Constructor example

    The following reference example uses placeholder values for all input properties.

    var clusterResource = new Databricks.Cluster("clusterResource", new()
    {
        SparkVersion = "string",
        GcpAttributes = new Databricks.Inputs.ClusterGcpAttributesArgs
        {
            Availability = "string",
            BootDiskSize = 0,
            GoogleServiceAccount = "string",
            UsePreemptibleExecutors = false,
            ZoneId = "string",
        },
        SparkEnvVars = 
        {
            { "string", "any" },
        },
        AzureAttributes = new Databricks.Inputs.ClusterAzureAttributesArgs
        {
            Availability = "string",
            FirstOnDemand = 0,
            SpotBidMaxPrice = 0,
        },
        ClusterId = "string",
        ClusterLogConf = new Databricks.Inputs.ClusterClusterLogConfArgs
        {
            Dbfs = new Databricks.Inputs.ClusterClusterLogConfDbfsArgs
            {
                Destination = "string",
            },
            S3 = new Databricks.Inputs.ClusterClusterLogConfS3Args
            {
                Destination = "string",
                CannedAcl = "string",
                EnableEncryption = false,
                EncryptionType = "string",
                Endpoint = "string",
                KmsKey = "string",
                Region = "string",
            },
        },
        ClusterName = "string",
        CustomTags = 
        {
            { "string", "any" },
        },
        DataSecurityMode = "string",
        DockerImage = new Databricks.Inputs.ClusterDockerImageArgs
        {
            Url = "string",
            BasicAuth = new Databricks.Inputs.ClusterDockerImageBasicAuthArgs
            {
                Password = "string",
                Username = "string",
            },
        },
        DriverInstancePoolId = "string",
        DriverNodeTypeId = "string",
        IdempotencyToken = "string",
        AwsAttributes = new Databricks.Inputs.ClusterAwsAttributesArgs
        {
            Availability = "string",
            EbsVolumeCount = 0,
            EbsVolumeSize = 0,
            EbsVolumeType = "string",
            FirstOnDemand = 0,
            InstanceProfileArn = "string",
            SpotBidPricePercent = 0,
            ZoneId = "string",
        },
        EnableLocalDiskEncryption = false,
        EnableElasticDisk = false,
        InitScripts = new[]
        {
            new Databricks.Inputs.ClusterInitScriptArgs
            {
                Dbfs = new Databricks.Inputs.ClusterInitScriptDbfsArgs
                {
                    Destination = "string",
                },
                File = new Databricks.Inputs.ClusterInitScriptFileArgs
                {
                    Destination = "string",
                },
                S3 = new Databricks.Inputs.ClusterInitScriptS3Args
                {
                    Destination = "string",
                    CannedAcl = "string",
                    EnableEncryption = false,
                    EncryptionType = "string",
                    Endpoint = "string",
                    KmsKey = "string",
                    Region = "string",
                },
            },
        },
        InstancePoolId = "string",
        IsPinned = false,
        Libraries = new[]
        {
            new Databricks.Inputs.ClusterLibraryArgs
            {
                Cran = new Databricks.Inputs.ClusterLibraryCranArgs
                {
                    Package = "string",
                    Repo = "string",
                },
                Egg = "string",
                Jar = "string",
                Maven = new Databricks.Inputs.ClusterLibraryMavenArgs
                {
                    Coordinates = "string",
                    Exclusions = new[]
                    {
                        "string",
                    },
                    Repo = "string",
                },
                Pypi = new Databricks.Inputs.ClusterLibraryPypiArgs
                {
                    Package = "string",
                    Repo = "string",
                },
                Whl = "string",
            },
        },
        NodeTypeId = "string",
        NumWorkers = 0,
        PolicyId = "string",
        SingleUserName = "string",
        SparkConf = 
        {
            { "string", "any" },
        },
        Autoscale = new Databricks.Inputs.ClusterAutoscaleArgs
        {
            MaxWorkers = 0,
            MinWorkers = 0,
        },
        AutoterminationMinutes = 0,
        SshPublicKeys = new[]
        {
            "string",
        },
    });
    
    example, err := databricks.NewCluster(ctx, "clusterResource", &databricks.ClusterArgs{
    	SparkVersion: pulumi.String("string"),
    	GcpAttributes: &databricks.ClusterGcpAttributesArgs{
    		Availability:            pulumi.String("string"),
    		BootDiskSize:            pulumi.Int(0),
    		GoogleServiceAccount:    pulumi.String("string"),
    		UsePreemptibleExecutors: pulumi.Bool(false),
    		ZoneId:                  pulumi.String("string"),
    	},
    	SparkEnvVars: pulumi.Map{
    		"string": pulumi.Any("any"),
    	},
    	AzureAttributes: &databricks.ClusterAzureAttributesArgs{
    		Availability:    pulumi.String("string"),
    		FirstOnDemand:   pulumi.Int(0),
    		SpotBidMaxPrice: pulumi.Float64(0),
    	},
    	ClusterId: pulumi.String("string"),
    	ClusterLogConf: &databricks.ClusterClusterLogConfArgs{
    		Dbfs: &databricks.ClusterClusterLogConfDbfsArgs{
    			Destination: pulumi.String("string"),
    		},
    		S3: &databricks.ClusterClusterLogConfS3Args{
    			Destination:      pulumi.String("string"),
    			CannedAcl:        pulumi.String("string"),
    			EnableEncryption: pulumi.Bool(false),
    			EncryptionType:   pulumi.String("string"),
    			Endpoint:         pulumi.String("string"),
    			KmsKey:           pulumi.String("string"),
    			Region:           pulumi.String("string"),
    		},
    	},
    	ClusterName: pulumi.String("string"),
    	CustomTags: pulumi.Map{
    		"string": pulumi.Any("any"),
    	},
    	DataSecurityMode: pulumi.String("string"),
    	DockerImage: &databricks.ClusterDockerImageArgs{
    		Url: pulumi.String("string"),
    		BasicAuth: &databricks.ClusterDockerImageBasicAuthArgs{
    			Password: pulumi.String("string"),
    			Username: pulumi.String("string"),
    		},
    	},
    	DriverInstancePoolId: pulumi.String("string"),
    	DriverNodeTypeId:     pulumi.String("string"),
    	IdempotencyToken:     pulumi.String("string"),
    	AwsAttributes: &databricks.ClusterAwsAttributesArgs{
    		Availability:        pulumi.String("string"),
    		EbsVolumeCount:      pulumi.Int(0),
    		EbsVolumeSize:       pulumi.Int(0),
    		EbsVolumeType:       pulumi.String("string"),
    		FirstOnDemand:       pulumi.Int(0),
    		InstanceProfileArn:  pulumi.String("string"),
    		SpotBidPricePercent: pulumi.Int(0),
    		ZoneId:              pulumi.String("string"),
    	},
    	EnableLocalDiskEncryption: pulumi.Bool(false),
    	EnableElasticDisk:         pulumi.Bool(false),
    	InitScripts: databricks.ClusterInitScriptArray{
    		&databricks.ClusterInitScriptArgs{
    			Dbfs: &databricks.ClusterInitScriptDbfsArgs{
    				Destination: pulumi.String("string"),
    			},
    			File: &databricks.ClusterInitScriptFileArgs{
    				Destination: pulumi.String("string"),
    			},
    			S3: &databricks.ClusterInitScriptS3Args{
    				Destination:      pulumi.String("string"),
    				CannedAcl:        pulumi.String("string"),
    				EnableEncryption: pulumi.Bool(false),
    				EncryptionType:   pulumi.String("string"),
    				Endpoint:         pulumi.String("string"),
    				KmsKey:           pulumi.String("string"),
    				Region:           pulumi.String("string"),
    			},
    		},
    	},
    	InstancePoolId: pulumi.String("string"),
    	IsPinned:       pulumi.Bool(false),
    	Libraries: databricks.ClusterLibraryArray{
    		&databricks.ClusterLibraryArgs{
    			Cran: &databricks.ClusterLibraryCranArgs{
    				Package: pulumi.String("string"),
    				Repo:    pulumi.String("string"),
    			},
    			Egg: pulumi.String("string"),
    			Jar: pulumi.String("string"),
    			Maven: &databricks.ClusterLibraryMavenArgs{
    				Coordinates: pulumi.String("string"),
    				Exclusions: pulumi.StringArray{
    					pulumi.String("string"),
    				},
    				Repo: pulumi.String("string"),
    			},
    			Pypi: &databricks.ClusterLibraryPypiArgs{
    				Package: pulumi.String("string"),
    				Repo:    pulumi.String("string"),
    			},
    			Whl: pulumi.String("string"),
    		},
    	},
    	NodeTypeId:     pulumi.String("string"),
    	NumWorkers:     pulumi.Int(0),
    	PolicyId:       pulumi.String("string"),
    	SingleUserName: pulumi.String("string"),
    	SparkConf: pulumi.Map{
    		"string": pulumi.Any("any"),
    	},
    	Autoscale: &databricks.ClusterAutoscaleArgs{
    		MaxWorkers: pulumi.Int(0),
    		MinWorkers: pulumi.Int(0),
    	},
    	AutoterminationMinutes: pulumi.Int(0),
    	SshPublicKeys: pulumi.StringArray{
    		pulumi.String("string"),
    	},
    })
    
    var clusterResource = new Cluster("clusterResource", ClusterArgs.builder()
        .sparkVersion("string")
        .gcpAttributes(ClusterGcpAttributesArgs.builder()
            .availability("string")
            .bootDiskSize(0)
            .googleServiceAccount("string")
            .usePreemptibleExecutors(false)
            .zoneId("string")
            .build())
        .sparkEnvVars(Map.of("string", "any"))
        .azureAttributes(ClusterAzureAttributesArgs.builder()
            .availability("string")
            .firstOnDemand(0)
            .spotBidMaxPrice(0.0)
            .build())
        .clusterId("string")
        .clusterLogConf(ClusterClusterLogConfArgs.builder()
            .dbfs(ClusterClusterLogConfDbfsArgs.builder()
                .destination("string")
                .build())
            .s3(ClusterClusterLogConfS3Args.builder()
                .destination("string")
                .cannedAcl("string")
                .enableEncryption(false)
                .encryptionType("string")
                .endpoint("string")
                .kmsKey("string")
                .region("string")
                .build())
            .build())
        .clusterName("string")
        .customTags(Map.of("string", "any"))
        .dataSecurityMode("string")
        .dockerImage(ClusterDockerImageArgs.builder()
            .url("string")
            .basicAuth(ClusterDockerImageBasicAuthArgs.builder()
                .password("string")
                .username("string")
                .build())
            .build())
        .driverInstancePoolId("string")
        .driverNodeTypeId("string")
        .idempotencyToken("string")
        .awsAttributes(ClusterAwsAttributesArgs.builder()
            .availability("string")
            .ebsVolumeCount(0)
            .ebsVolumeSize(0)
            .ebsVolumeType("string")
            .firstOnDemand(0)
            .instanceProfileArn("string")
            .spotBidPricePercent(0)
            .zoneId("string")
            .build())
        .enableLocalDiskEncryption(false)
        .enableElasticDisk(false)
        .initScripts(ClusterInitScriptArgs.builder()
            .dbfs(ClusterInitScriptDbfsArgs.builder()
                .destination("string")
                .build())
            .file(ClusterInitScriptFileArgs.builder()
                .destination("string")
                .build())
            .s3(ClusterInitScriptS3Args.builder()
                .destination("string")
                .cannedAcl("string")
                .enableEncryption(false)
                .encryptionType("string")
                .endpoint("string")
                .kmsKey("string")
                .region("string")
                .build())
            .build())
        .instancePoolId("string")
        .isPinned(false)
        .libraries(ClusterLibraryArgs.builder()
            .cran(ClusterLibraryCranArgs.builder()
                .package_("string")
                .repo("string")
                .build())
            .egg("string")
            .jar("string")
            .maven(ClusterLibraryMavenArgs.builder()
                .coordinates("string")
                .exclusions("string")
                .repo("string")
                .build())
            .pypi(ClusterLibraryPypiArgs.builder()
                .package_("string")
                .repo("string")
                .build())
            .whl("string")
            .build())
        .nodeTypeId("string")
        .numWorkers(0)
        .policyId("string")
        .singleUserName("string")
        .sparkConf(Map.of("string", "any"))
        .autoscale(ClusterAutoscaleArgs.builder()
            .maxWorkers(0)
            .minWorkers(0)
            .build())
        .autoterminationMinutes(0)
        .sshPublicKeys("string")
        .build());
    
    cluster_resource = databricks.Cluster("clusterResource",
        spark_version="string",
        gcp_attributes={
            "availability": "string",
            "boot_disk_size": 0,
            "google_service_account": "string",
            "use_preemptible_executors": False,
            "zone_id": "string",
        },
        spark_env_vars={
            "string": "any",
        },
        azure_attributes={
            "availability": "string",
            "first_on_demand": 0,
            "spot_bid_max_price": 0,
        },
        cluster_id="string",
        cluster_log_conf={
            "dbfs": {
                "destination": "string",
            },
            "s3": {
                "destination": "string",
                "canned_acl": "string",
                "enable_encryption": False,
                "encryption_type": "string",
                "endpoint": "string",
                "kms_key": "string",
                "region": "string",
            },
        },
        cluster_name="string",
        custom_tags={
            "string": "any",
        },
        data_security_mode="string",
        docker_image={
            "url": "string",
            "basic_auth": {
                "password": "string",
                "username": "string",
            },
        },
        driver_instance_pool_id="string",
        driver_node_type_id="string",
        idempotency_token="string",
        aws_attributes={
            "availability": "string",
            "ebs_volume_count": 0,
            "ebs_volume_size": 0,
            "ebs_volume_type": "string",
            "first_on_demand": 0,
            "instance_profile_arn": "string",
            "spot_bid_price_percent": 0,
            "zone_id": "string",
        },
        enable_local_disk_encryption=False,
        enable_elastic_disk=False,
        init_scripts=[{
            "dbfs": {
                "destination": "string",
            },
            "file": {
                "destination": "string",
            },
            "s3": {
                "destination": "string",
                "canned_acl": "string",
                "enable_encryption": False,
                "encryption_type": "string",
                "endpoint": "string",
                "kms_key": "string",
                "region": "string",
            },
        }],
        instance_pool_id="string",
        is_pinned=False,
        libraries=[{
            "cran": {
                "package": "string",
                "repo": "string",
            },
            "egg": "string",
            "jar": "string",
            "maven": {
                "coordinates": "string",
                "exclusions": ["string"],
                "repo": "string",
            },
            "pypi": {
                "package": "string",
                "repo": "string",
            },
            "whl": "string",
        }],
        node_type_id="string",
        num_workers=0,
        policy_id="string",
        single_user_name="string",
        spark_conf={
            "string": "any",
        },
        autoscale={
            "max_workers": 0,
            "min_workers": 0,
        },
        autotermination_minutes=0,
        ssh_public_keys=["string"])
    
    const clusterResource = new databricks.Cluster("clusterResource", {
        sparkVersion: "string",
        gcpAttributes: {
            availability: "string",
            bootDiskSize: 0,
            googleServiceAccount: "string",
            usePreemptibleExecutors: false,
            zoneId: "string",
        },
        sparkEnvVars: {
            string: "any",
        },
        azureAttributes: {
            availability: "string",
            firstOnDemand: 0,
            spotBidMaxPrice: 0,
        },
        clusterId: "string",
        clusterLogConf: {
            dbfs: {
                destination: "string",
            },
            s3: {
                destination: "string",
                cannedAcl: "string",
                enableEncryption: false,
                encryptionType: "string",
                endpoint: "string",
                kmsKey: "string",
                region: "string",
            },
        },
        clusterName: "string",
        customTags: {
            string: "any",
        },
        dataSecurityMode: "string",
        dockerImage: {
            url: "string",
            basicAuth: {
                password: "string",
                username: "string",
            },
        },
        driverInstancePoolId: "string",
        driverNodeTypeId: "string",
        idempotencyToken: "string",
        awsAttributes: {
            availability: "string",
            ebsVolumeCount: 0,
            ebsVolumeSize: 0,
            ebsVolumeType: "string",
            firstOnDemand: 0,
            instanceProfileArn: "string",
            spotBidPricePercent: 0,
            zoneId: "string",
        },
        enableLocalDiskEncryption: false,
        enableElasticDisk: false,
        initScripts: [{
            dbfs: {
                destination: "string",
            },
            file: {
                destination: "string",
            },
            s3: {
                destination: "string",
                cannedAcl: "string",
                enableEncryption: false,
                encryptionType: "string",
                endpoint: "string",
                kmsKey: "string",
                region: "string",
            },
        }],
        instancePoolId: "string",
        isPinned: false,
        libraries: [{
            cran: {
                "package": "string",
                repo: "string",
            },
            egg: "string",
            jar: "string",
            maven: {
                coordinates: "string",
                exclusions: ["string"],
                repo: "string",
            },
            pypi: {
                "package": "string",
                repo: "string",
            },
            whl: "string",
        }],
        nodeTypeId: "string",
        numWorkers: 0,
        policyId: "string",
        singleUserName: "string",
        sparkConf: {
            string: "any",
        },
        autoscale: {
            maxWorkers: 0,
            minWorkers: 0,
        },
        autoterminationMinutes: 0,
        sshPublicKeys: ["string"],
    });
    
    type: databricks:Cluster
    properties:
        autoscale:
            maxWorkers: 0
            minWorkers: 0
        autoterminationMinutes: 0
        awsAttributes:
            availability: string
            ebsVolumeCount: 0
            ebsVolumeSize: 0
            ebsVolumeType: string
            firstOnDemand: 0
            instanceProfileArn: string
            spotBidPricePercent: 0
            zoneId: string
        azureAttributes:
            availability: string
            firstOnDemand: 0
            spotBidMaxPrice: 0
        clusterId: string
        clusterLogConf:
            dbfs:
                destination: string
            s3:
                cannedAcl: string
                destination: string
                enableEncryption: false
                encryptionType: string
                endpoint: string
                kmsKey: string
                region: string
        clusterName: string
        customTags:
            string: any
        dataSecurityMode: string
        dockerImage:
            basicAuth:
                password: string
                username: string
            url: string
        driverInstancePoolId: string
        driverNodeTypeId: string
        enableElasticDisk: false
        enableLocalDiskEncryption: false
        gcpAttributes:
            availability: string
            bootDiskSize: 0
            googleServiceAccount: string
            usePreemptibleExecutors: false
            zoneId: string
        idempotencyToken: string
        initScripts:
            - dbfs:
                destination: string
              file:
                destination: string
              s3:
                cannedAcl: string
                destination: string
                enableEncryption: false
                encryptionType: string
                endpoint: string
                kmsKey: string
                region: string
        instancePoolId: string
        isPinned: false
        libraries:
            - cran:
                package: string
                repo: string
              egg: string
              jar: string
              maven:
                coordinates: string
                exclusions:
                    - string
                repo: string
              pypi:
                package: string
                repo: string
              whl: string
        nodeTypeId: string
        numWorkers: 0
        policyId: string
        singleUserName: string
        sparkConf:
            string: any
        sparkEnvVars:
            string: any
        sparkVersion: string
        sshPublicKeys:
            - string
    

    Cluster Resource Properties

    To learn more about resource properties and how to use them, see Inputs and Outputs in the Architecture and Concepts docs.

    Inputs

    In Python, inputs that are objects can be passed either as argument classes or as dictionary literals.

    The Cluster resource accepts the following input properties:

    SparkVersion string
    Runtime version of the cluster. Any supported databricks.getSparkVersion id. We advise using Cluster Policies to restrict the list of versions for simplicity while maintaining enough control.
    Autoscale ClusterAutoscale
    AutoterminationMinutes int
    Automatically terminate the cluster after being inactive for this time in minutes. If not set, Databricks won't automatically terminate an inactive cluster. If specified, the threshold must be between 10 and 10000 minutes. You can also set this value to 0 to explicitly disable automatic termination. We highly recommend having this setting present for Interactive/BI clusters.
    AwsAttributes ClusterAwsAttributes
    AzureAttributes ClusterAzureAttributes
    ClusterId string
    ClusterLogConf ClusterClusterLogConf
    ClusterName string
    Cluster name, which doesn’t have to be unique. If not specified at creation, the cluster name will be an empty string.
    CustomTags Dictionary<string, object>
    Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS EC2 instances and EBS volumes) with these tags in addition to default_tags.
    DataSecurityMode string
    Select the security features of the cluster. Unity Catalog requires SINGLE_USER or USER_ISOLATION mode. LEGACY_PASSTHROUGH for passthrough cluster and LEGACY_TABLE_ACL for Table ACL cluster. Default to NONE, i.e. no security feature enabled.
    DockerImage ClusterDockerImage
    DriverInstancePoolId string
    similar to instance_pool_id, but for driver node. If omitted, and instance_pool_id is specified, then driver will be allocated from that pool.
    DriverNodeTypeId string
    The node type of the Spark driver. This field is optional; if unset, API will set the driver node type to the same value as node_type_id defined above.
    EnableElasticDisk bool
    If you don’t want to allocate a fixed number of EBS volumes at cluster creation time, use autoscaling local storage. With autoscaling local storage, Databricks monitors the amount of free disk space available on your cluster’s Spark workers. If a worker begins to run too low on disk, Databricks automatically attaches a new EBS volume to the worker before it runs out of disk space. EBS volumes are attached up to a limit of 5 TB of total disk space per instance (including the instance’s local storage). To scale down EBS usage, make sure you have autotermination_minutes and autoscale attributes set. More documentation available at cluster configuration page.
    EnableLocalDiskEncryption bool
    Some instance types you use to run clusters may have locally attached disks. Databricks may store shuffle data or temporary data on these locally attached disks. To ensure that all data at rest is encrypted for all storage types, including shuffle data stored temporarily on your cluster’s local disks, you can enable local disk encryption. When local disk encryption is enabled, Databricks generates an encryption key locally unique to each cluster node and encrypting all data stored on local disks. The scope of the key is local to each cluster node and is destroyed along with the cluster node itself. During its lifetime, the key resides in memory for encryption and decryption and is stored encrypted on the disk. Your workloads may run more slowly because of the performance impact of reading and writing encrypted data to and from local volumes. This feature is not available for all Azure Databricks subscriptions. Contact your Microsoft or Databricks account representative to request access.
    GcpAttributes ClusterGcpAttributes
    IdempotencyToken string
    An optional token to guarantee the idempotency of cluster creation requests. If an active cluster with the provided token already exists, the request will not create a new cluster, but it will return the existing running cluster's ID instead. If you specify the idempotency token, upon failure, you can retry until the request succeeds. Databricks platform guarantees to launch exactly one cluster with that idempotency token. This token should have at most 64 characters.
    InitScripts List<ClusterInitScript>
    InstancePoolId string
    • To reduce cluster start time, you can attach a cluster to a predefined pool of idle instances. When attached to a pool, a cluster allocates its driver and worker nodes from the pool. If the pool does not have sufficient idle resources to accommodate the cluster’s request, it expands by allocating new instances from the instance provider. When an attached cluster changes its state to TERMINATED, the instances it used are returned to the pool and reused by a different cluster.
    IsPinned bool
    boolean value specifying if cluster is pinned (not pinned by default). You must be a Databricks administrator to use this. The pinned clusters' maximum number is limited to 70, so apply may fail if you have more than that.
    Libraries List<ClusterLibrary>
    NodeTypeId string
    Any supported databricks.getNodeType id. If instance_pool_id is specified, this field is not needed.
    NumWorkers int
    PolicyId string
    Identifier of Cluster Policy to validate cluster and preset certain defaults. The primary use for cluster policies is to allow users to create policy-scoped clusters via UI rather than sharing configuration for API-created clusters. For example, when you specify policy_id of external metastore policy, you still have to fill in relevant keys for spark_conf.
    SingleUserName string
    The optional user name of the user to assign to an interactive cluster. This field is required when using standard AAD Passthrough for Azure Data Lake Storage (ADLS) with a single-user cluster (i.e., not high-concurrency clusters).
    SparkConf Dictionary<string, object>
    Map with key-value pairs to fine-tune Spark clusters, where you can provide custom Spark configuration properties in a cluster configuration.
    SparkEnvVars Dictionary<string, object>
    Map with environment variable key-value pairs to fine-tune Spark clusters. Key-value pairs of the form (X,Y) are exported (i.e., X='Y') while launching the driver and workers.
    SshPublicKeys List<string>
    SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name ubuntu on port 2200. You can specify up to 10 keys.
    SparkVersion string
    Runtime version of the cluster. Any supported databricks.getSparkVersion id. We advise using Cluster Policies to restrict the list of versions for simplicity while maintaining enough control.
    Autoscale ClusterAutoscaleArgs
    AutoterminationMinutes int
    Automatically terminate the cluster after being inactive for this time in minutes. If not set, Databricks won't automatically terminate an inactive cluster. If specified, the threshold must be between 10 and 10000 minutes. You can also set this value to 0 to explicitly disable automatic termination. We highly recommend having this setting present for Interactive/BI clusters.
    AwsAttributes ClusterAwsAttributesArgs
    AzureAttributes ClusterAzureAttributesArgs
    ClusterId string
    ClusterLogConf ClusterClusterLogConfArgs
    ClusterName string
    Cluster name, which doesn’t have to be unique. If not specified at creation, the cluster name will be an empty string.
    CustomTags map[string]interface{}
    Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS EC2 instances and EBS volumes) with these tags in addition to default_tags.
    DataSecurityMode string
    Select the security features of the cluster. Unity Catalog requires SINGLE_USER or USER_ISOLATION mode. LEGACY_PASSTHROUGH for passthrough cluster and LEGACY_TABLE_ACL for Table ACL cluster. Default to NONE, i.e. no security feature enabled.
    DockerImage ClusterDockerImageArgs
    DriverInstancePoolId string
    similar to instance_pool_id, but for driver node. If omitted, and instance_pool_id is specified, then driver will be allocated from that pool.
    DriverNodeTypeId string
    The node type of the Spark driver. This field is optional; if unset, API will set the driver node type to the same value as node_type_id defined above.
    EnableElasticDisk bool
    If you don’t want to allocate a fixed number of EBS volumes at cluster creation time, use autoscaling local storage. With autoscaling local storage, Databricks monitors the amount of free disk space available on your cluster’s Spark workers. If a worker begins to run too low on disk, Databricks automatically attaches a new EBS volume to the worker before it runs out of disk space. EBS volumes are attached up to a limit of 5 TB of total disk space per instance (including the instance’s local storage). To scale down EBS usage, make sure you have autotermination_minutes and autoscale attributes set. More documentation available at cluster configuration page.
    EnableLocalDiskEncryption bool
    Some instance types you use to run clusters may have locally attached disks. Databricks may store shuffle data or temporary data on these locally attached disks. To ensure that all data at rest is encrypted for all storage types, including shuffle data stored temporarily on your cluster’s local disks, you can enable local disk encryption. When local disk encryption is enabled, Databricks generates an encryption key locally unique to each cluster node and encrypting all data stored on local disks. The scope of the key is local to each cluster node and is destroyed along with the cluster node itself. During its lifetime, the key resides in memory for encryption and decryption and is stored encrypted on the disk. Your workloads may run more slowly because of the performance impact of reading and writing encrypted data to and from local volumes. This feature is not available for all Azure Databricks subscriptions. Contact your Microsoft or Databricks account representative to request access.
    GcpAttributes ClusterGcpAttributesArgs
    IdempotencyToken string
    An optional token to guarantee the idempotency of cluster creation requests. If an active cluster with the provided token already exists, the request will not create a new cluster, but it will return the existing running cluster's ID instead. If you specify the idempotency token, upon failure, you can retry until the request succeeds. Databricks platform guarantees to launch exactly one cluster with that idempotency token. This token should have at most 64 characters.
    InitScripts []ClusterInitScriptArgs
    InstancePoolId string
    • To reduce cluster start time, you can attach a cluster to a predefined pool of idle instances. When attached to a pool, a cluster allocates its driver and worker nodes from the pool. If the pool does not have sufficient idle resources to accommodate the cluster’s request, it expands by allocating new instances from the instance provider. When an attached cluster changes its state to TERMINATED, the instances it used are returned to the pool and reused by a different cluster.
    IsPinned bool
    boolean value specifying if cluster is pinned (not pinned by default). You must be a Databricks administrator to use this. The pinned clusters' maximum number is limited to 70, so apply may fail if you have more than that.
    Libraries []ClusterLibraryArgs
    NodeTypeId string
    Any supported databricks.getNodeType id. If instance_pool_id is specified, this field is not needed.
    NumWorkers int
    PolicyId string
    Identifier of Cluster Policy to validate cluster and preset certain defaults. The primary use for cluster policies is to allow users to create policy-scoped clusters via UI rather than sharing configuration for API-created clusters. For example, when you specify policy_id of external metastore policy, you still have to fill in relevant keys for spark_conf.
    SingleUserName string
    The optional user name of the user to assign to an interactive cluster. This field is required when using standard AAD Passthrough for Azure Data Lake Storage (ADLS) with a single-user cluster (i.e., not high-concurrency clusters).
    SparkConf map[string]interface{}
    Map with key-value pairs to fine-tune Spark clusters, where you can provide custom Spark configuration properties in a cluster configuration.
    SparkEnvVars map[string]interface{}
    Map with environment variable key-value pairs to fine-tune Spark clusters. Key-value pairs of the form (X,Y) are exported (i.e., X='Y') while launching the driver and workers.
    SshPublicKeys []string
    SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name ubuntu on port 2200. You can specify up to 10 keys.
    sparkVersion String
    Runtime version of the cluster. Any supported databricks.getSparkVersion id. We advise using Cluster Policies to restrict the list of versions for simplicity while maintaining enough control.
    autoscale ClusterAutoscale
    autoterminationMinutes Integer
    Automatically terminate the cluster after being inactive for this time in minutes. If not set, Databricks won't automatically terminate an inactive cluster. If specified, the threshold must be between 10 and 10000 minutes. You can also set this value to 0 to explicitly disable automatic termination. We highly recommend having this setting present for Interactive/BI clusters.
    awsAttributes ClusterAwsAttributes
    azureAttributes ClusterAzureAttributes
    clusterId String
    clusterLogConf ClusterClusterLogConf
    clusterName String
    Cluster name, which doesn’t have to be unique. If not specified at creation, the cluster name will be an empty string.
    customTags Map<String,Object>
    Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS EC2 instances and EBS volumes) with these tags in addition to default_tags.
    dataSecurityMode String
    Select the security features of the cluster. Unity Catalog requires SINGLE_USER or USER_ISOLATION mode. LEGACY_PASSTHROUGH for passthrough cluster and LEGACY_TABLE_ACL for Table ACL cluster. Default to NONE, i.e. no security feature enabled.
    dockerImage ClusterDockerImage
    driverInstancePoolId String
    similar to instance_pool_id, but for driver node. If omitted, and instance_pool_id is specified, then driver will be allocated from that pool.
    driverNodeTypeId String
    The node type of the Spark driver. This field is optional; if unset, API will set the driver node type to the same value as node_type_id defined above.
    enableElasticDisk Boolean
    If you don’t want to allocate a fixed number of EBS volumes at cluster creation time, use autoscaling local storage. With autoscaling local storage, Databricks monitors the amount of free disk space available on your cluster’s Spark workers. If a worker begins to run too low on disk, Databricks automatically attaches a new EBS volume to the worker before it runs out of disk space. EBS volumes are attached up to a limit of 5 TB of total disk space per instance (including the instance’s local storage). To scale down EBS usage, make sure you have autotermination_minutes and autoscale attributes set. More documentation available at cluster configuration page.
    enableLocalDiskEncryption Boolean
    Some instance types you use to run clusters may have locally attached disks. Databricks may store shuffle data or temporary data on these locally attached disks. To ensure that all data at rest is encrypted for all storage types, including shuffle data stored temporarily on your cluster’s local disks, you can enable local disk encryption. When local disk encryption is enabled, Databricks generates an encryption key locally unique to each cluster node and encrypting all data stored on local disks. The scope of the key is local to each cluster node and is destroyed along with the cluster node itself. During its lifetime, the key resides in memory for encryption and decryption and is stored encrypted on the disk. Your workloads may run more slowly because of the performance impact of reading and writing encrypted data to and from local volumes. This feature is not available for all Azure Databricks subscriptions. Contact your Microsoft or Databricks account representative to request access.
    gcpAttributes ClusterGcpAttributes
    idempotencyToken String
    An optional token to guarantee the idempotency of cluster creation requests. If an active cluster with the provided token already exists, the request will not create a new cluster, but it will return the existing running cluster's ID instead. If you specify the idempotency token, upon failure, you can retry until the request succeeds. Databricks platform guarantees to launch exactly one cluster with that idempotency token. This token should have at most 64 characters.
    initScripts List<ClusterInitScript>
    instancePoolId String
    • To reduce cluster start time, you can attach a cluster to a predefined pool of idle instances. When attached to a pool, a cluster allocates its driver and worker nodes from the pool. If the pool does not have sufficient idle resources to accommodate the cluster’s request, it expands by allocating new instances from the instance provider. When an attached cluster changes its state to TERMINATED, the instances it used are returned to the pool and reused by a different cluster.
    isPinned Boolean
    boolean value specifying if cluster is pinned (not pinned by default). You must be a Databricks administrator to use this. The pinned clusters' maximum number is limited to 70, so apply may fail if you have more than that.
    libraries List<ClusterLibrary>
    nodeTypeId String
    Any supported databricks.getNodeType id. If instance_pool_id is specified, this field is not needed.
    numWorkers Integer
    policyId String
    Identifier of Cluster Policy to validate cluster and preset certain defaults. The primary use for cluster policies is to allow users to create policy-scoped clusters via UI rather than sharing configuration for API-created clusters. For example, when you specify policy_id of external metastore policy, you still have to fill in relevant keys for spark_conf.
    singleUserName String
    The optional user name of the user to assign to an interactive cluster. This field is required when using standard AAD Passthrough for Azure Data Lake Storage (ADLS) with a single-user cluster (i.e., not high-concurrency clusters).
    sparkConf Map<String,Object>
    Map with key-value pairs to fine-tune Spark clusters, where you can provide custom Spark configuration properties in a cluster configuration.
    sparkEnvVars Map<String,Object>
    Map with environment variable key-value pairs to fine-tune Spark clusters. Key-value pairs of the form (X,Y) are exported (i.e., X='Y') while launching the driver and workers.
    sshPublicKeys List<String>
    SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name ubuntu on port 2200. You can specify up to 10 keys.
    sparkVersion string
    Runtime version of the cluster. Any supported databricks.getSparkVersion id. We advise using Cluster Policies to restrict the list of versions for simplicity while maintaining enough control.
    autoscale ClusterAutoscale
    autoterminationMinutes number
    Automatically terminate the cluster after being inactive for this time in minutes. If not set, Databricks won't automatically terminate an inactive cluster. If specified, the threshold must be between 10 and 10000 minutes. You can also set this value to 0 to explicitly disable automatic termination. We highly recommend having this setting present for Interactive/BI clusters.
    awsAttributes ClusterAwsAttributes
    azureAttributes ClusterAzureAttributes
    clusterId string
    clusterLogConf ClusterClusterLogConf
    clusterName string
    Cluster name, which doesn’t have to be unique. If not specified at creation, the cluster name will be an empty string.
    customTags {[key: string]: any}
    Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS EC2 instances and EBS volumes) with these tags in addition to default_tags.
    dataSecurityMode string
    Select the security features of the cluster. Unity Catalog requires SINGLE_USER or USER_ISOLATION mode. LEGACY_PASSTHROUGH for passthrough cluster and LEGACY_TABLE_ACL for Table ACL cluster. Default to NONE, i.e. no security feature enabled.
    dockerImage ClusterDockerImage
    driverInstancePoolId string
    similar to instance_pool_id, but for driver node. If omitted, and instance_pool_id is specified, then driver will be allocated from that pool.
    driverNodeTypeId string
    The node type of the Spark driver. This field is optional; if unset, API will set the driver node type to the same value as node_type_id defined above.
    enableElasticDisk boolean
    If you don’t want to allocate a fixed number of EBS volumes at cluster creation time, use autoscaling local storage. With autoscaling local storage, Databricks monitors the amount of free disk space available on your cluster’s Spark workers. If a worker begins to run too low on disk, Databricks automatically attaches a new EBS volume to the worker before it runs out of disk space. EBS volumes are attached up to a limit of 5 TB of total disk space per instance (including the instance’s local storage). To scale down EBS usage, make sure you have autotermination_minutes and autoscale attributes set. More documentation available at cluster configuration page.
    enableLocalDiskEncryption boolean
    Some instance types you use to run clusters may have locally attached disks. Databricks may store shuffle data or temporary data on these locally attached disks. To ensure that all data at rest is encrypted for all storage types, including shuffle data stored temporarily on your cluster’s local disks, you can enable local disk encryption. When local disk encryption is enabled, Databricks generates an encryption key locally unique to each cluster node and encrypting all data stored on local disks. The scope of the key is local to each cluster node and is destroyed along with the cluster node itself. During its lifetime, the key resides in memory for encryption and decryption and is stored encrypted on the disk. Your workloads may run more slowly because of the performance impact of reading and writing encrypted data to and from local volumes. This feature is not available for all Azure Databricks subscriptions. Contact your Microsoft or Databricks account representative to request access.
    gcpAttributes ClusterGcpAttributes
    idempotencyToken string
    An optional token to guarantee the idempotency of cluster creation requests. If an active cluster with the provided token already exists, the request will not create a new cluster, but it will return the existing running cluster's ID instead. If you specify the idempotency token, upon failure, you can retry until the request succeeds. Databricks platform guarantees to launch exactly one cluster with that idempotency token. This token should have at most 64 characters.
    initScripts ClusterInitScript[]
    instancePoolId string
    • To reduce cluster start time, you can attach a cluster to a predefined pool of idle instances. When attached to a pool, a cluster allocates its driver and worker nodes from the pool. If the pool does not have sufficient idle resources to accommodate the cluster’s request, it expands by allocating new instances from the instance provider. When an attached cluster changes its state to TERMINATED, the instances it used are returned to the pool and reused by a different cluster.
    isPinned boolean
    boolean value specifying if cluster is pinned (not pinned by default). You must be a Databricks administrator to use this. The pinned clusters' maximum number is limited to 70, so apply may fail if you have more than that.
    libraries ClusterLibrary[]
    nodeTypeId string
    Any supported databricks.getNodeType id. If instance_pool_id is specified, this field is not needed.
    numWorkers number
    policyId string
    Identifier of Cluster Policy to validate cluster and preset certain defaults. The primary use for cluster policies is to allow users to create policy-scoped clusters via UI rather than sharing configuration for API-created clusters. For example, when you specify policy_id of external metastore policy, you still have to fill in relevant keys for spark_conf.
    singleUserName string
    The optional user name of the user to assign to an interactive cluster. This field is required when using standard AAD Passthrough for Azure Data Lake Storage (ADLS) with a single-user cluster (i.e., not high-concurrency clusters).
    sparkConf {[key: string]: any}
    Map with key-value pairs to fine-tune Spark clusters, where you can provide custom Spark configuration properties in a cluster configuration.
    sparkEnvVars {[key: string]: any}
    Map with environment variable key-value pairs to fine-tune Spark clusters. Key-value pairs of the form (X,Y) are exported (i.e., X='Y') while launching the driver and workers.
    sshPublicKeys string[]
    SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name ubuntu on port 2200. You can specify up to 10 keys.
    spark_version str
    Runtime version of the cluster. Any supported databricks.getSparkVersion id. We advise using Cluster Policies to restrict the list of versions for simplicity while maintaining enough control.
    autoscale ClusterAutoscaleArgs
    autotermination_minutes int
    Automatically terminate the cluster after being inactive for this time in minutes. If not set, Databricks won't automatically terminate an inactive cluster. If specified, the threshold must be between 10 and 10000 minutes. You can also set this value to 0 to explicitly disable automatic termination. We highly recommend having this setting present for Interactive/BI clusters.
    aws_attributes ClusterAwsAttributesArgs
    azure_attributes ClusterAzureAttributesArgs
    cluster_id str
    cluster_log_conf ClusterClusterLogConfArgs
    cluster_name str
    Cluster name, which doesn’t have to be unique. If not specified at creation, the cluster name will be an empty string.
    custom_tags Mapping[str, Any]
    Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS EC2 instances and EBS volumes) with these tags in addition to default_tags.
    data_security_mode str
    Select the security features of the cluster. Unity Catalog requires SINGLE_USER or USER_ISOLATION mode. LEGACY_PASSTHROUGH for passthrough cluster and LEGACY_TABLE_ACL for Table ACL cluster. Default to NONE, i.e. no security feature enabled.
    docker_image ClusterDockerImageArgs
    driver_instance_pool_id str
    similar to instance_pool_id, but for driver node. If omitted, and instance_pool_id is specified, then driver will be allocated from that pool.
    driver_node_type_id str
    The node type of the Spark driver. This field is optional; if unset, API will set the driver node type to the same value as node_type_id defined above.
    enable_elastic_disk bool
    If you don’t want to allocate a fixed number of EBS volumes at cluster creation time, use autoscaling local storage. With autoscaling local storage, Databricks monitors the amount of free disk space available on your cluster’s Spark workers. If a worker begins to run too low on disk, Databricks automatically attaches a new EBS volume to the worker before it runs out of disk space. EBS volumes are attached up to a limit of 5 TB of total disk space per instance (including the instance’s local storage). To scale down EBS usage, make sure you have autotermination_minutes and autoscale attributes set. More documentation available at cluster configuration page.
    enable_local_disk_encryption bool
    Some instance types you use to run clusters may have locally attached disks. Databricks may store shuffle data or temporary data on these locally attached disks. To ensure that all data at rest is encrypted for all storage types, including shuffle data stored temporarily on your cluster’s local disks, you can enable local disk encryption. When local disk encryption is enabled, Databricks generates an encryption key locally unique to each cluster node and encrypting all data stored on local disks. The scope of the key is local to each cluster node and is destroyed along with the cluster node itself. During its lifetime, the key resides in memory for encryption and decryption and is stored encrypted on the disk. Your workloads may run more slowly because of the performance impact of reading and writing encrypted data to and from local volumes. This feature is not available for all Azure Databricks subscriptions. Contact your Microsoft or Databricks account representative to request access.
    gcp_attributes ClusterGcpAttributesArgs
    idempotency_token str
    An optional token to guarantee the idempotency of cluster creation requests. If an active cluster with the provided token already exists, the request will not create a new cluster, but it will return the existing running cluster's ID instead. If you specify the idempotency token, upon failure, you can retry until the request succeeds. Databricks platform guarantees to launch exactly one cluster with that idempotency token. This token should have at most 64 characters.
    init_scripts Sequence[ClusterInitScriptArgs]
    instance_pool_id str
    • To reduce cluster start time, you can attach a cluster to a predefined pool of idle instances. When attached to a pool, a cluster allocates its driver and worker nodes from the pool. If the pool does not have sufficient idle resources to accommodate the cluster’s request, it expands by allocating new instances from the instance provider. When an attached cluster changes its state to TERMINATED, the instances it used are returned to the pool and reused by a different cluster.
    is_pinned bool
    boolean value specifying if cluster is pinned (not pinned by default). You must be a Databricks administrator to use this. The pinned clusters' maximum number is limited to 70, so apply may fail if you have more than that.
    libraries Sequence[ClusterLibraryArgs]
    node_type_id str
    Any supported databricks.getNodeType id. If instance_pool_id is specified, this field is not needed.
    num_workers int
    policy_id str
    Identifier of Cluster Policy to validate cluster and preset certain defaults. The primary use for cluster policies is to allow users to create policy-scoped clusters via UI rather than sharing configuration for API-created clusters. For example, when you specify policy_id of external metastore policy, you still have to fill in relevant keys for spark_conf.
    single_user_name str
    The optional user name of the user to assign to an interactive cluster. This field is required when using standard AAD Passthrough for Azure Data Lake Storage (ADLS) with a single-user cluster (i.e., not high-concurrency clusters).
    spark_conf Mapping[str, Any]
    Map with key-value pairs to fine-tune Spark clusters, where you can provide custom Spark configuration properties in a cluster configuration.
    spark_env_vars Mapping[str, Any]
    Map with environment variable key-value pairs to fine-tune Spark clusters. Key-value pairs of the form (X,Y) are exported (i.e., X='Y') while launching the driver and workers.
    ssh_public_keys Sequence[str]
    SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name ubuntu on port 2200. You can specify up to 10 keys.
    sparkVersion String
    Runtime version of the cluster. Any supported databricks.getSparkVersion id. We advise using Cluster Policies to restrict the list of versions for simplicity while maintaining enough control.
    autoscale Property Map
    autoterminationMinutes Number
    Automatically terminate the cluster after being inactive for this time in minutes. If not set, Databricks won't automatically terminate an inactive cluster. If specified, the threshold must be between 10 and 10000 minutes. You can also set this value to 0 to explicitly disable automatic termination. We highly recommend having this setting present for Interactive/BI clusters.
    awsAttributes Property Map
    azureAttributes Property Map
    clusterId String
    clusterLogConf Property Map
    clusterName String
    Cluster name, which doesn’t have to be unique. If not specified at creation, the cluster name will be an empty string.
    customTags Map<Any>
    Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS EC2 instances and EBS volumes) with these tags in addition to default_tags.
    dataSecurityMode String
    Select the security features of the cluster. Unity Catalog requires SINGLE_USER or USER_ISOLATION mode. LEGACY_PASSTHROUGH for passthrough cluster and LEGACY_TABLE_ACL for Table ACL cluster. Default to NONE, i.e. no security feature enabled.
    dockerImage Property Map
    driverInstancePoolId String
    similar to instance_pool_id, but for driver node. If omitted, and instance_pool_id is specified, then driver will be allocated from that pool.
    driverNodeTypeId String
    The node type of the Spark driver. This field is optional; if unset, API will set the driver node type to the same value as node_type_id defined above.
    enableElasticDisk Boolean
    If you don’t want to allocate a fixed number of EBS volumes at cluster creation time, use autoscaling local storage. With autoscaling local storage, Databricks monitors the amount of free disk space available on your cluster’s Spark workers. If a worker begins to run too low on disk, Databricks automatically attaches a new EBS volume to the worker before it runs out of disk space. EBS volumes are attached up to a limit of 5 TB of total disk space per instance (including the instance’s local storage). To scale down EBS usage, make sure you have autotermination_minutes and autoscale attributes set. More documentation available at cluster configuration page.
    enableLocalDiskEncryption Boolean
    Some instance types you use to run clusters may have locally attached disks. Databricks may store shuffle data or temporary data on these locally attached disks. To ensure that all data at rest is encrypted for all storage types, including shuffle data stored temporarily on your cluster’s local disks, you can enable local disk encryption. When local disk encryption is enabled, Databricks generates an encryption key locally unique to each cluster node and encrypting all data stored on local disks. The scope of the key is local to each cluster node and is destroyed along with the cluster node itself. During its lifetime, the key resides in memory for encryption and decryption and is stored encrypted on the disk. Your workloads may run more slowly because of the performance impact of reading and writing encrypted data to and from local volumes. This feature is not available for all Azure Databricks subscriptions. Contact your Microsoft or Databricks account representative to request access.
    gcpAttributes Property Map
    idempotencyToken String
    An optional token to guarantee the idempotency of cluster creation requests. If an active cluster with the provided token already exists, the request will not create a new cluster, but it will return the existing running cluster's ID instead. If you specify the idempotency token, upon failure, you can retry until the request succeeds. Databricks platform guarantees to launch exactly one cluster with that idempotency token. This token should have at most 64 characters.
    initScripts List<Property Map>
    instancePoolId String
    • To reduce cluster start time, you can attach a cluster to a predefined pool of idle instances. When attached to a pool, a cluster allocates its driver and worker nodes from the pool. If the pool does not have sufficient idle resources to accommodate the cluster’s request, it expands by allocating new instances from the instance provider. When an attached cluster changes its state to TERMINATED, the instances it used are returned to the pool and reused by a different cluster.
    isPinned Boolean
    boolean value specifying if cluster is pinned (not pinned by default). You must be a Databricks administrator to use this. The pinned clusters' maximum number is limited to 70, so apply may fail if you have more than that.
    libraries List<Property Map>
    nodeTypeId String
    Any supported databricks.getNodeType id. If instance_pool_id is specified, this field is not needed.
    numWorkers Number
    policyId String
    Identifier of Cluster Policy to validate cluster and preset certain defaults. The primary use for cluster policies is to allow users to create policy-scoped clusters via UI rather than sharing configuration for API-created clusters. For example, when you specify policy_id of external metastore policy, you still have to fill in relevant keys for spark_conf.
    singleUserName String
    The optional user name of the user to assign to an interactive cluster. This field is required when using standard AAD Passthrough for Azure Data Lake Storage (ADLS) with a single-user cluster (i.e., not high-concurrency clusters).
    sparkConf Map<Any>
    Map with key-value pairs to fine-tune Spark clusters, where you can provide custom Spark configuration properties in a cluster configuration.
    sparkEnvVars Map<Any>
    Map with environment variable key-value pairs to fine-tune Spark clusters. Key-value pairs of the form (X,Y) are exported (i.e., X='Y') while launching the driver and workers.
    sshPublicKeys List<String>
    SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name ubuntu on port 2200. You can specify up to 10 keys.

    Outputs

    All input properties are implicitly available as output properties. Additionally, the Cluster resource produces the following output properties:

    DefaultTags Dictionary<string, object>
    (map) Tags that are added by Databricks by default, regardless of any custom_tags that may have been added. These include: Vendor: Databricks, Creator: <username_of_creator>, ClusterName: <name_of_cluster>, ClusterId: <id_of_cluster>, Name:
    Id string
    The provider-assigned unique ID for this managed resource.
    State string
    (string) State of the cluster.
    Url string
    DefaultTags map[string]interface{}
    (map) Tags that are added by Databricks by default, regardless of any custom_tags that may have been added. These include: Vendor: Databricks, Creator: <username_of_creator>, ClusterName: <name_of_cluster>, ClusterId: <id_of_cluster>, Name:
    Id string
    The provider-assigned unique ID for this managed resource.
    State string
    (string) State of the cluster.
    Url string
    defaultTags Map<String,Object>
    (map) Tags that are added by Databricks by default, regardless of any custom_tags that may have been added. These include: Vendor: Databricks, Creator: <username_of_creator>, ClusterName: <name_of_cluster>, ClusterId: <id_of_cluster>, Name:
    id String
    The provider-assigned unique ID for this managed resource.
    state String
    (string) State of the cluster.
    url String
    defaultTags {[key: string]: any}
    (map) Tags that are added by Databricks by default, regardless of any custom_tags that may have been added. These include: Vendor: Databricks, Creator: <username_of_creator>, ClusterName: <name_of_cluster>, ClusterId: <id_of_cluster>, Name:
    id string
    The provider-assigned unique ID for this managed resource.
    state string
    (string) State of the cluster.
    url string
    default_tags Mapping[str, Any]
    (map) Tags that are added by Databricks by default, regardless of any custom_tags that may have been added. These include: Vendor: Databricks, Creator: <username_of_creator>, ClusterName: <name_of_cluster>, ClusterId: <id_of_cluster>, Name:
    id str
    The provider-assigned unique ID for this managed resource.
    state str
    (string) State of the cluster.
    url str
    defaultTags Map<Any>
    (map) Tags that are added by Databricks by default, regardless of any custom_tags that may have been added. These include: Vendor: Databricks, Creator: <username_of_creator>, ClusterName: <name_of_cluster>, ClusterId: <id_of_cluster>, Name:
    id String
    The provider-assigned unique ID for this managed resource.
    state String
    (string) State of the cluster.
    url String

    Look up Existing Cluster Resource

    Get an existing Cluster resource’s state with the given name, ID, and optional extra properties used to qualify the lookup.

    public static get(name: string, id: Input<ID>, state?: ClusterState, opts?: CustomResourceOptions): Cluster
    @staticmethod
    def get(resource_name: str,
            id: str,
            opts: Optional[ResourceOptions] = None,
            autoscale: Optional[ClusterAutoscaleArgs] = None,
            autotermination_minutes: Optional[int] = None,
            aws_attributes: Optional[ClusterAwsAttributesArgs] = None,
            azure_attributes: Optional[ClusterAzureAttributesArgs] = None,
            cluster_id: Optional[str] = None,
            cluster_log_conf: Optional[ClusterClusterLogConfArgs] = None,
            cluster_name: Optional[str] = None,
            custom_tags: Optional[Mapping[str, Any]] = None,
            data_security_mode: Optional[str] = None,
            default_tags: Optional[Mapping[str, Any]] = None,
            docker_image: Optional[ClusterDockerImageArgs] = None,
            driver_instance_pool_id: Optional[str] = None,
            driver_node_type_id: Optional[str] = None,
            enable_elastic_disk: Optional[bool] = None,
            enable_local_disk_encryption: Optional[bool] = None,
            gcp_attributes: Optional[ClusterGcpAttributesArgs] = None,
            idempotency_token: Optional[str] = None,
            init_scripts: Optional[Sequence[ClusterInitScriptArgs]] = None,
            instance_pool_id: Optional[str] = None,
            is_pinned: Optional[bool] = None,
            libraries: Optional[Sequence[ClusterLibraryArgs]] = None,
            node_type_id: Optional[str] = None,
            num_workers: Optional[int] = None,
            policy_id: Optional[str] = None,
            single_user_name: Optional[str] = None,
            spark_conf: Optional[Mapping[str, Any]] = None,
            spark_env_vars: Optional[Mapping[str, Any]] = None,
            spark_version: Optional[str] = None,
            ssh_public_keys: Optional[Sequence[str]] = None,
            state: Optional[str] = None,
            url: Optional[str] = None) -> Cluster
    func GetCluster(ctx *Context, name string, id IDInput, state *ClusterState, opts ...ResourceOption) (*Cluster, error)
    public static Cluster Get(string name, Input<string> id, ClusterState? state, CustomResourceOptions? opts = null)
    public static Cluster get(String name, Output<String> id, ClusterState state, CustomResourceOptions options)
    resources:  _:    type: databricks:Cluster    get:      id: ${id}
    name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    state
    Any extra arguments used during the lookup.
    opts
    A bag of options that control this resource's behavior.
    resource_name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    state
    Any extra arguments used during the lookup.
    opts
    A bag of options that control this resource's behavior.
    name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    state
    Any extra arguments used during the lookup.
    opts
    A bag of options that control this resource's behavior.
    name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    state
    Any extra arguments used during the lookup.
    opts
    A bag of options that control this resource's behavior.
    The following state arguments are supported:
    Autoscale ClusterAutoscale
    AutoterminationMinutes int
    Automatically terminate the cluster after being inactive for this time in minutes. If not set, Databricks won't automatically terminate an inactive cluster. If specified, the threshold must be between 10 and 10000 minutes. You can also set this value to 0 to explicitly disable automatic termination. We highly recommend having this setting present for Interactive/BI clusters.
    AwsAttributes ClusterAwsAttributes
    AzureAttributes ClusterAzureAttributes
    ClusterId string
    ClusterLogConf ClusterClusterLogConf
    ClusterName string
    Cluster name, which doesn’t have to be unique. If not specified at creation, the cluster name will be an empty string.
    CustomTags Dictionary<string, object>
    Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS EC2 instances and EBS volumes) with these tags in addition to default_tags.
    DataSecurityMode string
    Select the security features of the cluster. Unity Catalog requires SINGLE_USER or USER_ISOLATION mode. LEGACY_PASSTHROUGH for passthrough cluster and LEGACY_TABLE_ACL for Table ACL cluster. Default to NONE, i.e. no security feature enabled.
    DefaultTags Dictionary<string, object>
    (map) Tags that are added by Databricks by default, regardless of any custom_tags that may have been added. These include: Vendor: Databricks, Creator: <username_of_creator>, ClusterName: <name_of_cluster>, ClusterId: <id_of_cluster>, Name:
    DockerImage ClusterDockerImage
    DriverInstancePoolId string
    similar to instance_pool_id, but for driver node. If omitted, and instance_pool_id is specified, then driver will be allocated from that pool.
    DriverNodeTypeId string
    The node type of the Spark driver. This field is optional; if unset, API will set the driver node type to the same value as node_type_id defined above.
    EnableElasticDisk bool
    If you don’t want to allocate a fixed number of EBS volumes at cluster creation time, use autoscaling local storage. With autoscaling local storage, Databricks monitors the amount of free disk space available on your cluster’s Spark workers. If a worker begins to run too low on disk, Databricks automatically attaches a new EBS volume to the worker before it runs out of disk space. EBS volumes are attached up to a limit of 5 TB of total disk space per instance (including the instance’s local storage). To scale down EBS usage, make sure you have autotermination_minutes and autoscale attributes set. More documentation available at cluster configuration page.
    EnableLocalDiskEncryption bool
    Some instance types you use to run clusters may have locally attached disks. Databricks may store shuffle data or temporary data on these locally attached disks. To ensure that all data at rest is encrypted for all storage types, including shuffle data stored temporarily on your cluster’s local disks, you can enable local disk encryption. When local disk encryption is enabled, Databricks generates an encryption key locally unique to each cluster node and encrypting all data stored on local disks. The scope of the key is local to each cluster node and is destroyed along with the cluster node itself. During its lifetime, the key resides in memory for encryption and decryption and is stored encrypted on the disk. Your workloads may run more slowly because of the performance impact of reading and writing encrypted data to and from local volumes. This feature is not available for all Azure Databricks subscriptions. Contact your Microsoft or Databricks account representative to request access.
    GcpAttributes ClusterGcpAttributes
    IdempotencyToken string
    An optional token to guarantee the idempotency of cluster creation requests. If an active cluster with the provided token already exists, the request will not create a new cluster, but it will return the existing running cluster's ID instead. If you specify the idempotency token, upon failure, you can retry until the request succeeds. Databricks platform guarantees to launch exactly one cluster with that idempotency token. This token should have at most 64 characters.
    InitScripts List<ClusterInitScript>
    InstancePoolId string
    • To reduce cluster start time, you can attach a cluster to a predefined pool of idle instances. When attached to a pool, a cluster allocates its driver and worker nodes from the pool. If the pool does not have sufficient idle resources to accommodate the cluster’s request, it expands by allocating new instances from the instance provider. When an attached cluster changes its state to TERMINATED, the instances it used are returned to the pool and reused by a different cluster.
    IsPinned bool
    boolean value specifying if cluster is pinned (not pinned by default). You must be a Databricks administrator to use this. The pinned clusters' maximum number is limited to 70, so apply may fail if you have more than that.
    Libraries List<ClusterLibrary>
    NodeTypeId string
    Any supported databricks.getNodeType id. If instance_pool_id is specified, this field is not needed.
    NumWorkers int
    PolicyId string
    Identifier of Cluster Policy to validate cluster and preset certain defaults. The primary use for cluster policies is to allow users to create policy-scoped clusters via UI rather than sharing configuration for API-created clusters. For example, when you specify policy_id of external metastore policy, you still have to fill in relevant keys for spark_conf.
    SingleUserName string
    The optional user name of the user to assign to an interactive cluster. This field is required when using standard AAD Passthrough for Azure Data Lake Storage (ADLS) with a single-user cluster (i.e., not high-concurrency clusters).
    SparkConf Dictionary<string, object>
    Map with key-value pairs to fine-tune Spark clusters, where you can provide custom Spark configuration properties in a cluster configuration.
    SparkEnvVars Dictionary<string, object>
    Map with environment variable key-value pairs to fine-tune Spark clusters. Key-value pairs of the form (X,Y) are exported (i.e., X='Y') while launching the driver and workers.
    SparkVersion string
    Runtime version of the cluster. Any supported databricks.getSparkVersion id. We advise using Cluster Policies to restrict the list of versions for simplicity while maintaining enough control.
    SshPublicKeys List<string>
    SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name ubuntu on port 2200. You can specify up to 10 keys.
    State string
    (string) State of the cluster.
    Url string
    Autoscale ClusterAutoscaleArgs
    AutoterminationMinutes int
    Automatically terminate the cluster after being inactive for this time in minutes. If not set, Databricks won't automatically terminate an inactive cluster. If specified, the threshold must be between 10 and 10000 minutes. You can also set this value to 0 to explicitly disable automatic termination. We highly recommend having this setting present for Interactive/BI clusters.
    AwsAttributes ClusterAwsAttributesArgs
    AzureAttributes ClusterAzureAttributesArgs
    ClusterId string
    ClusterLogConf ClusterClusterLogConfArgs
    ClusterName string
    Cluster name, which doesn’t have to be unique. If not specified at creation, the cluster name will be an empty string.
    CustomTags map[string]interface{}
    Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS EC2 instances and EBS volumes) with these tags in addition to default_tags.
    DataSecurityMode string
    Select the security features of the cluster. Unity Catalog requires SINGLE_USER or USER_ISOLATION mode. LEGACY_PASSTHROUGH for passthrough cluster and LEGACY_TABLE_ACL for Table ACL cluster. Default to NONE, i.e. no security feature enabled.
    DefaultTags map[string]interface{}
    (map) Tags that are added by Databricks by default, regardless of any custom_tags that may have been added. These include: Vendor: Databricks, Creator: <username_of_creator>, ClusterName: <name_of_cluster>, ClusterId: <id_of_cluster>, Name:
    DockerImage ClusterDockerImageArgs
    DriverInstancePoolId string
    similar to instance_pool_id, but for driver node. If omitted, and instance_pool_id is specified, then driver will be allocated from that pool.
    DriverNodeTypeId string
    The node type of the Spark driver. This field is optional; if unset, API will set the driver node type to the same value as node_type_id defined above.
    EnableElasticDisk bool
    If you don’t want to allocate a fixed number of EBS volumes at cluster creation time, use autoscaling local storage. With autoscaling local storage, Databricks monitors the amount of free disk space available on your cluster’s Spark workers. If a worker begins to run too low on disk, Databricks automatically attaches a new EBS volume to the worker before it runs out of disk space. EBS volumes are attached up to a limit of 5 TB of total disk space per instance (including the instance’s local storage). To scale down EBS usage, make sure you have autotermination_minutes and autoscale attributes set. More documentation available at cluster configuration page.
    EnableLocalDiskEncryption bool
    Some instance types you use to run clusters may have locally attached disks. Databricks may store shuffle data or temporary data on these locally attached disks. To ensure that all data at rest is encrypted for all storage types, including shuffle data stored temporarily on your cluster’s local disks, you can enable local disk encryption. When local disk encryption is enabled, Databricks generates an encryption key locally unique to each cluster node and encrypting all data stored on local disks. The scope of the key is local to each cluster node and is destroyed along with the cluster node itself. During its lifetime, the key resides in memory for encryption and decryption and is stored encrypted on the disk. Your workloads may run more slowly because of the performance impact of reading and writing encrypted data to and from local volumes. This feature is not available for all Azure Databricks subscriptions. Contact your Microsoft or Databricks account representative to request access.
    GcpAttributes ClusterGcpAttributesArgs
    IdempotencyToken string
    An optional token to guarantee the idempotency of cluster creation requests. If an active cluster with the provided token already exists, the request will not create a new cluster, but it will return the existing running cluster's ID instead. If you specify the idempotency token, upon failure, you can retry until the request succeeds. Databricks platform guarantees to launch exactly one cluster with that idempotency token. This token should have at most 64 characters.
    InitScripts []ClusterInitScriptArgs
    InstancePoolId string
    • To reduce cluster start time, you can attach a cluster to a predefined pool of idle instances. When attached to a pool, a cluster allocates its driver and worker nodes from the pool. If the pool does not have sufficient idle resources to accommodate the cluster’s request, it expands by allocating new instances from the instance provider. When an attached cluster changes its state to TERMINATED, the instances it used are returned to the pool and reused by a different cluster.
    IsPinned bool
    boolean value specifying if cluster is pinned (not pinned by default). You must be a Databricks administrator to use this. The pinned clusters' maximum number is limited to 70, so apply may fail if you have more than that.
    Libraries []ClusterLibraryArgs
    NodeTypeId string
    Any supported databricks.getNodeType id. If instance_pool_id is specified, this field is not needed.
    NumWorkers int
    PolicyId string
    Identifier of Cluster Policy to validate cluster and preset certain defaults. The primary use for cluster policies is to allow users to create policy-scoped clusters via UI rather than sharing configuration for API-created clusters. For example, when you specify policy_id of external metastore policy, you still have to fill in relevant keys for spark_conf.
    SingleUserName string
    The optional user name of the user to assign to an interactive cluster. This field is required when using standard AAD Passthrough for Azure Data Lake Storage (ADLS) with a single-user cluster (i.e., not high-concurrency clusters).
    SparkConf map[string]interface{}
    Map with key-value pairs to fine-tune Spark clusters, where you can provide custom Spark configuration properties in a cluster configuration.
    SparkEnvVars map[string]interface{}
    Map with environment variable key-value pairs to fine-tune Spark clusters. Key-value pairs of the form (X,Y) are exported (i.e., X='Y') while launching the driver and workers.
    SparkVersion string
    Runtime version of the cluster. Any supported databricks.getSparkVersion id. We advise using Cluster Policies to restrict the list of versions for simplicity while maintaining enough control.
    SshPublicKeys []string
    SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name ubuntu on port 2200. You can specify up to 10 keys.
    State string
    (string) State of the cluster.
    Url string
    autoscale ClusterAutoscale
    autoterminationMinutes Integer
    Automatically terminate the cluster after being inactive for this time in minutes. If not set, Databricks won't automatically terminate an inactive cluster. If specified, the threshold must be between 10 and 10000 minutes. You can also set this value to 0 to explicitly disable automatic termination. We highly recommend having this setting present for Interactive/BI clusters.
    awsAttributes ClusterAwsAttributes
    azureAttributes ClusterAzureAttributes
    clusterId String
    clusterLogConf ClusterClusterLogConf
    clusterName String
    Cluster name, which doesn’t have to be unique. If not specified at creation, the cluster name will be an empty string.
    customTags Map<String,Object>
    Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS EC2 instances and EBS volumes) with these tags in addition to default_tags.
    dataSecurityMode String
    Select the security features of the cluster. Unity Catalog requires SINGLE_USER or USER_ISOLATION mode. LEGACY_PASSTHROUGH for passthrough cluster and LEGACY_TABLE_ACL for Table ACL cluster. Default to NONE, i.e. no security feature enabled.
    defaultTags Map<String,Object>
    (map) Tags that are added by Databricks by default, regardless of any custom_tags that may have been added. These include: Vendor: Databricks, Creator: <username_of_creator>, ClusterName: <name_of_cluster>, ClusterId: <id_of_cluster>, Name:
    dockerImage ClusterDockerImage
    driverInstancePoolId String
    similar to instance_pool_id, but for driver node. If omitted, and instance_pool_id is specified, then driver will be allocated from that pool.
    driverNodeTypeId String
    The node type of the Spark driver. This field is optional; if unset, API will set the driver node type to the same value as node_type_id defined above.
    enableElasticDisk Boolean
    If you don’t want to allocate a fixed number of EBS volumes at cluster creation time, use autoscaling local storage. With autoscaling local storage, Databricks monitors the amount of free disk space available on your cluster’s Spark workers. If a worker begins to run too low on disk, Databricks automatically attaches a new EBS volume to the worker before it runs out of disk space. EBS volumes are attached up to a limit of 5 TB of total disk space per instance (including the instance’s local storage). To scale down EBS usage, make sure you have autotermination_minutes and autoscale attributes set. More documentation available at cluster configuration page.
    enableLocalDiskEncryption Boolean
    Some instance types you use to run clusters may have locally attached disks. Databricks may store shuffle data or temporary data on these locally attached disks. To ensure that all data at rest is encrypted for all storage types, including shuffle data stored temporarily on your cluster’s local disks, you can enable local disk encryption. When local disk encryption is enabled, Databricks generates an encryption key locally unique to each cluster node and encrypting all data stored on local disks. The scope of the key is local to each cluster node and is destroyed along with the cluster node itself. During its lifetime, the key resides in memory for encryption and decryption and is stored encrypted on the disk. Your workloads may run more slowly because of the performance impact of reading and writing encrypted data to and from local volumes. This feature is not available for all Azure Databricks subscriptions. Contact your Microsoft or Databricks account representative to request access.
    gcpAttributes ClusterGcpAttributes
    idempotencyToken String
    An optional token to guarantee the idempotency of cluster creation requests. If an active cluster with the provided token already exists, the request will not create a new cluster, but it will return the existing running cluster's ID instead. If you specify the idempotency token, upon failure, you can retry until the request succeeds. Databricks platform guarantees to launch exactly one cluster with that idempotency token. This token should have at most 64 characters.
    initScripts List<ClusterInitScript>
    instancePoolId String
    • To reduce cluster start time, you can attach a cluster to a predefined pool of idle instances. When attached to a pool, a cluster allocates its driver and worker nodes from the pool. If the pool does not have sufficient idle resources to accommodate the cluster’s request, it expands by allocating new instances from the instance provider. When an attached cluster changes its state to TERMINATED, the instances it used are returned to the pool and reused by a different cluster.
    isPinned Boolean
    boolean value specifying if cluster is pinned (not pinned by default). You must be a Databricks administrator to use this. The pinned clusters' maximum number is limited to 70, so apply may fail if you have more than that.
    libraries List<ClusterLibrary>
    nodeTypeId String
    Any supported databricks.getNodeType id. If instance_pool_id is specified, this field is not needed.
    numWorkers Integer
    policyId String
    Identifier of Cluster Policy to validate cluster and preset certain defaults. The primary use for cluster policies is to allow users to create policy-scoped clusters via UI rather than sharing configuration for API-created clusters. For example, when you specify policy_id of external metastore policy, you still have to fill in relevant keys for spark_conf.
    singleUserName String
    The optional user name of the user to assign to an interactive cluster. This field is required when using standard AAD Passthrough for Azure Data Lake Storage (ADLS) with a single-user cluster (i.e., not high-concurrency clusters).
    sparkConf Map<String,Object>
    Map with key-value pairs to fine-tune Spark clusters, where you can provide custom Spark configuration properties in a cluster configuration.
    sparkEnvVars Map<String,Object>
    Map with environment variable key-value pairs to fine-tune Spark clusters. Key-value pairs of the form (X,Y) are exported (i.e., X='Y') while launching the driver and workers.
    sparkVersion String
    Runtime version of the cluster. Any supported databricks.getSparkVersion id. We advise using Cluster Policies to restrict the list of versions for simplicity while maintaining enough control.
    sshPublicKeys List<String>
    SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name ubuntu on port 2200. You can specify up to 10 keys.
    state String
    (string) State of the cluster.
    url String
    autoscale ClusterAutoscale
    autoterminationMinutes number
    Automatically terminate the cluster after being inactive for this time in minutes. If not set, Databricks won't automatically terminate an inactive cluster. If specified, the threshold must be between 10 and 10000 minutes. You can also set this value to 0 to explicitly disable automatic termination. We highly recommend having this setting present for Interactive/BI clusters.
    awsAttributes ClusterAwsAttributes
    azureAttributes ClusterAzureAttributes
    clusterId string
    clusterLogConf ClusterClusterLogConf
    clusterName string
    Cluster name, which doesn’t have to be unique. If not specified at creation, the cluster name will be an empty string.
    customTags {[key: string]: any}
    Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS EC2 instances and EBS volumes) with these tags in addition to default_tags.
    dataSecurityMode string
    Select the security features of the cluster. Unity Catalog requires SINGLE_USER or USER_ISOLATION mode. LEGACY_PASSTHROUGH for passthrough cluster and LEGACY_TABLE_ACL for Table ACL cluster. Default to NONE, i.e. no security feature enabled.
    defaultTags {[key: string]: any}
    (map) Tags that are added by Databricks by default, regardless of any custom_tags that may have been added. These include: Vendor: Databricks, Creator: <username_of_creator>, ClusterName: <name_of_cluster>, ClusterId: <id_of_cluster>, Name:
    dockerImage ClusterDockerImage
    driverInstancePoolId string
    similar to instance_pool_id, but for driver node. If omitted, and instance_pool_id is specified, then driver will be allocated from that pool.
    driverNodeTypeId string
    The node type of the Spark driver. This field is optional; if unset, API will set the driver node type to the same value as node_type_id defined above.
    enableElasticDisk boolean
    If you don’t want to allocate a fixed number of EBS volumes at cluster creation time, use autoscaling local storage. With autoscaling local storage, Databricks monitors the amount of free disk space available on your cluster’s Spark workers. If a worker begins to run too low on disk, Databricks automatically attaches a new EBS volume to the worker before it runs out of disk space. EBS volumes are attached up to a limit of 5 TB of total disk space per instance (including the instance’s local storage). To scale down EBS usage, make sure you have autotermination_minutes and autoscale attributes set. More documentation available at cluster configuration page.
    enableLocalDiskEncryption boolean
    Some instance types you use to run clusters may have locally attached disks. Databricks may store shuffle data or temporary data on these locally attached disks. To ensure that all data at rest is encrypted for all storage types, including shuffle data stored temporarily on your cluster’s local disks, you can enable local disk encryption. When local disk encryption is enabled, Databricks generates an encryption key locally unique to each cluster node and encrypting all data stored on local disks. The scope of the key is local to each cluster node and is destroyed along with the cluster node itself. During its lifetime, the key resides in memory for encryption and decryption and is stored encrypted on the disk. Your workloads may run more slowly because of the performance impact of reading and writing encrypted data to and from local volumes. This feature is not available for all Azure Databricks subscriptions. Contact your Microsoft or Databricks account representative to request access.
    gcpAttributes ClusterGcpAttributes
    idempotencyToken string
    An optional token to guarantee the idempotency of cluster creation requests. If an active cluster with the provided token already exists, the request will not create a new cluster, but it will return the existing running cluster's ID instead. If you specify the idempotency token, upon failure, you can retry until the request succeeds. Databricks platform guarantees to launch exactly one cluster with that idempotency token. This token should have at most 64 characters.
    initScripts ClusterInitScript[]
    instancePoolId string
    • To reduce cluster start time, you can attach a cluster to a predefined pool of idle instances. When attached to a pool, a cluster allocates its driver and worker nodes from the pool. If the pool does not have sufficient idle resources to accommodate the cluster’s request, it expands by allocating new instances from the instance provider. When an attached cluster changes its state to TERMINATED, the instances it used are returned to the pool and reused by a different cluster.
    isPinned boolean
    boolean value specifying if cluster is pinned (not pinned by default). You must be a Databricks administrator to use this. The pinned clusters' maximum number is limited to 70, so apply may fail if you have more than that.
    libraries ClusterLibrary[]
    nodeTypeId string
    Any supported databricks.getNodeType id. If instance_pool_id is specified, this field is not needed.
    numWorkers number
    policyId string
    Identifier of Cluster Policy to validate cluster and preset certain defaults. The primary use for cluster policies is to allow users to create policy-scoped clusters via UI rather than sharing configuration for API-created clusters. For example, when you specify policy_id of external metastore policy, you still have to fill in relevant keys for spark_conf.
    singleUserName string
    The optional user name of the user to assign to an interactive cluster. This field is required when using standard AAD Passthrough for Azure Data Lake Storage (ADLS) with a single-user cluster (i.e., not high-concurrency clusters).
    sparkConf {[key: string]: any}
    Map with key-value pairs to fine-tune Spark clusters, where you can provide custom Spark configuration properties in a cluster configuration.
    sparkEnvVars {[key: string]: any}
    Map with environment variable key-value pairs to fine-tune Spark clusters. Key-value pairs of the form (X,Y) are exported (i.e., X='Y') while launching the driver and workers.
    sparkVersion string
    Runtime version of the cluster. Any supported databricks.getSparkVersion id. We advise using Cluster Policies to restrict the list of versions for simplicity while maintaining enough control.
    sshPublicKeys string[]
    SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name ubuntu on port 2200. You can specify up to 10 keys.
    state string
    (string) State of the cluster.
    url string
    autoscale ClusterAutoscaleArgs
    autotermination_minutes int
    Automatically terminate the cluster after being inactive for this time in minutes. If not set, Databricks won't automatically terminate an inactive cluster. If specified, the threshold must be between 10 and 10000 minutes. You can also set this value to 0 to explicitly disable automatic termination. We highly recommend having this setting present for Interactive/BI clusters.
    aws_attributes ClusterAwsAttributesArgs
    azure_attributes ClusterAzureAttributesArgs
    cluster_id str
    cluster_log_conf ClusterClusterLogConfArgs
    cluster_name str
    Cluster name, which doesn’t have to be unique. If not specified at creation, the cluster name will be an empty string.
    custom_tags Mapping[str, Any]
    Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS EC2 instances and EBS volumes) with these tags in addition to default_tags.
    data_security_mode str
    Select the security features of the cluster. Unity Catalog requires SINGLE_USER or USER_ISOLATION mode. LEGACY_PASSTHROUGH for passthrough cluster and LEGACY_TABLE_ACL for Table ACL cluster. Default to NONE, i.e. no security feature enabled.
    default_tags Mapping[str, Any]
    (map) Tags that are added by Databricks by default, regardless of any custom_tags that may have been added. These include: Vendor: Databricks, Creator: <username_of_creator>, ClusterName: <name_of_cluster>, ClusterId: <id_of_cluster>, Name:
    docker_image ClusterDockerImageArgs
    driver_instance_pool_id str
    similar to instance_pool_id, but for driver node. If omitted, and instance_pool_id is specified, then driver will be allocated from that pool.
    driver_node_type_id str
    The node type of the Spark driver. This field is optional; if unset, API will set the driver node type to the same value as node_type_id defined above.
    enable_elastic_disk bool
    If you don’t want to allocate a fixed number of EBS volumes at cluster creation time, use autoscaling local storage. With autoscaling local storage, Databricks monitors the amount of free disk space available on your cluster’s Spark workers. If a worker begins to run too low on disk, Databricks automatically attaches a new EBS volume to the worker before it runs out of disk space. EBS volumes are attached up to a limit of 5 TB of total disk space per instance (including the instance’s local storage). To scale down EBS usage, make sure you have autotermination_minutes and autoscale attributes set. More documentation available at cluster configuration page.
    enable_local_disk_encryption bool
    Some instance types you use to run clusters may have locally attached disks. Databricks may store shuffle data or temporary data on these locally attached disks. To ensure that all data at rest is encrypted for all storage types, including shuffle data stored temporarily on your cluster’s local disks, you can enable local disk encryption. When local disk encryption is enabled, Databricks generates an encryption key locally unique to each cluster node and encrypting all data stored on local disks. The scope of the key is local to each cluster node and is destroyed along with the cluster node itself. During its lifetime, the key resides in memory for encryption and decryption and is stored encrypted on the disk. Your workloads may run more slowly because of the performance impact of reading and writing encrypted data to and from local volumes. This feature is not available for all Azure Databricks subscriptions. Contact your Microsoft or Databricks account representative to request access.
    gcp_attributes ClusterGcpAttributesArgs
    idempotency_token str
    An optional token to guarantee the idempotency of cluster creation requests. If an active cluster with the provided token already exists, the request will not create a new cluster, but it will return the existing running cluster's ID instead. If you specify the idempotency token, upon failure, you can retry until the request succeeds. Databricks platform guarantees to launch exactly one cluster with that idempotency token. This token should have at most 64 characters.
    init_scripts Sequence[ClusterInitScriptArgs]
    instance_pool_id str
    • To reduce cluster start time, you can attach a cluster to a predefined pool of idle instances. When attached to a pool, a cluster allocates its driver and worker nodes from the pool. If the pool does not have sufficient idle resources to accommodate the cluster’s request, it expands by allocating new instances from the instance provider. When an attached cluster changes its state to TERMINATED, the instances it used are returned to the pool and reused by a different cluster.
    is_pinned bool
    boolean value specifying if cluster is pinned (not pinned by default). You must be a Databricks administrator to use this. The pinned clusters' maximum number is limited to 70, so apply may fail if you have more than that.
    libraries Sequence[ClusterLibraryArgs]
    node_type_id str
    Any supported databricks.getNodeType id. If instance_pool_id is specified, this field is not needed.
    num_workers int
    policy_id str
    Identifier of Cluster Policy to validate cluster and preset certain defaults. The primary use for cluster policies is to allow users to create policy-scoped clusters via UI rather than sharing configuration for API-created clusters. For example, when you specify policy_id of external metastore policy, you still have to fill in relevant keys for spark_conf.
    single_user_name str
    The optional user name of the user to assign to an interactive cluster. This field is required when using standard AAD Passthrough for Azure Data Lake Storage (ADLS) with a single-user cluster (i.e., not high-concurrency clusters).
    spark_conf Mapping[str, Any]
    Map with key-value pairs to fine-tune Spark clusters, where you can provide custom Spark configuration properties in a cluster configuration.
    spark_env_vars Mapping[str, Any]
    Map with environment variable key-value pairs to fine-tune Spark clusters. Key-value pairs of the form (X,Y) are exported (i.e., X='Y') while launching the driver and workers.
    spark_version str
    Runtime version of the cluster. Any supported databricks.getSparkVersion id. We advise using Cluster Policies to restrict the list of versions for simplicity while maintaining enough control.
    ssh_public_keys Sequence[str]
    SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name ubuntu on port 2200. You can specify up to 10 keys.
    state str
    (string) State of the cluster.
    url str
    autoscale Property Map
    autoterminationMinutes Number
    Automatically terminate the cluster after being inactive for this time in minutes. If not set, Databricks won't automatically terminate an inactive cluster. If specified, the threshold must be between 10 and 10000 minutes. You can also set this value to 0 to explicitly disable automatic termination. We highly recommend having this setting present for Interactive/BI clusters.
    awsAttributes Property Map
    azureAttributes Property Map
    clusterId String
    clusterLogConf Property Map
    clusterName String
    Cluster name, which doesn’t have to be unique. If not specified at creation, the cluster name will be an empty string.
    customTags Map<Any>
    Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS EC2 instances and EBS volumes) with these tags in addition to default_tags.
    dataSecurityMode String
    Select the security features of the cluster. Unity Catalog requires SINGLE_USER or USER_ISOLATION mode. LEGACY_PASSTHROUGH for passthrough cluster and LEGACY_TABLE_ACL for Table ACL cluster. Default to NONE, i.e. no security feature enabled.
    defaultTags Map<Any>
    (map) Tags that are added by Databricks by default, regardless of any custom_tags that may have been added. These include: Vendor: Databricks, Creator: <username_of_creator>, ClusterName: <name_of_cluster>, ClusterId: <id_of_cluster>, Name:
    dockerImage Property Map
    driverInstancePoolId String
    similar to instance_pool_id, but for driver node. If omitted, and instance_pool_id is specified, then driver will be allocated from that pool.
    driverNodeTypeId String
    The node type of the Spark driver. This field is optional; if unset, API will set the driver node type to the same value as node_type_id defined above.
    enableElasticDisk Boolean
    If you don’t want to allocate a fixed number of EBS volumes at cluster creation time, use autoscaling local storage. With autoscaling local storage, Databricks monitors the amount of free disk space available on your cluster’s Spark workers. If a worker begins to run too low on disk, Databricks automatically attaches a new EBS volume to the worker before it runs out of disk space. EBS volumes are attached up to a limit of 5 TB of total disk space per instance (including the instance’s local storage). To scale down EBS usage, make sure you have autotermination_minutes and autoscale attributes set. More documentation available at cluster configuration page.
    enableLocalDiskEncryption Boolean
    Some instance types you use to run clusters may have locally attached disks. Databricks may store shuffle data or temporary data on these locally attached disks. To ensure that all data at rest is encrypted for all storage types, including shuffle data stored temporarily on your cluster’s local disks, you can enable local disk encryption. When local disk encryption is enabled, Databricks generates an encryption key locally unique to each cluster node and encrypting all data stored on local disks. The scope of the key is local to each cluster node and is destroyed along with the cluster node itself. During its lifetime, the key resides in memory for encryption and decryption and is stored encrypted on the disk. Your workloads may run more slowly because of the performance impact of reading and writing encrypted data to and from local volumes. This feature is not available for all Azure Databricks subscriptions. Contact your Microsoft or Databricks account representative to request access.
    gcpAttributes Property Map
    idempotencyToken String
    An optional token to guarantee the idempotency of cluster creation requests. If an active cluster with the provided token already exists, the request will not create a new cluster, but it will return the existing running cluster's ID instead. If you specify the idempotency token, upon failure, you can retry until the request succeeds. Databricks platform guarantees to launch exactly one cluster with that idempotency token. This token should have at most 64 characters.
    initScripts List<Property Map>
    instancePoolId String
    • To reduce cluster start time, you can attach a cluster to a predefined pool of idle instances. When attached to a pool, a cluster allocates its driver and worker nodes from the pool. If the pool does not have sufficient idle resources to accommodate the cluster’s request, it expands by allocating new instances from the instance provider. When an attached cluster changes its state to TERMINATED, the instances it used are returned to the pool and reused by a different cluster.
    isPinned Boolean
    boolean value specifying if cluster is pinned (not pinned by default). You must be a Databricks administrator to use this. The pinned clusters' maximum number is limited to 70, so apply may fail if you have more than that.
    libraries List<Property Map>
    nodeTypeId String
    Any supported databricks.getNodeType id. If instance_pool_id is specified, this field is not needed.
    numWorkers Number
    policyId String
    Identifier of Cluster Policy to validate cluster and preset certain defaults. The primary use for cluster policies is to allow users to create policy-scoped clusters via UI rather than sharing configuration for API-created clusters. For example, when you specify policy_id of external metastore policy, you still have to fill in relevant keys for spark_conf.
    singleUserName String
    The optional user name of the user to assign to an interactive cluster. This field is required when using standard AAD Passthrough for Azure Data Lake Storage (ADLS) with a single-user cluster (i.e., not high-concurrency clusters).
    sparkConf Map<Any>
    Map with key-value pairs to fine-tune Spark clusters, where you can provide custom Spark configuration properties in a cluster configuration.
    sparkEnvVars Map<Any>
    Map with environment variable key-value pairs to fine-tune Spark clusters. Key-value pairs of the form (X,Y) are exported (i.e., X='Y') while launching the driver and workers.
    sparkVersion String
    Runtime version of the cluster. Any supported databricks.getSparkVersion id. We advise using Cluster Policies to restrict the list of versions for simplicity while maintaining enough control.
    sshPublicKeys List<String>
    SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name ubuntu on port 2200. You can specify up to 10 keys.
    state String
    (string) State of the cluster.
    url String

    Supporting Types

    ClusterAutoscale, ClusterAutoscaleArgs

    maxWorkers Integer
    minWorkers Integer
    maxWorkers number
    minWorkers number
    maxWorkers Number
    minWorkers Number

    ClusterAwsAttributes, ClusterAwsAttributesArgs

    ClusterAzureAttributes, ClusterAzureAttributesArgs

    ClusterClusterLogConf, ClusterClusterLogConfArgs

    ClusterClusterLogConfDbfs, ClusterClusterLogConfDbfsArgs

    ClusterClusterLogConfS3, ClusterClusterLogConfS3Args

    Destination string
    CannedAcl string
    EnableEncryption bool
    EncryptionType string
    Endpoint string
    KmsKey string
    Region string
    Destination string
    CannedAcl string
    EnableEncryption bool
    EncryptionType string
    Endpoint string
    KmsKey string
    Region string
    destination String
    cannedAcl String
    enableEncryption Boolean
    encryptionType String
    endpoint String
    kmsKey String
    region String
    destination string
    cannedAcl string
    enableEncryption boolean
    encryptionType string
    endpoint string
    kmsKey string
    region string
    destination String
    cannedAcl String
    enableEncryption Boolean
    encryptionType String
    endpoint String
    kmsKey String
    region String

    ClusterDockerImage, ClusterDockerImageArgs

    ClusterDockerImageBasicAuth, ClusterDockerImageBasicAuthArgs

    Password string
    Username string
    Password string
    Username string
    password String
    username String
    password string
    username string
    password String
    username String

    ClusterGcpAttributes, ClusterGcpAttributesArgs

    ClusterInitScript, ClusterInitScriptArgs

    ClusterInitScriptDbfs, ClusterInitScriptDbfsArgs

    ClusterInitScriptFile, ClusterInitScriptFileArgs

    ClusterInitScriptS3, ClusterInitScriptS3Args

    Destination string
    CannedAcl string
    EnableEncryption bool
    EncryptionType string
    Endpoint string
    KmsKey string
    Region string
    Destination string
    CannedAcl string
    EnableEncryption bool
    EncryptionType string
    Endpoint string
    KmsKey string
    Region string
    destination String
    cannedAcl String
    enableEncryption Boolean
    encryptionType String
    endpoint String
    kmsKey String
    region String
    destination string
    cannedAcl string
    enableEncryption boolean
    encryptionType string
    endpoint string
    kmsKey string
    region string
    destination String
    cannedAcl String
    enableEncryption Boolean
    encryptionType String
    endpoint String
    kmsKey String
    region String

    ClusterLibrary, ClusterLibraryArgs

    ClusterLibraryCran, ClusterLibraryCranArgs

    Package string
    Repo string
    Package string
    Repo string
    package_ String
    repo String
    package string
    repo string
    package str
    repo str
    package String
    repo String

    ClusterLibraryMaven, ClusterLibraryMavenArgs

    Coordinates string
    Exclusions List<string>
    Repo string
    Coordinates string
    Exclusions []string
    Repo string
    coordinates String
    exclusions List<String>
    repo String
    coordinates string
    exclusions string[]
    repo string
    coordinates str
    exclusions Sequence[str]
    repo str
    coordinates String
    exclusions List<String>
    repo String

    ClusterLibraryPypi, ClusterLibraryPypiArgs

    Package string
    Repo string
    Package string
    Repo string
    package_ String
    repo String
    package string
    repo string
    package str
    repo str
    package String
    repo String

    Package Details

    Repository
    databricks pulumi/pulumi-databricks
    License
    Apache-2.0
    Notes
    This Pulumi package is based on the databricks Terraform Provider.
    databricks logo
    Viewing docs for Databricks v0.4.0 (Older version)
    published on Monday, Mar 9, 2026 by Pulumi
      Try Pulumi Cloud free. Your team will thank you.