Databricks v1.47.1, Jul 25 24

Databricks v1.47.1 published on Thursday, Jul 25, 2024 by Pulumi

databricks.getNodeType

Explore with Pulumi AI

Databricks v1.47.1 published on Thursday, Jul 25, 2024 by Pulumi

Gets the smallest node type for databricks.Cluster that fits search criteria, like amount of RAM or number of cores. AWS or Azure. Internally data source fetches node types available per cloud, similar to executing databricks clusters list-node-types, and filters it to return the smallest possible node with criteria.

Note This is experimental functionality, which aims to simplify things. In case of wrong parameters given (e.g. min_gpus = 876) or no nodes matching, data source will return cloud-default node type, even though it doesn’t match search criteria specified by data source arguments: i3.xlarge for AWS or Standard_D3_v2 for Azure.

Example Usage

import * as pulumi from "@pulumi/pulumi";
import * as databricks from "@pulumi/databricks";

const withGpu = databricks.getNodeType({
    localDisk: true,
    minCores: 16,
    gbPerCore: 1,
    minGpus: 1,
});
const gpuMl = databricks.getSparkVersion({
    gpu: true,
    ml: true,
});
const research = new databricks.Cluster("research", {
    clusterName: "Research Cluster",
    sparkVersion: gpuMl.then(gpuMl => gpuMl.id),
    nodeTypeId: withGpu.then(withGpu => withGpu.id),
    autoterminationMinutes: 20,
    autoscale: {
        minWorkers: 1,
        maxWorkers: 50,
    },
});

import pulumi
import pulumi_databricks as databricks

with_gpu = databricks.get_node_type(local_disk=True,
    min_cores=16,
    gb_per_core=1,
    min_gpus=1)
gpu_ml = databricks.get_spark_version(gpu=True,
    ml=True)
research = databricks.Cluster("research",
    cluster_name="Research Cluster",
    spark_version=gpu_ml.id,
    node_type_id=with_gpu.id,
    autotermination_minutes=20,
    autoscale=databricks.ClusterAutoscaleArgs(
        min_workers=1,
        max_workers=50,
    ))

package main

import (
	"github.com/pulumi/pulumi-databricks/sdk/go/databricks"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		withGpu, err := databricks.GetNodeType(ctx, &databricks.GetNodeTypeArgs{
			LocalDisk: pulumi.BoolRef(true),
			MinCores:  pulumi.IntRef(16),
			GbPerCore: pulumi.IntRef(1),
			MinGpus:   pulumi.IntRef(1),
		}, nil)
		if err != nil {
			return err
		}
		gpuMl, err := databricks.GetSparkVersion(ctx, &databricks.GetSparkVersionArgs{
			Gpu: pulumi.BoolRef(true),
			Ml:  pulumi.BoolRef(true),
		}, nil)
		if err != nil {
			return err
		}
		_, err = databricks.NewCluster(ctx, "research", &databricks.ClusterArgs{
			ClusterName:            pulumi.String("Research Cluster"),
			SparkVersion:           pulumi.String(gpuMl.Id),
			NodeTypeId:             pulumi.String(withGpu.Id),
			AutoterminationMinutes: pulumi.Int(20),
			Autoscale: &databricks.ClusterAutoscaleArgs{
				MinWorkers: pulumi.Int(1),
				MaxWorkers: pulumi.Int(50),
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}

using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Databricks = Pulumi.Databricks;

return await Deployment.RunAsync(() => 
{
    var withGpu = Databricks.GetNodeType.Invoke(new()
    {
        LocalDisk = true,
        MinCores = 16,
        GbPerCore = 1,
        MinGpus = 1,
    });

    var gpuMl = Databricks.GetSparkVersion.Invoke(new()
    {
        Gpu = true,
        Ml = true,
    });

    var research = new Databricks.Cluster("research", new()
    {
        ClusterName = "Research Cluster",
        SparkVersion = gpuMl.Apply(getSparkVersionResult => getSparkVersionResult.Id),
        NodeTypeId = withGpu.Apply(getNodeTypeResult => getNodeTypeResult.Id),
        AutoterminationMinutes = 20,
        Autoscale = new Databricks.Inputs.ClusterAutoscaleArgs
        {
            MinWorkers = 1,
            MaxWorkers = 50,
        },
    });

});

package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.databricks.DatabricksFunctions;
import com.pulumi.databricks.inputs.GetNodeTypeArgs;
import com.pulumi.databricks.inputs.GetSparkVersionArgs;
import com.pulumi.databricks.Cluster;
import com.pulumi.databricks.ClusterArgs;
import com.pulumi.databricks.inputs.ClusterAutoscaleArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        final var withGpu = DatabricksFunctions.getNodeType(GetNodeTypeArgs.builder()
            .localDisk(true)
            .minCores(16)
            .gbPerCore(1)
            .minGpus(1)
            .build());

        final var gpuMl = DatabricksFunctions.getSparkVersion(GetSparkVersionArgs.builder()
            .gpu(true)
            .ml(true)
            .build());

        var research = new Cluster("research", ClusterArgs.builder()
            .clusterName("Research Cluster")
            .sparkVersion(gpuMl.applyValue(getSparkVersionResult -> getSparkVersionResult.id()))
            .nodeTypeId(withGpu.applyValue(getNodeTypeResult -> getNodeTypeResult.id()))
            .autoterminationMinutes(20)
            .autoscale(ClusterAutoscaleArgs.builder()
                .minWorkers(1)
                .maxWorkers(50)
                .build())
            .build());

    }
}

resources:
  research:
    type: databricks:Cluster
    properties:
      clusterName: Research Cluster
      sparkVersion: ${gpuMl.id}
      nodeTypeId: ${withGpu.id}
      autoterminationMinutes: 20
      autoscale:
        minWorkers: 1
        maxWorkers: 50
variables:
  withGpu:
    fn::invoke:
      Function: databricks:getNodeType
      Arguments:
        localDisk: true
        minCores: 16
        gbPerCore: 1
        minGpus: 1
  gpuMl:
    fn::invoke:
      Function: databricks:getSparkVersion
      Arguments:
        gpu: true
        ml: true

The following resources are used in the same context:

End to end workspace management guide.
databricks.Cluster to create Databricks Clusters.
databricks.ClusterPolicy to create a databricks.Cluster policy, which limits the ability to create clusters based on a set of rules.
databricks.InstancePool to manage instance pools to reduce cluster start and auto-scaling times by maintaining a set of idle, ready-to-use instances.
databricks.Job to manage Databricks Jobs to run non-interactive code in a databricks_cluster.

Using getNodeType

Two invocation forms are available. The direct form accepts plain arguments and either blocks until the result value is available, or returns a Promise-wrapped result. The output form accepts Input-wrapped arguments and returns an Output-wrapped result.

function getNodeType(args: GetNodeTypeArgs, opts?: InvokeOptions): Promise<GetNodeTypeResult>
function getNodeTypeOutput(args: GetNodeTypeOutputArgs, opts?: InvokeOptions): Output<GetNodeTypeResult>

def get_node_type(category: Optional[str] = None,
                  fleet: Optional[bool] = None,
                  gb_per_core: Optional[int] = None,
                  graviton: Optional[bool] = None,
                  id: Optional[str] = None,
                  is_io_cache_enabled: Optional[bool] = None,
                  local_disk: Optional[bool] = None,
                  local_disk_min_size: Optional[int] = None,
                  min_cores: Optional[int] = None,
                  min_gpus: Optional[int] = None,
                  min_memory_gb: Optional[int] = None,
                  photon_driver_capable: Optional[bool] = None,
                  photon_worker_capable: Optional[bool] = None,
                  support_port_forwarding: Optional[bool] = None,
                  opts: Optional[InvokeOptions] = None) -> GetNodeTypeResult
def get_node_type_output(category: Optional[pulumi.Input[str]] = None,
                  fleet: Optional[pulumi.Input[bool]] = None,
                  gb_per_core: Optional[pulumi.Input[int]] = None,
                  graviton: Optional[pulumi.Input[bool]] = None,
                  id: Optional[pulumi.Input[str]] = None,
                  is_io_cache_enabled: Optional[pulumi.Input[bool]] = None,
                  local_disk: Optional[pulumi.Input[bool]] = None,
                  local_disk_min_size: Optional[pulumi.Input[int]] = None,
                  min_cores: Optional[pulumi.Input[int]] = None,
                  min_gpus: Optional[pulumi.Input[int]] = None,
                  min_memory_gb: Optional[pulumi.Input[int]] = None,
                  photon_driver_capable: Optional[pulumi.Input[bool]] = None,
                  photon_worker_capable: Optional[pulumi.Input[bool]] = None,
                  support_port_forwarding: Optional[pulumi.Input[bool]] = None,
                  opts: Optional[InvokeOptions] = None) -> Output[GetNodeTypeResult]

func GetNodeType(ctx *Context, args *GetNodeTypeArgs, opts ...InvokeOption) (*GetNodeTypeResult, error)
func GetNodeTypeOutput(ctx *Context, args *GetNodeTypeOutputArgs, opts ...InvokeOption) GetNodeTypeResultOutput

> Note: This function is named GetNodeType in the Go SDK.

public static class GetNodeType 
{
    public static Task<GetNodeTypeResult> InvokeAsync(GetNodeTypeArgs args, InvokeOptions? opts = null)
    public static Output<GetNodeTypeResult> Invoke(GetNodeTypeInvokeArgs args, InvokeOptions? opts = null)
}

public static CompletableFuture<GetNodeTypeResult> getNodeType(GetNodeTypeArgs args, InvokeOptions options)
// Output-based functions aren't available in Java yet

fn::invoke:
  function: databricks:index/getNodeType:getNodeType
  arguments:
    # arguments dictionary

The following arguments are supported:

Category string

Node category, which can be one of (depending on the cloud environment, could be checked with databricks clusters list-node-types -o json|jq '.node_types[]|.category'|sort |uniq):

General Purpose (all clouds)
General Purpose (HDD) (Azure)
Compute Optimized (all clouds)
Memory Optimized (all clouds)
Memory Optimized (Remote HDD) (Azure)
Storage Optimized (AWS, Azure)
GPU Accelerated (AWS, Azure)

Fleet bool

if we should limit the search only to AWS fleet instance types. Default to false.

GbPerCore int

Number of gigabytes per core available on instance. Conflicts with min_memory_gb. Defaults to 0.

Graviton bool

if we should limit the search only to nodes with AWS Graviton CPUs. Default to false.

Id string

node type, that can be used for databricks_job, databricks_cluster, or databricks_instance_pool.

IsIoCacheEnabled bool

. Pick only nodes that have IO Cache. Defaults to false.

LocalDisk bool

Pick only nodes with local storage. Defaults to false.

LocalDiskMinSize int

Pick only nodes that have size local storage greater or equal to given value. Defaults to 0.

MinCores int

Minimum number of CPU cores available on instance. Defaults to 0.

MinGpus int

Minimum number of GPU's attached to instance. Defaults to 0.

MinMemoryGb int

Minimum amount of memory per node in gigabytes. Defaults to 0.

PhotonDriverCapable bool

Pick only nodes that can run Photon driver. Defaults to false.

PhotonWorkerCapable bool

Pick only nodes that can run Photon workers. Defaults to false.

SupportPortForwarding bool

Pick only nodes that support port forwarding. Defaults to false.

Category string

Node category, which can be one of (depending on the cloud environment, could be checked with databricks clusters list-node-types -o json|jq '.node_types[]|.category'|sort |uniq):

General Purpose (all clouds)
General Purpose (HDD) (Azure)
Compute Optimized (all clouds)
Memory Optimized (all clouds)
Memory Optimized (Remote HDD) (Azure)
Storage Optimized (AWS, Azure)
GPU Accelerated (AWS, Azure)

Fleet bool

if we should limit the search only to AWS fleet instance types. Default to false.

GbPerCore int

Number of gigabytes per core available on instance. Conflicts with min_memory_gb. Defaults to 0.

Graviton bool

if we should limit the search only to nodes with AWS Graviton CPUs. Default to false.

Id string

node type, that can be used for databricks_job, databricks_cluster, or databricks_instance_pool.

IsIoCacheEnabled bool

. Pick only nodes that have IO Cache. Defaults to false.

LocalDisk bool

Pick only nodes with local storage. Defaults to false.

LocalDiskMinSize int

Pick only nodes that have size local storage greater or equal to given value. Defaults to 0.

MinCores int

Minimum number of CPU cores available on instance. Defaults to 0.

MinGpus int

Minimum number of GPU's attached to instance. Defaults to 0.

MinMemoryGb int

Minimum amount of memory per node in gigabytes. Defaults to 0.

PhotonDriverCapable bool

Pick only nodes that can run Photon driver. Defaults to false.

PhotonWorkerCapable bool

Pick only nodes that can run Photon workers. Defaults to false.

SupportPortForwarding bool

Pick only nodes that support port forwarding. Defaults to false.

category String

Node category, which can be one of (depending on the cloud environment, could be checked with databricks clusters list-node-types -o json|jq '.node_types[]|.category'|sort |uniq):

General Purpose (all clouds)
General Purpose (HDD) (Azure)
Compute Optimized (all clouds)
Memory Optimized (all clouds)
Memory Optimized (Remote HDD) (Azure)
Storage Optimized (AWS, Azure)
GPU Accelerated (AWS, Azure)

fleet Boolean

if we should limit the search only to AWS fleet instance types. Default to false.

gbPerCore Integer

Number of gigabytes per core available on instance. Conflicts with min_memory_gb. Defaults to 0.

graviton Boolean

if we should limit the search only to nodes with AWS Graviton CPUs. Default to false.

id String

node type, that can be used for databricks_job, databricks_cluster, or databricks_instance_pool.

isIoCacheEnabled Boolean

. Pick only nodes that have IO Cache. Defaults to false.

localDisk Boolean

Pick only nodes with local storage. Defaults to false.

localDiskMinSize Integer

Pick only nodes that have size local storage greater or equal to given value. Defaults to 0.

minCores Integer

Minimum number of CPU cores available on instance. Defaults to 0.

minGpus Integer

Minimum number of GPU's attached to instance. Defaults to 0.

minMemoryGb Integer

Minimum amount of memory per node in gigabytes. Defaults to 0.

photonDriverCapable Boolean

Pick only nodes that can run Photon driver. Defaults to false.

photonWorkerCapable Boolean

Pick only nodes that can run Photon workers. Defaults to false.

supportPortForwarding Boolean

Pick only nodes that support port forwarding. Defaults to false.

category string

Node category, which can be one of (depending on the cloud environment, could be checked with databricks clusters list-node-types -o json|jq '.node_types[]|.category'|sort |uniq):

General Purpose (all clouds)
General Purpose (HDD) (Azure)
Compute Optimized (all clouds)
Memory Optimized (all clouds)
Memory Optimized (Remote HDD) (Azure)
Storage Optimized (AWS, Azure)
GPU Accelerated (AWS, Azure)

fleet boolean

if we should limit the search only to AWS fleet instance types. Default to false.

gbPerCore number

Number of gigabytes per core available on instance. Conflicts with min_memory_gb. Defaults to 0.

graviton boolean

if we should limit the search only to nodes with AWS Graviton CPUs. Default to false.

id string

node type, that can be used for databricks_job, databricks_cluster, or databricks_instance_pool.

isIoCacheEnabled boolean

. Pick only nodes that have IO Cache. Defaults to false.

localDisk boolean

Pick only nodes with local storage. Defaults to false.

localDiskMinSize number

Pick only nodes that have size local storage greater or equal to given value. Defaults to 0.

minCores number

Minimum number of CPU cores available on instance. Defaults to 0.

minGpus number

Minimum number of GPU's attached to instance. Defaults to 0.

minMemoryGb number

Minimum amount of memory per node in gigabytes. Defaults to 0.

photonDriverCapable boolean

Pick only nodes that can run Photon driver. Defaults to false.

photonWorkerCapable boolean

Pick only nodes that can run Photon workers. Defaults to false.

supportPortForwarding boolean

Pick only nodes that support port forwarding. Defaults to false.

category str

Node category, which can be one of (depending on the cloud environment, could be checked with databricks clusters list-node-types -o json|jq '.node_types[]|.category'|sort |uniq):

General Purpose (all clouds)
General Purpose (HDD) (Azure)
Compute Optimized (all clouds)
Memory Optimized (all clouds)
Memory Optimized (Remote HDD) (Azure)
Storage Optimized (AWS, Azure)
GPU Accelerated (AWS, Azure)

fleet bool

if we should limit the search only to AWS fleet instance types. Default to false.

gb_per_core int

Number of gigabytes per core available on instance. Conflicts with min_memory_gb. Defaults to 0.

graviton bool

if we should limit the search only to nodes with AWS Graviton CPUs. Default to false.

id str

node type, that can be used for databricks_job, databricks_cluster, or databricks_instance_pool.

is_io_cache_enabled bool

. Pick only nodes that have IO Cache. Defaults to false.

local_disk bool

Pick only nodes with local storage. Defaults to false.

local_disk_min_size int

Pick only nodes that have size local storage greater or equal to given value. Defaults to 0.

min_cores int

Minimum number of CPU cores available on instance. Defaults to 0.

min_gpus int

Minimum number of GPU's attached to instance. Defaults to 0.

min_memory_gb int

Minimum amount of memory per node in gigabytes. Defaults to 0.

photon_driver_capable bool

Pick only nodes that can run Photon driver. Defaults to false.

photon_worker_capable bool

Pick only nodes that can run Photon workers. Defaults to false.

support_port_forwarding bool

Pick only nodes that support port forwarding. Defaults to false.

category String

Node category, which can be one of (depending on the cloud environment, could be checked with databricks clusters list-node-types -o json|jq '.node_types[]|.category'|sort |uniq):

General Purpose (all clouds)
General Purpose (HDD) (Azure)
Compute Optimized (all clouds)
Memory Optimized (all clouds)
Memory Optimized (Remote HDD) (Azure)
Storage Optimized (AWS, Azure)
GPU Accelerated (AWS, Azure)

fleet Boolean

if we should limit the search only to AWS fleet instance types. Default to false.

gbPerCore Number

Number of gigabytes per core available on instance. Conflicts with min_memory_gb. Defaults to 0.

graviton Boolean

if we should limit the search only to nodes with AWS Graviton CPUs. Default to false.

id String

node type, that can be used for databricks_job, databricks_cluster, or databricks_instance_pool.

isIoCacheEnabled Boolean

. Pick only nodes that have IO Cache. Defaults to false.

localDisk Boolean

Pick only nodes with local storage. Defaults to false.

localDiskMinSize Number

Pick only nodes that have size local storage greater or equal to given value. Defaults to 0.

minCores Number

Minimum number of CPU cores available on instance. Defaults to 0.

minGpus Number

Minimum number of GPU's attached to instance. Defaults to 0.

minMemoryGb Number

Minimum amount of memory per node in gigabytes. Defaults to 0.

photonDriverCapable Boolean

Pick only nodes that can run Photon driver. Defaults to false.

photonWorkerCapable Boolean

Pick only nodes that can run Photon workers. Defaults to false.

supportPortForwarding Boolean

Pick only nodes that support port forwarding. Defaults to false.

getNodeType Result

The following output properties are available:

Id string: node type, that can be used for databricks_job, databricks_cluster, or databricks_instance_pool.
Category string
Fleet bool
GbPerCore int
Graviton bool
IsIoCacheEnabled bool
LocalDisk bool
LocalDiskMinSize int
MinCores int
MinGpus int
MinMemoryGb int
PhotonDriverCapable bool
PhotonWorkerCapable bool
SupportPortForwarding bool

Id string: node type, that can be used for databricks_job, databricks_cluster, or databricks_instance_pool.
Category string
Fleet bool
GbPerCore int
Graviton bool
IsIoCacheEnabled bool
LocalDisk bool
LocalDiskMinSize int
MinCores int
MinGpus int
MinMemoryGb int
PhotonDriverCapable bool
PhotonWorkerCapable bool
SupportPortForwarding bool

id String: node type, that can be used for databricks_job, databricks_cluster, or databricks_instance_pool.
category String
fleet Boolean
gbPerCore Integer
graviton Boolean
isIoCacheEnabled Boolean
localDisk Boolean
localDiskMinSize Integer
minCores Integer
minGpus Integer
minMemoryGb Integer
photonDriverCapable Boolean
photonWorkerCapable Boolean
supportPortForwarding Boolean

id string: node type, that can be used for databricks_job, databricks_cluster, or databricks_instance_pool.
category string
fleet boolean
gbPerCore number
graviton boolean
isIoCacheEnabled boolean
localDisk boolean
localDiskMinSize number
minCores number
minGpus number
minMemoryGb number
photonDriverCapable boolean
photonWorkerCapable boolean
supportPortForwarding boolean

id str: node type, that can be used for databricks_job, databricks_cluster, or databricks_instance_pool.
category str
fleet bool
gb_per_core int
graviton bool
is_io_cache_enabled bool
local_disk bool
local_disk_min_size int
min_cores int
min_gpus int
min_memory_gb int
photon_driver_capable bool
photon_worker_capable bool
support_port_forwarding bool

id String: node type, that can be used for databricks_job, databricks_cluster, or databricks_instance_pool.
category String
fleet Boolean
gbPerCore Number
graviton Boolean
isIoCacheEnabled Boolean
localDisk Boolean
localDiskMinSize Number
minCores Number
minGpus Number
minMemoryGb Number
photonDriverCapable Boolean
photonWorkerCapable Boolean
supportPortForwarding Boolean

Package Details

Repository: databricks pulumi/pulumi-databricks
License: Apache-2.0
Notes: This Pulumi package is based on the databricks Terraform Provider.

Databricks v1.47.1 published on Thursday, Jul 25, 2024 by Pulumi

pulumi/pulumi-databricks

databricks.getNodeType

On this page

On this page

Example Usage

Using getNodeType

getNodeType Result

Package Details

On this page

On this page

databricks.getNodeType

On this page

On this page

Example Usage

Related Resources

Using getNodeType

getNodeType Result

Package Details

On this page

On this page