digitalocean.DedicatedInference

DigitalOcean v4.65.0, Apr 29 26

Viewing docs for DigitalOcean v4.65.0
published on Wednesday, Apr 29, 2026 by Pulumi

Schema (JSON)

pulumi/pulumi-digitalocean

Viewing docs for DigitalOcean v4.65.0
published on Wednesday, Apr 29, 2026 by Pulumi

Schema (JSON)

pulumi/pulumi-digitalocean

Example Usage

import * as pulumi from "@pulumi/pulumi";
import * as digitalocean from "@pulumi/digitalocean";

const example = new digitalocean.DedicatedInference("example", {
    name: "my-inference-endpoint",
    region: "tor1",
    modelDeployments: [{
        modelSlug: "deepseek-r1-distill-qwen-14b",
        modelProvider: "digitalocean",
        accelerators: [{
            acceleratorSlug: "gpu-h100x1-80gb",
            scale: 1,
            type: "nvidia_h100",
        }],
    }],
});

import pulumi
import pulumi_digitalocean as digitalocean

example = digitalocean.DedicatedInference("example",
    name="my-inference-endpoint",
    region="tor1",
    model_deployments=[{
        "model_slug": "deepseek-r1-distill-qwen-14b",
        "model_provider": "digitalocean",
        "accelerators": [{
            "accelerator_slug": "gpu-h100x1-80gb",
            "scale": 1,
            "type": "nvidia_h100",
        }],
    }])

package main

import (
	"github.com/pulumi/pulumi-digitalocean/sdk/v4/go/digitalocean"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := digitalocean.NewDedicatedInference(ctx, "example", &digitalocean.DedicatedInferenceArgs{
			Name:   pulumi.String("my-inference-endpoint"),
			Region: pulumi.String("tor1"),
			ModelDeployments: digitalocean.DedicatedInferenceModelDeploymentArray{
				&digitalocean.DedicatedInferenceModelDeploymentArgs{
					ModelSlug:     pulumi.String("deepseek-r1-distill-qwen-14b"),
					ModelProvider: pulumi.String("digitalocean"),
					Accelerators: digitalocean.DedicatedInferenceModelDeploymentAcceleratorArray{
						&digitalocean.DedicatedInferenceModelDeploymentAcceleratorArgs{
							AcceleratorSlug: pulumi.String("gpu-h100x1-80gb"),
							Scale:           pulumi.Int(1),
							Type:            pulumi.String("nvidia_h100"),
						},
					},
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}

using System.Collections.Generic;
using System.Linq;
using Pulumi;
using DigitalOcean = Pulumi.DigitalOcean;

return await Deployment.RunAsync(() => 
{
    var example = new DigitalOcean.Index.DedicatedInference("example", new()
    {
        Name = "my-inference-endpoint",
        Region = "tor1",
        ModelDeployments = new[]
        {
            new DigitalOcean.Inputs.DedicatedInferenceModelDeploymentArgs
            {
                ModelSlug = "deepseek-r1-distill-qwen-14b",
                ModelProvider = "digitalocean",
                Accelerators = new[]
                {
                    new DigitalOcean.Inputs.DedicatedInferenceModelDeploymentAcceleratorArgs
                    {
                        AcceleratorSlug = "gpu-h100x1-80gb",
                        Scale = 1,
                        Type = "nvidia_h100",
                    },
                },
            },
        },
    });

});

package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.digitalocean.DedicatedInference;
import com.pulumi.digitalocean.DedicatedInferenceArgs;
import com.pulumi.digitalocean.inputs.DedicatedInferenceModelDeploymentArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var example = new DedicatedInference("example", DedicatedInferenceArgs.builder()
            .name("my-inference-endpoint")
            .region("tor1")
            .modelDeployments(DedicatedInferenceModelDeploymentArgs.builder()
                .modelSlug("deepseek-r1-distill-qwen-14b")
                .modelProvider("digitalocean")
                .accelerators(DedicatedInferenceModelDeploymentAcceleratorArgs.builder()
                    .acceleratorSlug("gpu-h100x1-80gb")
                    .scale(1)
                    .type("nvidia_h100")
                    .build())
                .build())
            .build());

    }
}

resources:
  example:
    type: digitalocean:DedicatedInference
    properties:
      name: my-inference-endpoint
      region: tor1
      modelDeployments:
        - modelSlug: deepseek-r1-distill-qwen-14b
          modelProvider: digitalocean
          accelerators:
            - acceleratorSlug: gpu-h100x1-80gb
              scale: 1
              type: nvidia_h100

With Public Endpoint

import * as pulumi from "@pulumi/pulumi";
import * as digitalocean from "@pulumi/digitalocean";

const _public = new digitalocean.DedicatedInference("public", {
    name: "my-public-inference",
    region: "tor1",
    enablePublicEndpoint: true,
    modelDeployments: [{
        modelSlug: "deepseek-r1-distill-qwen-14b",
        modelProvider: "digitalocean",
        accelerators: [{
            acceleratorSlug: "gpu-h100x1-80gb",
            scale: 1,
            type: "nvidia_h100",
        }],
    }],
});

import pulumi
import pulumi_digitalocean as digitalocean

public = digitalocean.DedicatedInference("public",
    name="my-public-inference",
    region="tor1",
    enable_public_endpoint=True,
    model_deployments=[{
        "model_slug": "deepseek-r1-distill-qwen-14b",
        "model_provider": "digitalocean",
        "accelerators": [{
            "accelerator_slug": "gpu-h100x1-80gb",
            "scale": 1,
            "type": "nvidia_h100",
        }],
    }])

package main

import (
	"github.com/pulumi/pulumi-digitalocean/sdk/v4/go/digitalocean"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := digitalocean.NewDedicatedInference(ctx, "public", &digitalocean.DedicatedInferenceArgs{
			Name:                 pulumi.String("my-public-inference"),
			Region:               pulumi.String("tor1"),
			EnablePublicEndpoint: pulumi.Bool(true),
			ModelDeployments: digitalocean.DedicatedInferenceModelDeploymentArray{
				&digitalocean.DedicatedInferenceModelDeploymentArgs{
					ModelSlug:     pulumi.String("deepseek-r1-distill-qwen-14b"),
					ModelProvider: pulumi.String("digitalocean"),
					Accelerators: digitalocean.DedicatedInferenceModelDeploymentAcceleratorArray{
						&digitalocean.DedicatedInferenceModelDeploymentAcceleratorArgs{
							AcceleratorSlug: pulumi.String("gpu-h100x1-80gb"),
							Scale:           pulumi.Int(1),
							Type:            pulumi.String("nvidia_h100"),
						},
					},
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}

using System.Collections.Generic;
using System.Linq;
using Pulumi;
using DigitalOcean = Pulumi.DigitalOcean;

return await Deployment.RunAsync(() => 
{
    var @public = new DigitalOcean.Index.DedicatedInference("public", new()
    {
        Name = "my-public-inference",
        Region = "tor1",
        EnablePublicEndpoint = true,
        ModelDeployments = new[]
        {
            new DigitalOcean.Inputs.DedicatedInferenceModelDeploymentArgs
            {
                ModelSlug = "deepseek-r1-distill-qwen-14b",
                ModelProvider = "digitalocean",
                Accelerators = new[]
                {
                    new DigitalOcean.Inputs.DedicatedInferenceModelDeploymentAcceleratorArgs
                    {
                        AcceleratorSlug = "gpu-h100x1-80gb",
                        Scale = 1,
                        Type = "nvidia_h100",
                    },
                },
            },
        },
    });

});

package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.digitalocean.DedicatedInference;
import com.pulumi.digitalocean.DedicatedInferenceArgs;
import com.pulumi.digitalocean.inputs.DedicatedInferenceModelDeploymentArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var public_ = new DedicatedInference("public", DedicatedInferenceArgs.builder()
            .name("my-public-inference")
            .region("tor1")
            .enablePublicEndpoint(true)
            .modelDeployments(DedicatedInferenceModelDeploymentArgs.builder()
                .modelSlug("deepseek-r1-distill-qwen-14b")
                .modelProvider("digitalocean")
                .accelerators(DedicatedInferenceModelDeploymentAcceleratorArgs.builder()
                    .acceleratorSlug("gpu-h100x1-80gb")
                    .scale(1)
                    .type("nvidia_h100")
                    .build())
                .build())
            .build());

    }
}

resources:
  public:
    type: digitalocean:DedicatedInference
    properties:
      name: my-public-inference
      region: tor1
      enablePublicEndpoint: true
      modelDeployments:
        - modelSlug: deepseek-r1-distill-qwen-14b
          modelProvider: digitalocean
          accelerators:
            - acceleratorSlug: gpu-h100x1-80gb
              scale: 1
              type: nvidia_h100

With VPC

import * as pulumi from "@pulumi/pulumi";
import * as digitalocean from "@pulumi/digitalocean";

const _private = new digitalocean.DedicatedInference("private", {
    name: "my-private-inference",
    region: "tor1",
    vpcUuid: example.id,
    modelDeployments: [{
        modelSlug: "deepseek-r1-distill-qwen-14b",
        modelProvider: "digitalocean",
        accelerators: [{
            acceleratorSlug: "gpu-h100x1-80gb",
            scale: 1,
            type: "nvidia_h100",
        }],
    }],
});

import pulumi
import pulumi_digitalocean as digitalocean

private = digitalocean.DedicatedInference("private",
    name="my-private-inference",
    region="tor1",
    vpc_uuid=example["id"],
    model_deployments=[{
        "model_slug": "deepseek-r1-distill-qwen-14b",
        "model_provider": "digitalocean",
        "accelerators": [{
            "accelerator_slug": "gpu-h100x1-80gb",
            "scale": 1,
            "type": "nvidia_h100",
        }],
    }])

package main

import (
	"github.com/pulumi/pulumi-digitalocean/sdk/v4/go/digitalocean"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := digitalocean.NewDedicatedInference(ctx, "private", &digitalocean.DedicatedInferenceArgs{
			Name:    pulumi.String("my-private-inference"),
			Region:  pulumi.String("tor1"),
			VpcUuid: pulumi.Any(example.Id),
			ModelDeployments: digitalocean.DedicatedInferenceModelDeploymentArray{
				&digitalocean.DedicatedInferenceModelDeploymentArgs{
					ModelSlug:     pulumi.String("deepseek-r1-distill-qwen-14b"),
					ModelProvider: pulumi.String("digitalocean"),
					Accelerators: digitalocean.DedicatedInferenceModelDeploymentAcceleratorArray{
						&digitalocean.DedicatedInferenceModelDeploymentAcceleratorArgs{
							AcceleratorSlug: pulumi.String("gpu-h100x1-80gb"),
							Scale:           pulumi.Int(1),
							Type:            pulumi.String("nvidia_h100"),
						},
					},
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}

using System.Collections.Generic;
using System.Linq;
using Pulumi;
using DigitalOcean = Pulumi.DigitalOcean;

return await Deployment.RunAsync(() => 
{
    var @private = new DigitalOcean.Index.DedicatedInference("private", new()
    {
        Name = "my-private-inference",
        Region = "tor1",
        VpcUuid = example.Id,
        ModelDeployments = new[]
        {
            new DigitalOcean.Inputs.DedicatedInferenceModelDeploymentArgs
            {
                ModelSlug = "deepseek-r1-distill-qwen-14b",
                ModelProvider = "digitalocean",
                Accelerators = new[]
                {
                    new DigitalOcean.Inputs.DedicatedInferenceModelDeploymentAcceleratorArgs
                    {
                        AcceleratorSlug = "gpu-h100x1-80gb",
                        Scale = 1,
                        Type = "nvidia_h100",
                    },
                },
            },
        },
    });

});

package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.digitalocean.DedicatedInference;
import com.pulumi.digitalocean.DedicatedInferenceArgs;
import com.pulumi.digitalocean.inputs.DedicatedInferenceModelDeploymentArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var private_ = new DedicatedInference("private", DedicatedInferenceArgs.builder()
            .name("my-private-inference")
            .region("tor1")
            .vpcUuid(example.id())
            .modelDeployments(DedicatedInferenceModelDeploymentArgs.builder()
                .modelSlug("deepseek-r1-distill-qwen-14b")
                .modelProvider("digitalocean")
                .accelerators(DedicatedInferenceModelDeploymentAcceleratorArgs.builder()
                    .acceleratorSlug("gpu-h100x1-80gb")
                    .scale(1)
                    .type("nvidia_h100")
                    .build())
                .build())
            .build());

    }
}

resources:
  private:
    type: digitalocean:DedicatedInference
    properties:
      name: my-private-inference
      region: tor1
      vpcUuid: ${example.id}
      modelDeployments:
        - modelSlug: deepseek-r1-distill-qwen-14b
          modelProvider: digitalocean
          accelerators:
            - acceleratorSlug: gpu-h100x1-80gb
              scale: 1
              type: nvidia_h100

Create DedicatedInference Resource

Resources are created with functions called constructors. To learn more about declaring and configuring resources, see Resources.

Constructor syntax

new DedicatedInference(name: string, args: DedicatedInferenceArgs, opts?: CustomResourceOptions);

@overload
def DedicatedInference(resource_name: str,
                       args: DedicatedInferenceArgs,
                       opts: Optional[ResourceOptions] = None)

@overload
def DedicatedInference(resource_name: str,
                       opts: Optional[ResourceOptions] = None,
                       model_deployments: Optional[Sequence[DedicatedInferenceModelDeploymentArgs]] = None,
                       region: Optional[str] = None,
                       enable_public_endpoint: Optional[bool] = None,
                       hugging_face_token: Optional[str] = None,
                       name: Optional[str] = None,
                       vpc_uuid: Optional[str] = None)

func NewDedicatedInference(ctx *Context, name string, args DedicatedInferenceArgs, opts ...ResourceOption) (*DedicatedInference, error)

public DedicatedInference(string name, DedicatedInferenceArgs args, CustomResourceOptions? opts = null)

public DedicatedInference(String name, DedicatedInferenceArgs args)
public DedicatedInference(String name, DedicatedInferenceArgs args, CustomResourceOptions options)

type: digitalocean:DedicatedInference
properties: # The arguments to resource properties.
options: # Bag of options to control resource's behavior.

Parameters

name string: The unique name of the resource.
args DedicatedInferenceArgs: The arguments to resource properties.
opts CustomResourceOptions: Bag of options to control resource's behavior.

resource_name str: The unique name of the resource.
args DedicatedInferenceArgs: The arguments to resource properties.
opts ResourceOptions: Bag of options to control resource's behavior.

ctx Context: Context object for the current deployment.
name string: The unique name of the resource.
args DedicatedInferenceArgs: The arguments to resource properties.
opts ResourceOption: Bag of options to control resource's behavior.

name string: The unique name of the resource.
args DedicatedInferenceArgs: The arguments to resource properties.
opts CustomResourceOptions: Bag of options to control resource's behavior.

name String: The unique name of the resource.
args DedicatedInferenceArgs: The arguments to resource properties.
options CustomResourceOptions: Bag of options to control resource's behavior.

DedicatedInference Resource Properties

To learn more about resource properties and how to use them, see Inputs and Outputs in the Architecture and Concepts docs.

Inputs

In Python, inputs that are objects can be passed either as argument classes or as dictionary literals.

The DedicatedInference resource accepts the following input properties:

ModelDeployments List<Pulumi.DigitalOcean.Inputs.DedicatedInferenceModelDeployment>: The list of model deployments to run on the dedicated inference endpoint. Each modelDeployments block supports:
Region string: The region slug where the dedicated inference endpoint will be deployed. Changing this forces a new resource.
EnablePublicEndpoint bool: Whether to enable a public HTTPS endpoint for the dedicated inference endpoint. Defaults to false. This field is immutable after creation and changing it forces a new resource.
HuggingFaceToken string: A HuggingFace token for accessing gated models.
Name string: A human-readable name for the dedicated inference endpoint.
VpcUuid string: The UUID of the VPC to deploy the dedicated inference endpoint into. Changing this forces a new resource.

ModelDeployments []DedicatedInferenceModelDeploymentArgs: The list of model deployments to run on the dedicated inference endpoint. Each modelDeployments block supports:
Region string: The region slug where the dedicated inference endpoint will be deployed. Changing this forces a new resource.
EnablePublicEndpoint bool: Whether to enable a public HTTPS endpoint for the dedicated inference endpoint. Defaults to false. This field is immutable after creation and changing it forces a new resource.
HuggingFaceToken string: A HuggingFace token for accessing gated models.
Name string: A human-readable name for the dedicated inference endpoint.
VpcUuid string: The UUID of the VPC to deploy the dedicated inference endpoint into. Changing this forces a new resource.

modelDeployments List<DedicatedInferenceModelDeployment>: The list of model deployments to run on the dedicated inference endpoint. Each modelDeployments block supports:
region String: The region slug where the dedicated inference endpoint will be deployed. Changing this forces a new resource.
enablePublicEndpoint Boolean: Whether to enable a public HTTPS endpoint for the dedicated inference endpoint. Defaults to false. This field is immutable after creation and changing it forces a new resource.
huggingFaceToken String: A HuggingFace token for accessing gated models.
name String: A human-readable name for the dedicated inference endpoint.
vpcUuid String: The UUID of the VPC to deploy the dedicated inference endpoint into. Changing this forces a new resource.

modelDeployments DedicatedInferenceModelDeployment[]: The list of model deployments to run on the dedicated inference endpoint. Each modelDeployments block supports:
region string: The region slug where the dedicated inference endpoint will be deployed. Changing this forces a new resource.
enablePublicEndpoint boolean: Whether to enable a public HTTPS endpoint for the dedicated inference endpoint. Defaults to false. This field is immutable after creation and changing it forces a new resource.
huggingFaceToken string: A HuggingFace token for accessing gated models.
name string: A human-readable name for the dedicated inference endpoint.
vpcUuid string: The UUID of the VPC to deploy the dedicated inference endpoint into. Changing this forces a new resource.

model_deployments Sequence[DedicatedInferenceModelDeploymentArgs]: The list of model deployments to run on the dedicated inference endpoint. Each modelDeployments block supports:
region str: The region slug where the dedicated inference endpoint will be deployed. Changing this forces a new resource.
enable_public_endpoint bool: Whether to enable a public HTTPS endpoint for the dedicated inference endpoint. Defaults to false. This field is immutable after creation and changing it forces a new resource.
hugging_face_token str: A HuggingFace token for accessing gated models.
name str: A human-readable name for the dedicated inference endpoint.
vpc_uuid str: The UUID of the VPC to deploy the dedicated inference endpoint into. Changing this forces a new resource.

modelDeployments List<Property Map>: The list of model deployments to run on the dedicated inference endpoint. Each modelDeployments block supports:
region String: The region slug where the dedicated inference endpoint will be deployed. Changing this forces a new resource.
enablePublicEndpoint Boolean: Whether to enable a public HTTPS endpoint for the dedicated inference endpoint. Defaults to false. This field is immutable after creation and changing it forces a new resource.
huggingFaceToken String: A HuggingFace token for accessing gated models.
name String: A human-readable name for the dedicated inference endpoint.
vpcUuid String: The UUID of the VPC to deploy the dedicated inference endpoint into. Changing this forces a new resource.

Outputs

All input properties are implicitly available as output properties. Additionally, the DedicatedInference resource produces the following output properties:

CreatedAt string: The date and time when the dedicated inference endpoint was created.
Id string: The provider-assigned unique ID for this managed resource.
PrivateEndpointFqdn string: The fully-qualified domain name of the private endpoint.
PublicEndpointFqdn string: The fully-qualified domain name of the public endpoint, if enabled.
Status string: The current status of the dedicated inference endpoint.
UpdatedAt string: The date and time when the dedicated inference endpoint was last updated.

CreatedAt string: The date and time when the dedicated inference endpoint was created.
Id string: The provider-assigned unique ID for this managed resource.
PrivateEndpointFqdn string: The fully-qualified domain name of the private endpoint.
PublicEndpointFqdn string: The fully-qualified domain name of the public endpoint, if enabled.
Status string: The current status of the dedicated inference endpoint.
UpdatedAt string: The date and time when the dedicated inference endpoint was last updated.

createdAt String: The date and time when the dedicated inference endpoint was created.
id String: The provider-assigned unique ID for this managed resource.
privateEndpointFqdn String: The fully-qualified domain name of the private endpoint.
publicEndpointFqdn String: The fully-qualified domain name of the public endpoint, if enabled.
status String: The current status of the dedicated inference endpoint.
updatedAt String: The date and time when the dedicated inference endpoint was last updated.

createdAt string: The date and time when the dedicated inference endpoint was created.
id string: The provider-assigned unique ID for this managed resource.
privateEndpointFqdn string: The fully-qualified domain name of the private endpoint.
publicEndpointFqdn string: The fully-qualified domain name of the public endpoint, if enabled.
status string: The current status of the dedicated inference endpoint.
updatedAt string: The date and time when the dedicated inference endpoint was last updated.

created_at str: The date and time when the dedicated inference endpoint was created.
id str: The provider-assigned unique ID for this managed resource.
private_endpoint_fqdn str: The fully-qualified domain name of the private endpoint.
public_endpoint_fqdn str: The fully-qualified domain name of the public endpoint, if enabled.
status str: The current status of the dedicated inference endpoint.
updated_at str: The date and time when the dedicated inference endpoint was last updated.

createdAt String: The date and time when the dedicated inference endpoint was created.
id String: The provider-assigned unique ID for this managed resource.
privateEndpointFqdn String: The fully-qualified domain name of the private endpoint.
publicEndpointFqdn String: The fully-qualified domain name of the public endpoint, if enabled.
status String: The current status of the dedicated inference endpoint.
updatedAt String: The date and time when the dedicated inference endpoint was last updated.

Look up Existing DedicatedInference Resource

Get an existing DedicatedInference resource’s state with the given name, ID, and optional extra properties used to qualify the lookup.

public static get(name: string, id: Input<ID>, state?: DedicatedInferenceState, opts?: CustomResourceOptions): DedicatedInference

@staticmethod
def get(resource_name: str,
        id: str,
        opts: Optional[ResourceOptions] = None,
        created_at: Optional[str] = None,
        enable_public_endpoint: Optional[bool] = None,
        hugging_face_token: Optional[str] = None,
        model_deployments: Optional[Sequence[DedicatedInferenceModelDeploymentArgs]] = None,
        name: Optional[str] = None,
        private_endpoint_fqdn: Optional[str] = None,
        public_endpoint_fqdn: Optional[str] = None,
        region: Optional[str] = None,
        status: Optional[str] = None,
        updated_at: Optional[str] = None,
        vpc_uuid: Optional[str] = None) -> DedicatedInference

func GetDedicatedInference(ctx *Context, name string, id IDInput, state *DedicatedInferenceState, opts ...ResourceOption) (*DedicatedInference, error)

public static DedicatedInference Get(string name, Input<string> id, DedicatedInferenceState? state, CustomResourceOptions? opts = null)

public static DedicatedInference get(String name, Output<String> id, DedicatedInferenceState state, CustomResourceOptions options)

resources:  _:    type: digitalocean:DedicatedInference    get:      id: ${id}

name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.
state: Any extra arguments used during the lookup.
opts: A bag of options that control this resource's behavior.

resource_name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.

name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.
state: Any extra arguments used during the lookup.
opts: A bag of options that control this resource's behavior.

name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.
state: Any extra arguments used during the lookup.
opts: A bag of options that control this resource's behavior.

name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.
state: Any extra arguments used during the lookup.
opts: A bag of options that control this resource's behavior.

The following state arguments are supported:

CreatedAt string: The date and time when the dedicated inference endpoint was created.
EnablePublicEndpoint bool: Whether to enable a public HTTPS endpoint for the dedicated inference endpoint. Defaults to false. This field is immutable after creation and changing it forces a new resource.
HuggingFaceToken string: A HuggingFace token for accessing gated models.
ModelDeployments List<Pulumi.DigitalOcean.Inputs.DedicatedInferenceModelDeployment>: The list of model deployments to run on the dedicated inference endpoint. Each modelDeployments block supports:
Name string: A human-readable name for the dedicated inference endpoint.
PrivateEndpointFqdn string: The fully-qualified domain name of the private endpoint.
PublicEndpointFqdn string: The fully-qualified domain name of the public endpoint, if enabled.
Region string: The region slug where the dedicated inference endpoint will be deployed. Changing this forces a new resource.
Status string: The current status of the dedicated inference endpoint.
UpdatedAt string: The date and time when the dedicated inference endpoint was last updated.
VpcUuid string: The UUID of the VPC to deploy the dedicated inference endpoint into. Changing this forces a new resource.

CreatedAt string: The date and time when the dedicated inference endpoint was created.
EnablePublicEndpoint bool: Whether to enable a public HTTPS endpoint for the dedicated inference endpoint. Defaults to false. This field is immutable after creation and changing it forces a new resource.
HuggingFaceToken string: A HuggingFace token for accessing gated models.
ModelDeployments []DedicatedInferenceModelDeploymentArgs: The list of model deployments to run on the dedicated inference endpoint. Each modelDeployments block supports:
Name string: A human-readable name for the dedicated inference endpoint.
PrivateEndpointFqdn string: The fully-qualified domain name of the private endpoint.
PublicEndpointFqdn string: The fully-qualified domain name of the public endpoint, if enabled.
Region string: The region slug where the dedicated inference endpoint will be deployed. Changing this forces a new resource.
Status string: The current status of the dedicated inference endpoint.
UpdatedAt string: The date and time when the dedicated inference endpoint was last updated.
VpcUuid string: The UUID of the VPC to deploy the dedicated inference endpoint into. Changing this forces a new resource.

createdAt String: The date and time when the dedicated inference endpoint was created.
enablePublicEndpoint Boolean: Whether to enable a public HTTPS endpoint for the dedicated inference endpoint. Defaults to false. This field is immutable after creation and changing it forces a new resource.
huggingFaceToken String: A HuggingFace token for accessing gated models.
modelDeployments List<DedicatedInferenceModelDeployment>: The list of model deployments to run on the dedicated inference endpoint. Each modelDeployments block supports:
name String: A human-readable name for the dedicated inference endpoint.
privateEndpointFqdn String: The fully-qualified domain name of the private endpoint.
publicEndpointFqdn String: The fully-qualified domain name of the public endpoint, if enabled.
region String: The region slug where the dedicated inference endpoint will be deployed. Changing this forces a new resource.
status String: The current status of the dedicated inference endpoint.
updatedAt String: The date and time when the dedicated inference endpoint was last updated.
vpcUuid String: The UUID of the VPC to deploy the dedicated inference endpoint into. Changing this forces a new resource.

createdAt string: The date and time when the dedicated inference endpoint was created.
enablePublicEndpoint boolean: Whether to enable a public HTTPS endpoint for the dedicated inference endpoint. Defaults to false. This field is immutable after creation and changing it forces a new resource.
huggingFaceToken string: A HuggingFace token for accessing gated models.
modelDeployments DedicatedInferenceModelDeployment[]: The list of model deployments to run on the dedicated inference endpoint. Each modelDeployments block supports:
name string: A human-readable name for the dedicated inference endpoint.
privateEndpointFqdn string: The fully-qualified domain name of the private endpoint.
publicEndpointFqdn string: The fully-qualified domain name of the public endpoint, if enabled.
region string: The region slug where the dedicated inference endpoint will be deployed. Changing this forces a new resource.
status string: The current status of the dedicated inference endpoint.
updatedAt string: The date and time when the dedicated inference endpoint was last updated.
vpcUuid string: The UUID of the VPC to deploy the dedicated inference endpoint into. Changing this forces a new resource.

created_at str: The date and time when the dedicated inference endpoint was created.
enable_public_endpoint bool: Whether to enable a public HTTPS endpoint for the dedicated inference endpoint. Defaults to false. This field is immutable after creation and changing it forces a new resource.
hugging_face_token str: A HuggingFace token for accessing gated models.
model_deployments Sequence[DedicatedInferenceModelDeploymentArgs]: The list of model deployments to run on the dedicated inference endpoint. Each modelDeployments block supports:
name str: A human-readable name for the dedicated inference endpoint.
private_endpoint_fqdn str: The fully-qualified domain name of the private endpoint.
public_endpoint_fqdn str: The fully-qualified domain name of the public endpoint, if enabled.
region str: The region slug where the dedicated inference endpoint will be deployed. Changing this forces a new resource.
status str: The current status of the dedicated inference endpoint.
updated_at str: The date and time when the dedicated inference endpoint was last updated.
vpc_uuid str: The UUID of the VPC to deploy the dedicated inference endpoint into. Changing this forces a new resource.

createdAt String: The date and time when the dedicated inference endpoint was created.
enablePublicEndpoint Boolean: Whether to enable a public HTTPS endpoint for the dedicated inference endpoint. Defaults to false. This field is immutable after creation and changing it forces a new resource.
huggingFaceToken String: A HuggingFace token for accessing gated models.
modelDeployments List<Property Map>: The list of model deployments to run on the dedicated inference endpoint. Each modelDeployments block supports:
name String: A human-readable name for the dedicated inference endpoint.
privateEndpointFqdn String: The fully-qualified domain name of the private endpoint.
publicEndpointFqdn String: The fully-qualified domain name of the public endpoint, if enabled.
region String: The region slug where the dedicated inference endpoint will be deployed. Changing this forces a new resource.
status String: The current status of the dedicated inference endpoint.
updatedAt String: The date and time when the dedicated inference endpoint was last updated.
vpcUuid String: The UUID of the VPC to deploy the dedicated inference endpoint into. Changing this forces a new resource.

Supporting Types

DedicatedInferenceModelDeployment, DedicatedInferenceModelDeploymentArgs

Accelerators List<Pulumi.DigitalOcean.Inputs.DedicatedInferenceModelDeploymentAccelerator>: The GPU accelerators to allocate for this model deployment. Each accelerators block supports:
ModelProvider string: The provider of the model (e.g. digitalocean, huggingface).
ModelSlug string: The slug identifier for the model to deploy.
ModelId string: The unique ID of the model.
ProviderModelId string: The provider-specific model ID. Required when modelProvider is 'hugging_face', optional for 'modelcatalog'.

Accelerators []DedicatedInferenceModelDeploymentAccelerator: The GPU accelerators to allocate for this model deployment. Each accelerators block supports:
ModelProvider string: The provider of the model (e.g. digitalocean, huggingface).
ModelSlug string: The slug identifier for the model to deploy.
ModelId string: The unique ID of the model.
ProviderModelId string: The provider-specific model ID. Required when modelProvider is 'hugging_face', optional for 'modelcatalog'.

accelerators List<DedicatedInferenceModelDeploymentAccelerator>: The GPU accelerators to allocate for this model deployment. Each accelerators block supports:
modelProvider String: The provider of the model (e.g. digitalocean, huggingface).
modelSlug String: The slug identifier for the model to deploy.
modelId String: The unique ID of the model.
providerModelId String: The provider-specific model ID. Required when modelProvider is 'hugging_face', optional for 'modelcatalog'.

accelerators DedicatedInferenceModelDeploymentAccelerator[]: The GPU accelerators to allocate for this model deployment. Each accelerators block supports:
modelProvider string: The provider of the model (e.g. digitalocean, huggingface).
modelSlug string: The slug identifier for the model to deploy.
modelId string: The unique ID of the model.
providerModelId string: The provider-specific model ID. Required when modelProvider is 'hugging_face', optional for 'modelcatalog'.

accelerators Sequence[DedicatedInferenceModelDeploymentAccelerator]: The GPU accelerators to allocate for this model deployment. Each accelerators block supports:
model_provider str: The provider of the model (e.g. digitalocean, huggingface).
model_slug str: The slug identifier for the model to deploy.
model_id str: The unique ID of the model.
provider_model_id str: The provider-specific model ID. Required when modelProvider is 'hugging_face', optional for 'modelcatalog'.

accelerators List<Property Map>: The GPU accelerators to allocate for this model deployment. Each accelerators block supports:
modelProvider String: The provider of the model (e.g. digitalocean, huggingface).
modelSlug String: The slug identifier for the model to deploy.
modelId String: The unique ID of the model.
providerModelId String: The provider-specific model ID. Required when modelProvider is 'hugging_face', optional for 'modelcatalog'.

DedicatedInferenceModelDeploymentAccelerator, DedicatedInferenceModelDeploymentAcceleratorArgs

AcceleratorSlug string: The slug identifier for the GPU accelerator type.
Scale int: The number of accelerator units to allocate. Must be at least 1.
Type string: The accelerator type.

AcceleratorSlug string: The slug identifier for the GPU accelerator type.
Scale int: The number of accelerator units to allocate. Must be at least 1.
Type string: The accelerator type.

acceleratorSlug String: The slug identifier for the GPU accelerator type.
scale Integer: The number of accelerator units to allocate. Must be at least 1.
type String: The accelerator type.

acceleratorSlug string: The slug identifier for the GPU accelerator type.
scale number: The number of accelerator units to allocate. Must be at least 1.
type string: The accelerator type.

accelerator_slug str: The slug identifier for the GPU accelerator type.
scale int: The number of accelerator units to allocate. Must be at least 1.
type str: The accelerator type.

acceleratorSlug String: The slug identifier for the GPU accelerator type.
scale Number: The number of accelerator units to allocate. Must be at least 1.
type String: The accelerator type.

Import

Dedicated inference endpoints can be imported using their id, e.g.

$ pulumi import digitalocean:index/dedicatedInference:DedicatedInference example endpoint-id

To learn more about importing existing cloud resources, see Importing resources.

Package Details

Repository: DigitalOcean pulumi/pulumi-digitalocean
License: Apache-2.0
Notes: This Pulumi package is based on the digitalocean Terraform Provider.

Viewing docs for DigitalOcean v4.65.0
published on Wednesday, Apr 29, 2026 by Pulumi

Schema (JSON)

pulumi/pulumi-digitalocean

digitalocean.DedicatedInference

On this page

On this page

Example Usage

With Public Endpoint

With VPC

Create DedicatedInference Resource

Constructor syntax

Parameters

DedicatedInference Resource Properties

Inputs

Outputs

Look up Existing DedicatedInference Resource

Supporting Types

DedicatedInferenceModelDeployment, DedicatedInferenceModelDeploymentArgs

DedicatedInferenceModelDeploymentAccelerator, DedicatedInferenceModelDeploymentAcceleratorArgs

Import

Package Details

On this page

On this page