published on Wednesday, Apr 29, 2026 by Pulumi
published on Wednesday, Apr 29, 2026 by Pulumi
Provides a DigitalOcean Dedicated Inference resource. This can be used to create, modify, and delete dedicated inference endpoints for running GPU-accelerated model inference.
Example Usage
import * as pulumi from "@pulumi/pulumi";
import * as digitalocean from "@pulumi/digitalocean";
const example = new digitalocean.DedicatedInference("example", {
name: "my-inference-endpoint",
region: "tor1",
modelDeployments: [{
modelSlug: "deepseek-r1-distill-qwen-14b",
modelProvider: "digitalocean",
accelerators: [{
acceleratorSlug: "gpu-h100x1-80gb",
scale: 1,
type: "nvidia_h100",
}],
}],
});
import pulumi
import pulumi_digitalocean as digitalocean
example = digitalocean.DedicatedInference("example",
name="my-inference-endpoint",
region="tor1",
model_deployments=[{
"model_slug": "deepseek-r1-distill-qwen-14b",
"model_provider": "digitalocean",
"accelerators": [{
"accelerator_slug": "gpu-h100x1-80gb",
"scale": 1,
"type": "nvidia_h100",
}],
}])
package main
import (
"github.com/pulumi/pulumi-digitalocean/sdk/v4/go/digitalocean"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := digitalocean.NewDedicatedInference(ctx, "example", &digitalocean.DedicatedInferenceArgs{
Name: pulumi.String("my-inference-endpoint"),
Region: pulumi.String("tor1"),
ModelDeployments: digitalocean.DedicatedInferenceModelDeploymentArray{
&digitalocean.DedicatedInferenceModelDeploymentArgs{
ModelSlug: pulumi.String("deepseek-r1-distill-qwen-14b"),
ModelProvider: pulumi.String("digitalocean"),
Accelerators: digitalocean.DedicatedInferenceModelDeploymentAcceleratorArray{
&digitalocean.DedicatedInferenceModelDeploymentAcceleratorArgs{
AcceleratorSlug: pulumi.String("gpu-h100x1-80gb"),
Scale: pulumi.Int(1),
Type: pulumi.String("nvidia_h100"),
},
},
},
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using DigitalOcean = Pulumi.DigitalOcean;
return await Deployment.RunAsync(() =>
{
var example = new DigitalOcean.Index.DedicatedInference("example", new()
{
Name = "my-inference-endpoint",
Region = "tor1",
ModelDeployments = new[]
{
new DigitalOcean.Inputs.DedicatedInferenceModelDeploymentArgs
{
ModelSlug = "deepseek-r1-distill-qwen-14b",
ModelProvider = "digitalocean",
Accelerators = new[]
{
new DigitalOcean.Inputs.DedicatedInferenceModelDeploymentAcceleratorArgs
{
AcceleratorSlug = "gpu-h100x1-80gb",
Scale = 1,
Type = "nvidia_h100",
},
},
},
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.digitalocean.DedicatedInference;
import com.pulumi.digitalocean.DedicatedInferenceArgs;
import com.pulumi.digitalocean.inputs.DedicatedInferenceModelDeploymentArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var example = new DedicatedInference("example", DedicatedInferenceArgs.builder()
.name("my-inference-endpoint")
.region("tor1")
.modelDeployments(DedicatedInferenceModelDeploymentArgs.builder()
.modelSlug("deepseek-r1-distill-qwen-14b")
.modelProvider("digitalocean")
.accelerators(DedicatedInferenceModelDeploymentAcceleratorArgs.builder()
.acceleratorSlug("gpu-h100x1-80gb")
.scale(1)
.type("nvidia_h100")
.build())
.build())
.build());
}
}
resources:
example:
type: digitalocean:DedicatedInference
properties:
name: my-inference-endpoint
region: tor1
modelDeployments:
- modelSlug: deepseek-r1-distill-qwen-14b
modelProvider: digitalocean
accelerators:
- acceleratorSlug: gpu-h100x1-80gb
scale: 1
type: nvidia_h100
With Public Endpoint
import * as pulumi from "@pulumi/pulumi";
import * as digitalocean from "@pulumi/digitalocean";
const _public = new digitalocean.DedicatedInference("public", {
name: "my-public-inference",
region: "tor1",
enablePublicEndpoint: true,
modelDeployments: [{
modelSlug: "deepseek-r1-distill-qwen-14b",
modelProvider: "digitalocean",
accelerators: [{
acceleratorSlug: "gpu-h100x1-80gb",
scale: 1,
type: "nvidia_h100",
}],
}],
});
import pulumi
import pulumi_digitalocean as digitalocean
public = digitalocean.DedicatedInference("public",
name="my-public-inference",
region="tor1",
enable_public_endpoint=True,
model_deployments=[{
"model_slug": "deepseek-r1-distill-qwen-14b",
"model_provider": "digitalocean",
"accelerators": [{
"accelerator_slug": "gpu-h100x1-80gb",
"scale": 1,
"type": "nvidia_h100",
}],
}])
package main
import (
"github.com/pulumi/pulumi-digitalocean/sdk/v4/go/digitalocean"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := digitalocean.NewDedicatedInference(ctx, "public", &digitalocean.DedicatedInferenceArgs{
Name: pulumi.String("my-public-inference"),
Region: pulumi.String("tor1"),
EnablePublicEndpoint: pulumi.Bool(true),
ModelDeployments: digitalocean.DedicatedInferenceModelDeploymentArray{
&digitalocean.DedicatedInferenceModelDeploymentArgs{
ModelSlug: pulumi.String("deepseek-r1-distill-qwen-14b"),
ModelProvider: pulumi.String("digitalocean"),
Accelerators: digitalocean.DedicatedInferenceModelDeploymentAcceleratorArray{
&digitalocean.DedicatedInferenceModelDeploymentAcceleratorArgs{
AcceleratorSlug: pulumi.String("gpu-h100x1-80gb"),
Scale: pulumi.Int(1),
Type: pulumi.String("nvidia_h100"),
},
},
},
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using DigitalOcean = Pulumi.DigitalOcean;
return await Deployment.RunAsync(() =>
{
var @public = new DigitalOcean.Index.DedicatedInference("public", new()
{
Name = "my-public-inference",
Region = "tor1",
EnablePublicEndpoint = true,
ModelDeployments = new[]
{
new DigitalOcean.Inputs.DedicatedInferenceModelDeploymentArgs
{
ModelSlug = "deepseek-r1-distill-qwen-14b",
ModelProvider = "digitalocean",
Accelerators = new[]
{
new DigitalOcean.Inputs.DedicatedInferenceModelDeploymentAcceleratorArgs
{
AcceleratorSlug = "gpu-h100x1-80gb",
Scale = 1,
Type = "nvidia_h100",
},
},
},
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.digitalocean.DedicatedInference;
import com.pulumi.digitalocean.DedicatedInferenceArgs;
import com.pulumi.digitalocean.inputs.DedicatedInferenceModelDeploymentArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var public_ = new DedicatedInference("public", DedicatedInferenceArgs.builder()
.name("my-public-inference")
.region("tor1")
.enablePublicEndpoint(true)
.modelDeployments(DedicatedInferenceModelDeploymentArgs.builder()
.modelSlug("deepseek-r1-distill-qwen-14b")
.modelProvider("digitalocean")
.accelerators(DedicatedInferenceModelDeploymentAcceleratorArgs.builder()
.acceleratorSlug("gpu-h100x1-80gb")
.scale(1)
.type("nvidia_h100")
.build())
.build())
.build());
}
}
resources:
public:
type: digitalocean:DedicatedInference
properties:
name: my-public-inference
region: tor1
enablePublicEndpoint: true
modelDeployments:
- modelSlug: deepseek-r1-distill-qwen-14b
modelProvider: digitalocean
accelerators:
- acceleratorSlug: gpu-h100x1-80gb
scale: 1
type: nvidia_h100
With VPC
import * as pulumi from "@pulumi/pulumi";
import * as digitalocean from "@pulumi/digitalocean";
const _private = new digitalocean.DedicatedInference("private", {
name: "my-private-inference",
region: "tor1",
vpcUuid: example.id,
modelDeployments: [{
modelSlug: "deepseek-r1-distill-qwen-14b",
modelProvider: "digitalocean",
accelerators: [{
acceleratorSlug: "gpu-h100x1-80gb",
scale: 1,
type: "nvidia_h100",
}],
}],
});
import pulumi
import pulumi_digitalocean as digitalocean
private = digitalocean.DedicatedInference("private",
name="my-private-inference",
region="tor1",
vpc_uuid=example["id"],
model_deployments=[{
"model_slug": "deepseek-r1-distill-qwen-14b",
"model_provider": "digitalocean",
"accelerators": [{
"accelerator_slug": "gpu-h100x1-80gb",
"scale": 1,
"type": "nvidia_h100",
}],
}])
package main
import (
"github.com/pulumi/pulumi-digitalocean/sdk/v4/go/digitalocean"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := digitalocean.NewDedicatedInference(ctx, "private", &digitalocean.DedicatedInferenceArgs{
Name: pulumi.String("my-private-inference"),
Region: pulumi.String("tor1"),
VpcUuid: pulumi.Any(example.Id),
ModelDeployments: digitalocean.DedicatedInferenceModelDeploymentArray{
&digitalocean.DedicatedInferenceModelDeploymentArgs{
ModelSlug: pulumi.String("deepseek-r1-distill-qwen-14b"),
ModelProvider: pulumi.String("digitalocean"),
Accelerators: digitalocean.DedicatedInferenceModelDeploymentAcceleratorArray{
&digitalocean.DedicatedInferenceModelDeploymentAcceleratorArgs{
AcceleratorSlug: pulumi.String("gpu-h100x1-80gb"),
Scale: pulumi.Int(1),
Type: pulumi.String("nvidia_h100"),
},
},
},
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using DigitalOcean = Pulumi.DigitalOcean;
return await Deployment.RunAsync(() =>
{
var @private = new DigitalOcean.Index.DedicatedInference("private", new()
{
Name = "my-private-inference",
Region = "tor1",
VpcUuid = example.Id,
ModelDeployments = new[]
{
new DigitalOcean.Inputs.DedicatedInferenceModelDeploymentArgs
{
ModelSlug = "deepseek-r1-distill-qwen-14b",
ModelProvider = "digitalocean",
Accelerators = new[]
{
new DigitalOcean.Inputs.DedicatedInferenceModelDeploymentAcceleratorArgs
{
AcceleratorSlug = "gpu-h100x1-80gb",
Scale = 1,
Type = "nvidia_h100",
},
},
},
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.digitalocean.DedicatedInference;
import com.pulumi.digitalocean.DedicatedInferenceArgs;
import com.pulumi.digitalocean.inputs.DedicatedInferenceModelDeploymentArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var private_ = new DedicatedInference("private", DedicatedInferenceArgs.builder()
.name("my-private-inference")
.region("tor1")
.vpcUuid(example.id())
.modelDeployments(DedicatedInferenceModelDeploymentArgs.builder()
.modelSlug("deepseek-r1-distill-qwen-14b")
.modelProvider("digitalocean")
.accelerators(DedicatedInferenceModelDeploymentAcceleratorArgs.builder()
.acceleratorSlug("gpu-h100x1-80gb")
.scale(1)
.type("nvidia_h100")
.build())
.build())
.build());
}
}
resources:
private:
type: digitalocean:DedicatedInference
properties:
name: my-private-inference
region: tor1
vpcUuid: ${example.id}
modelDeployments:
- modelSlug: deepseek-r1-distill-qwen-14b
modelProvider: digitalocean
accelerators:
- acceleratorSlug: gpu-h100x1-80gb
scale: 1
type: nvidia_h100
Create DedicatedInference Resource
Resources are created with functions called constructors. To learn more about declaring and configuring resources, see Resources.
Constructor syntax
new DedicatedInference(name: string, args: DedicatedInferenceArgs, opts?: CustomResourceOptions);@overload
def DedicatedInference(resource_name: str,
args: DedicatedInferenceArgs,
opts: Optional[ResourceOptions] = None)
@overload
def DedicatedInference(resource_name: str,
opts: Optional[ResourceOptions] = None,
model_deployments: Optional[Sequence[DedicatedInferenceModelDeploymentArgs]] = None,
region: Optional[str] = None,
enable_public_endpoint: Optional[bool] = None,
hugging_face_token: Optional[str] = None,
name: Optional[str] = None,
vpc_uuid: Optional[str] = None)func NewDedicatedInference(ctx *Context, name string, args DedicatedInferenceArgs, opts ...ResourceOption) (*DedicatedInference, error)public DedicatedInference(string name, DedicatedInferenceArgs args, CustomResourceOptions? opts = null)
public DedicatedInference(String name, DedicatedInferenceArgs args)
public DedicatedInference(String name, DedicatedInferenceArgs args, CustomResourceOptions options)
type: digitalocean:DedicatedInference
properties: # The arguments to resource properties.
options: # Bag of options to control resource's behavior.
Parameters
- name string
- The unique name of the resource.
- args DedicatedInferenceArgs
- The arguments to resource properties.
- opts CustomResourceOptions
- Bag of options to control resource's behavior.
- resource_name str
- The unique name of the resource.
- args DedicatedInferenceArgs
- The arguments to resource properties.
- opts ResourceOptions
- Bag of options to control resource's behavior.
- ctx Context
- Context object for the current deployment.
- name string
- The unique name of the resource.
- args DedicatedInferenceArgs
- The arguments to resource properties.
- opts ResourceOption
- Bag of options to control resource's behavior.
- name string
- The unique name of the resource.
- args DedicatedInferenceArgs
- The arguments to resource properties.
- opts CustomResourceOptions
- Bag of options to control resource's behavior.
- name String
- The unique name of the resource.
- args DedicatedInferenceArgs
- The arguments to resource properties.
- options CustomResourceOptions
- Bag of options to control resource's behavior.
DedicatedInference Resource Properties
To learn more about resource properties and how to use them, see Inputs and Outputs in the Architecture and Concepts docs.
Inputs
In Python, inputs that are objects can be passed either as argument classes or as dictionary literals.
The DedicatedInference resource accepts the following input properties:
- Model
Deployments List<Pulumi.Digital Ocean. Inputs. Dedicated Inference Model Deployment> - The list of model deployments to run on the dedicated inference endpoint. Each
modelDeploymentsblock supports: - Region string
- The region slug where the dedicated inference endpoint will be deployed. Changing this forces a new resource.
- Enable
Public boolEndpoint - Whether to enable a public HTTPS endpoint for the dedicated inference endpoint. Defaults to
false. This field is immutable after creation and changing it forces a new resource. - Hugging
Face stringToken - A HuggingFace token for accessing gated models.
- Name string
- A human-readable name for the dedicated inference endpoint.
- Vpc
Uuid string - The UUID of the VPC to deploy the dedicated inference endpoint into. Changing this forces a new resource.
- Model
Deployments []DedicatedInference Model Deployment Args - The list of model deployments to run on the dedicated inference endpoint. Each
modelDeploymentsblock supports: - Region string
- The region slug where the dedicated inference endpoint will be deployed. Changing this forces a new resource.
- Enable
Public boolEndpoint - Whether to enable a public HTTPS endpoint for the dedicated inference endpoint. Defaults to
false. This field is immutable after creation and changing it forces a new resource. - Hugging
Face stringToken - A HuggingFace token for accessing gated models.
- Name string
- A human-readable name for the dedicated inference endpoint.
- Vpc
Uuid string - The UUID of the VPC to deploy the dedicated inference endpoint into. Changing this forces a new resource.
- model
Deployments List<DedicatedInference Model Deployment> - The list of model deployments to run on the dedicated inference endpoint. Each
modelDeploymentsblock supports: - region String
- The region slug where the dedicated inference endpoint will be deployed. Changing this forces a new resource.
- enable
Public BooleanEndpoint - Whether to enable a public HTTPS endpoint for the dedicated inference endpoint. Defaults to
false. This field is immutable after creation and changing it forces a new resource. - hugging
Face StringToken - A HuggingFace token for accessing gated models.
- name String
- A human-readable name for the dedicated inference endpoint.
- vpc
Uuid String - The UUID of the VPC to deploy the dedicated inference endpoint into. Changing this forces a new resource.
- model
Deployments DedicatedInference Model Deployment[] - The list of model deployments to run on the dedicated inference endpoint. Each
modelDeploymentsblock supports: - region string
- The region slug where the dedicated inference endpoint will be deployed. Changing this forces a new resource.
- enable
Public booleanEndpoint - Whether to enable a public HTTPS endpoint for the dedicated inference endpoint. Defaults to
false. This field is immutable after creation and changing it forces a new resource. - hugging
Face stringToken - A HuggingFace token for accessing gated models.
- name string
- A human-readable name for the dedicated inference endpoint.
- vpc
Uuid string - The UUID of the VPC to deploy the dedicated inference endpoint into. Changing this forces a new resource.
- model_
deployments Sequence[DedicatedInference Model Deployment Args] - The list of model deployments to run on the dedicated inference endpoint. Each
modelDeploymentsblock supports: - region str
- The region slug where the dedicated inference endpoint will be deployed. Changing this forces a new resource.
- enable_
public_ boolendpoint - Whether to enable a public HTTPS endpoint for the dedicated inference endpoint. Defaults to
false. This field is immutable after creation and changing it forces a new resource. - hugging_
face_ strtoken - A HuggingFace token for accessing gated models.
- name str
- A human-readable name for the dedicated inference endpoint.
- vpc_
uuid str - The UUID of the VPC to deploy the dedicated inference endpoint into. Changing this forces a new resource.
- model
Deployments List<Property Map> - The list of model deployments to run on the dedicated inference endpoint. Each
modelDeploymentsblock supports: - region String
- The region slug where the dedicated inference endpoint will be deployed. Changing this forces a new resource.
- enable
Public BooleanEndpoint - Whether to enable a public HTTPS endpoint for the dedicated inference endpoint. Defaults to
false. This field is immutable after creation and changing it forces a new resource. - hugging
Face StringToken - A HuggingFace token for accessing gated models.
- name String
- A human-readable name for the dedicated inference endpoint.
- vpc
Uuid String - The UUID of the VPC to deploy the dedicated inference endpoint into. Changing this forces a new resource.
Outputs
All input properties are implicitly available as output properties. Additionally, the DedicatedInference resource produces the following output properties:
- Created
At string - The date and time when the dedicated inference endpoint was created.
- Id string
- The provider-assigned unique ID for this managed resource.
- Private
Endpoint stringFqdn - The fully-qualified domain name of the private endpoint.
- Public
Endpoint stringFqdn - The fully-qualified domain name of the public endpoint, if enabled.
- Status string
- The current status of the dedicated inference endpoint.
- Updated
At string - The date and time when the dedicated inference endpoint was last updated.
- Created
At string - The date and time when the dedicated inference endpoint was created.
- Id string
- The provider-assigned unique ID for this managed resource.
- Private
Endpoint stringFqdn - The fully-qualified domain name of the private endpoint.
- Public
Endpoint stringFqdn - The fully-qualified domain name of the public endpoint, if enabled.
- Status string
- The current status of the dedicated inference endpoint.
- Updated
At string - The date and time when the dedicated inference endpoint was last updated.
- created
At String - The date and time when the dedicated inference endpoint was created.
- id String
- The provider-assigned unique ID for this managed resource.
- private
Endpoint StringFqdn - The fully-qualified domain name of the private endpoint.
- public
Endpoint StringFqdn - The fully-qualified domain name of the public endpoint, if enabled.
- status String
- The current status of the dedicated inference endpoint.
- updated
At String - The date and time when the dedicated inference endpoint was last updated.
- created
At string - The date and time when the dedicated inference endpoint was created.
- id string
- The provider-assigned unique ID for this managed resource.
- private
Endpoint stringFqdn - The fully-qualified domain name of the private endpoint.
- public
Endpoint stringFqdn - The fully-qualified domain name of the public endpoint, if enabled.
- status string
- The current status of the dedicated inference endpoint.
- updated
At string - The date and time when the dedicated inference endpoint was last updated.
- created_
at str - The date and time when the dedicated inference endpoint was created.
- id str
- The provider-assigned unique ID for this managed resource.
- private_
endpoint_ strfqdn - The fully-qualified domain name of the private endpoint.
- public_
endpoint_ strfqdn - The fully-qualified domain name of the public endpoint, if enabled.
- status str
- The current status of the dedicated inference endpoint.
- updated_
at str - The date and time when the dedicated inference endpoint was last updated.
- created
At String - The date and time when the dedicated inference endpoint was created.
- id String
- The provider-assigned unique ID for this managed resource.
- private
Endpoint StringFqdn - The fully-qualified domain name of the private endpoint.
- public
Endpoint StringFqdn - The fully-qualified domain name of the public endpoint, if enabled.
- status String
- The current status of the dedicated inference endpoint.
- updated
At String - The date and time when the dedicated inference endpoint was last updated.
Look up Existing DedicatedInference Resource
Get an existing DedicatedInference resource’s state with the given name, ID, and optional extra properties used to qualify the lookup.
public static get(name: string, id: Input<ID>, state?: DedicatedInferenceState, opts?: CustomResourceOptions): DedicatedInference@staticmethod
def get(resource_name: str,
id: str,
opts: Optional[ResourceOptions] = None,
created_at: Optional[str] = None,
enable_public_endpoint: Optional[bool] = None,
hugging_face_token: Optional[str] = None,
model_deployments: Optional[Sequence[DedicatedInferenceModelDeploymentArgs]] = None,
name: Optional[str] = None,
private_endpoint_fqdn: Optional[str] = None,
public_endpoint_fqdn: Optional[str] = None,
region: Optional[str] = None,
status: Optional[str] = None,
updated_at: Optional[str] = None,
vpc_uuid: Optional[str] = None) -> DedicatedInferencefunc GetDedicatedInference(ctx *Context, name string, id IDInput, state *DedicatedInferenceState, opts ...ResourceOption) (*DedicatedInference, error)public static DedicatedInference Get(string name, Input<string> id, DedicatedInferenceState? state, CustomResourceOptions? opts = null)public static DedicatedInference get(String name, Output<String> id, DedicatedInferenceState state, CustomResourceOptions options)resources: _: type: digitalocean:DedicatedInference get: id: ${id}- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- resource_name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- Created
At string - The date and time when the dedicated inference endpoint was created.
- Enable
Public boolEndpoint - Whether to enable a public HTTPS endpoint for the dedicated inference endpoint. Defaults to
false. This field is immutable after creation and changing it forces a new resource. - Hugging
Face stringToken - A HuggingFace token for accessing gated models.
- Model
Deployments List<Pulumi.Digital Ocean. Inputs. Dedicated Inference Model Deployment> - The list of model deployments to run on the dedicated inference endpoint. Each
modelDeploymentsblock supports: - Name string
- A human-readable name for the dedicated inference endpoint.
- Private
Endpoint stringFqdn - The fully-qualified domain name of the private endpoint.
- Public
Endpoint stringFqdn - The fully-qualified domain name of the public endpoint, if enabled.
- Region string
- The region slug where the dedicated inference endpoint will be deployed. Changing this forces a new resource.
- Status string
- The current status of the dedicated inference endpoint.
- Updated
At string - The date and time when the dedicated inference endpoint was last updated.
- Vpc
Uuid string - The UUID of the VPC to deploy the dedicated inference endpoint into. Changing this forces a new resource.
- Created
At string - The date and time when the dedicated inference endpoint was created.
- Enable
Public boolEndpoint - Whether to enable a public HTTPS endpoint for the dedicated inference endpoint. Defaults to
false. This field is immutable after creation and changing it forces a new resource. - Hugging
Face stringToken - A HuggingFace token for accessing gated models.
- Model
Deployments []DedicatedInference Model Deployment Args - The list of model deployments to run on the dedicated inference endpoint. Each
modelDeploymentsblock supports: - Name string
- A human-readable name for the dedicated inference endpoint.
- Private
Endpoint stringFqdn - The fully-qualified domain name of the private endpoint.
- Public
Endpoint stringFqdn - The fully-qualified domain name of the public endpoint, if enabled.
- Region string
- The region slug where the dedicated inference endpoint will be deployed. Changing this forces a new resource.
- Status string
- The current status of the dedicated inference endpoint.
- Updated
At string - The date and time when the dedicated inference endpoint was last updated.
- Vpc
Uuid string - The UUID of the VPC to deploy the dedicated inference endpoint into. Changing this forces a new resource.
- created
At String - The date and time when the dedicated inference endpoint was created.
- enable
Public BooleanEndpoint - Whether to enable a public HTTPS endpoint for the dedicated inference endpoint. Defaults to
false. This field is immutable after creation and changing it forces a new resource. - hugging
Face StringToken - A HuggingFace token for accessing gated models.
- model
Deployments List<DedicatedInference Model Deployment> - The list of model deployments to run on the dedicated inference endpoint. Each
modelDeploymentsblock supports: - name String
- A human-readable name for the dedicated inference endpoint.
- private
Endpoint StringFqdn - The fully-qualified domain name of the private endpoint.
- public
Endpoint StringFqdn - The fully-qualified domain name of the public endpoint, if enabled.
- region String
- The region slug where the dedicated inference endpoint will be deployed. Changing this forces a new resource.
- status String
- The current status of the dedicated inference endpoint.
- updated
At String - The date and time when the dedicated inference endpoint was last updated.
- vpc
Uuid String - The UUID of the VPC to deploy the dedicated inference endpoint into. Changing this forces a new resource.
- created
At string - The date and time when the dedicated inference endpoint was created.
- enable
Public booleanEndpoint - Whether to enable a public HTTPS endpoint for the dedicated inference endpoint. Defaults to
false. This field is immutable after creation and changing it forces a new resource. - hugging
Face stringToken - A HuggingFace token for accessing gated models.
- model
Deployments DedicatedInference Model Deployment[] - The list of model deployments to run on the dedicated inference endpoint. Each
modelDeploymentsblock supports: - name string
- A human-readable name for the dedicated inference endpoint.
- private
Endpoint stringFqdn - The fully-qualified domain name of the private endpoint.
- public
Endpoint stringFqdn - The fully-qualified domain name of the public endpoint, if enabled.
- region string
- The region slug where the dedicated inference endpoint will be deployed. Changing this forces a new resource.
- status string
- The current status of the dedicated inference endpoint.
- updated
At string - The date and time when the dedicated inference endpoint was last updated.
- vpc
Uuid string - The UUID of the VPC to deploy the dedicated inference endpoint into. Changing this forces a new resource.
- created_
at str - The date and time when the dedicated inference endpoint was created.
- enable_
public_ boolendpoint - Whether to enable a public HTTPS endpoint for the dedicated inference endpoint. Defaults to
false. This field is immutable after creation and changing it forces a new resource. - hugging_
face_ strtoken - A HuggingFace token for accessing gated models.
- model_
deployments Sequence[DedicatedInference Model Deployment Args] - The list of model deployments to run on the dedicated inference endpoint. Each
modelDeploymentsblock supports: - name str
- A human-readable name for the dedicated inference endpoint.
- private_
endpoint_ strfqdn - The fully-qualified domain name of the private endpoint.
- public_
endpoint_ strfqdn - The fully-qualified domain name of the public endpoint, if enabled.
- region str
- The region slug where the dedicated inference endpoint will be deployed. Changing this forces a new resource.
- status str
- The current status of the dedicated inference endpoint.
- updated_
at str - The date and time when the dedicated inference endpoint was last updated.
- vpc_
uuid str - The UUID of the VPC to deploy the dedicated inference endpoint into. Changing this forces a new resource.
- created
At String - The date and time when the dedicated inference endpoint was created.
- enable
Public BooleanEndpoint - Whether to enable a public HTTPS endpoint for the dedicated inference endpoint. Defaults to
false. This field is immutable after creation and changing it forces a new resource. - hugging
Face StringToken - A HuggingFace token for accessing gated models.
- model
Deployments List<Property Map> - The list of model deployments to run on the dedicated inference endpoint. Each
modelDeploymentsblock supports: - name String
- A human-readable name for the dedicated inference endpoint.
- private
Endpoint StringFqdn - The fully-qualified domain name of the private endpoint.
- public
Endpoint StringFqdn - The fully-qualified domain name of the public endpoint, if enabled.
- region String
- The region slug where the dedicated inference endpoint will be deployed. Changing this forces a new resource.
- status String
- The current status of the dedicated inference endpoint.
- updated
At String - The date and time when the dedicated inference endpoint was last updated.
- vpc
Uuid String - The UUID of the VPC to deploy the dedicated inference endpoint into. Changing this forces a new resource.
Supporting Types
DedicatedInferenceModelDeployment, DedicatedInferenceModelDeploymentArgs
- Accelerators
List<Pulumi.
Digital Ocean. Inputs. Dedicated Inference Model Deployment Accelerator> - The GPU accelerators to allocate for this model deployment. Each
acceleratorsblock supports: - Model
Provider string - The provider of the model (e.g.
digitalocean,huggingface). - Model
Slug string - The slug identifier for the model to deploy.
- Model
Id string - The unique ID of the model.
- Provider
Model stringId - The provider-specific model ID. Required when modelProvider is 'hugging_face', optional for 'modelcatalog'.
- Accelerators
[]Dedicated
Inference Model Deployment Accelerator - The GPU accelerators to allocate for this model deployment. Each
acceleratorsblock supports: - Model
Provider string - The provider of the model (e.g.
digitalocean,huggingface). - Model
Slug string - The slug identifier for the model to deploy.
- Model
Id string - The unique ID of the model.
- Provider
Model stringId - The provider-specific model ID. Required when modelProvider is 'hugging_face', optional for 'modelcatalog'.
- accelerators
List<Dedicated
Inference Model Deployment Accelerator> - The GPU accelerators to allocate for this model deployment. Each
acceleratorsblock supports: - model
Provider String - The provider of the model (e.g.
digitalocean,huggingface). - model
Slug String - The slug identifier for the model to deploy.
- model
Id String - The unique ID of the model.
- provider
Model StringId - The provider-specific model ID. Required when modelProvider is 'hugging_face', optional for 'modelcatalog'.
- accelerators
Dedicated
Inference Model Deployment Accelerator[] - The GPU accelerators to allocate for this model deployment. Each
acceleratorsblock supports: - model
Provider string - The provider of the model (e.g.
digitalocean,huggingface). - model
Slug string - The slug identifier for the model to deploy.
- model
Id string - The unique ID of the model.
- provider
Model stringId - The provider-specific model ID. Required when modelProvider is 'hugging_face', optional for 'modelcatalog'.
- accelerators
Sequence[Dedicated
Inference Model Deployment Accelerator] - The GPU accelerators to allocate for this model deployment. Each
acceleratorsblock supports: - model_
provider str - The provider of the model (e.g.
digitalocean,huggingface). - model_
slug str - The slug identifier for the model to deploy.
- model_
id str - The unique ID of the model.
- provider_
model_ strid - The provider-specific model ID. Required when modelProvider is 'hugging_face', optional for 'modelcatalog'.
- accelerators List<Property Map>
- The GPU accelerators to allocate for this model deployment. Each
acceleratorsblock supports: - model
Provider String - The provider of the model (e.g.
digitalocean,huggingface). - model
Slug String - The slug identifier for the model to deploy.
- model
Id String - The unique ID of the model.
- provider
Model StringId - The provider-specific model ID. Required when modelProvider is 'hugging_face', optional for 'modelcatalog'.
DedicatedInferenceModelDeploymentAccelerator, DedicatedInferenceModelDeploymentAcceleratorArgs
- Accelerator
Slug string - The slug identifier for the GPU accelerator type.
- Scale int
- The number of accelerator units to allocate. Must be at least 1.
- Type string
- The accelerator type.
- Accelerator
Slug string - The slug identifier for the GPU accelerator type.
- Scale int
- The number of accelerator units to allocate. Must be at least 1.
- Type string
- The accelerator type.
- accelerator
Slug String - The slug identifier for the GPU accelerator type.
- scale Integer
- The number of accelerator units to allocate. Must be at least 1.
- type String
- The accelerator type.
- accelerator
Slug string - The slug identifier for the GPU accelerator type.
- scale number
- The number of accelerator units to allocate. Must be at least 1.
- type string
- The accelerator type.
- accelerator_
slug str - The slug identifier for the GPU accelerator type.
- scale int
- The number of accelerator units to allocate. Must be at least 1.
- type str
- The accelerator type.
- accelerator
Slug String - The slug identifier for the GPU accelerator type.
- scale Number
- The number of accelerator units to allocate. Must be at least 1.
- type String
- The accelerator type.
Import
Dedicated inference endpoints can be imported using their id, e.g.
$ pulumi import digitalocean:index/dedicatedInference:DedicatedInference example endpoint-id
To learn more about importing existing cloud resources, see Importing resources.
Package Details
- Repository
- DigitalOcean pulumi/pulumi-digitalocean
- License
- Apache-2.0
- Notes
- This Pulumi package is based on the
digitaloceanTerraform Provider.
published on Wednesday, Apr 29, 2026 by Pulumi
