1. Packages
  2. Packages
  3. DigitalOcean Provider
  4. API Docs
  5. DedicatedInference
Viewing docs for DigitalOcean v4.65.0
published on Wednesday, Apr 29, 2026 by Pulumi
digitalocean logo
Viewing docs for DigitalOcean v4.65.0
published on Wednesday, Apr 29, 2026 by Pulumi

    Provides a DigitalOcean Dedicated Inference resource. This can be used to create, modify, and delete dedicated inference endpoints for running GPU-accelerated model inference.

    Example Usage

    import * as pulumi from "@pulumi/pulumi";
    import * as digitalocean from "@pulumi/digitalocean";
    
    const example = new digitalocean.DedicatedInference("example", {
        name: "my-inference-endpoint",
        region: "tor1",
        modelDeployments: [{
            modelSlug: "deepseek-r1-distill-qwen-14b",
            modelProvider: "digitalocean",
            accelerators: [{
                acceleratorSlug: "gpu-h100x1-80gb",
                scale: 1,
                type: "nvidia_h100",
            }],
        }],
    });
    
    import pulumi
    import pulumi_digitalocean as digitalocean
    
    example = digitalocean.DedicatedInference("example",
        name="my-inference-endpoint",
        region="tor1",
        model_deployments=[{
            "model_slug": "deepseek-r1-distill-qwen-14b",
            "model_provider": "digitalocean",
            "accelerators": [{
                "accelerator_slug": "gpu-h100x1-80gb",
                "scale": 1,
                "type": "nvidia_h100",
            }],
        }])
    
    package main
    
    import (
    	"github.com/pulumi/pulumi-digitalocean/sdk/v4/go/digitalocean"
    	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
    )
    
    func main() {
    	pulumi.Run(func(ctx *pulumi.Context) error {
    		_, err := digitalocean.NewDedicatedInference(ctx, "example", &digitalocean.DedicatedInferenceArgs{
    			Name:   pulumi.String("my-inference-endpoint"),
    			Region: pulumi.String("tor1"),
    			ModelDeployments: digitalocean.DedicatedInferenceModelDeploymentArray{
    				&digitalocean.DedicatedInferenceModelDeploymentArgs{
    					ModelSlug:     pulumi.String("deepseek-r1-distill-qwen-14b"),
    					ModelProvider: pulumi.String("digitalocean"),
    					Accelerators: digitalocean.DedicatedInferenceModelDeploymentAcceleratorArray{
    						&digitalocean.DedicatedInferenceModelDeploymentAcceleratorArgs{
    							AcceleratorSlug: pulumi.String("gpu-h100x1-80gb"),
    							Scale:           pulumi.Int(1),
    							Type:            pulumi.String("nvidia_h100"),
    						},
    					},
    				},
    			},
    		})
    		if err != nil {
    			return err
    		}
    		return nil
    	})
    }
    
    using System.Collections.Generic;
    using System.Linq;
    using Pulumi;
    using DigitalOcean = Pulumi.DigitalOcean;
    
    return await Deployment.RunAsync(() => 
    {
        var example = new DigitalOcean.Index.DedicatedInference("example", new()
        {
            Name = "my-inference-endpoint",
            Region = "tor1",
            ModelDeployments = new[]
            {
                new DigitalOcean.Inputs.DedicatedInferenceModelDeploymentArgs
                {
                    ModelSlug = "deepseek-r1-distill-qwen-14b",
                    ModelProvider = "digitalocean",
                    Accelerators = new[]
                    {
                        new DigitalOcean.Inputs.DedicatedInferenceModelDeploymentAcceleratorArgs
                        {
                            AcceleratorSlug = "gpu-h100x1-80gb",
                            Scale = 1,
                            Type = "nvidia_h100",
                        },
                    },
                },
            },
        });
    
    });
    
    package generated_program;
    
    import com.pulumi.Context;
    import com.pulumi.Pulumi;
    import com.pulumi.core.Output;
    import com.pulumi.digitalocean.DedicatedInference;
    import com.pulumi.digitalocean.DedicatedInferenceArgs;
    import com.pulumi.digitalocean.inputs.DedicatedInferenceModelDeploymentArgs;
    import java.util.List;
    import java.util.ArrayList;
    import java.util.Map;
    import java.io.File;
    import java.nio.file.Files;
    import java.nio.file.Paths;
    
    public class App {
        public static void main(String[] args) {
            Pulumi.run(App::stack);
        }
    
        public static void stack(Context ctx) {
            var example = new DedicatedInference("example", DedicatedInferenceArgs.builder()
                .name("my-inference-endpoint")
                .region("tor1")
                .modelDeployments(DedicatedInferenceModelDeploymentArgs.builder()
                    .modelSlug("deepseek-r1-distill-qwen-14b")
                    .modelProvider("digitalocean")
                    .accelerators(DedicatedInferenceModelDeploymentAcceleratorArgs.builder()
                        .acceleratorSlug("gpu-h100x1-80gb")
                        .scale(1)
                        .type("nvidia_h100")
                        .build())
                    .build())
                .build());
    
        }
    }
    
    resources:
      example:
        type: digitalocean:DedicatedInference
        properties:
          name: my-inference-endpoint
          region: tor1
          modelDeployments:
            - modelSlug: deepseek-r1-distill-qwen-14b
              modelProvider: digitalocean
              accelerators:
                - acceleratorSlug: gpu-h100x1-80gb
                  scale: 1
                  type: nvidia_h100
    

    With Public Endpoint

    import * as pulumi from "@pulumi/pulumi";
    import * as digitalocean from "@pulumi/digitalocean";
    
    const _public = new digitalocean.DedicatedInference("public", {
        name: "my-public-inference",
        region: "tor1",
        enablePublicEndpoint: true,
        modelDeployments: [{
            modelSlug: "deepseek-r1-distill-qwen-14b",
            modelProvider: "digitalocean",
            accelerators: [{
                acceleratorSlug: "gpu-h100x1-80gb",
                scale: 1,
                type: "nvidia_h100",
            }],
        }],
    });
    
    import pulumi
    import pulumi_digitalocean as digitalocean
    
    public = digitalocean.DedicatedInference("public",
        name="my-public-inference",
        region="tor1",
        enable_public_endpoint=True,
        model_deployments=[{
            "model_slug": "deepseek-r1-distill-qwen-14b",
            "model_provider": "digitalocean",
            "accelerators": [{
                "accelerator_slug": "gpu-h100x1-80gb",
                "scale": 1,
                "type": "nvidia_h100",
            }],
        }])
    
    package main
    
    import (
    	"github.com/pulumi/pulumi-digitalocean/sdk/v4/go/digitalocean"
    	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
    )
    
    func main() {
    	pulumi.Run(func(ctx *pulumi.Context) error {
    		_, err := digitalocean.NewDedicatedInference(ctx, "public", &digitalocean.DedicatedInferenceArgs{
    			Name:                 pulumi.String("my-public-inference"),
    			Region:               pulumi.String("tor1"),
    			EnablePublicEndpoint: pulumi.Bool(true),
    			ModelDeployments: digitalocean.DedicatedInferenceModelDeploymentArray{
    				&digitalocean.DedicatedInferenceModelDeploymentArgs{
    					ModelSlug:     pulumi.String("deepseek-r1-distill-qwen-14b"),
    					ModelProvider: pulumi.String("digitalocean"),
    					Accelerators: digitalocean.DedicatedInferenceModelDeploymentAcceleratorArray{
    						&digitalocean.DedicatedInferenceModelDeploymentAcceleratorArgs{
    							AcceleratorSlug: pulumi.String("gpu-h100x1-80gb"),
    							Scale:           pulumi.Int(1),
    							Type:            pulumi.String("nvidia_h100"),
    						},
    					},
    				},
    			},
    		})
    		if err != nil {
    			return err
    		}
    		return nil
    	})
    }
    
    using System.Collections.Generic;
    using System.Linq;
    using Pulumi;
    using DigitalOcean = Pulumi.DigitalOcean;
    
    return await Deployment.RunAsync(() => 
    {
        var @public = new DigitalOcean.Index.DedicatedInference("public", new()
        {
            Name = "my-public-inference",
            Region = "tor1",
            EnablePublicEndpoint = true,
            ModelDeployments = new[]
            {
                new DigitalOcean.Inputs.DedicatedInferenceModelDeploymentArgs
                {
                    ModelSlug = "deepseek-r1-distill-qwen-14b",
                    ModelProvider = "digitalocean",
                    Accelerators = new[]
                    {
                        new DigitalOcean.Inputs.DedicatedInferenceModelDeploymentAcceleratorArgs
                        {
                            AcceleratorSlug = "gpu-h100x1-80gb",
                            Scale = 1,
                            Type = "nvidia_h100",
                        },
                    },
                },
            },
        });
    
    });
    
    package generated_program;
    
    import com.pulumi.Context;
    import com.pulumi.Pulumi;
    import com.pulumi.core.Output;
    import com.pulumi.digitalocean.DedicatedInference;
    import com.pulumi.digitalocean.DedicatedInferenceArgs;
    import com.pulumi.digitalocean.inputs.DedicatedInferenceModelDeploymentArgs;
    import java.util.List;
    import java.util.ArrayList;
    import java.util.Map;
    import java.io.File;
    import java.nio.file.Files;
    import java.nio.file.Paths;
    
    public class App {
        public static void main(String[] args) {
            Pulumi.run(App::stack);
        }
    
        public static void stack(Context ctx) {
            var public_ = new DedicatedInference("public", DedicatedInferenceArgs.builder()
                .name("my-public-inference")
                .region("tor1")
                .enablePublicEndpoint(true)
                .modelDeployments(DedicatedInferenceModelDeploymentArgs.builder()
                    .modelSlug("deepseek-r1-distill-qwen-14b")
                    .modelProvider("digitalocean")
                    .accelerators(DedicatedInferenceModelDeploymentAcceleratorArgs.builder()
                        .acceleratorSlug("gpu-h100x1-80gb")
                        .scale(1)
                        .type("nvidia_h100")
                        .build())
                    .build())
                .build());
    
        }
    }
    
    resources:
      public:
        type: digitalocean:DedicatedInference
        properties:
          name: my-public-inference
          region: tor1
          enablePublicEndpoint: true
          modelDeployments:
            - modelSlug: deepseek-r1-distill-qwen-14b
              modelProvider: digitalocean
              accelerators:
                - acceleratorSlug: gpu-h100x1-80gb
                  scale: 1
                  type: nvidia_h100
    

    With VPC

    import * as pulumi from "@pulumi/pulumi";
    import * as digitalocean from "@pulumi/digitalocean";
    
    const _private = new digitalocean.DedicatedInference("private", {
        name: "my-private-inference",
        region: "tor1",
        vpcUuid: example.id,
        modelDeployments: [{
            modelSlug: "deepseek-r1-distill-qwen-14b",
            modelProvider: "digitalocean",
            accelerators: [{
                acceleratorSlug: "gpu-h100x1-80gb",
                scale: 1,
                type: "nvidia_h100",
            }],
        }],
    });
    
    import pulumi
    import pulumi_digitalocean as digitalocean
    
    private = digitalocean.DedicatedInference("private",
        name="my-private-inference",
        region="tor1",
        vpc_uuid=example["id"],
        model_deployments=[{
            "model_slug": "deepseek-r1-distill-qwen-14b",
            "model_provider": "digitalocean",
            "accelerators": [{
                "accelerator_slug": "gpu-h100x1-80gb",
                "scale": 1,
                "type": "nvidia_h100",
            }],
        }])
    
    package main
    
    import (
    	"github.com/pulumi/pulumi-digitalocean/sdk/v4/go/digitalocean"
    	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
    )
    
    func main() {
    	pulumi.Run(func(ctx *pulumi.Context) error {
    		_, err := digitalocean.NewDedicatedInference(ctx, "private", &digitalocean.DedicatedInferenceArgs{
    			Name:    pulumi.String("my-private-inference"),
    			Region:  pulumi.String("tor1"),
    			VpcUuid: pulumi.Any(example.Id),
    			ModelDeployments: digitalocean.DedicatedInferenceModelDeploymentArray{
    				&digitalocean.DedicatedInferenceModelDeploymentArgs{
    					ModelSlug:     pulumi.String("deepseek-r1-distill-qwen-14b"),
    					ModelProvider: pulumi.String("digitalocean"),
    					Accelerators: digitalocean.DedicatedInferenceModelDeploymentAcceleratorArray{
    						&digitalocean.DedicatedInferenceModelDeploymentAcceleratorArgs{
    							AcceleratorSlug: pulumi.String("gpu-h100x1-80gb"),
    							Scale:           pulumi.Int(1),
    							Type:            pulumi.String("nvidia_h100"),
    						},
    					},
    				},
    			},
    		})
    		if err != nil {
    			return err
    		}
    		return nil
    	})
    }
    
    using System.Collections.Generic;
    using System.Linq;
    using Pulumi;
    using DigitalOcean = Pulumi.DigitalOcean;
    
    return await Deployment.RunAsync(() => 
    {
        var @private = new DigitalOcean.Index.DedicatedInference("private", new()
        {
            Name = "my-private-inference",
            Region = "tor1",
            VpcUuid = example.Id,
            ModelDeployments = new[]
            {
                new DigitalOcean.Inputs.DedicatedInferenceModelDeploymentArgs
                {
                    ModelSlug = "deepseek-r1-distill-qwen-14b",
                    ModelProvider = "digitalocean",
                    Accelerators = new[]
                    {
                        new DigitalOcean.Inputs.DedicatedInferenceModelDeploymentAcceleratorArgs
                        {
                            AcceleratorSlug = "gpu-h100x1-80gb",
                            Scale = 1,
                            Type = "nvidia_h100",
                        },
                    },
                },
            },
        });
    
    });
    
    package generated_program;
    
    import com.pulumi.Context;
    import com.pulumi.Pulumi;
    import com.pulumi.core.Output;
    import com.pulumi.digitalocean.DedicatedInference;
    import com.pulumi.digitalocean.DedicatedInferenceArgs;
    import com.pulumi.digitalocean.inputs.DedicatedInferenceModelDeploymentArgs;
    import java.util.List;
    import java.util.ArrayList;
    import java.util.Map;
    import java.io.File;
    import java.nio.file.Files;
    import java.nio.file.Paths;
    
    public class App {
        public static void main(String[] args) {
            Pulumi.run(App::stack);
        }
    
        public static void stack(Context ctx) {
            var private_ = new DedicatedInference("private", DedicatedInferenceArgs.builder()
                .name("my-private-inference")
                .region("tor1")
                .vpcUuid(example.id())
                .modelDeployments(DedicatedInferenceModelDeploymentArgs.builder()
                    .modelSlug("deepseek-r1-distill-qwen-14b")
                    .modelProvider("digitalocean")
                    .accelerators(DedicatedInferenceModelDeploymentAcceleratorArgs.builder()
                        .acceleratorSlug("gpu-h100x1-80gb")
                        .scale(1)
                        .type("nvidia_h100")
                        .build())
                    .build())
                .build());
    
        }
    }
    
    resources:
      private:
        type: digitalocean:DedicatedInference
        properties:
          name: my-private-inference
          region: tor1
          vpcUuid: ${example.id}
          modelDeployments:
            - modelSlug: deepseek-r1-distill-qwen-14b
              modelProvider: digitalocean
              accelerators:
                - acceleratorSlug: gpu-h100x1-80gb
                  scale: 1
                  type: nvidia_h100
    

    Create DedicatedInference Resource

    Resources are created with functions called constructors. To learn more about declaring and configuring resources, see Resources.

    Constructor syntax

    new DedicatedInference(name: string, args: DedicatedInferenceArgs, opts?: CustomResourceOptions);
    @overload
    def DedicatedInference(resource_name: str,
                           args: DedicatedInferenceArgs,
                           opts: Optional[ResourceOptions] = None)
    
    @overload
    def DedicatedInference(resource_name: str,
                           opts: Optional[ResourceOptions] = None,
                           model_deployments: Optional[Sequence[DedicatedInferenceModelDeploymentArgs]] = None,
                           region: Optional[str] = None,
                           enable_public_endpoint: Optional[bool] = None,
                           hugging_face_token: Optional[str] = None,
                           name: Optional[str] = None,
                           vpc_uuid: Optional[str] = None)
    func NewDedicatedInference(ctx *Context, name string, args DedicatedInferenceArgs, opts ...ResourceOption) (*DedicatedInference, error)
    public DedicatedInference(string name, DedicatedInferenceArgs args, CustomResourceOptions? opts = null)
    public DedicatedInference(String name, DedicatedInferenceArgs args)
    public DedicatedInference(String name, DedicatedInferenceArgs args, CustomResourceOptions options)
    
    type: digitalocean:DedicatedInference
    properties: # The arguments to resource properties.
    options: # Bag of options to control resource's behavior.
    
    

    Parameters

    name string
    The unique name of the resource.
    args DedicatedInferenceArgs
    The arguments to resource properties.
    opts CustomResourceOptions
    Bag of options to control resource's behavior.
    resource_name str
    The unique name of the resource.
    args DedicatedInferenceArgs
    The arguments to resource properties.
    opts ResourceOptions
    Bag of options to control resource's behavior.
    ctx Context
    Context object for the current deployment.
    name string
    The unique name of the resource.
    args DedicatedInferenceArgs
    The arguments to resource properties.
    opts ResourceOption
    Bag of options to control resource's behavior.
    name string
    The unique name of the resource.
    args DedicatedInferenceArgs
    The arguments to resource properties.
    opts CustomResourceOptions
    Bag of options to control resource's behavior.
    name String
    The unique name of the resource.
    args DedicatedInferenceArgs
    The arguments to resource properties.
    options CustomResourceOptions
    Bag of options to control resource's behavior.

    DedicatedInference Resource Properties

    To learn more about resource properties and how to use them, see Inputs and Outputs in the Architecture and Concepts docs.

    Inputs

    In Python, inputs that are objects can be passed either as argument classes or as dictionary literals.

    The DedicatedInference resource accepts the following input properties:

    ModelDeployments List<Pulumi.DigitalOcean.Inputs.DedicatedInferenceModelDeployment>
    The list of model deployments to run on the dedicated inference endpoint. Each modelDeployments block supports:
    Region string
    The region slug where the dedicated inference endpoint will be deployed. Changing this forces a new resource.
    EnablePublicEndpoint bool
    Whether to enable a public HTTPS endpoint for the dedicated inference endpoint. Defaults to false. This field is immutable after creation and changing it forces a new resource.
    HuggingFaceToken string
    A HuggingFace token for accessing gated models.
    Name string
    A human-readable name for the dedicated inference endpoint.
    VpcUuid string
    The UUID of the VPC to deploy the dedicated inference endpoint into. Changing this forces a new resource.
    ModelDeployments []DedicatedInferenceModelDeploymentArgs
    The list of model deployments to run on the dedicated inference endpoint. Each modelDeployments block supports:
    Region string
    The region slug where the dedicated inference endpoint will be deployed. Changing this forces a new resource.
    EnablePublicEndpoint bool
    Whether to enable a public HTTPS endpoint for the dedicated inference endpoint. Defaults to false. This field is immutable after creation and changing it forces a new resource.
    HuggingFaceToken string
    A HuggingFace token for accessing gated models.
    Name string
    A human-readable name for the dedicated inference endpoint.
    VpcUuid string
    The UUID of the VPC to deploy the dedicated inference endpoint into. Changing this forces a new resource.
    modelDeployments List<DedicatedInferenceModelDeployment>
    The list of model deployments to run on the dedicated inference endpoint. Each modelDeployments block supports:
    region String
    The region slug where the dedicated inference endpoint will be deployed. Changing this forces a new resource.
    enablePublicEndpoint Boolean
    Whether to enable a public HTTPS endpoint for the dedicated inference endpoint. Defaults to false. This field is immutable after creation and changing it forces a new resource.
    huggingFaceToken String
    A HuggingFace token for accessing gated models.
    name String
    A human-readable name for the dedicated inference endpoint.
    vpcUuid String
    The UUID of the VPC to deploy the dedicated inference endpoint into. Changing this forces a new resource.
    modelDeployments DedicatedInferenceModelDeployment[]
    The list of model deployments to run on the dedicated inference endpoint. Each modelDeployments block supports:
    region string
    The region slug where the dedicated inference endpoint will be deployed. Changing this forces a new resource.
    enablePublicEndpoint boolean
    Whether to enable a public HTTPS endpoint for the dedicated inference endpoint. Defaults to false. This field is immutable after creation and changing it forces a new resource.
    huggingFaceToken string
    A HuggingFace token for accessing gated models.
    name string
    A human-readable name for the dedicated inference endpoint.
    vpcUuid string
    The UUID of the VPC to deploy the dedicated inference endpoint into. Changing this forces a new resource.
    model_deployments Sequence[DedicatedInferenceModelDeploymentArgs]
    The list of model deployments to run on the dedicated inference endpoint. Each modelDeployments block supports:
    region str
    The region slug where the dedicated inference endpoint will be deployed. Changing this forces a new resource.
    enable_public_endpoint bool
    Whether to enable a public HTTPS endpoint for the dedicated inference endpoint. Defaults to false. This field is immutable after creation and changing it forces a new resource.
    hugging_face_token str
    A HuggingFace token for accessing gated models.
    name str
    A human-readable name for the dedicated inference endpoint.
    vpc_uuid str
    The UUID of the VPC to deploy the dedicated inference endpoint into. Changing this forces a new resource.
    modelDeployments List<Property Map>
    The list of model deployments to run on the dedicated inference endpoint. Each modelDeployments block supports:
    region String
    The region slug where the dedicated inference endpoint will be deployed. Changing this forces a new resource.
    enablePublicEndpoint Boolean
    Whether to enable a public HTTPS endpoint for the dedicated inference endpoint. Defaults to false. This field is immutable after creation and changing it forces a new resource.
    huggingFaceToken String
    A HuggingFace token for accessing gated models.
    name String
    A human-readable name for the dedicated inference endpoint.
    vpcUuid String
    The UUID of the VPC to deploy the dedicated inference endpoint into. Changing this forces a new resource.

    Outputs

    All input properties are implicitly available as output properties. Additionally, the DedicatedInference resource produces the following output properties:

    CreatedAt string
    The date and time when the dedicated inference endpoint was created.
    Id string
    The provider-assigned unique ID for this managed resource.
    PrivateEndpointFqdn string
    The fully-qualified domain name of the private endpoint.
    PublicEndpointFqdn string
    The fully-qualified domain name of the public endpoint, if enabled.
    Status string
    The current status of the dedicated inference endpoint.
    UpdatedAt string
    The date and time when the dedicated inference endpoint was last updated.
    CreatedAt string
    The date and time when the dedicated inference endpoint was created.
    Id string
    The provider-assigned unique ID for this managed resource.
    PrivateEndpointFqdn string
    The fully-qualified domain name of the private endpoint.
    PublicEndpointFqdn string
    The fully-qualified domain name of the public endpoint, if enabled.
    Status string
    The current status of the dedicated inference endpoint.
    UpdatedAt string
    The date and time when the dedicated inference endpoint was last updated.
    createdAt String
    The date and time when the dedicated inference endpoint was created.
    id String
    The provider-assigned unique ID for this managed resource.
    privateEndpointFqdn String
    The fully-qualified domain name of the private endpoint.
    publicEndpointFqdn String
    The fully-qualified domain name of the public endpoint, if enabled.
    status String
    The current status of the dedicated inference endpoint.
    updatedAt String
    The date and time when the dedicated inference endpoint was last updated.
    createdAt string
    The date and time when the dedicated inference endpoint was created.
    id string
    The provider-assigned unique ID for this managed resource.
    privateEndpointFqdn string
    The fully-qualified domain name of the private endpoint.
    publicEndpointFqdn string
    The fully-qualified domain name of the public endpoint, if enabled.
    status string
    The current status of the dedicated inference endpoint.
    updatedAt string
    The date and time when the dedicated inference endpoint was last updated.
    created_at str
    The date and time when the dedicated inference endpoint was created.
    id str
    The provider-assigned unique ID for this managed resource.
    private_endpoint_fqdn str
    The fully-qualified domain name of the private endpoint.
    public_endpoint_fqdn str
    The fully-qualified domain name of the public endpoint, if enabled.
    status str
    The current status of the dedicated inference endpoint.
    updated_at str
    The date and time when the dedicated inference endpoint was last updated.
    createdAt String
    The date and time when the dedicated inference endpoint was created.
    id String
    The provider-assigned unique ID for this managed resource.
    privateEndpointFqdn String
    The fully-qualified domain name of the private endpoint.
    publicEndpointFqdn String
    The fully-qualified domain name of the public endpoint, if enabled.
    status String
    The current status of the dedicated inference endpoint.
    updatedAt String
    The date and time when the dedicated inference endpoint was last updated.

    Look up Existing DedicatedInference Resource

    Get an existing DedicatedInference resource’s state with the given name, ID, and optional extra properties used to qualify the lookup.

    public static get(name: string, id: Input<ID>, state?: DedicatedInferenceState, opts?: CustomResourceOptions): DedicatedInference
    @staticmethod
    def get(resource_name: str,
            id: str,
            opts: Optional[ResourceOptions] = None,
            created_at: Optional[str] = None,
            enable_public_endpoint: Optional[bool] = None,
            hugging_face_token: Optional[str] = None,
            model_deployments: Optional[Sequence[DedicatedInferenceModelDeploymentArgs]] = None,
            name: Optional[str] = None,
            private_endpoint_fqdn: Optional[str] = None,
            public_endpoint_fqdn: Optional[str] = None,
            region: Optional[str] = None,
            status: Optional[str] = None,
            updated_at: Optional[str] = None,
            vpc_uuid: Optional[str] = None) -> DedicatedInference
    func GetDedicatedInference(ctx *Context, name string, id IDInput, state *DedicatedInferenceState, opts ...ResourceOption) (*DedicatedInference, error)
    public static DedicatedInference Get(string name, Input<string> id, DedicatedInferenceState? state, CustomResourceOptions? opts = null)
    public static DedicatedInference get(String name, Output<String> id, DedicatedInferenceState state, CustomResourceOptions options)
    resources:  _:    type: digitalocean:DedicatedInference    get:      id: ${id}
    name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    state
    Any extra arguments used during the lookup.
    opts
    A bag of options that control this resource's behavior.
    resource_name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    state
    Any extra arguments used during the lookup.
    opts
    A bag of options that control this resource's behavior.
    name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    state
    Any extra arguments used during the lookup.
    opts
    A bag of options that control this resource's behavior.
    name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    state
    Any extra arguments used during the lookup.
    opts
    A bag of options that control this resource's behavior.
    The following state arguments are supported:
    CreatedAt string
    The date and time when the dedicated inference endpoint was created.
    EnablePublicEndpoint bool
    Whether to enable a public HTTPS endpoint for the dedicated inference endpoint. Defaults to false. This field is immutable after creation and changing it forces a new resource.
    HuggingFaceToken string
    A HuggingFace token for accessing gated models.
    ModelDeployments List<Pulumi.DigitalOcean.Inputs.DedicatedInferenceModelDeployment>
    The list of model deployments to run on the dedicated inference endpoint. Each modelDeployments block supports:
    Name string
    A human-readable name for the dedicated inference endpoint.
    PrivateEndpointFqdn string
    The fully-qualified domain name of the private endpoint.
    PublicEndpointFqdn string
    The fully-qualified domain name of the public endpoint, if enabled.
    Region string
    The region slug where the dedicated inference endpoint will be deployed. Changing this forces a new resource.
    Status string
    The current status of the dedicated inference endpoint.
    UpdatedAt string
    The date and time when the dedicated inference endpoint was last updated.
    VpcUuid string
    The UUID of the VPC to deploy the dedicated inference endpoint into. Changing this forces a new resource.
    CreatedAt string
    The date and time when the dedicated inference endpoint was created.
    EnablePublicEndpoint bool
    Whether to enable a public HTTPS endpoint for the dedicated inference endpoint. Defaults to false. This field is immutable after creation and changing it forces a new resource.
    HuggingFaceToken string
    A HuggingFace token for accessing gated models.
    ModelDeployments []DedicatedInferenceModelDeploymentArgs
    The list of model deployments to run on the dedicated inference endpoint. Each modelDeployments block supports:
    Name string
    A human-readable name for the dedicated inference endpoint.
    PrivateEndpointFqdn string
    The fully-qualified domain name of the private endpoint.
    PublicEndpointFqdn string
    The fully-qualified domain name of the public endpoint, if enabled.
    Region string
    The region slug where the dedicated inference endpoint will be deployed. Changing this forces a new resource.
    Status string
    The current status of the dedicated inference endpoint.
    UpdatedAt string
    The date and time when the dedicated inference endpoint was last updated.
    VpcUuid string
    The UUID of the VPC to deploy the dedicated inference endpoint into. Changing this forces a new resource.
    createdAt String
    The date and time when the dedicated inference endpoint was created.
    enablePublicEndpoint Boolean
    Whether to enable a public HTTPS endpoint for the dedicated inference endpoint. Defaults to false. This field is immutable after creation and changing it forces a new resource.
    huggingFaceToken String
    A HuggingFace token for accessing gated models.
    modelDeployments List<DedicatedInferenceModelDeployment>
    The list of model deployments to run on the dedicated inference endpoint. Each modelDeployments block supports:
    name String
    A human-readable name for the dedicated inference endpoint.
    privateEndpointFqdn String
    The fully-qualified domain name of the private endpoint.
    publicEndpointFqdn String
    The fully-qualified domain name of the public endpoint, if enabled.
    region String
    The region slug where the dedicated inference endpoint will be deployed. Changing this forces a new resource.
    status String
    The current status of the dedicated inference endpoint.
    updatedAt String
    The date and time when the dedicated inference endpoint was last updated.
    vpcUuid String
    The UUID of the VPC to deploy the dedicated inference endpoint into. Changing this forces a new resource.
    createdAt string
    The date and time when the dedicated inference endpoint was created.
    enablePublicEndpoint boolean
    Whether to enable a public HTTPS endpoint for the dedicated inference endpoint. Defaults to false. This field is immutable after creation and changing it forces a new resource.
    huggingFaceToken string
    A HuggingFace token for accessing gated models.
    modelDeployments DedicatedInferenceModelDeployment[]
    The list of model deployments to run on the dedicated inference endpoint. Each modelDeployments block supports:
    name string
    A human-readable name for the dedicated inference endpoint.
    privateEndpointFqdn string
    The fully-qualified domain name of the private endpoint.
    publicEndpointFqdn string
    The fully-qualified domain name of the public endpoint, if enabled.
    region string
    The region slug where the dedicated inference endpoint will be deployed. Changing this forces a new resource.
    status string
    The current status of the dedicated inference endpoint.
    updatedAt string
    The date and time when the dedicated inference endpoint was last updated.
    vpcUuid string
    The UUID of the VPC to deploy the dedicated inference endpoint into. Changing this forces a new resource.
    created_at str
    The date and time when the dedicated inference endpoint was created.
    enable_public_endpoint bool
    Whether to enable a public HTTPS endpoint for the dedicated inference endpoint. Defaults to false. This field is immutable after creation and changing it forces a new resource.
    hugging_face_token str
    A HuggingFace token for accessing gated models.
    model_deployments Sequence[DedicatedInferenceModelDeploymentArgs]
    The list of model deployments to run on the dedicated inference endpoint. Each modelDeployments block supports:
    name str
    A human-readable name for the dedicated inference endpoint.
    private_endpoint_fqdn str
    The fully-qualified domain name of the private endpoint.
    public_endpoint_fqdn str
    The fully-qualified domain name of the public endpoint, if enabled.
    region str
    The region slug where the dedicated inference endpoint will be deployed. Changing this forces a new resource.
    status str
    The current status of the dedicated inference endpoint.
    updated_at str
    The date and time when the dedicated inference endpoint was last updated.
    vpc_uuid str
    The UUID of the VPC to deploy the dedicated inference endpoint into. Changing this forces a new resource.
    createdAt String
    The date and time when the dedicated inference endpoint was created.
    enablePublicEndpoint Boolean
    Whether to enable a public HTTPS endpoint for the dedicated inference endpoint. Defaults to false. This field is immutable after creation and changing it forces a new resource.
    huggingFaceToken String
    A HuggingFace token for accessing gated models.
    modelDeployments List<Property Map>
    The list of model deployments to run on the dedicated inference endpoint. Each modelDeployments block supports:
    name String
    A human-readable name for the dedicated inference endpoint.
    privateEndpointFqdn String
    The fully-qualified domain name of the private endpoint.
    publicEndpointFqdn String
    The fully-qualified domain name of the public endpoint, if enabled.
    region String
    The region slug where the dedicated inference endpoint will be deployed. Changing this forces a new resource.
    status String
    The current status of the dedicated inference endpoint.
    updatedAt String
    The date and time when the dedicated inference endpoint was last updated.
    vpcUuid String
    The UUID of the VPC to deploy the dedicated inference endpoint into. Changing this forces a new resource.

    Supporting Types

    DedicatedInferenceModelDeployment, DedicatedInferenceModelDeploymentArgs

    Accelerators List<Pulumi.DigitalOcean.Inputs.DedicatedInferenceModelDeploymentAccelerator>
    The GPU accelerators to allocate for this model deployment. Each accelerators block supports:
    ModelProvider string
    The provider of the model (e.g. digitalocean, huggingface).
    ModelSlug string
    The slug identifier for the model to deploy.
    ModelId string
    The unique ID of the model.
    ProviderModelId string
    The provider-specific model ID. Required when modelProvider is 'hugging_face', optional for 'modelcatalog'.
    Accelerators []DedicatedInferenceModelDeploymentAccelerator
    The GPU accelerators to allocate for this model deployment. Each accelerators block supports:
    ModelProvider string
    The provider of the model (e.g. digitalocean, huggingface).
    ModelSlug string
    The slug identifier for the model to deploy.
    ModelId string
    The unique ID of the model.
    ProviderModelId string
    The provider-specific model ID. Required when modelProvider is 'hugging_face', optional for 'modelcatalog'.
    accelerators List<DedicatedInferenceModelDeploymentAccelerator>
    The GPU accelerators to allocate for this model deployment. Each accelerators block supports:
    modelProvider String
    The provider of the model (e.g. digitalocean, huggingface).
    modelSlug String
    The slug identifier for the model to deploy.
    modelId String
    The unique ID of the model.
    providerModelId String
    The provider-specific model ID. Required when modelProvider is 'hugging_face', optional for 'modelcatalog'.
    accelerators DedicatedInferenceModelDeploymentAccelerator[]
    The GPU accelerators to allocate for this model deployment. Each accelerators block supports:
    modelProvider string
    The provider of the model (e.g. digitalocean, huggingface).
    modelSlug string
    The slug identifier for the model to deploy.
    modelId string
    The unique ID of the model.
    providerModelId string
    The provider-specific model ID. Required when modelProvider is 'hugging_face', optional for 'modelcatalog'.
    accelerators Sequence[DedicatedInferenceModelDeploymentAccelerator]
    The GPU accelerators to allocate for this model deployment. Each accelerators block supports:
    model_provider str
    The provider of the model (e.g. digitalocean, huggingface).
    model_slug str
    The slug identifier for the model to deploy.
    model_id str
    The unique ID of the model.
    provider_model_id str
    The provider-specific model ID. Required when modelProvider is 'hugging_face', optional for 'modelcatalog'.
    accelerators List<Property Map>
    The GPU accelerators to allocate for this model deployment. Each accelerators block supports:
    modelProvider String
    The provider of the model (e.g. digitalocean, huggingface).
    modelSlug String
    The slug identifier for the model to deploy.
    modelId String
    The unique ID of the model.
    providerModelId String
    The provider-specific model ID. Required when modelProvider is 'hugging_face', optional for 'modelcatalog'.

    DedicatedInferenceModelDeploymentAccelerator, DedicatedInferenceModelDeploymentAcceleratorArgs

    AcceleratorSlug string
    The slug identifier for the GPU accelerator type.
    Scale int
    The number of accelerator units to allocate. Must be at least 1.
    Type string
    The accelerator type.
    AcceleratorSlug string
    The slug identifier for the GPU accelerator type.
    Scale int
    The number of accelerator units to allocate. Must be at least 1.
    Type string
    The accelerator type.
    acceleratorSlug String
    The slug identifier for the GPU accelerator type.
    scale Integer
    The number of accelerator units to allocate. Must be at least 1.
    type String
    The accelerator type.
    acceleratorSlug string
    The slug identifier for the GPU accelerator type.
    scale number
    The number of accelerator units to allocate. Must be at least 1.
    type string
    The accelerator type.
    accelerator_slug str
    The slug identifier for the GPU accelerator type.
    scale int
    The number of accelerator units to allocate. Must be at least 1.
    type str
    The accelerator type.
    acceleratorSlug String
    The slug identifier for the GPU accelerator type.
    scale Number
    The number of accelerator units to allocate. Must be at least 1.
    type String
    The accelerator type.

    Import

    Dedicated inference endpoints can be imported using their id, e.g.

    $ pulumi import digitalocean:index/dedicatedInference:DedicatedInference example endpoint-id
    

    To learn more about importing existing cloud resources, see Importing resources.

    Package Details

    Repository
    DigitalOcean pulumi/pulumi-digitalocean
    License
    Apache-2.0
    Notes
    This Pulumi package is based on the digitalocean Terraform Provider.
    digitalocean logo
    Viewing docs for DigitalOcean v4.65.0
    published on Wednesday, Apr 29, 2026 by Pulumi
      Try Pulumi Cloud free. Your team will thank you.