Configure Azure Machine Learning Datastores

The azure-native:machinelearningservices:Datastore resource, part of the Pulumi Azure Native provider, registers external storage as a datastore within an Azure Machine Learning workspace, enabling ML jobs to reference data without hardcoding connection details. This guide focuses on three capabilities: Blob Storage connectivity with account keys, Data Lake Gen2 with service principal authentication, and Azure Files for shared storage.

Datastores reference existing Azure Machine Learning workspaces and storage resources. The examples are intentionally small. Combine them with your own workspace, storage accounts, and authentication configuration.

Connect to Blob Storage with account keys

Most ML workflows start by connecting to Azure Blob Storage for training data and model artifacts.

import * as pulumi from "@pulumi/pulumi";
import * as azure_native from "@pulumi/azure-native";

const datastore = new azure_native.machinelearningservices.Datastore("datastore", {
    name: "string",
    properties: {
        accountName: "string",
        containerName: "string",
        credentials: {
            credentialsType: "AccountKey",
            secrets: {
                key: "string",
                secretsType: "AccountKey",
            },
        },
        datastoreType: "AzureBlob",
        description: "string",
        endpoint: "core.windows.net",
        protocol: "https",
        tags: {
            string: "string",
        },
    },
    resourceGroupName: "test-rg",
    skipValidation: false,
    workspaceName: "my-aml-workspace",
});
import pulumi
import pulumi_azure_native as azure_native

datastore = azure_native.machinelearningservices.Datastore("datastore",
    name="string",
    properties={
        "account_name": "string",
        "container_name": "string",
        "credentials": {
            "credentials_type": "AccountKey",
            "secrets": {
                "key": "string",
                "secrets_type": "AccountKey",
            },
        },
        "datastore_type": "AzureBlob",
        "description": "string",
        "endpoint": "core.windows.net",
        "protocol": "https",
        "tags": {
            "string": "string",
        },
    },
    resource_group_name="test-rg",
    skip_validation=False,
    workspace_name="my-aml-workspace")
package main

import (
	machinelearningservices "github.com/pulumi/pulumi-azure-native-sdk/machinelearningservices/v3"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := machinelearningservices.NewDatastore(ctx, "datastore", &machinelearningservices.DatastoreArgs{
			Name: pulumi.String("string"),
			Properties: &machinelearningservices.AzureBlobDatastoreArgs{
				AccountName:   pulumi.String("string"),
				ContainerName: pulumi.String("string"),
				Credentials: machinelearningservices.AccountKeyDatastoreCredentials{
					CredentialsType: "AccountKey",
					Secrets: machinelearningservices.AccountKeyDatastoreSecrets{
						Key:         "string",
						SecretsType: "AccountKey",
					},
				},
				DatastoreType: pulumi.String("AzureBlob"),
				Description:   pulumi.String("string"),
				Endpoint:      pulumi.String("core.windows.net"),
				Protocol:      pulumi.String("https"),
				Tags: pulumi.StringMap{
					"string": pulumi.String("string"),
				},
			},
			ResourceGroupName: pulumi.String("test-rg"),
			SkipValidation:    pulumi.Bool(false),
			WorkspaceName:     pulumi.String("my-aml-workspace"),
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using AzureNative = Pulumi.AzureNative;

return await Deployment.RunAsync(() => 
{
    var datastore = new AzureNative.MachineLearningServices.Datastore("datastore", new()
    {
        Name = "string",
        Properties = new AzureNative.MachineLearningServices.Inputs.AzureBlobDatastoreArgs
        {
            AccountName = "string",
            ContainerName = "string",
            Credentials = new AzureNative.MachineLearningServices.Inputs.AccountKeyDatastoreCredentialsArgs
            {
                CredentialsType = "AccountKey",
                Secrets = new AzureNative.MachineLearningServices.Inputs.AccountKeyDatastoreSecretsArgs
                {
                    Key = "string",
                    SecretsType = "AccountKey",
                },
            },
            DatastoreType = "AzureBlob",
            Description = "string",
            Endpoint = "core.windows.net",
            Protocol = "https",
            Tags = 
            {
                { "string", "string" },
            },
        },
        ResourceGroupName = "test-rg",
        SkipValidation = false,
        WorkspaceName = "my-aml-workspace",
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.azurenative.machinelearningservices.Datastore;
import com.pulumi.azurenative.machinelearningservices.DatastoreArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var datastore = new Datastore("datastore", DatastoreArgs.builder()
            .name("string")
            .properties(AzureBlobDatastoreArgs.builder()
                .accountName("string")
                .containerName("string")
                .credentials(AccountKeyDatastoreCredentialsArgs.builder()
                    .credentialsType("AccountKey")
                    .secrets(AccountKeyDatastoreSecretsArgs.builder()
                        .key("string")
                        .secretsType("AccountKey")
                        .build())
                    .build())
                .datastoreType("AzureBlob")
                .description("string")
                .endpoint("core.windows.net")
                .protocol("https")
                .tags(Map.of("string", "string"))
                .build())
            .resourceGroupName("test-rg")
            .skipValidation(false)
            .workspaceName("my-aml-workspace")
            .build());

    }
}
resources:
  datastore:
    type: azure-native:machinelearningservices:Datastore
    properties:
      name: string
      properties:
        accountName: string
        containerName: string
        credentials:
          credentialsType: AccountKey
          secrets:
            key: string
            secretsType: AccountKey
        datastoreType: AzureBlob
        description: string
        endpoint: core.windows.net
        protocol: https
        tags:
          string: string
      resourceGroupName: test-rg
      skipValidation: false
      workspaceName: my-aml-workspace

The datastoreType property specifies “AzureBlob” to indicate Blob Storage. The accountName and containerName identify the storage location. The credentials block uses credentialsType “AccountKey” with the storage account key in the secrets section. This configuration allows ML jobs to read training data and write model outputs without embedding connection strings in code.

Connect to Data Lake Gen2 with service principals

Teams working with large-scale data lakes often use Azure Data Lake Storage Gen2 for hierarchical namespaces and fine-grained access control.

import * as pulumi from "@pulumi/pulumi";
import * as azure_native from "@pulumi/azure-native";

const datastore = new azure_native.machinelearningservices.Datastore("datastore", {
    name: "string",
    properties: {
        accountName: "string",
        credentials: {
            authorityUrl: "string",
            clientId: "00000000-1111-2222-3333-444444444444",
            credentialsType: "ServicePrincipal",
            resourceUrl: "string",
            secrets: {
                clientSecret: "string",
                secretsType: "ServicePrincipal",
            },
            tenantId: "00000000-1111-2222-3333-444444444444",
        },
        datastoreType: "AzureDataLakeGen2",
        description: "string",
        endpoint: "string",
        filesystem: "string",
        protocol: "string",
        tags: {
            string: "string",
        },
    },
    resourceGroupName: "test-rg",
    skipValidation: false,
    workspaceName: "my-aml-workspace",
});
import pulumi
import pulumi_azure_native as azure_native

datastore = azure_native.machinelearningservices.Datastore("datastore",
    name="string",
    properties={
        "account_name": "string",
        "credentials": {
            "authority_url": "string",
            "client_id": "00000000-1111-2222-3333-444444444444",
            "credentials_type": "ServicePrincipal",
            "resource_url": "string",
            "secrets": {
                "client_secret": "string",
                "secrets_type": "ServicePrincipal",
            },
            "tenant_id": "00000000-1111-2222-3333-444444444444",
        },
        "datastore_type": "AzureDataLakeGen2",
        "description": "string",
        "endpoint": "string",
        "filesystem": "string",
        "protocol": "string",
        "tags": {
            "string": "string",
        },
    },
    resource_group_name="test-rg",
    skip_validation=False,
    workspace_name="my-aml-workspace")
package main

import (
	machinelearningservices "github.com/pulumi/pulumi-azure-native-sdk/machinelearningservices/v3"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := machinelearningservices.NewDatastore(ctx, "datastore", &machinelearningservices.DatastoreArgs{
			Name: pulumi.String("string"),
			Properties: &machinelearningservices.AzureDataLakeGen2DatastoreArgs{
				AccountName: pulumi.String("string"),
				Credentials: machinelearningservices.ServicePrincipalDatastoreCredentials{
					AuthorityUrl:    "string",
					ClientId:        "00000000-1111-2222-3333-444444444444",
					CredentialsType: "ServicePrincipal",
					ResourceUrl:     "string",
					Secrets: machinelearningservices.ServicePrincipalDatastoreSecrets{
						ClientSecret: "string",
						SecretsType:  "ServicePrincipal",
					},
					TenantId: "00000000-1111-2222-3333-444444444444",
				},
				DatastoreType: pulumi.String("AzureDataLakeGen2"),
				Description:   pulumi.String("string"),
				Endpoint:      pulumi.String("string"),
				Filesystem:    pulumi.String("string"),
				Protocol:      pulumi.String("string"),
				Tags: pulumi.StringMap{
					"string": pulumi.String("string"),
				},
			},
			ResourceGroupName: pulumi.String("test-rg"),
			SkipValidation:    pulumi.Bool(false),
			WorkspaceName:     pulumi.String("my-aml-workspace"),
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using AzureNative = Pulumi.AzureNative;

return await Deployment.RunAsync(() => 
{
    var datastore = new AzureNative.MachineLearningServices.Datastore("datastore", new()
    {
        Name = "string",
        Properties = new AzureNative.MachineLearningServices.Inputs.AzureDataLakeGen2DatastoreArgs
        {
            AccountName = "string",
            Credentials = new AzureNative.MachineLearningServices.Inputs.ServicePrincipalDatastoreCredentialsArgs
            {
                AuthorityUrl = "string",
                ClientId = "00000000-1111-2222-3333-444444444444",
                CredentialsType = "ServicePrincipal",
                ResourceUrl = "string",
                Secrets = new AzureNative.MachineLearningServices.Inputs.ServicePrincipalDatastoreSecretsArgs
                {
                    ClientSecret = "string",
                    SecretsType = "ServicePrincipal",
                },
                TenantId = "00000000-1111-2222-3333-444444444444",
            },
            DatastoreType = "AzureDataLakeGen2",
            Description = "string",
            Endpoint = "string",
            Filesystem = "string",
            Protocol = "string",
            Tags = 
            {
                { "string", "string" },
            },
        },
        ResourceGroupName = "test-rg",
        SkipValidation = false,
        WorkspaceName = "my-aml-workspace",
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.azurenative.machinelearningservices.Datastore;
import com.pulumi.azurenative.machinelearningservices.DatastoreArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var datastore = new Datastore("datastore", DatastoreArgs.builder()
            .name("string")
            .properties(AzureDataLakeGen2DatastoreArgs.builder()
                .accountName("string")
                .credentials(ServicePrincipalDatastoreCredentialsArgs.builder()
                    .authorityUrl("string")
                    .clientId("00000000-1111-2222-3333-444444444444")
                    .credentialsType("ServicePrincipal")
                    .resourceUrl("string")
                    .secrets(ServicePrincipalDatastoreSecretsArgs.builder()
                        .clientSecret("string")
                        .secretsType("ServicePrincipal")
                        .build())
                    .tenantId("00000000-1111-2222-3333-444444444444")
                    .build())
                .datastoreType("AzureDataLakeGen2")
                .description("string")
                .endpoint("string")
                .filesystem("string")
                .protocol("string")
                .tags(Map.of("string", "string"))
                .build())
            .resourceGroupName("test-rg")
            .skipValidation(false)
            .workspaceName("my-aml-workspace")
            .build());

    }
}
resources:
  datastore:
    type: azure-native:machinelearningservices:Datastore
    properties:
      name: string
      properties:
        accountName: string
        credentials:
          authorityUrl: string
          clientId: 00000000-1111-2222-3333-444444444444
          credentialsType: ServicePrincipal
          resourceUrl: string
          secrets:
            clientSecret: string
            secretsType: ServicePrincipal
          tenantId: 00000000-1111-2222-3333-444444444444
        datastoreType: AzureDataLakeGen2
        description: string
        endpoint: string
        filesystem: string
        protocol: string
        tags:
          string: string
      resourceGroupName: test-rg
      skipValidation: false
      workspaceName: my-aml-workspace

The datastoreType “AzureDataLakeGen2” enables Data Lake Gen2 features like hierarchical namespaces. The filesystem property specifies the container within the Data Lake account. Service principal authentication requires clientId, tenantId, authorityUrl, and resourceUrl in the credentials block, along with the client secret. This approach provides more granular access control than account keys, allowing different service principals to access different parts of the data lake.

Connect to Azure Files for shared storage

Some ML pipelines need shared file storage accessible from multiple compute targets simultaneously.

import * as pulumi from "@pulumi/pulumi";
import * as azure_native from "@pulumi/azure-native";

const datastore = new azure_native.machinelearningservices.Datastore("datastore", {
    name: "string",
    properties: {
        accountName: "string",
        credentials: {
            credentialsType: "AccountKey",
            secrets: {
                key: "string",
                secretsType: "AccountKey",
            },
        },
        datastoreType: "AzureFile",
        description: "string",
        endpoint: "string",
        fileShareName: "string",
        protocol: "string",
        tags: {
            string: "string",
        },
    },
    resourceGroupName: "test-rg",
    skipValidation: false,
    workspaceName: "my-aml-workspace",
});
import pulumi
import pulumi_azure_native as azure_native

datastore = azure_native.machinelearningservices.Datastore("datastore",
    name="string",
    properties={
        "account_name": "string",
        "credentials": {
            "credentials_type": "AccountKey",
            "secrets": {
                "key": "string",
                "secrets_type": "AccountKey",
            },
        },
        "datastore_type": "AzureFile",
        "description": "string",
        "endpoint": "string",
        "file_share_name": "string",
        "protocol": "string",
        "tags": {
            "string": "string",
        },
    },
    resource_group_name="test-rg",
    skip_validation=False,
    workspace_name="my-aml-workspace")
package main

import (
	machinelearningservices "github.com/pulumi/pulumi-azure-native-sdk/machinelearningservices/v3"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := machinelearningservices.NewDatastore(ctx, "datastore", &machinelearningservices.DatastoreArgs{
			Name: pulumi.String("string"),
			Properties: &machinelearningservices.AzureFileDatastoreArgs{
				AccountName: pulumi.String("string"),
				Credentials: machinelearningservices.AccountKeyDatastoreCredentials{
					CredentialsType: "AccountKey",
					Secrets: machinelearningservices.AccountKeyDatastoreSecrets{
						Key:         "string",
						SecretsType: "AccountKey",
					},
				},
				DatastoreType: pulumi.String("AzureFile"),
				Description:   pulumi.String("string"),
				Endpoint:      pulumi.String("string"),
				FileShareName: pulumi.String("string"),
				Protocol:      pulumi.String("string"),
				Tags: pulumi.StringMap{
					"string": pulumi.String("string"),
				},
			},
			ResourceGroupName: pulumi.String("test-rg"),
			SkipValidation:    pulumi.Bool(false),
			WorkspaceName:     pulumi.String("my-aml-workspace"),
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using AzureNative = Pulumi.AzureNative;

return await Deployment.RunAsync(() => 
{
    var datastore = new AzureNative.MachineLearningServices.Datastore("datastore", new()
    {
        Name = "string",
        Properties = new AzureNative.MachineLearningServices.Inputs.AzureFileDatastoreArgs
        {
            AccountName = "string",
            Credentials = new AzureNative.MachineLearningServices.Inputs.AccountKeyDatastoreCredentialsArgs
            {
                CredentialsType = "AccountKey",
                Secrets = new AzureNative.MachineLearningServices.Inputs.AccountKeyDatastoreSecretsArgs
                {
                    Key = "string",
                    SecretsType = "AccountKey",
                },
            },
            DatastoreType = "AzureFile",
            Description = "string",
            Endpoint = "string",
            FileShareName = "string",
            Protocol = "string",
            Tags = 
            {
                { "string", "string" },
            },
        },
        ResourceGroupName = "test-rg",
        SkipValidation = false,
        WorkspaceName = "my-aml-workspace",
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.azurenative.machinelearningservices.Datastore;
import com.pulumi.azurenative.machinelearningservices.DatastoreArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var datastore = new Datastore("datastore", DatastoreArgs.builder()
            .name("string")
            .properties(AzureFileDatastoreArgs.builder()
                .accountName("string")
                .credentials(AccountKeyDatastoreCredentialsArgs.builder()
                    .credentialsType("AccountKey")
                    .secrets(AccountKeyDatastoreSecretsArgs.builder()
                        .key("string")
                        .secretsType("AccountKey")
                        .build())
                    .build())
                .datastoreType("AzureFile")
                .description("string")
                .endpoint("string")
                .fileShareName("string")
                .protocol("string")
                .tags(Map.of("string", "string"))
                .build())
            .resourceGroupName("test-rg")
            .skipValidation(false)
            .workspaceName("my-aml-workspace")
            .build());

    }
}
resources:
  datastore:
    type: azure-native:machinelearningservices:Datastore
    properties:
      name: string
      properties:
        accountName: string
        credentials:
          credentialsType: AccountKey
          secrets:
            key: string
            secretsType: AccountKey
        datastoreType: AzureFile
        description: string
        endpoint: string
        fileShareName: string
        protocol: string
        tags:
          string: string
      resourceGroupName: test-rg
      skipValidation: false
      workspaceName: my-aml-workspace

The datastoreType “AzureFile” enables Azure Files connectivity. The fileShareName property identifies the SMB-compatible file share. Azure Files allows multiple compute instances to mount the same storage concurrently, useful for scenarios where training jobs need to share intermediate results or configuration files during execution.

Beyond these examples

These snippets focus on specific datastore features: Blob Storage and Data Lake Gen2 connectivity, account key and service principal authentication, and Azure Files integration. They’re intentionally minimal rather than full ML pipeline configurations.

The examples reference pre-existing infrastructure such as Azure Machine Learning workspaces, Storage Accounts, Data Lake Gen2 accounts or Azure Files shares, service principals with appropriate permissions (for Gen2 example), and resource groups. They focus on registering the datastore rather than provisioning the underlying storage.

To keep things focused, common datastore patterns are omitted, including:

  • Data Lake Gen1 connectivity (older generation)
  • Validation controls (skipValidation)
  • Custom endpoints and protocols
  • Managed identity authentication

These omissions are intentional: the goal is to illustrate how each datastore type is wired, not provide drop-in storage modules. See the Datastore resource reference for all available configuration options.

Let's configure Azure Machine Learning Datastores

Get started with Pulumi Cloud, then follow our quick setup guide to deploy this infrastructure.

Try Pulumi Cloud for FREE

Frequently Asked Questions

Datastore Types & Configuration
What types of datastores can I create?
You can create four types of datastores: AzureDataLakeGen1, AzureDataLakeGen2, AzureFile, and AzureBlob. Each type requires different properties (e.g., AzureBlob needs containerName, while AzureDataLakeGen2 needs filesystem).
What's the difference between the datastore types?

Each datastore type connects to different Azure storage services:

  • AzureBlob - Azure Blob Storage (requires accountName, containerName)
  • AzureFile - Azure File Storage (requires accountName, fileShareName)
  • AzureDataLakeGen1 - Data Lake Gen1 (requires storeName)
  • AzureDataLakeGen2 - Data Lake Gen2 (requires accountName, filesystem)
Authentication & Credentials
What credential types are supported?
Two credential types are supported: ServicePrincipal (with clientId, tenantId, authorityUrl, resourceUrl, and clientSecret) and AccountKey (with storage account key).
When should I use ServicePrincipal vs AccountKey credentials?
Use ServicePrincipal for Azure Data Lake Gen1 and Gen2 datastores (as shown in examples). Use AccountKey for Azure File and Azure Blob datastores (as shown in examples).
Resource Management
Which properties can't I change after creating a datastore?
The properties name, resourceGroupName, and workspaceName are immutable and require resource replacement if changed.
How do I access different API versions of this resource?
Generate a local SDK package using pulumi package add azure-native machinelearningservices [ApiVersion]. Many versions are available, including 2022-05-01, 2023-04-01, 2024-04-01, and 2025-12-01.
How do I import an existing datastore into Pulumi?
Use the import command: pulumi import azure-native:machinelearningservices:Datastore string /subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.MachineLearningServices/workspaces/{workspaceName}/datastores/{name}
Validation & Deployment
What does the skipValidation flag do?
Setting skipValidation to true skips validation during datastore creation. Examples show it set to false for standard validation.

Using a different cloud?

Explore analytics guides for other cloud providers: