Configure Azure Machine Learning Datastores

The azure-native:machinelearningservices:Datastore resource, part of the Pulumi Azure Native provider, registers external storage as a datastore within an Azure Machine Learning workspace. This allows training jobs and pipelines to reference data by logical name rather than connection strings. This guide focuses on three capabilities: Blob Storage connectivity with account keys, Data Lake Gen2 with service principal authentication, and Azure Files integration.

Datastores reference existing storage accounts, containers, and authentication credentials. The examples are intentionally small. Combine them with your own storage infrastructure and workspace configuration.

Connect to Blob Storage with account keys

Most ML workflows start by connecting to Azure Blob Storage for training data and model artifacts.

import * as pulumi from "@pulumi/pulumi";
import * as azure_native from "@pulumi/azure-native";

const datastore = new azure_native.machinelearningservices.Datastore("datastore", {
    datastoreProperties: {
        accountName: "string",
        containerName: "string",
        credentials: {
            credentialsType: "AccountKey",
            secrets: {
                key: "string",
                secretsType: "AccountKey",
            },
        },
        datastoreType: "AzureBlob",
        description: "string",
        endpoint: "core.windows.net",
        protocol: "https",
        tags: {
            string: "string",
        },
    },
    name: "string",
    resourceGroupName: "test-rg",
    skipValidation: false,
    workspaceName: "my-aml-workspace",
});
import pulumi
import pulumi_azure_native as azure_native

datastore = azure_native.machinelearningservices.Datastore("datastore",
    datastore_properties={
        "account_name": "string",
        "container_name": "string",
        "credentials": {
            "credentials_type": "AccountKey",
            "secrets": {
                "key": "string",
                "secrets_type": "AccountKey",
            },
        },
        "datastore_type": "AzureBlob",
        "description": "string",
        "endpoint": "core.windows.net",
        "protocol": "https",
        "tags": {
            "string": "string",
        },
    },
    name="string",
    resource_group_name="test-rg",
    skip_validation=False,
    workspace_name="my-aml-workspace")
package main

import (
	machinelearningservices "github.com/pulumi/pulumi-azure-native-sdk/machinelearningservices/v3"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := machinelearningservices.NewDatastore(ctx, "datastore", &machinelearningservices.DatastoreArgs{
			DatastoreProperties: &machinelearningservices.AzureBlobDatastoreArgs{
				AccountName:   pulumi.String("string"),
				ContainerName: pulumi.String("string"),
				Credentials: machinelearningservices.AccountKeyDatastoreCredentials{
					CredentialsType: "AccountKey",
					Secrets: machinelearningservices.AccountKeyDatastoreSecrets{
						Key:         "string",
						SecretsType: "AccountKey",
					},
				},
				DatastoreType: pulumi.String("AzureBlob"),
				Description:   pulumi.String("string"),
				Endpoint:      pulumi.String("core.windows.net"),
				Protocol:      pulumi.String("https"),
				Tags: pulumi.StringMap{
					"string": pulumi.String("string"),
				},
			},
			Name:              pulumi.String("string"),
			ResourceGroupName: pulumi.String("test-rg"),
			SkipValidation:    pulumi.Bool(false),
			WorkspaceName:     pulumi.String("my-aml-workspace"),
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using AzureNative = Pulumi.AzureNative;

return await Deployment.RunAsync(() => 
{
    var datastore = new AzureNative.MachineLearningServices.Datastore("datastore", new()
    {
        DatastoreProperties = new AzureNative.MachineLearningServices.Inputs.AzureBlobDatastoreArgs
        {
            AccountName = "string",
            ContainerName = "string",
            Credentials = new AzureNative.MachineLearningServices.Inputs.AccountKeyDatastoreCredentialsArgs
            {
                CredentialsType = "AccountKey",
                Secrets = new AzureNative.MachineLearningServices.Inputs.AccountKeyDatastoreSecretsArgs
                {
                    Key = "string",
                    SecretsType = "AccountKey",
                },
            },
            DatastoreType = "AzureBlob",
            Description = "string",
            Endpoint = "core.windows.net",
            Protocol = "https",
            Tags = 
            {
                { "string", "string" },
            },
        },
        Name = "string",
        ResourceGroupName = "test-rg",
        SkipValidation = false,
        WorkspaceName = "my-aml-workspace",
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.azurenative.machinelearningservices.Datastore;
import com.pulumi.azurenative.machinelearningservices.DatastoreArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var datastore = new Datastore("datastore", DatastoreArgs.builder()
            .datastoreProperties(AzureBlobDatastoreArgs.builder()
                .accountName("string")
                .containerName("string")
                .credentials(AccountKeyDatastoreCredentialsArgs.builder()
                    .credentialsType("AccountKey")
                    .secrets(AccountKeyDatastoreSecretsArgs.builder()
                        .key("string")
                        .secretsType("AccountKey")
                        .build())
                    .build())
                .datastoreType("AzureBlob")
                .description("string")
                .endpoint("core.windows.net")
                .protocol("https")
                .tags(Map.of("string", "string"))
                .build())
            .name("string")
            .resourceGroupName("test-rg")
            .skipValidation(false)
            .workspaceName("my-aml-workspace")
            .build());

    }
}
resources:
  datastore:
    type: azure-native:machinelearningservices:Datastore
    properties:
      datastoreProperties:
        accountName: string
        containerName: string
        credentials:
          credentialsType: AccountKey
          secrets:
            key: string
            secretsType: AccountKey
        datastoreType: AzureBlob
        description: string
        endpoint: core.windows.net
        protocol: https
        tags:
          string: string
      name: string
      resourceGroupName: test-rg
      skipValidation: false
      workspaceName: my-aml-workspace

When you create the datastore, Azure ML validates the connection and stores the credentials securely. The datastoreType property specifies “AzureBlob”, while accountName and containerName identify the storage location. The credentials block uses “AccountKey” authentication, embedding the storage account key in the secrets property. Training jobs can then reference this datastore by name without handling connection strings directly.

Connect to Data Lake Gen2 with service principal

Data Lake Gen2 provides hierarchical namespace and enterprise-grade security for large-scale analytics. Service principal authentication enables automated access without storing account keys.

import * as pulumi from "@pulumi/pulumi";
import * as azure_native from "@pulumi/azure-native";

const datastore = new azure_native.machinelearningservices.Datastore("datastore", {
    datastoreProperties: {
        accountName: "string",
        credentials: {
            authorityUrl: "string",
            clientId: "00000000-1111-2222-3333-444444444444",
            credentialsType: "ServicePrincipal",
            resourceUrl: "string",
            secrets: {
                clientSecret: "string",
                secretsType: "ServicePrincipal",
            },
            tenantId: "00000000-1111-2222-3333-444444444444",
        },
        datastoreType: "AzureDataLakeGen2",
        description: "string",
        endpoint: "string",
        filesystem: "string",
        protocol: "string",
        tags: {
            string: "string",
        },
    },
    name: "string",
    resourceGroupName: "test-rg",
    skipValidation: false,
    workspaceName: "my-aml-workspace",
});
import pulumi
import pulumi_azure_native as azure_native

datastore = azure_native.machinelearningservices.Datastore("datastore",
    datastore_properties={
        "account_name": "string",
        "credentials": {
            "authority_url": "string",
            "client_id": "00000000-1111-2222-3333-444444444444",
            "credentials_type": "ServicePrincipal",
            "resource_url": "string",
            "secrets": {
                "client_secret": "string",
                "secrets_type": "ServicePrincipal",
            },
            "tenant_id": "00000000-1111-2222-3333-444444444444",
        },
        "datastore_type": "AzureDataLakeGen2",
        "description": "string",
        "endpoint": "string",
        "filesystem": "string",
        "protocol": "string",
        "tags": {
            "string": "string",
        },
    },
    name="string",
    resource_group_name="test-rg",
    skip_validation=False,
    workspace_name="my-aml-workspace")
package main

import (
	machinelearningservices "github.com/pulumi/pulumi-azure-native-sdk/machinelearningservices/v3"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := machinelearningservices.NewDatastore(ctx, "datastore", &machinelearningservices.DatastoreArgs{
			DatastoreProperties: &machinelearningservices.AzureDataLakeGen2DatastoreArgs{
				AccountName: pulumi.String("string"),
				Credentials: machinelearningservices.ServicePrincipalDatastoreCredentials{
					AuthorityUrl:    "string",
					ClientId:        "00000000-1111-2222-3333-444444444444",
					CredentialsType: "ServicePrincipal",
					ResourceUrl:     "string",
					Secrets: machinelearningservices.ServicePrincipalDatastoreSecrets{
						ClientSecret: "string",
						SecretsType:  "ServicePrincipal",
					},
					TenantId: "00000000-1111-2222-3333-444444444444",
				},
				DatastoreType: pulumi.String("AzureDataLakeGen2"),
				Description:   pulumi.String("string"),
				Endpoint:      pulumi.String("string"),
				Filesystem:    pulumi.String("string"),
				Protocol:      pulumi.String("string"),
				Tags: pulumi.StringMap{
					"string": pulumi.String("string"),
				},
			},
			Name:              pulumi.String("string"),
			ResourceGroupName: pulumi.String("test-rg"),
			SkipValidation:    pulumi.Bool(false),
			WorkspaceName:     pulumi.String("my-aml-workspace"),
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using AzureNative = Pulumi.AzureNative;

return await Deployment.RunAsync(() => 
{
    var datastore = new AzureNative.MachineLearningServices.Datastore("datastore", new()
    {
        DatastoreProperties = new AzureNative.MachineLearningServices.Inputs.AzureDataLakeGen2DatastoreArgs
        {
            AccountName = "string",
            Credentials = new AzureNative.MachineLearningServices.Inputs.ServicePrincipalDatastoreCredentialsArgs
            {
                AuthorityUrl = "string",
                ClientId = "00000000-1111-2222-3333-444444444444",
                CredentialsType = "ServicePrincipal",
                ResourceUrl = "string",
                Secrets = new AzureNative.MachineLearningServices.Inputs.ServicePrincipalDatastoreSecretsArgs
                {
                    ClientSecret = "string",
                    SecretsType = "ServicePrincipal",
                },
                TenantId = "00000000-1111-2222-3333-444444444444",
            },
            DatastoreType = "AzureDataLakeGen2",
            Description = "string",
            Endpoint = "string",
            Filesystem = "string",
            Protocol = "string",
            Tags = 
            {
                { "string", "string" },
            },
        },
        Name = "string",
        ResourceGroupName = "test-rg",
        SkipValidation = false,
        WorkspaceName = "my-aml-workspace",
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.azurenative.machinelearningservices.Datastore;
import com.pulumi.azurenative.machinelearningservices.DatastoreArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var datastore = new Datastore("datastore", DatastoreArgs.builder()
            .datastoreProperties(AzureDataLakeGen2DatastoreArgs.builder()
                .accountName("string")
                .credentials(ServicePrincipalDatastoreCredentialsArgs.builder()
                    .authorityUrl("string")
                    .clientId("00000000-1111-2222-3333-444444444444")
                    .credentialsType("ServicePrincipal")
                    .resourceUrl("string")
                    .secrets(ServicePrincipalDatastoreSecretsArgs.builder()
                        .clientSecret("string")
                        .secretsType("ServicePrincipal")
                        .build())
                    .tenantId("00000000-1111-2222-3333-444444444444")
                    .build())
                .datastoreType("AzureDataLakeGen2")
                .description("string")
                .endpoint("string")
                .filesystem("string")
                .protocol("string")
                .tags(Map.of("string", "string"))
                .build())
            .name("string")
            .resourceGroupName("test-rg")
            .skipValidation(false)
            .workspaceName("my-aml-workspace")
            .build());

    }
}
resources:
  datastore:
    type: azure-native:machinelearningservices:Datastore
    properties:
      datastoreProperties:
        accountName: string
        credentials:
          authorityUrl: string
          clientId: 00000000-1111-2222-3333-444444444444
          credentialsType: ServicePrincipal
          resourceUrl: string
          secrets:
            clientSecret: string
            secretsType: ServicePrincipal
          tenantId: 00000000-1111-2222-3333-444444444444
        datastoreType: AzureDataLakeGen2
        description: string
        endpoint: string
        filesystem: string
        protocol: string
        tags:
          string: string
      name: string
      resourceGroupName: test-rg
      skipValidation: false
      workspaceName: my-aml-workspace

The datastoreType switches to “AzureDataLakeGen2”, and the filesystem property replaces containerName to reference the Gen2 hierarchical namespace. The credentials block uses “ServicePrincipal” authentication, requiring clientId, tenantId, authorityUrl, and resourceUrl. The clientSecret in the secrets block authenticates the service principal. This approach supports role-based access control and credential rotation without updating the datastore.

Connect to Azure Files with account keys

Azure Files provides SMB-compatible file shares that training jobs can mount. This is useful for shared configuration files or datasets that need file system semantics.

import * as pulumi from "@pulumi/pulumi";
import * as azure_native from "@pulumi/azure-native";

const datastore = new azure_native.machinelearningservices.Datastore("datastore", {
    datastoreProperties: {
        accountName: "string",
        credentials: {
            credentialsType: "AccountKey",
            secrets: {
                key: "string",
                secretsType: "AccountKey",
            },
        },
        datastoreType: "AzureFile",
        description: "string",
        endpoint: "string",
        fileShareName: "string",
        protocol: "string",
        tags: {
            string: "string",
        },
    },
    name: "string",
    resourceGroupName: "test-rg",
    skipValidation: false,
    workspaceName: "my-aml-workspace",
});
import pulumi
import pulumi_azure_native as azure_native

datastore = azure_native.machinelearningservices.Datastore("datastore",
    datastore_properties={
        "account_name": "string",
        "credentials": {
            "credentials_type": "AccountKey",
            "secrets": {
                "key": "string",
                "secrets_type": "AccountKey",
            },
        },
        "datastore_type": "AzureFile",
        "description": "string",
        "endpoint": "string",
        "file_share_name": "string",
        "protocol": "string",
        "tags": {
            "string": "string",
        },
    },
    name="string",
    resource_group_name="test-rg",
    skip_validation=False,
    workspace_name="my-aml-workspace")
package main

import (
	machinelearningservices "github.com/pulumi/pulumi-azure-native-sdk/machinelearningservices/v3"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := machinelearningservices.NewDatastore(ctx, "datastore", &machinelearningservices.DatastoreArgs{
			DatastoreProperties: &machinelearningservices.AzureFileDatastoreArgs{
				AccountName: pulumi.String("string"),
				Credentials: machinelearningservices.AccountKeyDatastoreCredentials{
					CredentialsType: "AccountKey",
					Secrets: machinelearningservices.AccountKeyDatastoreSecrets{
						Key:         "string",
						SecretsType: "AccountKey",
					},
				},
				DatastoreType: pulumi.String("AzureFile"),
				Description:   pulumi.String("string"),
				Endpoint:      pulumi.String("string"),
				FileShareName: pulumi.String("string"),
				Protocol:      pulumi.String("string"),
				Tags: pulumi.StringMap{
					"string": pulumi.String("string"),
				},
			},
			Name:              pulumi.String("string"),
			ResourceGroupName: pulumi.String("test-rg"),
			SkipValidation:    pulumi.Bool(false),
			WorkspaceName:     pulumi.String("my-aml-workspace"),
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using AzureNative = Pulumi.AzureNative;

return await Deployment.RunAsync(() => 
{
    var datastore = new AzureNative.MachineLearningServices.Datastore("datastore", new()
    {
        DatastoreProperties = new AzureNative.MachineLearningServices.Inputs.AzureFileDatastoreArgs
        {
            AccountName = "string",
            Credentials = new AzureNative.MachineLearningServices.Inputs.AccountKeyDatastoreCredentialsArgs
            {
                CredentialsType = "AccountKey",
                Secrets = new AzureNative.MachineLearningServices.Inputs.AccountKeyDatastoreSecretsArgs
                {
                    Key = "string",
                    SecretsType = "AccountKey",
                },
            },
            DatastoreType = "AzureFile",
            Description = "string",
            Endpoint = "string",
            FileShareName = "string",
            Protocol = "string",
            Tags = 
            {
                { "string", "string" },
            },
        },
        Name = "string",
        ResourceGroupName = "test-rg",
        SkipValidation = false,
        WorkspaceName = "my-aml-workspace",
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.azurenative.machinelearningservices.Datastore;
import com.pulumi.azurenative.machinelearningservices.DatastoreArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var datastore = new Datastore("datastore", DatastoreArgs.builder()
            .datastoreProperties(AzureFileDatastoreArgs.builder()
                .accountName("string")
                .credentials(AccountKeyDatastoreCredentialsArgs.builder()
                    .credentialsType("AccountKey")
                    .secrets(AccountKeyDatastoreSecretsArgs.builder()
                        .key("string")
                        .secretsType("AccountKey")
                        .build())
                    .build())
                .datastoreType("AzureFile")
                .description("string")
                .endpoint("string")
                .fileShareName("string")
                .protocol("string")
                .tags(Map.of("string", "string"))
                .build())
            .name("string")
            .resourceGroupName("test-rg")
            .skipValidation(false)
            .workspaceName("my-aml-workspace")
            .build());

    }
}
resources:
  datastore:
    type: azure-native:machinelearningservices:Datastore
    properties:
      datastoreProperties:
        accountName: string
        credentials:
          credentialsType: AccountKey
          secrets:
            key: string
            secretsType: AccountKey
        datastoreType: AzureFile
        description: string
        endpoint: string
        fileShareName: string
        protocol: string
        tags:
          string: string
      name: string
      resourceGroupName: test-rg
      skipValidation: false
      workspaceName: my-aml-workspace

The datastoreType becomes “AzureFile”, and fileShareName identifies the share within the storage account. The credentials block uses “AccountKey” authentication, similar to Blob Storage. Azure ML mounts the file share during training, making files accessible through standard file I/O operations rather than blob APIs.

Beyond these examples

These snippets focus on specific datastore features: Blob Storage and Data Lake Gen2 connectivity, account key and service principal authentication, and Azure Files integration. They’re intentionally minimal rather than full ML pipeline configurations.

The examples reference pre-existing infrastructure such as Azure Machine Learning workspace and resource group, storage accounts (Blob, Data Lake Gen2, Files), containers, filesystems, or file shares, and service principals with client credentials (for Gen2). They focus on registering storage rather than provisioning the underlying infrastructure.

To keep things focused, common datastore patterns are omitted, including:

  • Data Lake Gen1 connectivity (older generation)
  • Validation controls (skipValidation)
  • Custom endpoints and protocols
  • Managed identity authentication

These omissions are intentional: the goal is to illustrate how each storage type is wired, not provide drop-in data pipeline modules. See the Datastore resource reference for all available configuration options.

Let's configure Azure Machine Learning Datastores

Get started with Pulumi Cloud, then follow our quick setup guide to deploy this infrastructure.

Try Pulumi Cloud for FREE

Frequently Asked Questions

Datastore Types & Configuration
What types of datastores can I create?
You can create four types: AzureDataLakeGen1, AzureDataLakeGen2, AzureFile, and AzureBlob. Specify the type using the datastoreType field within datastoreProperties.
What's required for each datastore type?

Each type has specific requirements:

  • AzureDataLakeGen1: storeName
  • AzureDataLakeGen2: accountName, filesystem
  • AzureFile: accountName, fileShareName
  • AzureBlob: accountName, containerName
Authentication & Credentials
What authentication methods are available?
Two credential types are supported: ServicePrincipal (for Data Lake Gen1 and Gen2) and AccountKey (for File and Blob storage). Set the credentialsType field accordingly.
What do I need to configure ServicePrincipal authentication?
ServicePrincipal requires authorityUrl, clientId, resourceUrl, tenantId, and a clientSecret within the secrets object. Set credentialsType to ServicePrincipal and secretsType to ServicePrincipal.
What do I need to configure AccountKey authentication?
AccountKey requires a key field within the secrets object. Set credentialsType to AccountKey and secretsType to AccountKey.
Resource Management
Which properties can't be changed after creating a datastore?
The name, resourceGroupName, and workspaceName properties are immutable and cannot be modified after creation.
How do I use a specific Azure API version?
Generate a local SDK package using the CLI command pulumi package add azure-native machinelearningservices [ApiVersion]. The default version is 2025-09-01.

Using a different cloud?

Explore analytics guides for other cloud providers: