The azure-native:machinelearningservices:Datastore resource, part of the Pulumi Azure Native provider, registers external storage as a datastore within an Azure Machine Learning workspace. This allows training jobs and pipelines to reference data by logical name rather than connection strings. This guide focuses on three capabilities: Blob Storage connectivity with account keys, Data Lake Gen2 with service principal authentication, and Azure Files integration.
Datastores reference existing storage accounts, containers, and authentication credentials. The examples are intentionally small. Combine them with your own storage infrastructure and workspace configuration.
Connect to Blob Storage with account keys
Most ML workflows start by connecting to Azure Blob Storage for training data and model artifacts.
import * as pulumi from "@pulumi/pulumi";
import * as azure_native from "@pulumi/azure-native";
const datastore = new azure_native.machinelearningservices.Datastore("datastore", {
datastoreProperties: {
accountName: "string",
containerName: "string",
credentials: {
credentialsType: "AccountKey",
secrets: {
key: "string",
secretsType: "AccountKey",
},
},
datastoreType: "AzureBlob",
description: "string",
endpoint: "core.windows.net",
protocol: "https",
tags: {
string: "string",
},
},
name: "string",
resourceGroupName: "test-rg",
skipValidation: false,
workspaceName: "my-aml-workspace",
});
import pulumi
import pulumi_azure_native as azure_native
datastore = azure_native.machinelearningservices.Datastore("datastore",
datastore_properties={
"account_name": "string",
"container_name": "string",
"credentials": {
"credentials_type": "AccountKey",
"secrets": {
"key": "string",
"secrets_type": "AccountKey",
},
},
"datastore_type": "AzureBlob",
"description": "string",
"endpoint": "core.windows.net",
"protocol": "https",
"tags": {
"string": "string",
},
},
name="string",
resource_group_name="test-rg",
skip_validation=False,
workspace_name="my-aml-workspace")
package main
import (
machinelearningservices "github.com/pulumi/pulumi-azure-native-sdk/machinelearningservices/v3"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := machinelearningservices.NewDatastore(ctx, "datastore", &machinelearningservices.DatastoreArgs{
DatastoreProperties: &machinelearningservices.AzureBlobDatastoreArgs{
AccountName: pulumi.String("string"),
ContainerName: pulumi.String("string"),
Credentials: machinelearningservices.AccountKeyDatastoreCredentials{
CredentialsType: "AccountKey",
Secrets: machinelearningservices.AccountKeyDatastoreSecrets{
Key: "string",
SecretsType: "AccountKey",
},
},
DatastoreType: pulumi.String("AzureBlob"),
Description: pulumi.String("string"),
Endpoint: pulumi.String("core.windows.net"),
Protocol: pulumi.String("https"),
Tags: pulumi.StringMap{
"string": pulumi.String("string"),
},
},
Name: pulumi.String("string"),
ResourceGroupName: pulumi.String("test-rg"),
SkipValidation: pulumi.Bool(false),
WorkspaceName: pulumi.String("my-aml-workspace"),
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using AzureNative = Pulumi.AzureNative;
return await Deployment.RunAsync(() =>
{
var datastore = new AzureNative.MachineLearningServices.Datastore("datastore", new()
{
DatastoreProperties = new AzureNative.MachineLearningServices.Inputs.AzureBlobDatastoreArgs
{
AccountName = "string",
ContainerName = "string",
Credentials = new AzureNative.MachineLearningServices.Inputs.AccountKeyDatastoreCredentialsArgs
{
CredentialsType = "AccountKey",
Secrets = new AzureNative.MachineLearningServices.Inputs.AccountKeyDatastoreSecretsArgs
{
Key = "string",
SecretsType = "AccountKey",
},
},
DatastoreType = "AzureBlob",
Description = "string",
Endpoint = "core.windows.net",
Protocol = "https",
Tags =
{
{ "string", "string" },
},
},
Name = "string",
ResourceGroupName = "test-rg",
SkipValidation = false,
WorkspaceName = "my-aml-workspace",
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.azurenative.machinelearningservices.Datastore;
import com.pulumi.azurenative.machinelearningservices.DatastoreArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var datastore = new Datastore("datastore", DatastoreArgs.builder()
.datastoreProperties(AzureBlobDatastoreArgs.builder()
.accountName("string")
.containerName("string")
.credentials(AccountKeyDatastoreCredentialsArgs.builder()
.credentialsType("AccountKey")
.secrets(AccountKeyDatastoreSecretsArgs.builder()
.key("string")
.secretsType("AccountKey")
.build())
.build())
.datastoreType("AzureBlob")
.description("string")
.endpoint("core.windows.net")
.protocol("https")
.tags(Map.of("string", "string"))
.build())
.name("string")
.resourceGroupName("test-rg")
.skipValidation(false)
.workspaceName("my-aml-workspace")
.build());
}
}
resources:
datastore:
type: azure-native:machinelearningservices:Datastore
properties:
datastoreProperties:
accountName: string
containerName: string
credentials:
credentialsType: AccountKey
secrets:
key: string
secretsType: AccountKey
datastoreType: AzureBlob
description: string
endpoint: core.windows.net
protocol: https
tags:
string: string
name: string
resourceGroupName: test-rg
skipValidation: false
workspaceName: my-aml-workspace
When you create the datastore, Azure ML validates the connection and stores the credentials securely. The datastoreType property specifies “AzureBlob”, while accountName and containerName identify the storage location. The credentials block uses “AccountKey” authentication, embedding the storage account key in the secrets property. Training jobs can then reference this datastore by name without handling connection strings directly.
Connect to Data Lake Gen2 with service principal
Data Lake Gen2 provides hierarchical namespace and enterprise-grade security for large-scale analytics. Service principal authentication enables automated access without storing account keys.
import * as pulumi from "@pulumi/pulumi";
import * as azure_native from "@pulumi/azure-native";
const datastore = new azure_native.machinelearningservices.Datastore("datastore", {
datastoreProperties: {
accountName: "string",
credentials: {
authorityUrl: "string",
clientId: "00000000-1111-2222-3333-444444444444",
credentialsType: "ServicePrincipal",
resourceUrl: "string",
secrets: {
clientSecret: "string",
secretsType: "ServicePrincipal",
},
tenantId: "00000000-1111-2222-3333-444444444444",
},
datastoreType: "AzureDataLakeGen2",
description: "string",
endpoint: "string",
filesystem: "string",
protocol: "string",
tags: {
string: "string",
},
},
name: "string",
resourceGroupName: "test-rg",
skipValidation: false,
workspaceName: "my-aml-workspace",
});
import pulumi
import pulumi_azure_native as azure_native
datastore = azure_native.machinelearningservices.Datastore("datastore",
datastore_properties={
"account_name": "string",
"credentials": {
"authority_url": "string",
"client_id": "00000000-1111-2222-3333-444444444444",
"credentials_type": "ServicePrincipal",
"resource_url": "string",
"secrets": {
"client_secret": "string",
"secrets_type": "ServicePrincipal",
},
"tenant_id": "00000000-1111-2222-3333-444444444444",
},
"datastore_type": "AzureDataLakeGen2",
"description": "string",
"endpoint": "string",
"filesystem": "string",
"protocol": "string",
"tags": {
"string": "string",
},
},
name="string",
resource_group_name="test-rg",
skip_validation=False,
workspace_name="my-aml-workspace")
package main
import (
machinelearningservices "github.com/pulumi/pulumi-azure-native-sdk/machinelearningservices/v3"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := machinelearningservices.NewDatastore(ctx, "datastore", &machinelearningservices.DatastoreArgs{
DatastoreProperties: &machinelearningservices.AzureDataLakeGen2DatastoreArgs{
AccountName: pulumi.String("string"),
Credentials: machinelearningservices.ServicePrincipalDatastoreCredentials{
AuthorityUrl: "string",
ClientId: "00000000-1111-2222-3333-444444444444",
CredentialsType: "ServicePrincipal",
ResourceUrl: "string",
Secrets: machinelearningservices.ServicePrincipalDatastoreSecrets{
ClientSecret: "string",
SecretsType: "ServicePrincipal",
},
TenantId: "00000000-1111-2222-3333-444444444444",
},
DatastoreType: pulumi.String("AzureDataLakeGen2"),
Description: pulumi.String("string"),
Endpoint: pulumi.String("string"),
Filesystem: pulumi.String("string"),
Protocol: pulumi.String("string"),
Tags: pulumi.StringMap{
"string": pulumi.String("string"),
},
},
Name: pulumi.String("string"),
ResourceGroupName: pulumi.String("test-rg"),
SkipValidation: pulumi.Bool(false),
WorkspaceName: pulumi.String("my-aml-workspace"),
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using AzureNative = Pulumi.AzureNative;
return await Deployment.RunAsync(() =>
{
var datastore = new AzureNative.MachineLearningServices.Datastore("datastore", new()
{
DatastoreProperties = new AzureNative.MachineLearningServices.Inputs.AzureDataLakeGen2DatastoreArgs
{
AccountName = "string",
Credentials = new AzureNative.MachineLearningServices.Inputs.ServicePrincipalDatastoreCredentialsArgs
{
AuthorityUrl = "string",
ClientId = "00000000-1111-2222-3333-444444444444",
CredentialsType = "ServicePrincipal",
ResourceUrl = "string",
Secrets = new AzureNative.MachineLearningServices.Inputs.ServicePrincipalDatastoreSecretsArgs
{
ClientSecret = "string",
SecretsType = "ServicePrincipal",
},
TenantId = "00000000-1111-2222-3333-444444444444",
},
DatastoreType = "AzureDataLakeGen2",
Description = "string",
Endpoint = "string",
Filesystem = "string",
Protocol = "string",
Tags =
{
{ "string", "string" },
},
},
Name = "string",
ResourceGroupName = "test-rg",
SkipValidation = false,
WorkspaceName = "my-aml-workspace",
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.azurenative.machinelearningservices.Datastore;
import com.pulumi.azurenative.machinelearningservices.DatastoreArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var datastore = new Datastore("datastore", DatastoreArgs.builder()
.datastoreProperties(AzureDataLakeGen2DatastoreArgs.builder()
.accountName("string")
.credentials(ServicePrincipalDatastoreCredentialsArgs.builder()
.authorityUrl("string")
.clientId("00000000-1111-2222-3333-444444444444")
.credentialsType("ServicePrincipal")
.resourceUrl("string")
.secrets(ServicePrincipalDatastoreSecretsArgs.builder()
.clientSecret("string")
.secretsType("ServicePrincipal")
.build())
.tenantId("00000000-1111-2222-3333-444444444444")
.build())
.datastoreType("AzureDataLakeGen2")
.description("string")
.endpoint("string")
.filesystem("string")
.protocol("string")
.tags(Map.of("string", "string"))
.build())
.name("string")
.resourceGroupName("test-rg")
.skipValidation(false)
.workspaceName("my-aml-workspace")
.build());
}
}
resources:
datastore:
type: azure-native:machinelearningservices:Datastore
properties:
datastoreProperties:
accountName: string
credentials:
authorityUrl: string
clientId: 00000000-1111-2222-3333-444444444444
credentialsType: ServicePrincipal
resourceUrl: string
secrets:
clientSecret: string
secretsType: ServicePrincipal
tenantId: 00000000-1111-2222-3333-444444444444
datastoreType: AzureDataLakeGen2
description: string
endpoint: string
filesystem: string
protocol: string
tags:
string: string
name: string
resourceGroupName: test-rg
skipValidation: false
workspaceName: my-aml-workspace
The datastoreType switches to “AzureDataLakeGen2”, and the filesystem property replaces containerName to reference the Gen2 hierarchical namespace. The credentials block uses “ServicePrincipal” authentication, requiring clientId, tenantId, authorityUrl, and resourceUrl. The clientSecret in the secrets block authenticates the service principal. This approach supports role-based access control and credential rotation without updating the datastore.
Connect to Azure Files with account keys
Azure Files provides SMB-compatible file shares that training jobs can mount. This is useful for shared configuration files or datasets that need file system semantics.
import * as pulumi from "@pulumi/pulumi";
import * as azure_native from "@pulumi/azure-native";
const datastore = new azure_native.machinelearningservices.Datastore("datastore", {
datastoreProperties: {
accountName: "string",
credentials: {
credentialsType: "AccountKey",
secrets: {
key: "string",
secretsType: "AccountKey",
},
},
datastoreType: "AzureFile",
description: "string",
endpoint: "string",
fileShareName: "string",
protocol: "string",
tags: {
string: "string",
},
},
name: "string",
resourceGroupName: "test-rg",
skipValidation: false,
workspaceName: "my-aml-workspace",
});
import pulumi
import pulumi_azure_native as azure_native
datastore = azure_native.machinelearningservices.Datastore("datastore",
datastore_properties={
"account_name": "string",
"credentials": {
"credentials_type": "AccountKey",
"secrets": {
"key": "string",
"secrets_type": "AccountKey",
},
},
"datastore_type": "AzureFile",
"description": "string",
"endpoint": "string",
"file_share_name": "string",
"protocol": "string",
"tags": {
"string": "string",
},
},
name="string",
resource_group_name="test-rg",
skip_validation=False,
workspace_name="my-aml-workspace")
package main
import (
machinelearningservices "github.com/pulumi/pulumi-azure-native-sdk/machinelearningservices/v3"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := machinelearningservices.NewDatastore(ctx, "datastore", &machinelearningservices.DatastoreArgs{
DatastoreProperties: &machinelearningservices.AzureFileDatastoreArgs{
AccountName: pulumi.String("string"),
Credentials: machinelearningservices.AccountKeyDatastoreCredentials{
CredentialsType: "AccountKey",
Secrets: machinelearningservices.AccountKeyDatastoreSecrets{
Key: "string",
SecretsType: "AccountKey",
},
},
DatastoreType: pulumi.String("AzureFile"),
Description: pulumi.String("string"),
Endpoint: pulumi.String("string"),
FileShareName: pulumi.String("string"),
Protocol: pulumi.String("string"),
Tags: pulumi.StringMap{
"string": pulumi.String("string"),
},
},
Name: pulumi.String("string"),
ResourceGroupName: pulumi.String("test-rg"),
SkipValidation: pulumi.Bool(false),
WorkspaceName: pulumi.String("my-aml-workspace"),
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using AzureNative = Pulumi.AzureNative;
return await Deployment.RunAsync(() =>
{
var datastore = new AzureNative.MachineLearningServices.Datastore("datastore", new()
{
DatastoreProperties = new AzureNative.MachineLearningServices.Inputs.AzureFileDatastoreArgs
{
AccountName = "string",
Credentials = new AzureNative.MachineLearningServices.Inputs.AccountKeyDatastoreCredentialsArgs
{
CredentialsType = "AccountKey",
Secrets = new AzureNative.MachineLearningServices.Inputs.AccountKeyDatastoreSecretsArgs
{
Key = "string",
SecretsType = "AccountKey",
},
},
DatastoreType = "AzureFile",
Description = "string",
Endpoint = "string",
FileShareName = "string",
Protocol = "string",
Tags =
{
{ "string", "string" },
},
},
Name = "string",
ResourceGroupName = "test-rg",
SkipValidation = false,
WorkspaceName = "my-aml-workspace",
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.azurenative.machinelearningservices.Datastore;
import com.pulumi.azurenative.machinelearningservices.DatastoreArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var datastore = new Datastore("datastore", DatastoreArgs.builder()
.datastoreProperties(AzureFileDatastoreArgs.builder()
.accountName("string")
.credentials(AccountKeyDatastoreCredentialsArgs.builder()
.credentialsType("AccountKey")
.secrets(AccountKeyDatastoreSecretsArgs.builder()
.key("string")
.secretsType("AccountKey")
.build())
.build())
.datastoreType("AzureFile")
.description("string")
.endpoint("string")
.fileShareName("string")
.protocol("string")
.tags(Map.of("string", "string"))
.build())
.name("string")
.resourceGroupName("test-rg")
.skipValidation(false)
.workspaceName("my-aml-workspace")
.build());
}
}
resources:
datastore:
type: azure-native:machinelearningservices:Datastore
properties:
datastoreProperties:
accountName: string
credentials:
credentialsType: AccountKey
secrets:
key: string
secretsType: AccountKey
datastoreType: AzureFile
description: string
endpoint: string
fileShareName: string
protocol: string
tags:
string: string
name: string
resourceGroupName: test-rg
skipValidation: false
workspaceName: my-aml-workspace
The datastoreType becomes “AzureFile”, and fileShareName identifies the share within the storage account. The credentials block uses “AccountKey” authentication, similar to Blob Storage. Azure ML mounts the file share during training, making files accessible through standard file I/O operations rather than blob APIs.
Beyond these examples
These snippets focus on specific datastore features: Blob Storage and Data Lake Gen2 connectivity, account key and service principal authentication, and Azure Files integration. They’re intentionally minimal rather than full ML pipeline configurations.
The examples reference pre-existing infrastructure such as Azure Machine Learning workspace and resource group, storage accounts (Blob, Data Lake Gen2, Files), containers, filesystems, or file shares, and service principals with client credentials (for Gen2). They focus on registering storage rather than provisioning the underlying infrastructure.
To keep things focused, common datastore patterns are omitted, including:
- Data Lake Gen1 connectivity (older generation)
- Validation controls (skipValidation)
- Custom endpoints and protocols
- Managed identity authentication
These omissions are intentional: the goal is to illustrate how each storage type is wired, not provide drop-in data pipeline modules. See the Datastore resource reference for all available configuration options.
Let's configure Azure Machine Learning Datastores
Get started with Pulumi Cloud, then follow our quick setup guide to deploy this infrastructure.
Try Pulumi Cloud for FREEFrequently Asked Questions
Datastore Types & Configuration
AzureDataLakeGen1, AzureDataLakeGen2, AzureFile, and AzureBlob. Specify the type using the datastoreType field within datastoreProperties.Each type has specific requirements:
- AzureDataLakeGen1:
storeName - AzureDataLakeGen2:
accountName,filesystem - AzureFile:
accountName,fileShareName - AzureBlob:
accountName,containerName
Authentication & Credentials
ServicePrincipal (for Data Lake Gen1 and Gen2) and AccountKey (for File and Blob storage). Set the credentialsType field accordingly.authorityUrl, clientId, resourceUrl, tenantId, and a clientSecret within the secrets object. Set credentialsType to ServicePrincipal and secretsType to ServicePrincipal.key field within the secrets object. Set credentialsType to AccountKey and secretsType to AccountKey.Resource Management
name, resourceGroupName, and workspaceName properties are immutable and cannot be modified after creation.pulumi package add azure-native machinelearningservices [ApiVersion]. The default version is 2025-09-01.