The azure-native:machinelearningservices:Datastore resource, part of the Pulumi Azure Native provider, registers external storage as a datastore within an Azure Machine Learning workspace, enabling ML jobs to reference data without hardcoding connection details. This guide focuses on three capabilities: Blob Storage connectivity with account keys, Data Lake Gen2 with service principal authentication, and Azure Files for shared storage.
Datastores reference existing Azure Machine Learning workspaces and storage resources. The examples are intentionally small. Combine them with your own workspace, storage accounts, and authentication configuration.
Connect to Blob Storage with account keys
Most ML workflows start by connecting to Azure Blob Storage for training data and model artifacts.
import * as pulumi from "@pulumi/pulumi";
import * as azure_native from "@pulumi/azure-native";
const datastore = new azure_native.machinelearningservices.Datastore("datastore", {
name: "string",
properties: {
accountName: "string",
containerName: "string",
credentials: {
credentialsType: "AccountKey",
secrets: {
key: "string",
secretsType: "AccountKey",
},
},
datastoreType: "AzureBlob",
description: "string",
endpoint: "core.windows.net",
protocol: "https",
tags: {
string: "string",
},
},
resourceGroupName: "test-rg",
skipValidation: false,
workspaceName: "my-aml-workspace",
});
import pulumi
import pulumi_azure_native as azure_native
datastore = azure_native.machinelearningservices.Datastore("datastore",
name="string",
properties={
"account_name": "string",
"container_name": "string",
"credentials": {
"credentials_type": "AccountKey",
"secrets": {
"key": "string",
"secrets_type": "AccountKey",
},
},
"datastore_type": "AzureBlob",
"description": "string",
"endpoint": "core.windows.net",
"protocol": "https",
"tags": {
"string": "string",
},
},
resource_group_name="test-rg",
skip_validation=False,
workspace_name="my-aml-workspace")
package main
import (
machinelearningservices "github.com/pulumi/pulumi-azure-native-sdk/machinelearningservices/v3"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := machinelearningservices.NewDatastore(ctx, "datastore", &machinelearningservices.DatastoreArgs{
Name: pulumi.String("string"),
Properties: &machinelearningservices.AzureBlobDatastoreArgs{
AccountName: pulumi.String("string"),
ContainerName: pulumi.String("string"),
Credentials: machinelearningservices.AccountKeyDatastoreCredentials{
CredentialsType: "AccountKey",
Secrets: machinelearningservices.AccountKeyDatastoreSecrets{
Key: "string",
SecretsType: "AccountKey",
},
},
DatastoreType: pulumi.String("AzureBlob"),
Description: pulumi.String("string"),
Endpoint: pulumi.String("core.windows.net"),
Protocol: pulumi.String("https"),
Tags: pulumi.StringMap{
"string": pulumi.String("string"),
},
},
ResourceGroupName: pulumi.String("test-rg"),
SkipValidation: pulumi.Bool(false),
WorkspaceName: pulumi.String("my-aml-workspace"),
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using AzureNative = Pulumi.AzureNative;
return await Deployment.RunAsync(() =>
{
var datastore = new AzureNative.MachineLearningServices.Datastore("datastore", new()
{
Name = "string",
Properties = new AzureNative.MachineLearningServices.Inputs.AzureBlobDatastoreArgs
{
AccountName = "string",
ContainerName = "string",
Credentials = new AzureNative.MachineLearningServices.Inputs.AccountKeyDatastoreCredentialsArgs
{
CredentialsType = "AccountKey",
Secrets = new AzureNative.MachineLearningServices.Inputs.AccountKeyDatastoreSecretsArgs
{
Key = "string",
SecretsType = "AccountKey",
},
},
DatastoreType = "AzureBlob",
Description = "string",
Endpoint = "core.windows.net",
Protocol = "https",
Tags =
{
{ "string", "string" },
},
},
ResourceGroupName = "test-rg",
SkipValidation = false,
WorkspaceName = "my-aml-workspace",
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.azurenative.machinelearningservices.Datastore;
import com.pulumi.azurenative.machinelearningservices.DatastoreArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var datastore = new Datastore("datastore", DatastoreArgs.builder()
.name("string")
.properties(AzureBlobDatastoreArgs.builder()
.accountName("string")
.containerName("string")
.credentials(AccountKeyDatastoreCredentialsArgs.builder()
.credentialsType("AccountKey")
.secrets(AccountKeyDatastoreSecretsArgs.builder()
.key("string")
.secretsType("AccountKey")
.build())
.build())
.datastoreType("AzureBlob")
.description("string")
.endpoint("core.windows.net")
.protocol("https")
.tags(Map.of("string", "string"))
.build())
.resourceGroupName("test-rg")
.skipValidation(false)
.workspaceName("my-aml-workspace")
.build());
}
}
resources:
datastore:
type: azure-native:machinelearningservices:Datastore
properties:
name: string
properties:
accountName: string
containerName: string
credentials:
credentialsType: AccountKey
secrets:
key: string
secretsType: AccountKey
datastoreType: AzureBlob
description: string
endpoint: core.windows.net
protocol: https
tags:
string: string
resourceGroupName: test-rg
skipValidation: false
workspaceName: my-aml-workspace
The datastoreType property specifies “AzureBlob” to indicate Blob Storage. The accountName and containerName identify the storage location. The credentials block uses credentialsType “AccountKey” with the storage account key in the secrets section. This configuration allows ML jobs to read training data and write model outputs without embedding connection strings in code.
Connect to Data Lake Gen2 with service principals
Teams working with large-scale data lakes often use Azure Data Lake Storage Gen2 for hierarchical namespaces and fine-grained access control.
import * as pulumi from "@pulumi/pulumi";
import * as azure_native from "@pulumi/azure-native";
const datastore = new azure_native.machinelearningservices.Datastore("datastore", {
name: "string",
properties: {
accountName: "string",
credentials: {
authorityUrl: "string",
clientId: "00000000-1111-2222-3333-444444444444",
credentialsType: "ServicePrincipal",
resourceUrl: "string",
secrets: {
clientSecret: "string",
secretsType: "ServicePrincipal",
},
tenantId: "00000000-1111-2222-3333-444444444444",
},
datastoreType: "AzureDataLakeGen2",
description: "string",
endpoint: "string",
filesystem: "string",
protocol: "string",
tags: {
string: "string",
},
},
resourceGroupName: "test-rg",
skipValidation: false,
workspaceName: "my-aml-workspace",
});
import pulumi
import pulumi_azure_native as azure_native
datastore = azure_native.machinelearningservices.Datastore("datastore",
name="string",
properties={
"account_name": "string",
"credentials": {
"authority_url": "string",
"client_id": "00000000-1111-2222-3333-444444444444",
"credentials_type": "ServicePrincipal",
"resource_url": "string",
"secrets": {
"client_secret": "string",
"secrets_type": "ServicePrincipal",
},
"tenant_id": "00000000-1111-2222-3333-444444444444",
},
"datastore_type": "AzureDataLakeGen2",
"description": "string",
"endpoint": "string",
"filesystem": "string",
"protocol": "string",
"tags": {
"string": "string",
},
},
resource_group_name="test-rg",
skip_validation=False,
workspace_name="my-aml-workspace")
package main
import (
machinelearningservices "github.com/pulumi/pulumi-azure-native-sdk/machinelearningservices/v3"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := machinelearningservices.NewDatastore(ctx, "datastore", &machinelearningservices.DatastoreArgs{
Name: pulumi.String("string"),
Properties: &machinelearningservices.AzureDataLakeGen2DatastoreArgs{
AccountName: pulumi.String("string"),
Credentials: machinelearningservices.ServicePrincipalDatastoreCredentials{
AuthorityUrl: "string",
ClientId: "00000000-1111-2222-3333-444444444444",
CredentialsType: "ServicePrincipal",
ResourceUrl: "string",
Secrets: machinelearningservices.ServicePrincipalDatastoreSecrets{
ClientSecret: "string",
SecretsType: "ServicePrincipal",
},
TenantId: "00000000-1111-2222-3333-444444444444",
},
DatastoreType: pulumi.String("AzureDataLakeGen2"),
Description: pulumi.String("string"),
Endpoint: pulumi.String("string"),
Filesystem: pulumi.String("string"),
Protocol: pulumi.String("string"),
Tags: pulumi.StringMap{
"string": pulumi.String("string"),
},
},
ResourceGroupName: pulumi.String("test-rg"),
SkipValidation: pulumi.Bool(false),
WorkspaceName: pulumi.String("my-aml-workspace"),
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using AzureNative = Pulumi.AzureNative;
return await Deployment.RunAsync(() =>
{
var datastore = new AzureNative.MachineLearningServices.Datastore("datastore", new()
{
Name = "string",
Properties = new AzureNative.MachineLearningServices.Inputs.AzureDataLakeGen2DatastoreArgs
{
AccountName = "string",
Credentials = new AzureNative.MachineLearningServices.Inputs.ServicePrincipalDatastoreCredentialsArgs
{
AuthorityUrl = "string",
ClientId = "00000000-1111-2222-3333-444444444444",
CredentialsType = "ServicePrincipal",
ResourceUrl = "string",
Secrets = new AzureNative.MachineLearningServices.Inputs.ServicePrincipalDatastoreSecretsArgs
{
ClientSecret = "string",
SecretsType = "ServicePrincipal",
},
TenantId = "00000000-1111-2222-3333-444444444444",
},
DatastoreType = "AzureDataLakeGen2",
Description = "string",
Endpoint = "string",
Filesystem = "string",
Protocol = "string",
Tags =
{
{ "string", "string" },
},
},
ResourceGroupName = "test-rg",
SkipValidation = false,
WorkspaceName = "my-aml-workspace",
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.azurenative.machinelearningservices.Datastore;
import com.pulumi.azurenative.machinelearningservices.DatastoreArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var datastore = new Datastore("datastore", DatastoreArgs.builder()
.name("string")
.properties(AzureDataLakeGen2DatastoreArgs.builder()
.accountName("string")
.credentials(ServicePrincipalDatastoreCredentialsArgs.builder()
.authorityUrl("string")
.clientId("00000000-1111-2222-3333-444444444444")
.credentialsType("ServicePrincipal")
.resourceUrl("string")
.secrets(ServicePrincipalDatastoreSecretsArgs.builder()
.clientSecret("string")
.secretsType("ServicePrincipal")
.build())
.tenantId("00000000-1111-2222-3333-444444444444")
.build())
.datastoreType("AzureDataLakeGen2")
.description("string")
.endpoint("string")
.filesystem("string")
.protocol("string")
.tags(Map.of("string", "string"))
.build())
.resourceGroupName("test-rg")
.skipValidation(false)
.workspaceName("my-aml-workspace")
.build());
}
}
resources:
datastore:
type: azure-native:machinelearningservices:Datastore
properties:
name: string
properties:
accountName: string
credentials:
authorityUrl: string
clientId: 00000000-1111-2222-3333-444444444444
credentialsType: ServicePrincipal
resourceUrl: string
secrets:
clientSecret: string
secretsType: ServicePrincipal
tenantId: 00000000-1111-2222-3333-444444444444
datastoreType: AzureDataLakeGen2
description: string
endpoint: string
filesystem: string
protocol: string
tags:
string: string
resourceGroupName: test-rg
skipValidation: false
workspaceName: my-aml-workspace
The datastoreType “AzureDataLakeGen2” enables Data Lake Gen2 features like hierarchical namespaces. The filesystem property specifies the container within the Data Lake account. Service principal authentication requires clientId, tenantId, authorityUrl, and resourceUrl in the credentials block, along with the client secret. This approach provides more granular access control than account keys, allowing different service principals to access different parts of the data lake.
Connect to Azure Files for shared storage
Some ML pipelines need shared file storage accessible from multiple compute targets simultaneously.
import * as pulumi from "@pulumi/pulumi";
import * as azure_native from "@pulumi/azure-native";
const datastore = new azure_native.machinelearningservices.Datastore("datastore", {
name: "string",
properties: {
accountName: "string",
credentials: {
credentialsType: "AccountKey",
secrets: {
key: "string",
secretsType: "AccountKey",
},
},
datastoreType: "AzureFile",
description: "string",
endpoint: "string",
fileShareName: "string",
protocol: "string",
tags: {
string: "string",
},
},
resourceGroupName: "test-rg",
skipValidation: false,
workspaceName: "my-aml-workspace",
});
import pulumi
import pulumi_azure_native as azure_native
datastore = azure_native.machinelearningservices.Datastore("datastore",
name="string",
properties={
"account_name": "string",
"credentials": {
"credentials_type": "AccountKey",
"secrets": {
"key": "string",
"secrets_type": "AccountKey",
},
},
"datastore_type": "AzureFile",
"description": "string",
"endpoint": "string",
"file_share_name": "string",
"protocol": "string",
"tags": {
"string": "string",
},
},
resource_group_name="test-rg",
skip_validation=False,
workspace_name="my-aml-workspace")
package main
import (
machinelearningservices "github.com/pulumi/pulumi-azure-native-sdk/machinelearningservices/v3"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := machinelearningservices.NewDatastore(ctx, "datastore", &machinelearningservices.DatastoreArgs{
Name: pulumi.String("string"),
Properties: &machinelearningservices.AzureFileDatastoreArgs{
AccountName: pulumi.String("string"),
Credentials: machinelearningservices.AccountKeyDatastoreCredentials{
CredentialsType: "AccountKey",
Secrets: machinelearningservices.AccountKeyDatastoreSecrets{
Key: "string",
SecretsType: "AccountKey",
},
},
DatastoreType: pulumi.String("AzureFile"),
Description: pulumi.String("string"),
Endpoint: pulumi.String("string"),
FileShareName: pulumi.String("string"),
Protocol: pulumi.String("string"),
Tags: pulumi.StringMap{
"string": pulumi.String("string"),
},
},
ResourceGroupName: pulumi.String("test-rg"),
SkipValidation: pulumi.Bool(false),
WorkspaceName: pulumi.String("my-aml-workspace"),
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using AzureNative = Pulumi.AzureNative;
return await Deployment.RunAsync(() =>
{
var datastore = new AzureNative.MachineLearningServices.Datastore("datastore", new()
{
Name = "string",
Properties = new AzureNative.MachineLearningServices.Inputs.AzureFileDatastoreArgs
{
AccountName = "string",
Credentials = new AzureNative.MachineLearningServices.Inputs.AccountKeyDatastoreCredentialsArgs
{
CredentialsType = "AccountKey",
Secrets = new AzureNative.MachineLearningServices.Inputs.AccountKeyDatastoreSecretsArgs
{
Key = "string",
SecretsType = "AccountKey",
},
},
DatastoreType = "AzureFile",
Description = "string",
Endpoint = "string",
FileShareName = "string",
Protocol = "string",
Tags =
{
{ "string", "string" },
},
},
ResourceGroupName = "test-rg",
SkipValidation = false,
WorkspaceName = "my-aml-workspace",
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.azurenative.machinelearningservices.Datastore;
import com.pulumi.azurenative.machinelearningservices.DatastoreArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var datastore = new Datastore("datastore", DatastoreArgs.builder()
.name("string")
.properties(AzureFileDatastoreArgs.builder()
.accountName("string")
.credentials(AccountKeyDatastoreCredentialsArgs.builder()
.credentialsType("AccountKey")
.secrets(AccountKeyDatastoreSecretsArgs.builder()
.key("string")
.secretsType("AccountKey")
.build())
.build())
.datastoreType("AzureFile")
.description("string")
.endpoint("string")
.fileShareName("string")
.protocol("string")
.tags(Map.of("string", "string"))
.build())
.resourceGroupName("test-rg")
.skipValidation(false)
.workspaceName("my-aml-workspace")
.build());
}
}
resources:
datastore:
type: azure-native:machinelearningservices:Datastore
properties:
name: string
properties:
accountName: string
credentials:
credentialsType: AccountKey
secrets:
key: string
secretsType: AccountKey
datastoreType: AzureFile
description: string
endpoint: string
fileShareName: string
protocol: string
tags:
string: string
resourceGroupName: test-rg
skipValidation: false
workspaceName: my-aml-workspace
The datastoreType “AzureFile” enables Azure Files connectivity. The fileShareName property identifies the SMB-compatible file share. Azure Files allows multiple compute instances to mount the same storage concurrently, useful for scenarios where training jobs need to share intermediate results or configuration files during execution.
Beyond these examples
These snippets focus on specific datastore features: Blob Storage and Data Lake Gen2 connectivity, account key and service principal authentication, and Azure Files integration. They’re intentionally minimal rather than full ML pipeline configurations.
The examples reference pre-existing infrastructure such as Azure Machine Learning workspaces, Storage Accounts, Data Lake Gen2 accounts or Azure Files shares, service principals with appropriate permissions (for Gen2 example), and resource groups. They focus on registering the datastore rather than provisioning the underlying storage.
To keep things focused, common datastore patterns are omitted, including:
- Data Lake Gen1 connectivity (older generation)
- Validation controls (skipValidation)
- Custom endpoints and protocols
- Managed identity authentication
These omissions are intentional: the goal is to illustrate how each datastore type is wired, not provide drop-in storage modules. See the Datastore resource reference for all available configuration options.
Let's configure Azure Machine Learning Datastores
Get started with Pulumi Cloud, then follow our quick setup guide to deploy this infrastructure.
Try Pulumi Cloud for FREEFrequently Asked Questions
Datastore Types & Configuration
AzureDataLakeGen1, AzureDataLakeGen2, AzureFile, and AzureBlob. Each type requires different properties (e.g., AzureBlob needs containerName, while AzureDataLakeGen2 needs filesystem).Each datastore type connects to different Azure storage services:
- AzureBlob - Azure Blob Storage (requires
accountName,containerName) - AzureFile - Azure File Storage (requires
accountName,fileShareName) - AzureDataLakeGen1 - Data Lake Gen1 (requires
storeName) - AzureDataLakeGen2 - Data Lake Gen2 (requires
accountName,filesystem)
Authentication & Credentials
ServicePrincipal (with clientId, tenantId, authorityUrl, resourceUrl, and clientSecret) and AccountKey (with storage account key).ServicePrincipal for Azure Data Lake Gen1 and Gen2 datastores (as shown in examples). Use AccountKey for Azure File and Azure Blob datastores (as shown in examples).Resource Management
name, resourceGroupName, and workspaceName are immutable and require resource replacement if changed.pulumi package add azure-native machinelearningservices [ApiVersion]. Many versions are available, including 2022-05-01, 2023-04-01, 2024-04-01, and 2025-12-01.pulumi import azure-native:machinelearningservices:Datastore string /subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.MachineLearningServices/workspaces/{workspaceName}/datastores/{name}Validation & Deployment
skipValidation to true skips validation during datastore creation. Examples show it set to false for standard validation.