Configure Azure Machine Learning Datastores

The azure-native:machinelearningservices:MachineLearningDatastore resource, part of the Pulumi Azure Native provider, registers external storage systems as named datastores within an Azure ML workspace. This guide focuses on three capabilities: Blob Storage and ADLS Gen2 connections, service principal and account key authentication, and SQL database and Databricks File System integration.

Datastores reference existing Azure ML workspaces and storage infrastructure that must be provisioned separately. The examples are intentionally small. Combine them with your own workspace, storage accounts, and authentication configuration.

Connect to Azure Blob Storage with account keys

Most ML workflows start by connecting to Blob Storage for training data and model artifacts.

import * as pulumi from "@pulumi/pulumi";
import * as azure_native from "@pulumi/azure-native";

const machineLearningDatastore = new azure_native.machinelearningservices.MachineLearningDatastore("machineLearningDatastore", {
    accountKey: "wddrfewfewsgewgrrwegwreg",
    accountName: "acjainmleastus9484093746",
    containerName: "azureml-blobstore-5da947c5-53aa-41a5-bb2b-074074e73b7",
    dataStoreType: azure_native.machinelearningservices.DatastoreTypeArm.Blob,
    datastoreName: "blobDatastore",
    resourceGroupName: "acjain-mleastUS2",
    workspaceName: "acjain-mleastUS2",
});
import pulumi
import pulumi_azure_native as azure_native

machine_learning_datastore = azure_native.machinelearningservices.MachineLearningDatastore("machineLearningDatastore",
    account_key="wddrfewfewsgewgrrwegwreg",
    account_name="acjainmleastus9484093746",
    container_name="azureml-blobstore-5da947c5-53aa-41a5-bb2b-074074e73b7",
    data_store_type=azure_native.machinelearningservices.DatastoreTypeArm.BLOB,
    datastore_name="blobDatastore",
    resource_group_name="acjain-mleastUS2",
    workspace_name="acjain-mleastUS2")
package main

import (
	machinelearningservices "github.com/pulumi/pulumi-azure-native-sdk/machinelearningservices/v3"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := machinelearningservices.NewMachineLearningDatastore(ctx, "machineLearningDatastore", &machinelearningservices.MachineLearningDatastoreArgs{
			AccountKey:        pulumi.String("wddrfewfewsgewgrrwegwreg"),
			AccountName:       pulumi.String("acjainmleastus9484093746"),
			ContainerName:     pulumi.String("azureml-blobstore-5da947c5-53aa-41a5-bb2b-074074e73b7"),
			DataStoreType:     pulumi.String(machinelearningservices.DatastoreTypeArmBlob),
			DatastoreName:     pulumi.String("blobDatastore"),
			ResourceGroupName: pulumi.String("acjain-mleastUS2"),
			WorkspaceName:     pulumi.String("acjain-mleastUS2"),
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using AzureNative = Pulumi.AzureNative;

return await Deployment.RunAsync(() => 
{
    var machineLearningDatastore = new AzureNative.MachineLearningServices.MachineLearningDatastore("machineLearningDatastore", new()
    {
        AccountKey = "wddrfewfewsgewgrrwegwreg",
        AccountName = "acjainmleastus9484093746",
        ContainerName = "azureml-blobstore-5da947c5-53aa-41a5-bb2b-074074e73b7",
        DataStoreType = AzureNative.MachineLearningServices.DatastoreTypeArm.Blob,
        DatastoreName = "blobDatastore",
        ResourceGroupName = "acjain-mleastUS2",
        WorkspaceName = "acjain-mleastUS2",
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.azurenative.machinelearningservices.MachineLearningDatastore;
import com.pulumi.azurenative.machinelearningservices.MachineLearningDatastoreArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var machineLearningDatastore = new MachineLearningDatastore("machineLearningDatastore", MachineLearningDatastoreArgs.builder()
            .accountKey("wddrfewfewsgewgrrwegwreg")
            .accountName("acjainmleastus9484093746")
            .containerName("azureml-blobstore-5da947c5-53aa-41a5-bb2b-074074e73b7")
            .dataStoreType("blob")
            .datastoreName("blobDatastore")
            .resourceGroupName("acjain-mleastUS2")
            .workspaceName("acjain-mleastUS2")
            .build());

    }
}
resources:
  machineLearningDatastore:
    type: azure-native:machinelearningservices:MachineLearningDatastore
    properties:
      accountKey: wddrfewfewsgewgrrwegwreg
      accountName: acjainmleastus9484093746
      containerName: azureml-blobstore-5da947c5-53aa-41a5-bb2b-074074e73b7
      dataStoreType: blob
      datastoreName: blobDatastore
      resourceGroupName: acjain-mleastUS2
      workspaceName: acjain-mleastUS2

The dataStoreType property specifies “blob” for Azure Blob Storage. The accountName and accountKey authenticate to the storage account, while containerName identifies the specific container. The workspaceName and resourceGroupName scope the datastore to your ML workspace.

Connect to ADLS Gen2 with service principal authentication

Data lake scenarios often require hierarchical namespace features and service principal authentication for enterprise security policies.

import * as pulumi from "@pulumi/pulumi";
import * as azure_native from "@pulumi/azure-native";

const machineLearningDatastore = new azure_native.machinelearningservices.MachineLearningDatastore("machineLearningDatastore", {
    accountName: "nicksadlsgen2storage",
    clientId: "233d7008-b157-4354-88d1-ba191f06a900",
    clientSecret: "vdegbvedgeg",
    dataStoreType: azure_native.machinelearningservices.DatastoreTypeArm.Adls_gen2,
    datastoreName: "adlsgen2Datastore",
    fileSystem: "testfs1",
    resourceGroupName: "acjain-mleastUS2",
    tenantId: "72f988bf-86f1-41af-91ab-2d7cd011db47",
    workspaceName: "acjain-mleastUS2",
});
import pulumi
import pulumi_azure_native as azure_native

machine_learning_datastore = azure_native.machinelearningservices.MachineLearningDatastore("machineLearningDatastore",
    account_name="nicksadlsgen2storage",
    client_id="233d7008-b157-4354-88d1-ba191f06a900",
    client_secret="vdegbvedgeg",
    data_store_type=azure_native.machinelearningservices.DatastoreTypeArm.ADLS_GEN2,
    datastore_name="adlsgen2Datastore",
    file_system="testfs1",
    resource_group_name="acjain-mleastUS2",
    tenant_id="72f988bf-86f1-41af-91ab-2d7cd011db47",
    workspace_name="acjain-mleastUS2")
package main

import (
	machinelearningservices "github.com/pulumi/pulumi-azure-native-sdk/machinelearningservices/v3"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := machinelearningservices.NewMachineLearningDatastore(ctx, "machineLearningDatastore", &machinelearningservices.MachineLearningDatastoreArgs{
			AccountName:       pulumi.String("nicksadlsgen2storage"),
			ClientId:          pulumi.String("233d7008-b157-4354-88d1-ba191f06a900"),
			ClientSecret:      pulumi.String("vdegbvedgeg"),
			DataStoreType:     pulumi.String(machinelearningservices.DatastoreTypeArm_Adls_Gen2),
			DatastoreName:     pulumi.String("adlsgen2Datastore"),
			FileSystem:        pulumi.String("testfs1"),
			ResourceGroupName: pulumi.String("acjain-mleastUS2"),
			TenantId:          pulumi.String("72f988bf-86f1-41af-91ab-2d7cd011db47"),
			WorkspaceName:     pulumi.String("acjain-mleastUS2"),
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using AzureNative = Pulumi.AzureNative;

return await Deployment.RunAsync(() => 
{
    var machineLearningDatastore = new AzureNative.MachineLearningServices.MachineLearningDatastore("machineLearningDatastore", new()
    {
        AccountName = "nicksadlsgen2storage",
        ClientId = "233d7008-b157-4354-88d1-ba191f06a900",
        ClientSecret = "vdegbvedgeg",
        DataStoreType = AzureNative.MachineLearningServices.DatastoreTypeArm.Adls_gen2,
        DatastoreName = "adlsgen2Datastore",
        FileSystem = "testfs1",
        ResourceGroupName = "acjain-mleastUS2",
        TenantId = "72f988bf-86f1-41af-91ab-2d7cd011db47",
        WorkspaceName = "acjain-mleastUS2",
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.azurenative.machinelearningservices.MachineLearningDatastore;
import com.pulumi.azurenative.machinelearningservices.MachineLearningDatastoreArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var machineLearningDatastore = new MachineLearningDatastore("machineLearningDatastore", MachineLearningDatastoreArgs.builder()
            .accountName("nicksadlsgen2storage")
            .clientId("233d7008-b157-4354-88d1-ba191f06a900")
            .clientSecret("vdegbvedgeg")
            .dataStoreType("adls-gen2")
            .datastoreName("adlsgen2Datastore")
            .fileSystem("testfs1")
            .resourceGroupName("acjain-mleastUS2")
            .tenantId("72f988bf-86f1-41af-91ab-2d7cd011db47")
            .workspaceName("acjain-mleastUS2")
            .build());

    }
}
resources:
  machineLearningDatastore:
    type: azure-native:machinelearningservices:MachineLearningDatastore
    properties:
      accountName: nicksadlsgen2storage
      clientId: 233d7008-b157-4354-88d1-ba191f06a900
      clientSecret: vdegbvedgeg
      dataStoreType: adls-gen2
      datastoreName: adlsgen2Datastore
      fileSystem: testfs1
      resourceGroupName: acjain-mleastUS2
      tenantId: 72f988bf-86f1-41af-91ab-2d7cd011db47
      workspaceName: acjain-mleastUS2

The dataStoreType switches to “adls-gen2” for Azure Data Lake Storage Gen2. The fileSystem property identifies the data lake container. Instead of account keys, this configuration uses clientId, clientSecret, and tenantId for Azure AD service principal authentication, which integrates with enterprise identity policies.

Connect to Azure SQL Database for structured data

ML pipelines that work with relational data need database connections for feature engineering and model training.

import * as pulumi from "@pulumi/pulumi";
import * as azure_native from "@pulumi/azure-native";

const machineLearningDatastore = new azure_native.machinelearningservices.MachineLearningDatastore("machineLearningDatastore", {
    dataStoreType: azure_native.machinelearningservices.DatastoreTypeArm.Sqldb,
    databaseName: "dataset",
    datastoreName: "sqlDatastore",
    password: "<password>",
    resourceGroupName: "acjain-mleastUS2",
    serverName: "dataset-azsql-srv",
    userName: "demo_user",
    workspaceName: "acjain-mleastUS2",
});
import pulumi
import pulumi_azure_native as azure_native

machine_learning_datastore = azure_native.machinelearningservices.MachineLearningDatastore("machineLearningDatastore",
    data_store_type=azure_native.machinelearningservices.DatastoreTypeArm.SQLDB,
    database_name="dataset",
    datastore_name="sqlDatastore",
    password="<password>",
    resource_group_name="acjain-mleastUS2",
    server_name="dataset-azsql-srv",
    user_name="demo_user",
    workspace_name="acjain-mleastUS2")
package main

import (
	machinelearningservices "github.com/pulumi/pulumi-azure-native-sdk/machinelearningservices/v3"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := machinelearningservices.NewMachineLearningDatastore(ctx, "machineLearningDatastore", &machinelearningservices.MachineLearningDatastoreArgs{
			DataStoreType:     pulumi.String(machinelearningservices.DatastoreTypeArmSqldb),
			DatabaseName:      pulumi.String("dataset"),
			DatastoreName:     pulumi.String("sqlDatastore"),
			Password:          pulumi.String("<password>"),
			ResourceGroupName: pulumi.String("acjain-mleastUS2"),
			ServerName:        pulumi.String("dataset-azsql-srv"),
			UserName:          pulumi.String("demo_user"),
			WorkspaceName:     pulumi.String("acjain-mleastUS2"),
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using AzureNative = Pulumi.AzureNative;

return await Deployment.RunAsync(() => 
{
    var machineLearningDatastore = new AzureNative.MachineLearningServices.MachineLearningDatastore("machineLearningDatastore", new()
    {
        DataStoreType = AzureNative.MachineLearningServices.DatastoreTypeArm.Sqldb,
        DatabaseName = "dataset",
        DatastoreName = "sqlDatastore",
        Password = "<password>",
        ResourceGroupName = "acjain-mleastUS2",
        ServerName = "dataset-azsql-srv",
        UserName = "demo_user",
        WorkspaceName = "acjain-mleastUS2",
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.azurenative.machinelearningservices.MachineLearningDatastore;
import com.pulumi.azurenative.machinelearningservices.MachineLearningDatastoreArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var machineLearningDatastore = new MachineLearningDatastore("machineLearningDatastore", MachineLearningDatastoreArgs.builder()
            .dataStoreType("sqldb")
            .databaseName("dataset")
            .datastoreName("sqlDatastore")
            .password("<password>")
            .resourceGroupName("acjain-mleastUS2")
            .serverName("dataset-azsql-srv")
            .userName("demo_user")
            .workspaceName("acjain-mleastUS2")
            .build());

    }
}
resources:
  machineLearningDatastore:
    type: azure-native:machinelearningservices:MachineLearningDatastore
    properties:
      dataStoreType: sqldb
      databaseName: dataset
      datastoreName: sqlDatastore
      password: <password>
      resourceGroupName: acjain-mleastUS2
      serverName: dataset-azsql-srv
      userName: demo_user
      workspaceName: acjain-mleastUS2

The dataStoreType becomes “sqldb” for Azure SQL Database. The serverName and databaseName identify the database, while userName and password provide authentication. This enables ML experiments to query and load structured training data directly from SQL tables.

Connect to Databricks File System for Spark workflows

Teams using Databricks for distributed processing can register DBFS as a datastore to integrate Spark-based feature engineering with Azure ML pipelines.

import * as pulumi from "@pulumi/pulumi";
import * as azure_native from "@pulumi/azure-native";

const machineLearningDatastore = new azure_native.machinelearningservices.MachineLearningDatastore("machineLearningDatastore", {
    dataStoreType: azure_native.machinelearningservices.DatastoreTypeArm.Dbfs,
    datastoreName: "blobDatastore",
    resourceGroupName: "acjain-mleastUS2",
    workspaceName: "acjain-mleastUS2",
});
import pulumi
import pulumi_azure_native as azure_native

machine_learning_datastore = azure_native.machinelearningservices.MachineLearningDatastore("machineLearningDatastore",
    data_store_type=azure_native.machinelearningservices.DatastoreTypeArm.DBFS,
    datastore_name="blobDatastore",
    resource_group_name="acjain-mleastUS2",
    workspace_name="acjain-mleastUS2")
package main

import (
	machinelearningservices "github.com/pulumi/pulumi-azure-native-sdk/machinelearningservices/v3"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := machinelearningservices.NewMachineLearningDatastore(ctx, "machineLearningDatastore", &machinelearningservices.MachineLearningDatastoreArgs{
			DataStoreType:     pulumi.String(machinelearningservices.DatastoreTypeArmDbfs),
			DatastoreName:     pulumi.String("blobDatastore"),
			ResourceGroupName: pulumi.String("acjain-mleastUS2"),
			WorkspaceName:     pulumi.String("acjain-mleastUS2"),
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using AzureNative = Pulumi.AzureNative;

return await Deployment.RunAsync(() => 
{
    var machineLearningDatastore = new AzureNative.MachineLearningServices.MachineLearningDatastore("machineLearningDatastore", new()
    {
        DataStoreType = AzureNative.MachineLearningServices.DatastoreTypeArm.Dbfs,
        DatastoreName = "blobDatastore",
        ResourceGroupName = "acjain-mleastUS2",
        WorkspaceName = "acjain-mleastUS2",
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.azurenative.machinelearningservices.MachineLearningDatastore;
import com.pulumi.azurenative.machinelearningservices.MachineLearningDatastoreArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var machineLearningDatastore = new MachineLearningDatastore("machineLearningDatastore", MachineLearningDatastoreArgs.builder()
            .dataStoreType("dbfs")
            .datastoreName("blobDatastore")
            .resourceGroupName("acjain-mleastUS2")
            .workspaceName("acjain-mleastUS2")
            .build());

    }
}
resources:
  machineLearningDatastore:
    type: azure-native:machinelearningservices:MachineLearningDatastore
    properties:
      dataStoreType: dbfs
      datastoreName: blobDatastore
      resourceGroupName: acjain-mleastUS2
      workspaceName: acjain-mleastUS2

The dataStoreType “dbfs” registers Databricks File System. This minimal configuration requires only the datastore name and workspace reference, as DBFS authentication is handled through the Databricks workspace integration.

Beyond these examples

These snippets focus on specific datastore features: storage backend types (Blob, ADLS Gen2, SQL, DBFS) and authentication methods (account keys, service principals, database credentials). They’re intentionally minimal rather than full ML pipeline configurations.

The examples reference pre-existing infrastructure such as Azure ML workspaces, storage accounts, data lakes, or databases, and service principals for Azure AD authentication. They focus on registering datastores rather than provisioning the underlying storage.

To keep things focused, common datastore patterns are omitted, including:

  • SAS token authentication (sasToken)
  • Workspace managed identity authentication (workspaceSystemAssignedIdentity)
  • Validation controls (skipValidation, includeSecret)
  • Cross-subscription and cross-resource-group references
  • SSL enforcement and custom endpoints for databases

These omissions are intentional: the goal is to illustrate how each datastore type is wired, not provide drop-in ML infrastructure modules. See the MachineLearningDatastore resource reference for all available configuration options.

Let's configure Azure Machine Learning Datastores

Get started with Pulumi Cloud, then follow our quick setup guide to deploy this infrastructure.

Try Pulumi Cloud for FREE

Frequently Asked Questions

Datastore Types & Configuration
What datastore types are supported?
Eight types are supported: ADLS (adls), ADLS Gen2 (adls-gen2), Blob Storage (blob), Databricks File System (dbfs), Azure Files (file), MySQL (mysqldb), PostgreSQL (psqldb), and SQL Database (sqldb).
How do I create a Databricks File System datastore?
Set dataStoreType to dbfs. DBFS datastores require minimal configuration beyond the standard workspaceName, resourceGroupName, and datastoreName parameters.
Can I skip validation when creating a datastore?
Yes, set skipValidation to true to skip validation that ensures data can be loaded from the dataset before registration.
Authentication & Security
How do I authenticate to ADLS or ADLS Gen2 datastores?
Use service principal authentication by providing clientId, clientSecret, and tenantId. For ADLS, also specify storeName. For ADLS Gen2, specify accountName and fileSystem.
How do I authenticate to Blob or File datastores?
Use accountKey for authentication along with accountName. For Blob storage, specify containerName. For File storage, specify shareName.
What's the difference between userId and userName for database datastores?
MySQL and PostgreSQL use userId with the format user@server (e.g., demo_user@dataset-mysql-srv), while SQL databases use userName without the server suffix.
What are the default security settings?
SSL enforcement is enabled by default (enforceSSL: true), and datastore secrets are included in responses by default (includeSecret: true).
Immutability & Updates
What properties can't be changed after creating a datastore?
Three properties are immutable: datastoreName, workspaceName, and resourceGroupName. Changes to these require recreating the resource.

Using a different cloud?

Explore analytics guides for other cloud providers: