Configure Azure Machine Learning Jobs

The azure-native:machinelearningservices:Job resource, part of the Pulumi Azure Native provider, defines Azure Machine Learning jobs: training runs, hyperparameter sweeps, and multi-step pipelines. This guide focuses on three capabilities: command jobs with distributed training, hyperparameter sweep configuration, and pipeline workflow orchestration.

Jobs execute within Azure ML workspaces and reference compute clusters, registered environments, and uploaded code assets. The examples are intentionally small. Combine them with your own workspace infrastructure, compute targets, and data assets.

Run a command job with distributed training

Teams training models start with command jobs that execute scripts on compute clusters, specifying code, runtime, and resources.

import * as pulumi from "@pulumi/pulumi";
import * as azure_native from "@pulumi/azure-native";

const job = new azure_native.machinelearningservices.Job("job", {
    id: "string",
    jobBaseProperties: {
        codeId: "string",
        command: "string",
        computeId: "string",
        description: "string",
        displayName: "string",
        distribution: {
            distributionType: "TensorFlow",
            parameterServerCount: 1,
            workerCount: 1,
        },
        environmentId: "string",
        environmentVariables: {
            string: "string",
        },
        experimentName: "string",
        identity: {
            identityType: "AMLToken",
        },
        inputs: {
            string: {
                description: "string",
                jobInputType: "literal",
                value: "string",
            },
        },
        jobType: "Command",
        limits: {
            jobLimitsType: "Command",
            timeout: "PT5M",
        },
        outputs: {
            string: {
                description: "string",
                jobOutputType: "uri_file",
                mode: azure_native.machinelearningservices.OutputDeliveryMode.ReadWriteMount,
                uri: "string",
            },
        },
        properties: {
            string: "string",
        },
        resources: {
            instanceCount: 1,
            instanceType: "string",
            properties: {
                string: {
                    "e6b6493e-7d5e-4db3-be1e-306ec641327e": null,
                },
            },
        },
        services: {
            string: {
                endpoint: "string",
                jobServiceType: "string",
                port: 1,
                properties: {
                    string: "string",
                },
            },
        },
        tags: {
            string: "string",
        },
    },
    resourceGroupName: "test-rg",
    workspaceName: "my-aml-workspace",
});

import pulumi
import pulumi_azure_native as azure_native

job = azure_native.machinelearningservices.Job("job",
    id="string",
    job_base_properties={
        "code_id": "string",
        "command": "string",
        "compute_id": "string",
        "description": "string",
        "display_name": "string",
        "distribution": {
            "distribution_type": "TensorFlow",
            "parameter_server_count": 1,
            "worker_count": 1,
        },
        "environment_id": "string",
        "environment_variables": {
            "string": "string",
        },
        "experiment_name": "string",
        "identity": {
            "identity_type": "AMLToken",
        },
        "inputs": {
            "string": {
                "description": "string",
                "job_input_type": "literal",
                "value": "string",
            },
        },
        "job_type": "Command",
        "limits": {
            "job_limits_type": "Command",
            "timeout": "PT5M",
        },
        "outputs": {
            "string": {
                "description": "string",
                "job_output_type": "uri_file",
                "mode": azure_native.machinelearningservices.OutputDeliveryMode.READ_WRITE_MOUNT,
                "uri": "string",
            },
        },
        "properties": {
            "string": "string",
        },
        "resources": {
            "instance_count": 1,
            "instance_type": "string",
            "properties": {
                "string": {
                    "e6b6493e-7d5e-4db3-be1e-306ec641327e": None,
                },
            },
        },
        "services": {
            "string": {
                "endpoint": "string",
                "job_service_type": "string",
                "port": 1,
                "properties": {
                    "string": "string",
                },
            },
        },
        "tags": {
            "string": "string",
        },
    },
    resource_group_name="test-rg",
    workspace_name="my-aml-workspace")

package main

import (
	machinelearningservices "github.com/pulumi/pulumi-azure-native-sdk/machinelearningservices/v3"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := machinelearningservices.NewJob(ctx, "job", &machinelearningservices.JobArgs{
			Id: pulumi.String("string"),
			JobBaseProperties: &machinelearningservices.CommandJobArgs{
				CodeId:      pulumi.String("string"),
				Command:     pulumi.String("string"),
				ComputeId:   pulumi.String("string"),
				Description: pulumi.String("string"),
				DisplayName: pulumi.String("string"),
				Distribution: machinelearningservices.TensorFlow{
					DistributionType:     "TensorFlow",
					ParameterServerCount: 1,
					WorkerCount:          1,
				},
				EnvironmentId: pulumi.String("string"),
				EnvironmentVariables: pulumi.StringMap{
					"string": pulumi.String("string"),
				},
				ExperimentName: pulumi.String("string"),
				Identity: machinelearningservices.AmlToken{
					IdentityType: "AMLToken",
				},
				Inputs: pulumi.Map{
					"string": machinelearningservices.LiteralJobInput{
						Description:  "string",
						JobInputType: "literal",
						Value:        "string",
					},
				},
				JobType: pulumi.String("Command"),
				Limits: &machinelearningservices.CommandJobLimitsArgs{
					JobLimitsType: pulumi.String("Command"),
					Timeout:       pulumi.String("PT5M"),
				},
				Outputs: pulumi.Map{
					"string": machinelearningservices.UriFileJobOutput{
						Description:   "string",
						JobOutputType: "uri_file",
						Mode:          machinelearningservices.OutputDeliveryModeReadWriteMount,
						Uri:           "string",
					},
				},
				Properties: pulumi.StringMap{
					"string": pulumi.String("string"),
				},
				Resources: &machinelearningservices.JobResourceConfigurationArgs{
					InstanceCount: pulumi.Int(1),
					InstanceType:  pulumi.String("string"),
					Properties: pulumi.Map{
						"string": pulumi.Any(map[string]interface{}{
							"e6b6493e-7d5e-4db3-be1e-306ec641327e": nil,
						}),
					},
				},
				Services: machinelearningservices.JobServiceMap{
					"string": &machinelearningservices.JobServiceArgs{
						Endpoint:       pulumi.String("string"),
						JobServiceType: pulumi.String("string"),
						Port:           pulumi.Int(1),
						Properties: pulumi.StringMap{
							"string": pulumi.String("string"),
						},
					},
				},
				Tags: pulumi.StringMap{
					"string": pulumi.String("string"),
				},
			},
			ResourceGroupName: pulumi.String("test-rg"),
			WorkspaceName:     pulumi.String("my-aml-workspace"),
		})
		if err != nil {
			return err
		}
		return nil
	})
}

using System.Collections.Generic;
using System.Linq;
using Pulumi;
using AzureNative = Pulumi.AzureNative;

return await Deployment.RunAsync(() => 
{
    var job = new AzureNative.MachineLearningServices.Job("job", new()
    {
        Id = "string",
        JobBaseProperties = new AzureNative.MachineLearningServices.Inputs.CommandJobArgs
        {
            CodeId = "string",
            Command = "string",
            ComputeId = "string",
            Description = "string",
            DisplayName = "string",
            Distribution = new AzureNative.MachineLearningServices.Inputs.TensorFlowArgs
            {
                DistributionType = "TensorFlow",
                ParameterServerCount = 1,
                WorkerCount = 1,
            },
            EnvironmentId = "string",
            EnvironmentVariables = 
            {
                { "string", "string" },
            },
            ExperimentName = "string",
            Identity = new AzureNative.MachineLearningServices.Inputs.AmlTokenArgs
            {
                IdentityType = "AMLToken",
            },
            Inputs = 
            {
                { "string", new AzureNative.MachineLearningServices.Inputs.LiteralJobInputArgs
                {
                    Description = "string",
                    JobInputType = "literal",
                    Value = "string",
                } },
            },
            JobType = "Command",
            Limits = new AzureNative.MachineLearningServices.Inputs.CommandJobLimitsArgs
            {
                JobLimitsType = "Command",
                Timeout = "PT5M",
            },
            Outputs = 
            {
                { "string", new AzureNative.MachineLearningServices.Inputs.UriFileJobOutputArgs
                {
                    Description = "string",
                    JobOutputType = "uri_file",
                    Mode = AzureNative.MachineLearningServices.OutputDeliveryMode.ReadWriteMount,
                    Uri = "string",
                } },
            },
            Properties = 
            {
                { "string", "string" },
            },
            Resources = new AzureNative.MachineLearningServices.Inputs.JobResourceConfigurationArgs
            {
                InstanceCount = 1,
                InstanceType = "string",
                Properties = 
                {
                    { "string", new Dictionary<string, object?>
                    {
                        ["e6b6493e-7d5e-4db3-be1e-306ec641327e"] = null,
                    } },
                },
            },
            Services = 
            {
                { "string", new AzureNative.MachineLearningServices.Inputs.JobServiceArgs
                {
                    Endpoint = "string",
                    JobServiceType = "string",
                    Port = 1,
                    Properties = 
                    {
                        { "string", "string" },
                    },
                } },
            },
            Tags = 
            {
                { "string", "string" },
            },
        },
        ResourceGroupName = "test-rg",
        WorkspaceName = "my-aml-workspace",
    });

});

package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.azurenative.machinelearningservices.Job;
import com.pulumi.azurenative.machinelearningservices.JobArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var job = new Job("job", JobArgs.builder()
            .id("string")
            .jobBaseProperties(CommandJobArgs.builder()
                .codeId("string")
                .command("string")
                .computeId("string")
                .description("string")
                .displayName("string")
                .distribution(TensorFlowArgs.builder()
                    .distributionType("TensorFlow")
                    .parameterServerCount(1)
                    .workerCount(1)
                    .build())
                .environmentId("string")
                .environmentVariables(Map.of("string", "string"))
                .experimentName("string")
                .identity(AmlTokenArgs.builder()
                    .identityType("AMLToken")
                    .build())
                .inputs(Map.of("string", LiteralJobInputArgs.builder()
                    .description("string")
                    .jobInputType("literal")
                    .value("string")
                    .build()))
                .jobType("Command")
                .limits(Map.ofEntries(
                    Map.entry("jobLimitsType", "Command"),
                    Map.entry("timeout", "PT5M")
                ))
                .outputs(Map.of("string", UriFileJobOutputArgs.builder()
                    .description("string")
                    .jobOutputType("uri_file")
                    .mode("ReadWriteMount")
                    .uri("string")
                    .build()))
                .properties(Map.of("string", "string"))
                .resources(JobResourceConfigurationArgs.builder()
                    .instanceCount(1)
                    .instanceType("string")
                    .properties(Map.of("string", Map.of("e6b6493e-7d5e-4db3-be1e-306ec641327e", null)))
                    .build())
                .services(Map.of("string", JobServiceArgs.builder()
                    .endpoint("string")
                    .jobServiceType("string")
                    .port(1)
                    .properties(Map.of("string", "string"))
                    .build()))
                .tags(Map.of("string", "string"))
                .build())
            .resourceGroupName("test-rg")
            .workspaceName("my-aml-workspace")
            .build());

    }
}

resources:
  job:
    type: azure-native:machinelearningservices:Job
    properties:
      id: string
      jobBaseProperties:
        codeId: string
        command: string
        computeId: string
        description: string
        displayName: string
        distribution:
          distributionType: TensorFlow
          parameterServerCount: 1
          workerCount: 1
        environmentId: string
        environmentVariables:
          string: string
        experimentName: string
        identity:
          identityType: AMLToken
        inputs:
          string:
            description: string
            jobInputType: literal
            value: string
        jobType: Command
        limits:
          jobLimitsType: Command
          timeout: PT5M
        outputs:
          string:
            description: string
            jobOutputType: uri_file
            mode: ReadWriteMount
            uri: string
        properties:
          string: string
        resources:
          instanceCount: 1
          instanceType: string
          properties:
            string:
              e6b6493e-7d5e-4db3-be1e-306ec641327e: null
        services:
          string:
            endpoint: string
            jobServiceType: string
            port: 1
            properties:
              string: string
        tags:
          string: string
      resourceGroupName: test-rg
      workspaceName: my-aml-workspace

The command property defines the script to execute, while codeId points to your uploaded code asset. The distribution property configures TensorFlow distributed training across multiple workers and parameter servers. The resources block specifies instance type and count for the compute cluster. Environment variables pass configuration to your training script at runtime.

Optimize hyperparameters with sweep jobs

Model tuning requires testing multiple hyperparameter combinations. Sweep jobs automate this by running trials across a search space with early termination.

import * as pulumi from "@pulumi/pulumi";
import * as azure_native from "@pulumi/azure-native";

const job = new azure_native.machinelearningservices.Job("job", {
    id: "string",
    jobBaseProperties: {
        computeId: "string",
        description: "string",
        displayName: "string",
        earlyTermination: {
            delayEvaluation: 1,
            evaluationInterval: 1,
            policyType: "MedianStopping",
        },
        experimentName: "string",
        jobType: "Sweep",
        limits: {
            jobLimitsType: "Sweep",
            maxConcurrentTrials: 1,
            maxTotalTrials: 1,
            trialTimeout: "PT1S",
        },
        objective: {
            goal: azure_native.machinelearningservices.Goal.Minimize,
            primaryMetric: "string",
        },
        properties: {
            string: "string",
        },
        samplingAlgorithm: {
            samplingAlgorithmType: "Grid",
        },
        searchSpace: {
            string: {},
        },
        services: {
            string: {
                endpoint: "string",
                jobServiceType: "string",
                port: 1,
                properties: {
                    string: "string",
                },
            },
        },
        tags: {
            string: "string",
        },
        trial: {
            codeId: "string",
            command: "string",
            distribution: {
                distributionType: "Mpi",
                processCountPerInstance: 1,
            },
            environmentId: "string",
            environmentVariables: {
                string: "string",
            },
            resources: {
                instanceCount: 1,
                instanceType: "string",
                properties: {
                    string: {
                        "e6b6493e-7d5e-4db3-be1e-306ec641327e": null,
                    },
                },
            },
        },
    },
    resourceGroupName: "test-rg",
    workspaceName: "my-aml-workspace",
});

import pulumi
import pulumi_azure_native as azure_native

job = azure_native.machinelearningservices.Job("job",
    id="string",
    job_base_properties={
        "compute_id": "string",
        "description": "string",
        "display_name": "string",
        "early_termination": {
            "delay_evaluation": 1,
            "evaluation_interval": 1,
            "policy_type": "MedianStopping",
        },
        "experiment_name": "string",
        "job_type": "Sweep",
        "limits": {
            "job_limits_type": "Sweep",
            "max_concurrent_trials": 1,
            "max_total_trials": 1,
            "trial_timeout": "PT1S",
        },
        "objective": {
            "goal": azure_native.machinelearningservices.Goal.MINIMIZE,
            "primary_metric": "string",
        },
        "properties": {
            "string": "string",
        },
        "sampling_algorithm": {
            "sampling_algorithm_type": "Grid",
        },
        "search_space": {
            "string": {},
        },
        "services": {
            "string": {
                "endpoint": "string",
                "job_service_type": "string",
                "port": 1,
                "properties": {
                    "string": "string",
                },
            },
        },
        "tags": {
            "string": "string",
        },
        "trial": {
            "code_id": "string",
            "command": "string",
            "distribution": {
                "distribution_type": "Mpi",
                "process_count_per_instance": 1,
            },
            "environment_id": "string",
            "environment_variables": {
                "string": "string",
            },
            "resources": {
                "instance_count": 1,
                "instance_type": "string",
                "properties": {
                    "string": {
                        "e6b6493e-7d5e-4db3-be1e-306ec641327e": None,
                    },
                },
            },
        },
    },
    resource_group_name="test-rg",
    workspace_name="my-aml-workspace")

package main

import (
	machinelearningservices "github.com/pulumi/pulumi-azure-native-sdk/machinelearningservices/v3"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := machinelearningservices.NewJob(ctx, "job", &machinelearningservices.JobArgs{
			Id: pulumi.String("string"),
			JobBaseProperties: &machinelearningservices.SweepJobArgs{
				ComputeId:   pulumi.String("string"),
				Description: pulumi.String("string"),
				DisplayName: pulumi.String("string"),
				EarlyTermination: machinelearningservices.MedianStoppingPolicy{
					DelayEvaluation:    1,
					EvaluationInterval: 1,
					PolicyType:         "MedianStopping",
				},
				ExperimentName: pulumi.String("string"),
				JobType:        pulumi.String("Sweep"),
				Limits: &machinelearningservices.SweepJobLimitsArgs{
					JobLimitsType:       pulumi.String("Sweep"),
					MaxConcurrentTrials: pulumi.Int(1),
					MaxTotalTrials:      pulumi.Int(1),
					TrialTimeout:        pulumi.String("PT1S"),
				},
				Objective: &machinelearningservices.ObjectiveArgs{
					Goal:          pulumi.String(machinelearningservices.GoalMinimize),
					PrimaryMetric: pulumi.String("string"),
				},
				Properties: pulumi.StringMap{
					"string": pulumi.String("string"),
				},
				SamplingAlgorithm: machinelearningservices.GridSamplingAlgorithm{
					SamplingAlgorithmType: "Grid",
				},
				SearchSpace: pulumi.Any(map[string]interface{}{
					"string": map[string]interface{}{},
				}),
				Services: machinelearningservices.JobServiceMap{
					"string": &machinelearningservices.JobServiceArgs{
						Endpoint:       pulumi.String("string"),
						JobServiceType: pulumi.String("string"),
						Port:           pulumi.Int(1),
						Properties: pulumi.StringMap{
							"string": pulumi.String("string"),
						},
					},
				},
				Tags: pulumi.StringMap{
					"string": pulumi.String("string"),
				},
				Trial: &machinelearningservices.TrialComponentArgs{
					CodeId:  pulumi.String("string"),
					Command: pulumi.String("string"),
					Distribution: machinelearningservices.Mpi{
						DistributionType:        "Mpi",
						ProcessCountPerInstance: 1,
					},
					EnvironmentId: pulumi.String("string"),
					EnvironmentVariables: pulumi.StringMap{
						"string": pulumi.String("string"),
					},
					Resources: &machinelearningservices.JobResourceConfigurationArgs{
						InstanceCount: pulumi.Int(1),
						InstanceType:  pulumi.String("string"),
						Properties: pulumi.Map{
							"string": pulumi.Any(map[string]interface{}{
								"e6b6493e-7d5e-4db3-be1e-306ec641327e": nil,
							}),
						},
					},
				},
			},
			ResourceGroupName: pulumi.String("test-rg"),
			WorkspaceName:     pulumi.String("my-aml-workspace"),
		})
		if err != nil {
			return err
		}
		return nil
	})
}

using System.Collections.Generic;
using System.Linq;
using Pulumi;
using AzureNative = Pulumi.AzureNative;

return await Deployment.RunAsync(() => 
{
    var job = new AzureNative.MachineLearningServices.Job("job", new()
    {
        Id = "string",
        JobBaseProperties = new AzureNative.MachineLearningServices.Inputs.SweepJobArgs
        {
            ComputeId = "string",
            Description = "string",
            DisplayName = "string",
            EarlyTermination = new AzureNative.MachineLearningServices.Inputs.MedianStoppingPolicyArgs
            {
                DelayEvaluation = 1,
                EvaluationInterval = 1,
                PolicyType = "MedianStopping",
            },
            ExperimentName = "string",
            JobType = "Sweep",
            Limits = new AzureNative.MachineLearningServices.Inputs.SweepJobLimitsArgs
            {
                JobLimitsType = "Sweep",
                MaxConcurrentTrials = 1,
                MaxTotalTrials = 1,
                TrialTimeout = "PT1S",
            },
            Objective = new AzureNative.MachineLearningServices.Inputs.ObjectiveArgs
            {
                Goal = AzureNative.MachineLearningServices.Goal.Minimize,
                PrimaryMetric = "string",
            },
            Properties = 
            {
                { "string", "string" },
            },
            SamplingAlgorithm = new AzureNative.MachineLearningServices.Inputs.GridSamplingAlgorithmArgs
            {
                SamplingAlgorithmType = "Grid",
            },
            SearchSpace = new Dictionary<string, object?>
            {
                ["string"] = new Dictionary<string, object?>
                {
                },
            },
            Services = 
            {
                { "string", new AzureNative.MachineLearningServices.Inputs.JobServiceArgs
                {
                    Endpoint = "string",
                    JobServiceType = "string",
                    Port = 1,
                    Properties = 
                    {
                        { "string", "string" },
                    },
                } },
            },
            Tags = 
            {
                { "string", "string" },
            },
            Trial = new AzureNative.MachineLearningServices.Inputs.TrialComponentArgs
            {
                CodeId = "string",
                Command = "string",
                Distribution = new AzureNative.MachineLearningServices.Inputs.MpiArgs
                {
                    DistributionType = "Mpi",
                    ProcessCountPerInstance = 1,
                },
                EnvironmentId = "string",
                EnvironmentVariables = 
                {
                    { "string", "string" },
                },
                Resources = new AzureNative.MachineLearningServices.Inputs.JobResourceConfigurationArgs
                {
                    InstanceCount = 1,
                    InstanceType = "string",
                    Properties = 
                    {
                        { "string", new Dictionary<string, object?>
                        {
                            ["e6b6493e-7d5e-4db3-be1e-306ec641327e"] = null,
                        } },
                    },
                },
            },
        },
        ResourceGroupName = "test-rg",
        WorkspaceName = "my-aml-workspace",
    });

});

package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.azurenative.machinelearningservices.Job;
import com.pulumi.azurenative.machinelearningservices.JobArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var job = new Job("job", JobArgs.builder()
            .id("string")
            .jobBaseProperties(SweepJobArgs.builder()
                .computeId("string")
                .description("string")
                .displayName("string")
                .earlyTermination(MedianStoppingPolicyArgs.builder()
                    .delayEvaluation(1)
                    .evaluationInterval(1)
                    .policyType("MedianStopping")
                    .build())
                .experimentName("string")
                .jobType("Sweep")
                .limits(Map.ofEntries(
                    Map.entry("jobLimitsType", "Sweep"),
                    Map.entry("maxConcurrentTrials", 1),
                    Map.entry("maxTotalTrials", 1),
                    Map.entry("trialTimeout", "PT1S")
                ))
                .objective(ObjectiveArgs.builder()
                    .goal("Minimize")
                    .primaryMetric("string")
                    .build())
                .properties(Map.of("string", "string"))
                .samplingAlgorithm(GridSamplingAlgorithmArgs.builder()
                    .samplingAlgorithmType("Grid")
                    .build())
                .searchSpace(Map.of("string", Map.ofEntries(
                )))
                .services(Map.of("string", JobServiceArgs.builder()
                    .endpoint("string")
                    .jobServiceType("string")
                    .port(1)
                    .properties(Map.of("string", "string"))
                    .build()))
                .tags(Map.of("string", "string"))
                .trial(TrialComponentArgs.builder()
                    .codeId("string")
                    .command("string")
                    .distribution(MpiArgs.builder()
                        .distributionType("Mpi")
                        .processCountPerInstance(1)
                        .build())
                    .environmentId("string")
                    .environmentVariables(Map.of("string", "string"))
                    .resources(JobResourceConfigurationArgs.builder()
                        .instanceCount(1)
                        .instanceType("string")
                        .properties(Map.of("string", Map.of("e6b6493e-7d5e-4db3-be1e-306ec641327e", null)))
                        .build())
                    .build())
                .build())
            .resourceGroupName("test-rg")
            .workspaceName("my-aml-workspace")
            .build());

    }
}

resources:
  job:
    type: azure-native:machinelearningservices:Job
    properties:
      id: string
      jobBaseProperties:
        computeId: string
        description: string
        displayName: string
        earlyTermination:
          delayEvaluation: 1
          evaluationInterval: 1
          policyType: MedianStopping
        experimentName: string
        jobType: Sweep
        limits:
          jobLimitsType: Sweep
          maxConcurrentTrials: 1
          maxTotalTrials: 1
          trialTimeout: PT1S
        objective:
          goal: Minimize
          primaryMetric: string
        properties:
          string: string
        samplingAlgorithm:
          samplingAlgorithmType: Grid
        searchSpace:
          string: {}
        services:
          string:
            endpoint: string
            jobServiceType: string
            port: 1
            properties:
              string: string
        tags:
          string: string
        trial:
          codeId: string
          command: string
          distribution:
            distributionType: Mpi
            processCountPerInstance: 1
          environmentId: string
          environmentVariables:
            string: string
          resources:
            instanceCount: 1
            instanceType: string
            properties:
              string:
                e6b6493e-7d5e-4db3-be1e-306ec641327e: null
      resourceGroupName: test-rg
      workspaceName: my-aml-workspace

The objective property defines what metric to optimize and whether to minimize or maximize it. The samplingAlgorithm determines how to explore the searchSpace (grid, random, or Bayesian). The earlyTermination policy stops underperforming trials early using median stopping rules, saving compute costs. The trial component defines the training script that runs for each hyperparameter combination, while limits controls maximum trials and concurrency.

Orchestrate multi-step workflows with pipeline jobs

Complex ML workflows chain multiple steps like data preparation, training, and evaluation. Pipeline jobs coordinate these steps and manage data flow.

import * as pulumi from "@pulumi/pulumi";
import * as azure_native from "@pulumi/azure-native";

const job = new azure_native.machinelearningservices.Job("job", {
    id: "string",
    jobBaseProperties: {
        computeId: "string",
        description: "string",
        displayName: "string",
        experimentName: "string",
        inputs: {
            string: {
                description: "string",
                jobInputType: "literal",
                value: "string",
            },
        },
        jobType: "Pipeline",
        outputs: {
            string: {
                description: "string",
                jobOutputType: "uri_file",
                mode: azure_native.machinelearningservices.OutputDeliveryMode.Upload,
                uri: "string",
            },
        },
        properties: {
            string: "string",
        },
        services: {
            string: {
                endpoint: "string",
                jobServiceType: "string",
                port: 1,
                properties: {
                    string: "string",
                },
            },
        },
        settings: {},
        tags: {
            string: "string",
        },
    },
    resourceGroupName: "test-rg",
    workspaceName: "my-aml-workspace",
});

import pulumi
import pulumi_azure_native as azure_native

job = azure_native.machinelearningservices.Job("job",
    id="string",
    job_base_properties={
        "compute_id": "string",
        "description": "string",
        "display_name": "string",
        "experiment_name": "string",
        "inputs": {
            "string": {
                "description": "string",
                "job_input_type": "literal",
                "value": "string",
            },
        },
        "job_type": "Pipeline",
        "outputs": {
            "string": {
                "description": "string",
                "job_output_type": "uri_file",
                "mode": azure_native.machinelearningservices.OutputDeliveryMode.UPLOAD,
                "uri": "string",
            },
        },
        "properties": {
            "string": "string",
        },
        "services": {
            "string": {
                "endpoint": "string",
                "job_service_type": "string",
                "port": 1,
                "properties": {
                    "string": "string",
                },
            },
        },
        "settings": {},
        "tags": {
            "string": "string",
        },
    },
    resource_group_name="test-rg",
    workspace_name="my-aml-workspace")

package main

import (
	machinelearningservices "github.com/pulumi/pulumi-azure-native-sdk/machinelearningservices/v3"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := machinelearningservices.NewJob(ctx, "job", &machinelearningservices.JobArgs{
			Id: pulumi.String("string"),
			JobBaseProperties: &machinelearningservices.PipelineJobArgs{
				ComputeId:      pulumi.String("string"),
				Description:    pulumi.String("string"),
				DisplayName:    pulumi.String("string"),
				ExperimentName: pulumi.String("string"),
				Inputs: pulumi.Map{
					"string": machinelearningservices.LiteralJobInput{
						Description:  "string",
						JobInputType: "literal",
						Value:        "string",
					},
				},
				JobType: pulumi.String("Pipeline"),
				Outputs: pulumi.Map{
					"string": machinelearningservices.UriFileJobOutput{
						Description:   "string",
						JobOutputType: "uri_file",
						Mode:          machinelearningservices.OutputDeliveryModeUpload,
						Uri:           "string",
					},
				},
				Properties: pulumi.StringMap{
					"string": pulumi.String("string"),
				},
				Services: machinelearningservices.JobServiceMap{
					"string": &machinelearningservices.JobServiceArgs{
						Endpoint:       pulumi.String("string"),
						JobServiceType: pulumi.String("string"),
						Port:           pulumi.Int(1),
						Properties: pulumi.StringMap{
							"string": pulumi.String("string"),
						},
					},
				},
				Settings: pulumi.Any(map[string]interface{}{}),
				Tags: pulumi.StringMap{
					"string": pulumi.String("string"),
				},
			},
			ResourceGroupName: pulumi.String("test-rg"),
			WorkspaceName:     pulumi.String("my-aml-workspace"),
		})
		if err != nil {
			return err
		}
		return nil
	})
}

using System.Collections.Generic;
using System.Linq;
using Pulumi;
using AzureNative = Pulumi.AzureNative;

return await Deployment.RunAsync(() => 
{
    var job = new AzureNative.MachineLearningServices.Job("job", new()
    {
        Id = "string",
        JobBaseProperties = new AzureNative.MachineLearningServices.Inputs.PipelineJobArgs
        {
            ComputeId = "string",
            Description = "string",
            DisplayName = "string",
            ExperimentName = "string",
            Inputs = 
            {
                { "string", new AzureNative.MachineLearningServices.Inputs.LiteralJobInputArgs
                {
                    Description = "string",
                    JobInputType = "literal",
                    Value = "string",
                } },
            },
            JobType = "Pipeline",
            Outputs = 
            {
                { "string", new AzureNative.MachineLearningServices.Inputs.UriFileJobOutputArgs
                {
                    Description = "string",
                    JobOutputType = "uri_file",
                    Mode = AzureNative.MachineLearningServices.OutputDeliveryMode.Upload,
                    Uri = "string",
                } },
            },
            Properties = 
            {
                { "string", "string" },
            },
            Services = 
            {
                { "string", new AzureNative.MachineLearningServices.Inputs.JobServiceArgs
                {
                    Endpoint = "string",
                    JobServiceType = "string",
                    Port = 1,
                    Properties = 
                    {
                        { "string", "string" },
                    },
                } },
            },
            Settings = null,
            Tags = 
            {
                { "string", "string" },
            },
        },
        ResourceGroupName = "test-rg",
        WorkspaceName = "my-aml-workspace",
    });

});

package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.azurenative.machinelearningservices.Job;
import com.pulumi.azurenative.machinelearningservices.JobArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var job = new Job("job", JobArgs.builder()
            .id("string")
            .jobBaseProperties(PipelineJobArgs.builder()
                .computeId("string")
                .description("string")
                .displayName("string")
                .experimentName("string")
                .inputs(Map.of("string", LiteralJobInputArgs.builder()
                    .description("string")
                    .jobInputType("literal")
                    .value("string")
                    .build()))
                .jobType("Pipeline")
                .outputs(Map.of("string", UriFileJobOutputArgs.builder()
                    .description("string")
                    .jobOutputType("uri_file")
                    .mode("Upload")
                    .uri("string")
                    .build()))
                .properties(Map.of("string", "string"))
                .services(Map.of("string", JobServiceArgs.builder()
                    .endpoint("string")
                    .jobServiceType("string")
                    .port(1)
                    .properties(Map.of("string", "string"))
                    .build()))
                .settings(Map.ofEntries(
                ))
                .tags(Map.of("string", "string"))
                .build())
            .resourceGroupName("test-rg")
            .workspaceName("my-aml-workspace")
            .build());

    }
}

resources:
  job:
    type: azure-native:machinelearningservices:Job
    properties:
      id: string
      jobBaseProperties:
        computeId: string
        description: string
        displayName: string
        experimentName: string
        inputs:
          string:
            description: string
            jobInputType: literal
            value: string
        jobType: Pipeline
        outputs:
          string:
            description: string
            jobOutputType: uri_file
            mode: Upload
            uri: string
        properties:
          string: string
        services:
          string:
            endpoint: string
            jobServiceType: string
            port: 1
            properties:
              string: string
        settings: {}
        tags:
          string: string
      resourceGroupName: test-rg
      workspaceName: my-aml-workspace

The inputs and outputs properties define data contracts between pipeline steps. The settings property would contain the pipeline component definitions (shown as empty in this example). Pipeline jobs enable reusable workflow definitions where each step can reference different compute targets and environments.

Beyond these examples

These snippets focus on specific job-level features: command jobs with distributed training, hyperparameter sweep with early termination, and pipeline orchestration. They’re intentionally minimal rather than full ML workflows.

The examples reference pre-existing infrastructure such as Azure Machine Learning workspaces, compute clusters (referenced by computeId), and registered environments and code assets. They focus on job configuration rather than provisioning the workspace and compute infrastructure.

To keep things focused, common job patterns are omitted, including:

AutoML jobs for automated model selection
Queue settings and priority configuration
Notification and monitoring hooks
Component-based pipeline definitions (jobs property)
Resource limits and timeout controls
Identity and credential management beyond AMLToken

These omissions are intentional: the goal is to illustrate how each job type is wired, not provide drop-in ML training modules. See the Azure Machine Learning Job resource reference for all available configuration options.

Let's configure Azure Machine Learning Jobs

Get started with Pulumi Cloud, then follow our quick setup guide to deploy this infrastructure.

Try Pulumi Cloud for FREE

Frequently Asked Questions

Job Configuration & Immutability

What properties can't I change after creating a job?

The id, resourceGroupName, and workspaceName properties are immutable. Plan these values carefully before creating the job, as modifications require recreating the resource.

What job types are available in Azure Machine Learning?

Four job types are supported: AutoML (automated machine learning), Command (single command execution), Pipeline (workflow orchestration), and Sweep (hyperparameter tuning). Specify the type using the jobType property in jobBaseProperties.

How do I specify timeouts for jobs?

Use ISO 8601 duration format for timeout values. For example, PT5M for 5 minutes or PT1S for 1 second. Command jobs use timeout in the limits property, while Sweep jobs use trialTimeout.

Distributed Training & Outputs

How do I configure distributed training for a job?

Set the distribution property with a distribution type. For TensorFlow, specify parameterServerCount and workerCount. For MPI, specify processCountPerInstance. The distribution type determines which configuration options are available.

What's the difference between ReadWriteMount and Upload output modes?

Output modes control how job outputs are delivered. ReadWriteMount mounts the output location for read/write access during job execution, while Upload uploads outputs after job completion. Specify the mode in the output configuration.

API Versions & Import

Which API version should I use for Azure ML jobs?

The default API version is 2025-09-01. Version 2.x of the Azure Native provider used 2023-04-01. To access other versions (2021-03-01-preview through 2025-07-01-preview), use the CLI command: pulumi package add azure-native machinelearningservices [ApiVersion]

How do I import an existing Azure ML job into Pulumi?

Use the import command with the job’s resource identifier:

pulumi import azure-native:machinelearningservices:Job <name> /subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.MachineLearningServices/workspaces/{workspaceName}/jobs/{id}

Using a different cloud?

Explore analytics guides for other cloud providers:

AWS Guides GCP Guides