Configure Azure Kubernetes Service Agent Pools

The azure-native:containerservice:AgentPool resource, part of the Pulumi Azure Native provider, defines an AKS agent pool: the VM configuration, scaling behavior, and runtime settings for worker nodes. This guide focuses on five capabilities: spot instances and autoscaling, kernel and kubelet tuning, GPU partitioning, ephemeral storage, and configuration snapshots.

Agent pools belong to an existing AKS managed cluster and may reference VNets, snapshots, or specialized Azure compute resources like capacity reservation groups or dedicated host groups. The examples are intentionally small. Combine them with your own cluster configuration, networking, and security policies.

Create a basic agent pool with spot instances

Most deployments start with a user-mode pool running application workloads on spot instances to reduce costs.

import * as pulumi from "@pulumi/pulumi";
import * as azure_native from "@pulumi/azure-native";

const agentPool = new azure_native.containerservice.AgentPool("agentPool", {
    agentPoolName: "agentpool1",
    count: 3,
    mode: azure_native.containerservice.AgentPoolMode.User,
    nodeLabels: {
        key1: "val1",
    },
    nodeTaints: ["Key1=Value1:NoSchedule"],
    orchestratorVersion: "",
    osType: azure_native.containerservice.OSType.Linux,
    resourceGroupName: "rg1",
    resourceName: "clustername1",
    scaleSetEvictionPolicy: azure_native.containerservice.ScaleSetEvictionPolicy.Delete,
    scaleSetPriority: azure_native.containerservice.ScaleSetPriority.Spot,
    tags: {
        name1: "val1",
    },
    vmSize: "Standard_DS1_v2",
});
import pulumi
import pulumi_azure_native as azure_native

agent_pool = azure_native.containerservice.AgentPool("agentPool",
    agent_pool_name="agentpool1",
    count=3,
    mode=azure_native.containerservice.AgentPoolMode.USER,
    node_labels={
        "key1": "val1",
    },
    node_taints=["Key1=Value1:NoSchedule"],
    orchestrator_version="",
    os_type=azure_native.containerservice.OSType.LINUX,
    resource_group_name="rg1",
    resource_name_="clustername1",
    scale_set_eviction_policy=azure_native.containerservice.ScaleSetEvictionPolicy.DELETE,
    scale_set_priority=azure_native.containerservice.ScaleSetPriority.SPOT,
    tags={
        "name1": "val1",
    },
    vm_size="Standard_DS1_v2")
package main

import (
	containerservice "github.com/pulumi/pulumi-azure-native-sdk/containerservice/v3"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := containerservice.NewAgentPool(ctx, "agentPool", &containerservice.AgentPoolArgs{
			AgentPoolName: pulumi.String("agentpool1"),
			Count:         pulumi.Int(3),
			Mode:          pulumi.String(containerservice.AgentPoolModeUser),
			NodeLabels: pulumi.StringMap{
				"key1": pulumi.String("val1"),
			},
			NodeTaints: pulumi.StringArray{
				pulumi.String("Key1=Value1:NoSchedule"),
			},
			OrchestratorVersion:    pulumi.String(""),
			OsType:                 pulumi.String(containerservice.OSTypeLinux),
			ResourceGroupName:      pulumi.String("rg1"),
			ResourceName:           pulumi.String("clustername1"),
			ScaleSetEvictionPolicy: pulumi.String(containerservice.ScaleSetEvictionPolicyDelete),
			ScaleSetPriority:       pulumi.String(containerservice.ScaleSetPrioritySpot),
			Tags: pulumi.StringMap{
				"name1": pulumi.String("val1"),
			},
			VmSize: pulumi.String("Standard_DS1_v2"),
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using AzureNative = Pulumi.AzureNative;

return await Deployment.RunAsync(() => 
{
    var agentPool = new AzureNative.ContainerService.AgentPool("agentPool", new()
    {
        AgentPoolName = "agentpool1",
        Count = 3,
        Mode = AzureNative.ContainerService.AgentPoolMode.User,
        NodeLabels = 
        {
            { "key1", "val1" },
        },
        NodeTaints = new[]
        {
            "Key1=Value1:NoSchedule",
        },
        OrchestratorVersion = "",
        OsType = AzureNative.ContainerService.OSType.Linux,
        ResourceGroupName = "rg1",
        ResourceName = "clustername1",
        ScaleSetEvictionPolicy = AzureNative.ContainerService.ScaleSetEvictionPolicy.Delete,
        ScaleSetPriority = AzureNative.ContainerService.ScaleSetPriority.Spot,
        Tags = 
        {
            { "name1", "val1" },
        },
        VmSize = "Standard_DS1_v2",
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.azurenative.containerservice.AgentPool;
import com.pulumi.azurenative.containerservice.AgentPoolArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var agentPool = new AgentPool("agentPool", AgentPoolArgs.builder()
            .agentPoolName("agentpool1")
            .count(3)
            .mode("User")
            .nodeLabels(Map.of("key1", "val1"))
            .nodeTaints("Key1=Value1:NoSchedule")
            .orchestratorVersion("")
            .osType("Linux")
            .resourceGroupName("rg1")
            .resourceName("clustername1")
            .scaleSetEvictionPolicy("Delete")
            .scaleSetPriority("Spot")
            .tags(Map.of("name1", "val1"))
            .vmSize("Standard_DS1_v2")
            .build());

    }
}
resources:
  agentPool:
    type: azure-native:containerservice:AgentPool
    properties:
      agentPoolName: agentpool1
      count: 3
      mode: User
      nodeLabels:
        key1: val1
      nodeTaints:
        - Key1=Value1:NoSchedule
      orchestratorVersion: ""
      osType: Linux
      resourceGroupName: rg1
      resourceName: clustername1
      scaleSetEvictionPolicy: Delete
      scaleSetPriority: Spot
      tags:
        name1: val1
      vmSize: Standard_DS1_v2

The scaleSetPriority property sets the pool to use spot VMs, which run on Azure’s spare capacity at reduced rates. The scaleSetEvictionPolicy determines what happens when Azure reclaims capacity (Delete removes VMs, Deallocate preserves them). The mode property distinguishes user pools (application workloads) from system pools (cluster services). Node labels and taints help Kubernetes schedule pods appropriately, ensuring spot-tolerant workloads land on these cost-optimized nodes.

Enable autoscaling on an existing pool

After creating a pool, teams enable autoscaling to handle variable demand without manual node management.

import * as pulumi from "@pulumi/pulumi";
import * as azure_native from "@pulumi/azure-native";

const agentPool = new azure_native.containerservice.AgentPool("agentPool", {
    agentPoolName: "agentpool1",
    count: 3,
    enableAutoScaling: true,
    maxCount: 2,
    minCount: 2,
    nodeTaints: ["Key1=Value1:NoSchedule"],
    orchestratorVersion: "",
    osType: azure_native.containerservice.OSType.Linux,
    resourceGroupName: "rg1",
    resourceName: "clustername1",
    scaleSetEvictionPolicy: azure_native.containerservice.ScaleSetEvictionPolicy.Delete,
    scaleSetPriority: azure_native.containerservice.ScaleSetPriority.Spot,
    vmSize: "Standard_DS1_v2",
});
import pulumi
import pulumi_azure_native as azure_native

agent_pool = azure_native.containerservice.AgentPool("agentPool",
    agent_pool_name="agentpool1",
    count=3,
    enable_auto_scaling=True,
    max_count=2,
    min_count=2,
    node_taints=["Key1=Value1:NoSchedule"],
    orchestrator_version="",
    os_type=azure_native.containerservice.OSType.LINUX,
    resource_group_name="rg1",
    resource_name_="clustername1",
    scale_set_eviction_policy=azure_native.containerservice.ScaleSetEvictionPolicy.DELETE,
    scale_set_priority=azure_native.containerservice.ScaleSetPriority.SPOT,
    vm_size="Standard_DS1_v2")
package main

import (
	containerservice "github.com/pulumi/pulumi-azure-native-sdk/containerservice/v3"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := containerservice.NewAgentPool(ctx, "agentPool", &containerservice.AgentPoolArgs{
			AgentPoolName:     pulumi.String("agentpool1"),
			Count:             pulumi.Int(3),
			EnableAutoScaling: pulumi.Bool(true),
			MaxCount:          pulumi.Int(2),
			MinCount:          pulumi.Int(2),
			NodeTaints: pulumi.StringArray{
				pulumi.String("Key1=Value1:NoSchedule"),
			},
			OrchestratorVersion:    pulumi.String(""),
			OsType:                 pulumi.String(containerservice.OSTypeLinux),
			ResourceGroupName:      pulumi.String("rg1"),
			ResourceName:           pulumi.String("clustername1"),
			ScaleSetEvictionPolicy: pulumi.String(containerservice.ScaleSetEvictionPolicyDelete),
			ScaleSetPriority:       pulumi.String(containerservice.ScaleSetPrioritySpot),
			VmSize:                 pulumi.String("Standard_DS1_v2"),
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using AzureNative = Pulumi.AzureNative;

return await Deployment.RunAsync(() => 
{
    var agentPool = new AzureNative.ContainerService.AgentPool("agentPool", new()
    {
        AgentPoolName = "agentpool1",
        Count = 3,
        EnableAutoScaling = true,
        MaxCount = 2,
        MinCount = 2,
        NodeTaints = new[]
        {
            "Key1=Value1:NoSchedule",
        },
        OrchestratorVersion = "",
        OsType = AzureNative.ContainerService.OSType.Linux,
        ResourceGroupName = "rg1",
        ResourceName = "clustername1",
        ScaleSetEvictionPolicy = AzureNative.ContainerService.ScaleSetEvictionPolicy.Delete,
        ScaleSetPriority = AzureNative.ContainerService.ScaleSetPriority.Spot,
        VmSize = "Standard_DS1_v2",
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.azurenative.containerservice.AgentPool;
import com.pulumi.azurenative.containerservice.AgentPoolArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var agentPool = new AgentPool("agentPool", AgentPoolArgs.builder()
            .agentPoolName("agentpool1")
            .count(3)
            .enableAutoScaling(true)
            .maxCount(2)
            .minCount(2)
            .nodeTaints("Key1=Value1:NoSchedule")
            .orchestratorVersion("")
            .osType("Linux")
            .resourceGroupName("rg1")
            .resourceName("clustername1")
            .scaleSetEvictionPolicy("Delete")
            .scaleSetPriority("Spot")
            .vmSize("Standard_DS1_v2")
            .build());

    }
}
resources:
  agentPool:
    type: azure-native:containerservice:AgentPool
    properties:
      agentPoolName: agentpool1
      count: 3
      enableAutoScaling: true
      maxCount: 2
      minCount: 2
      nodeTaints:
        - Key1=Value1:NoSchedule
      orchestratorVersion: ""
      osType: Linux
      resourceGroupName: rg1
      resourceName: clustername1
      scaleSetEvictionPolicy: Delete
      scaleSetPriority: Spot
      vmSize: Standard_DS1_v2

When enableAutoScaling is true, the cluster autoscaler monitors pod resource requests and scales the node count between minCount and maxCount. Nodes are added when pods can’t be scheduled due to insufficient resources, and removed when utilization drops below thresholds. This configuration updates an existing pool rather than creating a new one.

Tune kernel and kubelet settings for performance

High-performance workloads often need custom kernel parameters and kubelet configuration beyond AKS defaults.

import * as pulumi from "@pulumi/pulumi";
import * as azure_native from "@pulumi/azure-native";

const agentPool = new azure_native.containerservice.AgentPool("agentPool", {
    agentPoolName: "agentpool1",
    count: 3,
    kubeletConfig: {
        allowedUnsafeSysctls: [
            "kernel.msg*",
            "net.core.somaxconn",
        ],
        cpuCfsQuota: true,
        cpuCfsQuotaPeriod: "200ms",
        cpuManagerPolicy: "static",
        failSwapOn: false,
        imageGcHighThreshold: 90,
        imageGcLowThreshold: 70,
        topologyManagerPolicy: "best-effort",
    },
    linuxOSConfig: {
        swapFileSizeMB: 1500,
        sysctls: {
            kernelThreadsMax: 99999,
            netCoreWmemDefault: 12345,
            netIpv4IpLocalPortRange: "20000 60000",
            netIpv4TcpTwReuse: true,
        },
        transparentHugePageDefrag: "madvise",
        transparentHugePageEnabled: "always",
    },
    orchestratorVersion: "",
    osType: azure_native.containerservice.OSType.Linux,
    resourceGroupName: "rg1",
    resourceName: "clustername1",
    vmSize: "Standard_DS2_v2",
});
import pulumi
import pulumi_azure_native as azure_native

agent_pool = azure_native.containerservice.AgentPool("agentPool",
    agent_pool_name="agentpool1",
    count=3,
    kubelet_config={
        "allowed_unsafe_sysctls": [
            "kernel.msg*",
            "net.core.somaxconn",
        ],
        "cpu_cfs_quota": True,
        "cpu_cfs_quota_period": "200ms",
        "cpu_manager_policy": "static",
        "fail_swap_on": False,
        "image_gc_high_threshold": 90,
        "image_gc_low_threshold": 70,
        "topology_manager_policy": "best-effort",
    },
    linux_os_config={
        "swap_file_size_mb": 1500,
        "sysctls": {
            "kernel_threads_max": 99999,
            "net_core_wmem_default": 12345,
            "net_ipv4_ip_local_port_range": "20000 60000",
            "net_ipv4_tcp_tw_reuse": True,
        },
        "transparent_huge_page_defrag": "madvise",
        "transparent_huge_page_enabled": "always",
    },
    orchestrator_version="",
    os_type=azure_native.containerservice.OSType.LINUX,
    resource_group_name="rg1",
    resource_name_="clustername1",
    vm_size="Standard_DS2_v2")
package main

import (
	containerservice "github.com/pulumi/pulumi-azure-native-sdk/containerservice/v3"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := containerservice.NewAgentPool(ctx, "agentPool", &containerservice.AgentPoolArgs{
			AgentPoolName: pulumi.String("agentpool1"),
			Count:         pulumi.Int(3),
			KubeletConfig: &containerservice.KubeletConfigArgs{
				AllowedUnsafeSysctls: pulumi.StringArray{
					pulumi.String("kernel.msg*"),
					pulumi.String("net.core.somaxconn"),
				},
				CpuCfsQuota:           pulumi.Bool(true),
				CpuCfsQuotaPeriod:     pulumi.String("200ms"),
				CpuManagerPolicy:      pulumi.String("static"),
				FailSwapOn:            pulumi.Bool(false),
				ImageGcHighThreshold:  pulumi.Int(90),
				ImageGcLowThreshold:   pulumi.Int(70),
				TopologyManagerPolicy: pulumi.String("best-effort"),
			},
			LinuxOSConfig: &containerservice.LinuxOSConfigArgs{
				SwapFileSizeMB: pulumi.Int(1500),
				Sysctls: &containerservice.SysctlConfigArgs{
					KernelThreadsMax:        pulumi.Int(99999),
					NetCoreWmemDefault:      pulumi.Int(12345),
					NetIpv4IpLocalPortRange: pulumi.String("20000 60000"),
					NetIpv4TcpTwReuse:       pulumi.Bool(true),
				},
				TransparentHugePageDefrag:  pulumi.String("madvise"),
				TransparentHugePageEnabled: pulumi.String("always"),
			},
			OrchestratorVersion: pulumi.String(""),
			OsType:              pulumi.String(containerservice.OSTypeLinux),
			ResourceGroupName:   pulumi.String("rg1"),
			ResourceName:        pulumi.String("clustername1"),
			VmSize:              pulumi.String("Standard_DS2_v2"),
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using AzureNative = Pulumi.AzureNative;

return await Deployment.RunAsync(() => 
{
    var agentPool = new AzureNative.ContainerService.AgentPool("agentPool", new()
    {
        AgentPoolName = "agentpool1",
        Count = 3,
        KubeletConfig = new AzureNative.ContainerService.Inputs.KubeletConfigArgs
        {
            AllowedUnsafeSysctls = new[]
            {
                "kernel.msg*",
                "net.core.somaxconn",
            },
            CpuCfsQuota = true,
            CpuCfsQuotaPeriod = "200ms",
            CpuManagerPolicy = "static",
            FailSwapOn = false,
            ImageGcHighThreshold = 90,
            ImageGcLowThreshold = 70,
            TopologyManagerPolicy = "best-effort",
        },
        LinuxOSConfig = new AzureNative.ContainerService.Inputs.LinuxOSConfigArgs
        {
            SwapFileSizeMB = 1500,
            Sysctls = new AzureNative.ContainerService.Inputs.SysctlConfigArgs
            {
                KernelThreadsMax = 99999,
                NetCoreWmemDefault = 12345,
                NetIpv4IpLocalPortRange = "20000 60000",
                NetIpv4TcpTwReuse = true,
            },
            TransparentHugePageDefrag = "madvise",
            TransparentHugePageEnabled = "always",
        },
        OrchestratorVersion = "",
        OsType = AzureNative.ContainerService.OSType.Linux,
        ResourceGroupName = "rg1",
        ResourceName = "clustername1",
        VmSize = "Standard_DS2_v2",
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.azurenative.containerservice.AgentPool;
import com.pulumi.azurenative.containerservice.AgentPoolArgs;
import com.pulumi.azurenative.containerservice.inputs.KubeletConfigArgs;
import com.pulumi.azurenative.containerservice.inputs.LinuxOSConfigArgs;
import com.pulumi.azurenative.containerservice.inputs.SysctlConfigArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var agentPool = new AgentPool("agentPool", AgentPoolArgs.builder()
            .agentPoolName("agentpool1")
            .count(3)
            .kubeletConfig(KubeletConfigArgs.builder()
                .allowedUnsafeSysctls(                
                    "kernel.msg*",
                    "net.core.somaxconn")
                .cpuCfsQuota(true)
                .cpuCfsQuotaPeriod("200ms")
                .cpuManagerPolicy("static")
                .failSwapOn(false)
                .imageGcHighThreshold(90)
                .imageGcLowThreshold(70)
                .topologyManagerPolicy("best-effort")
                .build())
            .linuxOSConfig(LinuxOSConfigArgs.builder()
                .swapFileSizeMB(1500)
                .sysctls(SysctlConfigArgs.builder()
                    .kernelThreadsMax(99999)
                    .netCoreWmemDefault(12345)
                    .netIpv4IpLocalPortRange("20000 60000")
                    .netIpv4TcpTwReuse(true)
                    .build())
                .transparentHugePageDefrag("madvise")
                .transparentHugePageEnabled("always")
                .build())
            .orchestratorVersion("")
            .osType("Linux")
            .resourceGroupName("rg1")
            .resourceName("clustername1")
            .vmSize("Standard_DS2_v2")
            .build());

    }
}
resources:
  agentPool:
    type: azure-native:containerservice:AgentPool
    properties:
      agentPoolName: agentpool1
      count: 3
      kubeletConfig:
        allowedUnsafeSysctls:
          - kernel.msg*
          - net.core.somaxconn
        cpuCfsQuota: true
        cpuCfsQuotaPeriod: 200ms
        cpuManagerPolicy: static
        failSwapOn: false
        imageGcHighThreshold: 90
        imageGcLowThreshold: 70
        topologyManagerPolicy: best-effort
      linuxOSConfig:
        swapFileSizeMB: 1500
        sysctls:
          kernelThreadsMax: 99999
          netCoreWmemDefault: 12345
          netIpv4IpLocalPortRange: 20000 60000
          netIpv4TcpTwReuse: true
        transparentHugePageDefrag: madvise
        transparentHugePageEnabled: always
      orchestratorVersion: ""
      osType: Linux
      resourceGroupName: rg1
      resourceName: clustername1
      vmSize: Standard_DS2_v2

The kubeletConfig block controls container runtime behavior: cpuManagerPolicy pins pods to specific CPU cores, imageGcHighThreshold triggers garbage collection when disk usage crosses a threshold, and topologyManagerPolicy aligns CPU and device resources. The linuxOSConfig block sets kernel parameters through sysctls (like network buffer sizes or thread limits) and configures swap and transparent huge pages. These settings apply at node creation and persist across reboots.

Configure GPU partitioning for ML workloads

Machine learning teams running multiple small models can partition GPU resources using Multi-Instance GPU (MIG).

import * as pulumi from "@pulumi/pulumi";
import * as azure_native from "@pulumi/azure-native";

const agentPool = new azure_native.containerservice.AgentPool("agentPool", {
    agentPoolName: "agentpool1",
    count: 3,
    gpuInstanceProfile: azure_native.containerservice.GPUInstanceProfile.MIG2g,
    kubeletConfig: {
        allowedUnsafeSysctls: [
            "kernel.msg*",
            "net.core.somaxconn",
        ],
        cpuCfsQuota: true,
        cpuCfsQuotaPeriod: "200ms",
        cpuManagerPolicy: "static",
        failSwapOn: false,
        imageGcHighThreshold: 90,
        imageGcLowThreshold: 70,
        topologyManagerPolicy: "best-effort",
    },
    linuxOSConfig: {
        swapFileSizeMB: 1500,
        sysctls: {
            kernelThreadsMax: 99999,
            netCoreWmemDefault: 12345,
            netIpv4IpLocalPortRange: "20000 60000",
            netIpv4TcpTwReuse: true,
        },
        transparentHugePageDefrag: "madvise",
        transparentHugePageEnabled: "always",
    },
    orchestratorVersion: "",
    osType: azure_native.containerservice.OSType.Linux,
    resourceGroupName: "rg1",
    resourceName: "clustername1",
    vmSize: "Standard_ND96asr_v4",
});
import pulumi
import pulumi_azure_native as azure_native

agent_pool = azure_native.containerservice.AgentPool("agentPool",
    agent_pool_name="agentpool1",
    count=3,
    gpu_instance_profile=azure_native.containerservice.GPUInstanceProfile.MIG2G,
    kubelet_config={
        "allowed_unsafe_sysctls": [
            "kernel.msg*",
            "net.core.somaxconn",
        ],
        "cpu_cfs_quota": True,
        "cpu_cfs_quota_period": "200ms",
        "cpu_manager_policy": "static",
        "fail_swap_on": False,
        "image_gc_high_threshold": 90,
        "image_gc_low_threshold": 70,
        "topology_manager_policy": "best-effort",
    },
    linux_os_config={
        "swap_file_size_mb": 1500,
        "sysctls": {
            "kernel_threads_max": 99999,
            "net_core_wmem_default": 12345,
            "net_ipv4_ip_local_port_range": "20000 60000",
            "net_ipv4_tcp_tw_reuse": True,
        },
        "transparent_huge_page_defrag": "madvise",
        "transparent_huge_page_enabled": "always",
    },
    orchestrator_version="",
    os_type=azure_native.containerservice.OSType.LINUX,
    resource_group_name="rg1",
    resource_name_="clustername1",
    vm_size="Standard_ND96asr_v4")
package main

import (
	containerservice "github.com/pulumi/pulumi-azure-native-sdk/containerservice/v3"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := containerservice.NewAgentPool(ctx, "agentPool", &containerservice.AgentPoolArgs{
			AgentPoolName:      pulumi.String("agentpool1"),
			Count:              pulumi.Int(3),
			GpuInstanceProfile: pulumi.String(containerservice.GPUInstanceProfileMIG2g),
			KubeletConfig: &containerservice.KubeletConfigArgs{
				AllowedUnsafeSysctls: pulumi.StringArray{
					pulumi.String("kernel.msg*"),
					pulumi.String("net.core.somaxconn"),
				},
				CpuCfsQuota:           pulumi.Bool(true),
				CpuCfsQuotaPeriod:     pulumi.String("200ms"),
				CpuManagerPolicy:      pulumi.String("static"),
				FailSwapOn:            pulumi.Bool(false),
				ImageGcHighThreshold:  pulumi.Int(90),
				ImageGcLowThreshold:   pulumi.Int(70),
				TopologyManagerPolicy: pulumi.String("best-effort"),
			},
			LinuxOSConfig: &containerservice.LinuxOSConfigArgs{
				SwapFileSizeMB: pulumi.Int(1500),
				Sysctls: &containerservice.SysctlConfigArgs{
					KernelThreadsMax:        pulumi.Int(99999),
					NetCoreWmemDefault:      pulumi.Int(12345),
					NetIpv4IpLocalPortRange: pulumi.String("20000 60000"),
					NetIpv4TcpTwReuse:       pulumi.Bool(true),
				},
				TransparentHugePageDefrag:  pulumi.String("madvise"),
				TransparentHugePageEnabled: pulumi.String("always"),
			},
			OrchestratorVersion: pulumi.String(""),
			OsType:              pulumi.String(containerservice.OSTypeLinux),
			ResourceGroupName:   pulumi.String("rg1"),
			ResourceName:        pulumi.String("clustername1"),
			VmSize:              pulumi.String("Standard_ND96asr_v4"),
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using AzureNative = Pulumi.AzureNative;

return await Deployment.RunAsync(() => 
{
    var agentPool = new AzureNative.ContainerService.AgentPool("agentPool", new()
    {
        AgentPoolName = "agentpool1",
        Count = 3,
        GpuInstanceProfile = AzureNative.ContainerService.GPUInstanceProfile.MIG2g,
        KubeletConfig = new AzureNative.ContainerService.Inputs.KubeletConfigArgs
        {
            AllowedUnsafeSysctls = new[]
            {
                "kernel.msg*",
                "net.core.somaxconn",
            },
            CpuCfsQuota = true,
            CpuCfsQuotaPeriod = "200ms",
            CpuManagerPolicy = "static",
            FailSwapOn = false,
            ImageGcHighThreshold = 90,
            ImageGcLowThreshold = 70,
            TopologyManagerPolicy = "best-effort",
        },
        LinuxOSConfig = new AzureNative.ContainerService.Inputs.LinuxOSConfigArgs
        {
            SwapFileSizeMB = 1500,
            Sysctls = new AzureNative.ContainerService.Inputs.SysctlConfigArgs
            {
                KernelThreadsMax = 99999,
                NetCoreWmemDefault = 12345,
                NetIpv4IpLocalPortRange = "20000 60000",
                NetIpv4TcpTwReuse = true,
            },
            TransparentHugePageDefrag = "madvise",
            TransparentHugePageEnabled = "always",
        },
        OrchestratorVersion = "",
        OsType = AzureNative.ContainerService.OSType.Linux,
        ResourceGroupName = "rg1",
        ResourceName = "clustername1",
        VmSize = "Standard_ND96asr_v4",
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.azurenative.containerservice.AgentPool;
import com.pulumi.azurenative.containerservice.AgentPoolArgs;
import com.pulumi.azurenative.containerservice.inputs.KubeletConfigArgs;
import com.pulumi.azurenative.containerservice.inputs.LinuxOSConfigArgs;
import com.pulumi.azurenative.containerservice.inputs.SysctlConfigArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var agentPool = new AgentPool("agentPool", AgentPoolArgs.builder()
            .agentPoolName("agentpool1")
            .count(3)
            .gpuInstanceProfile("MIG2g")
            .kubeletConfig(KubeletConfigArgs.builder()
                .allowedUnsafeSysctls(                
                    "kernel.msg*",
                    "net.core.somaxconn")
                .cpuCfsQuota(true)
                .cpuCfsQuotaPeriod("200ms")
                .cpuManagerPolicy("static")
                .failSwapOn(false)
                .imageGcHighThreshold(90)
                .imageGcLowThreshold(70)
                .topologyManagerPolicy("best-effort")
                .build())
            .linuxOSConfig(LinuxOSConfigArgs.builder()
                .swapFileSizeMB(1500)
                .sysctls(SysctlConfigArgs.builder()
                    .kernelThreadsMax(99999)
                    .netCoreWmemDefault(12345)
                    .netIpv4IpLocalPortRange("20000 60000")
                    .netIpv4TcpTwReuse(true)
                    .build())
                .transparentHugePageDefrag("madvise")
                .transparentHugePageEnabled("always")
                .build())
            .orchestratorVersion("")
            .osType("Linux")
            .resourceGroupName("rg1")
            .resourceName("clustername1")
            .vmSize("Standard_ND96asr_v4")
            .build());

    }
}
resources:
  agentPool:
    type: azure-native:containerservice:AgentPool
    properties:
      agentPoolName: agentpool1
      count: 3
      gpuInstanceProfile: MIG2g
      kubeletConfig:
        allowedUnsafeSysctls:
          - kernel.msg*
          - net.core.somaxconn
        cpuCfsQuota: true
        cpuCfsQuotaPeriod: 200ms
        cpuManagerPolicy: static
        failSwapOn: false
        imageGcHighThreshold: 90
        imageGcLowThreshold: 70
        topologyManagerPolicy: best-effort
      linuxOSConfig:
        swapFileSizeMB: 1500
        sysctls:
          kernelThreadsMax: 99999
          netCoreWmemDefault: 12345
          netIpv4IpLocalPortRange: 20000 60000
          netIpv4TcpTwReuse: true
        transparentHugePageDefrag: madvise
        transparentHugePageEnabled: always
      orchestratorVersion: ""
      osType: Linux
      resourceGroupName: rg1
      resourceName: clustername1
      vmSize: Standard_ND96asr_v4

The gpuInstanceProfile property divides a single GPU into smaller, isolated instances (like MIG2g for 2GB slices). This lets multiple containers share one physical GPU while maintaining memory and compute isolation. The vmSize must support MIG (like Standard_ND96asr_v4), and kubelet/OS tuning often accompanies GPU configuration to optimize scheduling and resource limits.

Use ephemeral disks for faster node provisioning

Ephemeral OS disks store the operating system on the VM’s local cache, reducing boot times and storage costs.

import * as pulumi from "@pulumi/pulumi";
import * as azure_native from "@pulumi/azure-native";

const agentPool = new azure_native.containerservice.AgentPool("agentPool", {
    agentPoolName: "agentpool1",
    count: 3,
    orchestratorVersion: "",
    osDiskSizeGB: 64,
    osDiskType: azure_native.containerservice.OSDiskType.Ephemeral,
    osType: azure_native.containerservice.OSType.Linux,
    resourceGroupName: "rg1",
    resourceName: "clustername1",
    vmSize: "Standard_DS2_v2",
});
import pulumi
import pulumi_azure_native as azure_native

agent_pool = azure_native.containerservice.AgentPool("agentPool",
    agent_pool_name="agentpool1",
    count=3,
    orchestrator_version="",
    os_disk_size_gb=64,
    os_disk_type=azure_native.containerservice.OSDiskType.EPHEMERAL,
    os_type=azure_native.containerservice.OSType.LINUX,
    resource_group_name="rg1",
    resource_name_="clustername1",
    vm_size="Standard_DS2_v2")
package main

import (
	containerservice "github.com/pulumi/pulumi-azure-native-sdk/containerservice/v3"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := containerservice.NewAgentPool(ctx, "agentPool", &containerservice.AgentPoolArgs{
			AgentPoolName:       pulumi.String("agentpool1"),
			Count:               pulumi.Int(3),
			OrchestratorVersion: pulumi.String(""),
			OsDiskSizeGB:        pulumi.Int(64),
			OsDiskType:          pulumi.String(containerservice.OSDiskTypeEphemeral),
			OsType:              pulumi.String(containerservice.OSTypeLinux),
			ResourceGroupName:   pulumi.String("rg1"),
			ResourceName:        pulumi.String("clustername1"),
			VmSize:              pulumi.String("Standard_DS2_v2"),
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using AzureNative = Pulumi.AzureNative;

return await Deployment.RunAsync(() => 
{
    var agentPool = new AzureNative.ContainerService.AgentPool("agentPool", new()
    {
        AgentPoolName = "agentpool1",
        Count = 3,
        OrchestratorVersion = "",
        OsDiskSizeGB = 64,
        OsDiskType = AzureNative.ContainerService.OSDiskType.Ephemeral,
        OsType = AzureNative.ContainerService.OSType.Linux,
        ResourceGroupName = "rg1",
        ResourceName = "clustername1",
        VmSize = "Standard_DS2_v2",
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.azurenative.containerservice.AgentPool;
import com.pulumi.azurenative.containerservice.AgentPoolArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var agentPool = new AgentPool("agentPool", AgentPoolArgs.builder()
            .agentPoolName("agentpool1")
            .count(3)
            .orchestratorVersion("")
            .osDiskSizeGB(64)
            .osDiskType("Ephemeral")
            .osType("Linux")
            .resourceGroupName("rg1")
            .resourceName("clustername1")
            .vmSize("Standard_DS2_v2")
            .build());

    }
}
resources:
  agentPool:
    type: azure-native:containerservice:AgentPool
    properties:
      agentPoolName: agentpool1
      count: 3
      orchestratorVersion: ""
      osDiskSizeGB: 64
      osDiskType: Ephemeral
      osType: Linux
      resourceGroupName: rg1
      resourceName: clustername1
      vmSize: Standard_DS2_v2

When osDiskType is Ephemeral, the OS runs from the VM’s temporary storage rather than a managed disk. This speeds up node creation (no disk provisioning delay) and eliminates persistent disk costs. The osDiskSizeGB must fit within the VM’s cache size. Ephemeral disks are ideal for stateless workloads where node replacement is frequent and data persistence isn’t required.

Clone pool configuration from a snapshot

Teams standardizing configurations across clusters can capture an agent pool’s settings as a snapshot.

import * as pulumi from "@pulumi/pulumi";
import * as azure_native from "@pulumi/azure-native";

const agentPool = new azure_native.containerservice.AgentPool("agentPool", {
    agentPoolName: "agentpool1",
    count: 3,
    creationData: {
        sourceResourceId: "/subscriptions/00000000-0000-0000-0000-000000000000/resourcegroups/rg1/providers/Microsoft.ContainerService/snapshots/snapshot1",
    },
    enableFIPS: true,
    orchestratorVersion: "",
    osType: azure_native.containerservice.OSType.Linux,
    resourceGroupName: "rg1",
    resourceName: "clustername1",
    vmSize: "Standard_DS2_v2",
});
import pulumi
import pulumi_azure_native as azure_native

agent_pool = azure_native.containerservice.AgentPool("agentPool",
    agent_pool_name="agentpool1",
    count=3,
    creation_data={
        "source_resource_id": "/subscriptions/00000000-0000-0000-0000-000000000000/resourcegroups/rg1/providers/Microsoft.ContainerService/snapshots/snapshot1",
    },
    enable_fips=True,
    orchestrator_version="",
    os_type=azure_native.containerservice.OSType.LINUX,
    resource_group_name="rg1",
    resource_name_="clustername1",
    vm_size="Standard_DS2_v2")
package main

import (
	containerservice "github.com/pulumi/pulumi-azure-native-sdk/containerservice/v3"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := containerservice.NewAgentPool(ctx, "agentPool", &containerservice.AgentPoolArgs{
			AgentPoolName: pulumi.String("agentpool1"),
			Count:         pulumi.Int(3),
			CreationData: &containerservice.CreationDataArgs{
				SourceResourceId: pulumi.String("/subscriptions/00000000-0000-0000-0000-000000000000/resourcegroups/rg1/providers/Microsoft.ContainerService/snapshots/snapshot1"),
			},
			EnableFIPS:          pulumi.Bool(true),
			OrchestratorVersion: pulumi.String(""),
			OsType:              pulumi.String(containerservice.OSTypeLinux),
			ResourceGroupName:   pulumi.String("rg1"),
			ResourceName:        pulumi.String("clustername1"),
			VmSize:              pulumi.String("Standard_DS2_v2"),
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using AzureNative = Pulumi.AzureNative;

return await Deployment.RunAsync(() => 
{
    var agentPool = new AzureNative.ContainerService.AgentPool("agentPool", new()
    {
        AgentPoolName = "agentpool1",
        Count = 3,
        CreationData = new AzureNative.ContainerService.Inputs.CreationDataArgs
        {
            SourceResourceId = "/subscriptions/00000000-0000-0000-0000-000000000000/resourcegroups/rg1/providers/Microsoft.ContainerService/snapshots/snapshot1",
        },
        EnableFIPS = true,
        OrchestratorVersion = "",
        OsType = AzureNative.ContainerService.OSType.Linux,
        ResourceGroupName = "rg1",
        ResourceName = "clustername1",
        VmSize = "Standard_DS2_v2",
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.azurenative.containerservice.AgentPool;
import com.pulumi.azurenative.containerservice.AgentPoolArgs;
import com.pulumi.azurenative.containerservice.inputs.CreationDataArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var agentPool = new AgentPool("agentPool", AgentPoolArgs.builder()
            .agentPoolName("agentpool1")
            .count(3)
            .creationData(CreationDataArgs.builder()
                .sourceResourceId("/subscriptions/00000000-0000-0000-0000-000000000000/resourcegroups/rg1/providers/Microsoft.ContainerService/snapshots/snapshot1")
                .build())
            .enableFIPS(true)
            .orchestratorVersion("")
            .osType("Linux")
            .resourceGroupName("rg1")
            .resourceName("clustername1")
            .vmSize("Standard_DS2_v2")
            .build());

    }
}
resources:
  agentPool:
    type: azure-native:containerservice:AgentPool
    properties:
      agentPoolName: agentpool1
      count: 3
      creationData:
        sourceResourceId: /subscriptions/00000000-0000-0000-0000-000000000000/resourcegroups/rg1/providers/Microsoft.ContainerService/snapshots/snapshot1
      enableFIPS: true
      orchestratorVersion: ""
      osType: Linux
      resourceGroupName: rg1
      resourceName: clustername1
      vmSize: Standard_DS2_v2

The creationData property references a snapshot resource ID, copying its configuration (VM size, OS settings, FIPS enablement) to the new pool. This ensures consistency across environments without manually replicating dozens of properties. The orchestratorVersion can be empty to inherit the cluster’s Kubernetes version, or specified to pin the pool to a particular version.

Beyond these examples

These snippets focus on specific agent pool features: node scaling and spot instances, OS and kubelet tuning, and GPU partitioning and specialized hardware. They’re intentionally minimal rather than full cluster deployments.

The examples may reference pre-existing infrastructure such as AKS managed clusters, resource groups, snapshots, capacity reservation groups, and VNet subnets, proximity placement groups, or dedicated host groups. They focus on configuring the agent pool rather than provisioning the surrounding cluster infrastructure.

To keep things focused, common agent pool patterns are omitted, including:

  • Availability zones and multi-zone placement
  • Network configuration (vnetSubnetID, podSubnetID, networkProfile)
  • Upgrade settings and maintenance windows
  • Security profiles and encryption settings
  • Windows-specific configuration (windowsProfile)
  • Virtual machine pool types and heterogeneous sizing

These omissions are intentional: the goal is to illustrate how each agent pool feature is wired, not provide drop-in cluster modules. See the AgentPool resource reference for all available configuration options.

Let's configure Azure Kubernetes Service Agent Pools

Get started with Pulumi Cloud, then follow our quick setup guide to deploy this infrastructure.

Try Pulumi Cloud for FREE

Frequently Asked Questions

Pool Configuration & Requirements
What's the difference between System and User agent pools?
System pools run critical system pods, while User pools run application workloads. A cluster must have at least one System agent pool at all times.
What properties can't I change after creating an agent pool?
You cannot change agentPoolName, vmSize, gpuInstanceProfile, resourceGroupName, or resourceName after creation.
What are the node count limits for agent pools?
User pools support 0-1000 nodes, system pools support 1-1000 nodes. The default is 1 node.
Auto-Scaling & Sizing
How do I enable auto-scaling for my agent pool?
Set enableAutoScaling to true and specify minCount and maxCount to define the scaling range.
Can I specify Kubernetes version as just major.minor?
Yes, when you specify orchestratorVersion as <major.minor> (e.g., 1.20), AKS automatically selects the latest supported GA patch version.
Spot Instances & Cost Optimization
How do I use Spot VMs to reduce costs?
Set scaleSetPriority to Spot and configure scaleSetEvictionPolicy (Delete or Deallocate). Note that scaleSetEvictionPolicy can only be set when using Spot priority.
How do I stop an agent pool to avoid billing charges?
Set powerState.code to Stopped. Stopped agent pools don’t accrue billing charges and can only be stopped if they’re Running with provisioning state Succeeded.
OS & Disk Configuration
What are the default OS SKUs for Linux and Windows?
Linux defaults to Ubuntu. Windows defaults to Windows2019 for Kubernetes <= 1.24 or Windows2022 for Kubernetes >= 1.25.
What's the difference between Ephemeral and Managed OS disks?
Ephemeral disks use local VM storage for better performance. AKS defaults to Ephemeral if the VM supports it and has a cache disk larger than the requested osDiskSizeGB, otherwise it uses Managed disks.
How do I enable FIPS-compliant OS for my nodes?
Set enableFIPS to true when creating the agent pool.
Networking & Subnets
What's the difference between vnetSubnetID and podSubnetID?
vnetSubnetID specifies the subnet for nodes (and pods if podSubnetID isn’t set). podSubnetID specifies a separate subnet for pod IPs. When using podIPAllocationMode, you must specify podSubnetID.
Can nodes have dedicated public IPs?
Yes, set enableNodePublicIP to true. This is useful for gaming workloads requiring direct connections. The default is false.
Advanced Features
How do I create an agent pool from a snapshot?
Use the creationData property with sourceResourceId pointing to your snapshot resource ID.
Can I use the Message of the Day feature with Windows nodes?
No, messageOfTheDay must not be specified for Windows nodes. It’s only for Linux nodes and must be a base64-encoded static string.
What advanced compute features are available?
You can enable host-based encryption (enableEncryptionAtHost), UltraSSD (enableUltraSSD), GPU MIG profiles (gpuInstanceProfile), Dedicated Host Groups (hostGroupID), and Capacity Reservation Groups (capacityReservationGroupID).

Using a different cloud?

Explore containers guides for other cloud providers: