Configure GCP Backend Services for Load Balancing

The gcp:compute/backendService:BackendService resource, part of the Pulumi GCP provider, defines a global backend service that groups compute resources for load balancing and configures traffic routing, health checking, and optional CDN caching. This guide focuses on four capabilities: health check integration, Cloud CDN configuration and cache policies, Traffic Director service mesh routing, and custom metrics-based load balancing.

Backend services reference health checks, network endpoint groups or instance groups, and optionally security policies or TLS configurations. The examples are intentionally small. Combine them with your own compute resources, networking, and security infrastructure.

Create a backend service with health checks

Most load balancing deployments start by defining a backend service that groups compute resources and monitors their health.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const defaultHttpHealthCheck = new gcp.compute.HttpHealthCheck("default", {
    name: "health-check",
    requestPath: "/",
    checkIntervalSec: 1,
    timeoutSec: 1,
});
const _default = new gcp.compute.BackendService("default", {
    name: "backend-service",
    healthChecks: defaultHttpHealthCheck.id,
});
import pulumi
import pulumi_gcp as gcp

default_http_health_check = gcp.compute.HttpHealthCheck("default",
    name="health-check",
    request_path="/",
    check_interval_sec=1,
    timeout_sec=1)
default = gcp.compute.BackendService("default",
    name="backend-service",
    health_checks=default_http_health_check.id)
package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/compute"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		defaultHttpHealthCheck, err := compute.NewHttpHealthCheck(ctx, "default", &compute.HttpHealthCheckArgs{
			Name:             pulumi.String("health-check"),
			RequestPath:      pulumi.String("/"),
			CheckIntervalSec: pulumi.Int(1),
			TimeoutSec:       pulumi.Int(1),
		})
		if err != nil {
			return err
		}
		_, err = compute.NewBackendService(ctx, "default", &compute.BackendServiceArgs{
			Name:         pulumi.String("backend-service"),
			HealthChecks: defaultHttpHealthCheck.ID(),
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var defaultHttpHealthCheck = new Gcp.Compute.HttpHealthCheck("default", new()
    {
        Name = "health-check",
        RequestPath = "/",
        CheckIntervalSec = 1,
        TimeoutSec = 1,
    });

    var @default = new Gcp.Compute.BackendService("default", new()
    {
        Name = "backend-service",
        HealthChecks = defaultHttpHealthCheck.Id,
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.compute.HttpHealthCheck;
import com.pulumi.gcp.compute.HttpHealthCheckArgs;
import com.pulumi.gcp.compute.BackendService;
import com.pulumi.gcp.compute.BackendServiceArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var defaultHttpHealthCheck = new HttpHealthCheck("defaultHttpHealthCheck", HttpHealthCheckArgs.builder()
            .name("health-check")
            .requestPath("/")
            .checkIntervalSec(1)
            .timeoutSec(1)
            .build());

        var default_ = new BackendService("default", BackendServiceArgs.builder()
            .name("backend-service")
            .healthChecks(defaultHttpHealthCheck.id())
            .build());

    }
}
resources:
  default:
    type: gcp:compute:BackendService
    properties:
      name: backend-service
      healthChecks: ${defaultHttpHealthCheck.id}
  defaultHttpHealthCheck:
    type: gcp:compute:HttpHealthCheck
    name: default
    properties:
      name: health-check
      requestPath: /
      checkIntervalSec: 1
      timeoutSec: 1

The healthChecks property links to an HttpHealthCheck resource that periodically probes backends. The backend service uses these health signals to route traffic only to healthy instances. Without backends configured, this service defines the health monitoring foundation but doesn’t yet route traffic.

Enable Cloud CDN with signed URL caching

Applications serving static or cacheable content often enable Cloud CDN to reduce latency and backend load.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const defaultHttpHealthCheck = new gcp.compute.HttpHealthCheck("default", {
    name: "health-check",
    requestPath: "/",
    checkIntervalSec: 1,
    timeoutSec: 1,
});
const _default = new gcp.compute.BackendService("default", {
    name: "backend-service",
    healthChecks: defaultHttpHealthCheck.id,
    enableCdn: true,
    cdnPolicy: {
        signedUrlCacheMaxAgeSec: 7200,
    },
});
import pulumi
import pulumi_gcp as gcp

default_http_health_check = gcp.compute.HttpHealthCheck("default",
    name="health-check",
    request_path="/",
    check_interval_sec=1,
    timeout_sec=1)
default = gcp.compute.BackendService("default",
    name="backend-service",
    health_checks=default_http_health_check.id,
    enable_cdn=True,
    cdn_policy={
        "signed_url_cache_max_age_sec": 7200,
    })
package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/compute"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		defaultHttpHealthCheck, err := compute.NewHttpHealthCheck(ctx, "default", &compute.HttpHealthCheckArgs{
			Name:             pulumi.String("health-check"),
			RequestPath:      pulumi.String("/"),
			CheckIntervalSec: pulumi.Int(1),
			TimeoutSec:       pulumi.Int(1),
		})
		if err != nil {
			return err
		}
		_, err = compute.NewBackendService(ctx, "default", &compute.BackendServiceArgs{
			Name:         pulumi.String("backend-service"),
			HealthChecks: defaultHttpHealthCheck.ID(),
			EnableCdn:    pulumi.Bool(true),
			CdnPolicy: &compute.BackendServiceCdnPolicyArgs{
				SignedUrlCacheMaxAgeSec: pulumi.Int(7200),
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var defaultHttpHealthCheck = new Gcp.Compute.HttpHealthCheck("default", new()
    {
        Name = "health-check",
        RequestPath = "/",
        CheckIntervalSec = 1,
        TimeoutSec = 1,
    });

    var @default = new Gcp.Compute.BackendService("default", new()
    {
        Name = "backend-service",
        HealthChecks = defaultHttpHealthCheck.Id,
        EnableCdn = true,
        CdnPolicy = new Gcp.Compute.Inputs.BackendServiceCdnPolicyArgs
        {
            SignedUrlCacheMaxAgeSec = 7200,
        },
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.compute.HttpHealthCheck;
import com.pulumi.gcp.compute.HttpHealthCheckArgs;
import com.pulumi.gcp.compute.BackendService;
import com.pulumi.gcp.compute.BackendServiceArgs;
import com.pulumi.gcp.compute.inputs.BackendServiceCdnPolicyArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var defaultHttpHealthCheck = new HttpHealthCheck("defaultHttpHealthCheck", HttpHealthCheckArgs.builder()
            .name("health-check")
            .requestPath("/")
            .checkIntervalSec(1)
            .timeoutSec(1)
            .build());

        var default_ = new BackendService("default", BackendServiceArgs.builder()
            .name("backend-service")
            .healthChecks(defaultHttpHealthCheck.id())
            .enableCdn(true)
            .cdnPolicy(BackendServiceCdnPolicyArgs.builder()
                .signedUrlCacheMaxAgeSec(7200)
                .build())
            .build());

    }
}
resources:
  default:
    type: gcp:compute:BackendService
    properties:
      name: backend-service
      healthChecks: ${defaultHttpHealthCheck.id}
      enableCdn: true
      cdnPolicy:
        signedUrlCacheMaxAgeSec: 7200
  defaultHttpHealthCheck:
    type: gcp:compute:HttpHealthCheck
    name: default
    properties:
      name: health-check
      requestPath: /
      checkIntervalSec: 1
      timeoutSec: 1

Setting enableCdn to true activates Cloud CDN for this backend service. The cdnPolicy block configures caching behavior; signedUrlCacheMaxAgeSec sets how long signed URLs remain valid in the cache (7200 seconds = 2 hours). Signed URLs provide time-limited access control for cached content.

Configure CDN cache behavior and TTLs

Production CDN deployments need fine-grained control over what gets cached and for how long.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const defaultHttpHealthCheck = new gcp.compute.HttpHealthCheck("default", {
    name: "health-check",
    requestPath: "/",
    checkIntervalSec: 1,
    timeoutSec: 1,
});
const _default = new gcp.compute.BackendService("default", {
    name: "backend-service",
    healthChecks: defaultHttpHealthCheck.id,
    enableCdn: true,
    cdnPolicy: {
        cacheMode: "CACHE_ALL_STATIC",
        defaultTtl: 3600,
        clientTtl: 7200,
        maxTtl: 10800,
        negativeCaching: true,
        signedUrlCacheMaxAgeSec: 7200,
    },
});
import pulumi
import pulumi_gcp as gcp

default_http_health_check = gcp.compute.HttpHealthCheck("default",
    name="health-check",
    request_path="/",
    check_interval_sec=1,
    timeout_sec=1)
default = gcp.compute.BackendService("default",
    name="backend-service",
    health_checks=default_http_health_check.id,
    enable_cdn=True,
    cdn_policy={
        "cache_mode": "CACHE_ALL_STATIC",
        "default_ttl": 3600,
        "client_ttl": 7200,
        "max_ttl": 10800,
        "negative_caching": True,
        "signed_url_cache_max_age_sec": 7200,
    })
package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/compute"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		defaultHttpHealthCheck, err := compute.NewHttpHealthCheck(ctx, "default", &compute.HttpHealthCheckArgs{
			Name:             pulumi.String("health-check"),
			RequestPath:      pulumi.String("/"),
			CheckIntervalSec: pulumi.Int(1),
			TimeoutSec:       pulumi.Int(1),
		})
		if err != nil {
			return err
		}
		_, err = compute.NewBackendService(ctx, "default", &compute.BackendServiceArgs{
			Name:         pulumi.String("backend-service"),
			HealthChecks: defaultHttpHealthCheck.ID(),
			EnableCdn:    pulumi.Bool(true),
			CdnPolicy: &compute.BackendServiceCdnPolicyArgs{
				CacheMode:               pulumi.String("CACHE_ALL_STATIC"),
				DefaultTtl:              pulumi.Int(3600),
				ClientTtl:               pulumi.Int(7200),
				MaxTtl:                  pulumi.Int(10800),
				NegativeCaching:         pulumi.Bool(true),
				SignedUrlCacheMaxAgeSec: pulumi.Int(7200),
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var defaultHttpHealthCheck = new Gcp.Compute.HttpHealthCheck("default", new()
    {
        Name = "health-check",
        RequestPath = "/",
        CheckIntervalSec = 1,
        TimeoutSec = 1,
    });

    var @default = new Gcp.Compute.BackendService("default", new()
    {
        Name = "backend-service",
        HealthChecks = defaultHttpHealthCheck.Id,
        EnableCdn = true,
        CdnPolicy = new Gcp.Compute.Inputs.BackendServiceCdnPolicyArgs
        {
            CacheMode = "CACHE_ALL_STATIC",
            DefaultTtl = 3600,
            ClientTtl = 7200,
            MaxTtl = 10800,
            NegativeCaching = true,
            SignedUrlCacheMaxAgeSec = 7200,
        },
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.compute.HttpHealthCheck;
import com.pulumi.gcp.compute.HttpHealthCheckArgs;
import com.pulumi.gcp.compute.BackendService;
import com.pulumi.gcp.compute.BackendServiceArgs;
import com.pulumi.gcp.compute.inputs.BackendServiceCdnPolicyArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var defaultHttpHealthCheck = new HttpHealthCheck("defaultHttpHealthCheck", HttpHealthCheckArgs.builder()
            .name("health-check")
            .requestPath("/")
            .checkIntervalSec(1)
            .timeoutSec(1)
            .build());

        var default_ = new BackendService("default", BackendServiceArgs.builder()
            .name("backend-service")
            .healthChecks(defaultHttpHealthCheck.id())
            .enableCdn(true)
            .cdnPolicy(BackendServiceCdnPolicyArgs.builder()
                .cacheMode("CACHE_ALL_STATIC")
                .defaultTtl(3600)
                .clientTtl(7200)
                .maxTtl(10800)
                .negativeCaching(true)
                .signedUrlCacheMaxAgeSec(7200)
                .build())
            .build());

    }
}
resources:
  default:
    type: gcp:compute:BackendService
    properties:
      name: backend-service
      healthChecks: ${defaultHttpHealthCheck.id}
      enableCdn: true
      cdnPolicy:
        cacheMode: CACHE_ALL_STATIC
        defaultTtl: 3600
        clientTtl: 7200
        maxTtl: 10800
        negativeCaching: true
        signedUrlCacheMaxAgeSec: 7200
  defaultHttpHealthCheck:
    type: gcp:compute:HttpHealthCheck
    name: default
    properties:
      name: health-check
      requestPath: /
      checkIntervalSec: 1
      timeoutSec: 1

The cacheMode property determines what content gets cached (CACHE_ALL_STATIC caches static responses). The TTL properties control cache lifetime: defaultTtl sets the base cache duration, clientTtl controls Cache-Control headers sent to clients, and maxTtl caps the maximum cache time. Setting negativeCaching to true caches error responses to reduce backend load during failures.

Customize cache keys with HTTP headers

Some applications need cache keys that include custom headers to serve different content based on request metadata.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const _default = new gcp.compute.BackendService("default", {
    name: "backend-service",
    enableCdn: true,
    cdnPolicy: {
        cacheMode: "USE_ORIGIN_HEADERS",
        cacheKeyPolicy: {
            includeHost: true,
            includeProtocol: true,
            includeQueryString: true,
            includeHttpHeaders: ["X-My-Header-Field"],
        },
    },
});
import pulumi
import pulumi_gcp as gcp

default = gcp.compute.BackendService("default",
    name="backend-service",
    enable_cdn=True,
    cdn_policy={
        "cache_mode": "USE_ORIGIN_HEADERS",
        "cache_key_policy": {
            "include_host": True,
            "include_protocol": True,
            "include_query_string": True,
            "include_http_headers": ["X-My-Header-Field"],
        },
    })
package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/compute"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := compute.NewBackendService(ctx, "default", &compute.BackendServiceArgs{
			Name:      pulumi.String("backend-service"),
			EnableCdn: pulumi.Bool(true),
			CdnPolicy: &compute.BackendServiceCdnPolicyArgs{
				CacheMode: pulumi.String("USE_ORIGIN_HEADERS"),
				CacheKeyPolicy: &compute.BackendServiceCdnPolicyCacheKeyPolicyArgs{
					IncludeHost:        pulumi.Bool(true),
					IncludeProtocol:    pulumi.Bool(true),
					IncludeQueryString: pulumi.Bool(true),
					IncludeHttpHeaders: pulumi.StringArray{
						pulumi.String("X-My-Header-Field"),
					},
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var @default = new Gcp.Compute.BackendService("default", new()
    {
        Name = "backend-service",
        EnableCdn = true,
        CdnPolicy = new Gcp.Compute.Inputs.BackendServiceCdnPolicyArgs
        {
            CacheMode = "USE_ORIGIN_HEADERS",
            CacheKeyPolicy = new Gcp.Compute.Inputs.BackendServiceCdnPolicyCacheKeyPolicyArgs
            {
                IncludeHost = true,
                IncludeProtocol = true,
                IncludeQueryString = true,
                IncludeHttpHeaders = new[]
                {
                    "X-My-Header-Field",
                },
            },
        },
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.compute.BackendService;
import com.pulumi.gcp.compute.BackendServiceArgs;
import com.pulumi.gcp.compute.inputs.BackendServiceCdnPolicyArgs;
import com.pulumi.gcp.compute.inputs.BackendServiceCdnPolicyCacheKeyPolicyArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var default_ = new BackendService("default", BackendServiceArgs.builder()
            .name("backend-service")
            .enableCdn(true)
            .cdnPolicy(BackendServiceCdnPolicyArgs.builder()
                .cacheMode("USE_ORIGIN_HEADERS")
                .cacheKeyPolicy(BackendServiceCdnPolicyCacheKeyPolicyArgs.builder()
                    .includeHost(true)
                    .includeProtocol(true)
                    .includeQueryString(true)
                    .includeHttpHeaders("X-My-Header-Field")
                    .build())
                .build())
            .build());

    }
}
resources:
  default:
    type: gcp:compute:BackendService
    properties:
      name: backend-service
      enableCdn: true
      cdnPolicy:
        cacheMode: USE_ORIGIN_HEADERS
        cacheKeyPolicy:
          includeHost: true
          includeProtocol: true
          includeQueryString: true
          includeHttpHeaders:
            - X-My-Header-Field

The cacheKeyPolicy block determines which request attributes affect cache lookups. Setting includeHttpHeaders to [“X-My-Header-Field”] means requests with different values for that header get separate cache entries. This lets you vary cached responses by custom headers while still benefiting from CDN caching.

Configure Traffic Director with round-robin routing

Service mesh deployments using Traffic Director require backend services with INTERNAL_SELF_MANAGED load balancing.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const healthCheck = new gcp.compute.HealthCheck("health_check", {
    name: "health-check",
    httpHealthCheck: {
        port: 80,
    },
});
const _default = new gcp.compute.BackendService("default", {
    name: "backend-service",
    healthChecks: healthCheck.id,
    loadBalancingScheme: "INTERNAL_SELF_MANAGED",
    localityLbPolicy: "ROUND_ROBIN",
});
import pulumi
import pulumi_gcp as gcp

health_check = gcp.compute.HealthCheck("health_check",
    name="health-check",
    http_health_check={
        "port": 80,
    })
default = gcp.compute.BackendService("default",
    name="backend-service",
    health_checks=health_check.id,
    load_balancing_scheme="INTERNAL_SELF_MANAGED",
    locality_lb_policy="ROUND_ROBIN")
package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/compute"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		healthCheck, err := compute.NewHealthCheck(ctx, "health_check", &compute.HealthCheckArgs{
			Name: pulumi.String("health-check"),
			HttpHealthCheck: &compute.HealthCheckHttpHealthCheckArgs{
				Port: pulumi.Int(80),
			},
		})
		if err != nil {
			return err
		}
		_, err = compute.NewBackendService(ctx, "default", &compute.BackendServiceArgs{
			Name:                pulumi.String("backend-service"),
			HealthChecks:        healthCheck.ID(),
			LoadBalancingScheme: pulumi.String("INTERNAL_SELF_MANAGED"),
			LocalityLbPolicy:    pulumi.String("ROUND_ROBIN"),
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var healthCheck = new Gcp.Compute.HealthCheck("health_check", new()
    {
        Name = "health-check",
        HttpHealthCheck = new Gcp.Compute.Inputs.HealthCheckHttpHealthCheckArgs
        {
            Port = 80,
        },
    });

    var @default = new Gcp.Compute.BackendService("default", new()
    {
        Name = "backend-service",
        HealthChecks = healthCheck.Id,
        LoadBalancingScheme = "INTERNAL_SELF_MANAGED",
        LocalityLbPolicy = "ROUND_ROBIN",
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.compute.HealthCheck;
import com.pulumi.gcp.compute.HealthCheckArgs;
import com.pulumi.gcp.compute.inputs.HealthCheckHttpHealthCheckArgs;
import com.pulumi.gcp.compute.BackendService;
import com.pulumi.gcp.compute.BackendServiceArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var healthCheck = new HealthCheck("healthCheck", HealthCheckArgs.builder()
            .name("health-check")
            .httpHealthCheck(HealthCheckHttpHealthCheckArgs.builder()
                .port(80)
                .build())
            .build());

        var default_ = new BackendService("default", BackendServiceArgs.builder()
            .name("backend-service")
            .healthChecks(healthCheck.id())
            .loadBalancingScheme("INTERNAL_SELF_MANAGED")
            .localityLbPolicy("ROUND_ROBIN")
            .build());

    }
}
resources:
  default:
    type: gcp:compute:BackendService
    properties:
      name: backend-service
      healthChecks: ${healthCheck.id}
      loadBalancingScheme: INTERNAL_SELF_MANAGED
      localityLbPolicy: ROUND_ROBIN
  healthCheck:
    type: gcp:compute:HealthCheck
    name: health_check
    properties:
      name: health-check
      httpHealthCheck:
        port: 80

Setting loadBalancingScheme to INTERNAL_SELF_MANAGED enables Traffic Director integration for service mesh routing. The localityLbPolicy of ROUND_ROBIN distributes traffic evenly across healthy backends within each locality. This configuration provides basic load distribution without session affinity.

Enable consistent hashing with circuit breakers

Applications requiring session affinity or consistent routing to the same backend use ring hash load balancing.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const healthCheck = new gcp.compute.HealthCheck("health_check", {
    name: "health-check",
    httpHealthCheck: {
        port: 80,
    },
});
const _default = new gcp.compute.BackendService("default", {
    name: "backend-service",
    healthChecks: healthCheck.id,
    loadBalancingScheme: "INTERNAL_SELF_MANAGED",
    localityLbPolicy: "RING_HASH",
    sessionAffinity: "HTTP_COOKIE",
    circuitBreakers: {
        maxConnections: 10,
    },
    consistentHash: {
        httpCookie: {
            ttl: {
                seconds: 11,
                nanos: 1111,
            },
            name: "mycookie",
        },
    },
    outlierDetection: {
        consecutiveErrors: 2,
        consecutiveGatewayFailure: 5,
        enforcingConsecutiveErrors: 100,
        enforcingConsecutiveGatewayFailure: 0,
        enforcingSuccessRate: 100,
        maxEjectionPercent: 10,
        successRateMinimumHosts: 5,
        successRateRequestVolume: 100,
        successRateStdevFactor: 1900,
    },
});
import pulumi
import pulumi_gcp as gcp

health_check = gcp.compute.HealthCheck("health_check",
    name="health-check",
    http_health_check={
        "port": 80,
    })
default = gcp.compute.BackendService("default",
    name="backend-service",
    health_checks=health_check.id,
    load_balancing_scheme="INTERNAL_SELF_MANAGED",
    locality_lb_policy="RING_HASH",
    session_affinity="HTTP_COOKIE",
    circuit_breakers={
        "max_connections": 10,
    },
    consistent_hash={
        "http_cookie": {
            "ttl": {
                "seconds": 11,
                "nanos": 1111,
            },
            "name": "mycookie",
        },
    },
    outlier_detection={
        "consecutive_errors": 2,
        "consecutive_gateway_failure": 5,
        "enforcing_consecutive_errors": 100,
        "enforcing_consecutive_gateway_failure": 0,
        "enforcing_success_rate": 100,
        "max_ejection_percent": 10,
        "success_rate_minimum_hosts": 5,
        "success_rate_request_volume": 100,
        "success_rate_stdev_factor": 1900,
    })
package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/compute"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		healthCheck, err := compute.NewHealthCheck(ctx, "health_check", &compute.HealthCheckArgs{
			Name: pulumi.String("health-check"),
			HttpHealthCheck: &compute.HealthCheckHttpHealthCheckArgs{
				Port: pulumi.Int(80),
			},
		})
		if err != nil {
			return err
		}
		_, err = compute.NewBackendService(ctx, "default", &compute.BackendServiceArgs{
			Name:                pulumi.String("backend-service"),
			HealthChecks:        healthCheck.ID(),
			LoadBalancingScheme: pulumi.String("INTERNAL_SELF_MANAGED"),
			LocalityLbPolicy:    pulumi.String("RING_HASH"),
			SessionAffinity:     pulumi.String("HTTP_COOKIE"),
			CircuitBreakers: &compute.BackendServiceCircuitBreakersArgs{
				MaxConnections: pulumi.Int(10),
			},
			ConsistentHash: &compute.BackendServiceConsistentHashArgs{
				HttpCookie: &compute.BackendServiceConsistentHashHttpCookieArgs{
					Ttl: &compute.BackendServiceConsistentHashHttpCookieTtlArgs{
						Seconds: pulumi.Int(11),
						Nanos:   pulumi.Int(1111),
					},
					Name: pulumi.String("mycookie"),
				},
			},
			OutlierDetection: &compute.BackendServiceOutlierDetectionArgs{
				ConsecutiveErrors:                  pulumi.Int(2),
				ConsecutiveGatewayFailure:          pulumi.Int(5),
				EnforcingConsecutiveErrors:         pulumi.Int(100),
				EnforcingConsecutiveGatewayFailure: pulumi.Int(0),
				EnforcingSuccessRate:               pulumi.Int(100),
				MaxEjectionPercent:                 pulumi.Int(10),
				SuccessRateMinimumHosts:            pulumi.Int(5),
				SuccessRateRequestVolume:           pulumi.Int(100),
				SuccessRateStdevFactor:             pulumi.Int(1900),
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var healthCheck = new Gcp.Compute.HealthCheck("health_check", new()
    {
        Name = "health-check",
        HttpHealthCheck = new Gcp.Compute.Inputs.HealthCheckHttpHealthCheckArgs
        {
            Port = 80,
        },
    });

    var @default = new Gcp.Compute.BackendService("default", new()
    {
        Name = "backend-service",
        HealthChecks = healthCheck.Id,
        LoadBalancingScheme = "INTERNAL_SELF_MANAGED",
        LocalityLbPolicy = "RING_HASH",
        SessionAffinity = "HTTP_COOKIE",
        CircuitBreakers = new Gcp.Compute.Inputs.BackendServiceCircuitBreakersArgs
        {
            MaxConnections = 10,
        },
        ConsistentHash = new Gcp.Compute.Inputs.BackendServiceConsistentHashArgs
        {
            HttpCookie = new Gcp.Compute.Inputs.BackendServiceConsistentHashHttpCookieArgs
            {
                Ttl = new Gcp.Compute.Inputs.BackendServiceConsistentHashHttpCookieTtlArgs
                {
                    Seconds = 11,
                    Nanos = 1111,
                },
                Name = "mycookie",
            },
        },
        OutlierDetection = new Gcp.Compute.Inputs.BackendServiceOutlierDetectionArgs
        {
            ConsecutiveErrors = 2,
            ConsecutiveGatewayFailure = 5,
            EnforcingConsecutiveErrors = 100,
            EnforcingConsecutiveGatewayFailure = 0,
            EnforcingSuccessRate = 100,
            MaxEjectionPercent = 10,
            SuccessRateMinimumHosts = 5,
            SuccessRateRequestVolume = 100,
            SuccessRateStdevFactor = 1900,
        },
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.compute.HealthCheck;
import com.pulumi.gcp.compute.HealthCheckArgs;
import com.pulumi.gcp.compute.inputs.HealthCheckHttpHealthCheckArgs;
import com.pulumi.gcp.compute.BackendService;
import com.pulumi.gcp.compute.BackendServiceArgs;
import com.pulumi.gcp.compute.inputs.BackendServiceCircuitBreakersArgs;
import com.pulumi.gcp.compute.inputs.BackendServiceConsistentHashArgs;
import com.pulumi.gcp.compute.inputs.BackendServiceConsistentHashHttpCookieArgs;
import com.pulumi.gcp.compute.inputs.BackendServiceConsistentHashHttpCookieTtlArgs;
import com.pulumi.gcp.compute.inputs.BackendServiceOutlierDetectionArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var healthCheck = new HealthCheck("healthCheck", HealthCheckArgs.builder()
            .name("health-check")
            .httpHealthCheck(HealthCheckHttpHealthCheckArgs.builder()
                .port(80)
                .build())
            .build());

        var default_ = new BackendService("default", BackendServiceArgs.builder()
            .name("backend-service")
            .healthChecks(healthCheck.id())
            .loadBalancingScheme("INTERNAL_SELF_MANAGED")
            .localityLbPolicy("RING_HASH")
            .sessionAffinity("HTTP_COOKIE")
            .circuitBreakers(BackendServiceCircuitBreakersArgs.builder()
                .maxConnections(10)
                .build())
            .consistentHash(BackendServiceConsistentHashArgs.builder()
                .httpCookie(BackendServiceConsistentHashHttpCookieArgs.builder()
                    .ttl(BackendServiceConsistentHashHttpCookieTtlArgs.builder()
                        .seconds(11)
                        .nanos(1111)
                        .build())
                    .name("mycookie")
                    .build())
                .build())
            .outlierDetection(BackendServiceOutlierDetectionArgs.builder()
                .consecutiveErrors(2)
                .consecutiveGatewayFailure(5)
                .enforcingConsecutiveErrors(100)
                .enforcingConsecutiveGatewayFailure(0)
                .enforcingSuccessRate(100)
                .maxEjectionPercent(10)
                .successRateMinimumHosts(5)
                .successRateRequestVolume(100)
                .successRateStdevFactor(1900)
                .build())
            .build());

    }
}
resources:
  default:
    type: gcp:compute:BackendService
    properties:
      name: backend-service
      healthChecks: ${healthCheck.id}
      loadBalancingScheme: INTERNAL_SELF_MANAGED
      localityLbPolicy: RING_HASH
      sessionAffinity: HTTP_COOKIE
      circuitBreakers:
        maxConnections: 10
      consistentHash:
        httpCookie:
          ttl:
            seconds: 11
            nanos: 1111
          name: mycookie
      outlierDetection:
        consecutiveErrors: 2
        consecutiveGatewayFailure: 5
        enforcingConsecutiveErrors: 100
        enforcingConsecutiveGatewayFailure: 0
        enforcingSuccessRate: 100
        maxEjectionPercent: 10
        successRateMinimumHosts: 5
        successRateRequestVolume: 100
        successRateStdevFactor: 1900
  healthCheck:
    type: gcp:compute:HealthCheck
    name: health_check
    properties:
      name: health-check
      httpHealthCheck:
        port: 80

The RING_HASH locality policy routes requests consistently to the same backend based on a hash of request attributes. The sessionAffinity property (HTTP_COOKIE) enables cookie-based affinity, while consistentHash configures the hashing behavior. The circuitBreakers block limits concurrent connections to prevent overload, and outlierDetection automatically removes unhealthy backends from the pool based on error rates and latency.

Route to external endpoints via network endpoint groups

Backend services can route to external destinations like third-party APIs or services outside GCP.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const externalProxy = new gcp.compute.GlobalNetworkEndpointGroup("external_proxy", {
    name: "network-endpoint",
    networkEndpointType: "INTERNET_FQDN_PORT",
    defaultPort: 443,
});
const proxy = new gcp.compute.GlobalNetworkEndpoint("proxy", {
    globalNetworkEndpointGroup: externalProxy.id,
    fqdn: "test.example.com",
    port: externalProxy.defaultPort,
});
const _default = new gcp.compute.BackendService("default", {
    name: "backend-service",
    enableCdn: true,
    timeoutSec: 10,
    connectionDrainingTimeoutSec: 10,
    customRequestHeaders: [proxy.fqdn.apply(fqdn => `host: ${fqdn}`)],
    customResponseHeaders: ["X-Cache-Hit: {cdn_cache_status}"],
    backends: [{
        group: externalProxy.id,
    }],
});
import pulumi
import pulumi_gcp as gcp

external_proxy = gcp.compute.GlobalNetworkEndpointGroup("external_proxy",
    name="network-endpoint",
    network_endpoint_type="INTERNET_FQDN_PORT",
    default_port=443)
proxy = gcp.compute.GlobalNetworkEndpoint("proxy",
    global_network_endpoint_group=external_proxy.id,
    fqdn="test.example.com",
    port=external_proxy.default_port)
default = gcp.compute.BackendService("default",
    name="backend-service",
    enable_cdn=True,
    timeout_sec=10,
    connection_draining_timeout_sec=10,
    custom_request_headers=[proxy.fqdn.apply(lambda fqdn: f"host: {fqdn}")],
    custom_response_headers=["X-Cache-Hit: {cdn_cache_status}"],
    backends=[{
        "group": external_proxy.id,
    }])
package main

import (
	"fmt"

	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/compute"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		externalProxy, err := compute.NewGlobalNetworkEndpointGroup(ctx, "external_proxy", &compute.GlobalNetworkEndpointGroupArgs{
			Name:                pulumi.String("network-endpoint"),
			NetworkEndpointType: pulumi.String("INTERNET_FQDN_PORT"),
			DefaultPort:         pulumi.Int(443),
		})
		if err != nil {
			return err
		}
		proxy, err := compute.NewGlobalNetworkEndpoint(ctx, "proxy", &compute.GlobalNetworkEndpointArgs{
			GlobalNetworkEndpointGroup: externalProxy.ID(),
			Fqdn:                       pulumi.String("test.example.com"),
			Port:                       externalProxy.DefaultPort,
		})
		if err != nil {
			return err
		}
		_, err = compute.NewBackendService(ctx, "default", &compute.BackendServiceArgs{
			Name:                         pulumi.String("backend-service"),
			EnableCdn:                    pulumi.Bool(true),
			TimeoutSec:                   pulumi.Int(10),
			ConnectionDrainingTimeoutSec: pulumi.Int(10),
			CustomRequestHeaders: pulumi.StringArray{
				proxy.Fqdn.ApplyT(func(fqdn *string) (string, error) {
					return fmt.Sprintf("host: %v", fqdn), nil
				}).(pulumi.StringOutput),
			},
			CustomResponseHeaders: pulumi.StringArray{
				pulumi.String("X-Cache-Hit: {cdn_cache_status}"),
			},
			Backends: compute.BackendServiceBackendArray{
				&compute.BackendServiceBackendArgs{
					Group: externalProxy.ID(),
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var externalProxy = new Gcp.Compute.GlobalNetworkEndpointGroup("external_proxy", new()
    {
        Name = "network-endpoint",
        NetworkEndpointType = "INTERNET_FQDN_PORT",
        DefaultPort = 443,
    });

    var proxy = new Gcp.Compute.GlobalNetworkEndpoint("proxy", new()
    {
        GlobalNetworkEndpointGroup = externalProxy.Id,
        Fqdn = "test.example.com",
        Port = externalProxy.DefaultPort,
    });

    var @default = new Gcp.Compute.BackendService("default", new()
    {
        Name = "backend-service",
        EnableCdn = true,
        TimeoutSec = 10,
        ConnectionDrainingTimeoutSec = 10,
        CustomRequestHeaders = new[]
        {
            proxy.Fqdn.Apply(fqdn => $"host: {fqdn}"),
        },
        CustomResponseHeaders = new[]
        {
            "X-Cache-Hit: {cdn_cache_status}",
        },
        Backends = new[]
        {
            new Gcp.Compute.Inputs.BackendServiceBackendArgs
            {
                Group = externalProxy.Id,
            },
        },
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.compute.GlobalNetworkEndpointGroup;
import com.pulumi.gcp.compute.GlobalNetworkEndpointGroupArgs;
import com.pulumi.gcp.compute.GlobalNetworkEndpoint;
import com.pulumi.gcp.compute.GlobalNetworkEndpointArgs;
import com.pulumi.gcp.compute.BackendService;
import com.pulumi.gcp.compute.BackendServiceArgs;
import com.pulumi.gcp.compute.inputs.BackendServiceBackendArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var externalProxy = new GlobalNetworkEndpointGroup("externalProxy", GlobalNetworkEndpointGroupArgs.builder()
            .name("network-endpoint")
            .networkEndpointType("INTERNET_FQDN_PORT")
            .defaultPort(443)
            .build());

        var proxy = new GlobalNetworkEndpoint("proxy", GlobalNetworkEndpointArgs.builder()
            .globalNetworkEndpointGroup(externalProxy.id())
            .fqdn("test.example.com")
            .port(externalProxy.defaultPort())
            .build());

        var default_ = new BackendService("default", BackendServiceArgs.builder()
            .name("backend-service")
            .enableCdn(true)
            .timeoutSec(10)
            .connectionDrainingTimeoutSec(10)
            .customRequestHeaders(proxy.fqdn().applyValue(_fqdn -> String.format("host: %s", _fqdn)))
            .customResponseHeaders("X-Cache-Hit: {cdn_cache_status}")
            .backends(BackendServiceBackendArgs.builder()
                .group(externalProxy.id())
                .build())
            .build());

    }
}
resources:
  externalProxy:
    type: gcp:compute:GlobalNetworkEndpointGroup
    name: external_proxy
    properties:
      name: network-endpoint
      networkEndpointType: INTERNET_FQDN_PORT
      defaultPort: '443'
  proxy:
    type: gcp:compute:GlobalNetworkEndpoint
    properties:
      globalNetworkEndpointGroup: ${externalProxy.id}
      fqdn: test.example.com
      port: ${externalProxy.defaultPort}
  default:
    type: gcp:compute:BackendService
    properties:
      name: backend-service
      enableCdn: true
      timeoutSec: 10
      connectionDrainingTimeoutSec: 10
      customRequestHeaders:
        - 'host: ${proxy.fqdn}'
      customResponseHeaders:
        - 'X-Cache-Hit: {cdn_cache_status}'
      backends:
        - group: ${externalProxy.id}

Network endpoint groups with networkEndpointType set to INTERNET_FQDN_PORT enable routing to external FQDNs. The backends array references the network endpoint group, and customRequestHeaders modifies requests before forwarding them. This configuration lets you proxy traffic to external services through GCP’s load balancing infrastructure.

Balance load using custom backend metrics

Advanced load balancing can use custom metrics reported by backends to make routing decisions.

import * as pulumi from "@pulumi/pulumi";
import * as gcp from "@pulumi/gcp";

const _default = new gcp.compute.Network("default", {name: "network"});
// Zonal NEG with GCE_VM_IP_PORT
const defaultNetworkEndpointGroup = new gcp.compute.NetworkEndpointGroup("default", {
    name: "network-endpoint",
    network: _default.id,
    defaultPort: 90,
    zone: "us-central1-a",
    networkEndpointType: "GCE_VM_IP_PORT",
});
const defaultHealthCheck = new gcp.compute.HealthCheck("default", {
    name: "health-check",
    timeoutSec: 1,
    checkIntervalSec: 1,
    tcpHealthCheck: {
        port: 80,
    },
});
const defaultBackendService = new gcp.compute.BackendService("default", {
    name: "backend-service",
    healthChecks: defaultHealthCheck.id,
    loadBalancingScheme: "EXTERNAL_MANAGED",
    localityLbPolicy: "WEIGHTED_ROUND_ROBIN",
    customMetrics: [{
        name: "orca.application_utilization",
        dryRun: false,
    }],
    backends: [{
        group: defaultNetworkEndpointGroup.id,
        balancingMode: "CUSTOM_METRICS",
        customMetrics: [
            {
                name: "orca.cpu_utilization",
                maxUtilization: 0.9,
                dryRun: true,
            },
            {
                name: "orca.named_metrics.foo",
                dryRun: false,
            },
        ],
    }],
    logConfig: {
        enable: true,
        optionalMode: "CUSTOM",
        optionalFields: [
            "orca_load_report",
            "tls.protocol",
        ],
    },
});
import pulumi
import pulumi_gcp as gcp

default = gcp.compute.Network("default", name="network")
# Zonal NEG with GCE_VM_IP_PORT
default_network_endpoint_group = gcp.compute.NetworkEndpointGroup("default",
    name="network-endpoint",
    network=default.id,
    default_port=90,
    zone="us-central1-a",
    network_endpoint_type="GCE_VM_IP_PORT")
default_health_check = gcp.compute.HealthCheck("default",
    name="health-check",
    timeout_sec=1,
    check_interval_sec=1,
    tcp_health_check={
        "port": 80,
    })
default_backend_service = gcp.compute.BackendService("default",
    name="backend-service",
    health_checks=default_health_check.id,
    load_balancing_scheme="EXTERNAL_MANAGED",
    locality_lb_policy="WEIGHTED_ROUND_ROBIN",
    custom_metrics=[{
        "name": "orca.application_utilization",
        "dry_run": False,
    }],
    backends=[{
        "group": default_network_endpoint_group.id,
        "balancing_mode": "CUSTOM_METRICS",
        "custom_metrics": [
            {
                "name": "orca.cpu_utilization",
                "max_utilization": 0.9,
                "dry_run": True,
            },
            {
                "name": "orca.named_metrics.foo",
                "dry_run": False,
            },
        ],
    }],
    log_config={
        "enable": True,
        "optional_mode": "CUSTOM",
        "optional_fields": [
            "orca_load_report",
            "tls.protocol",
        ],
    })
package main

import (
	"github.com/pulumi/pulumi-gcp/sdk/v9/go/gcp/compute"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_default, err := compute.NewNetwork(ctx, "default", &compute.NetworkArgs{
			Name: pulumi.String("network"),
		})
		if err != nil {
			return err
		}
		// Zonal NEG with GCE_VM_IP_PORT
		defaultNetworkEndpointGroup, err := compute.NewNetworkEndpointGroup(ctx, "default", &compute.NetworkEndpointGroupArgs{
			Name:                pulumi.String("network-endpoint"),
			Network:             _default.ID(),
			DefaultPort:         pulumi.Int(90),
			Zone:                pulumi.String("us-central1-a"),
			NetworkEndpointType: pulumi.String("GCE_VM_IP_PORT"),
		})
		if err != nil {
			return err
		}
		defaultHealthCheck, err := compute.NewHealthCheck(ctx, "default", &compute.HealthCheckArgs{
			Name:             pulumi.String("health-check"),
			TimeoutSec:       pulumi.Int(1),
			CheckIntervalSec: pulumi.Int(1),
			TcpHealthCheck: &compute.HealthCheckTcpHealthCheckArgs{
				Port: pulumi.Int(80),
			},
		})
		if err != nil {
			return err
		}
		_, err = compute.NewBackendService(ctx, "default", &compute.BackendServiceArgs{
			Name:                pulumi.String("backend-service"),
			HealthChecks:        defaultHealthCheck.ID(),
			LoadBalancingScheme: pulumi.String("EXTERNAL_MANAGED"),
			LocalityLbPolicy:    pulumi.String("WEIGHTED_ROUND_ROBIN"),
			CustomMetrics: compute.BackendServiceCustomMetricArray{
				&compute.BackendServiceCustomMetricArgs{
					Name:   pulumi.String("orca.application_utilization"),
					DryRun: pulumi.Bool(false),
				},
			},
			Backends: compute.BackendServiceBackendArray{
				&compute.BackendServiceBackendArgs{
					Group:         defaultNetworkEndpointGroup.ID(),
					BalancingMode: pulumi.String("CUSTOM_METRICS"),
					CustomMetrics: compute.BackendServiceBackendCustomMetricArray{
						&compute.BackendServiceBackendCustomMetricArgs{
							Name:           pulumi.String("orca.cpu_utilization"),
							MaxUtilization: pulumi.Float64(0.9),
							DryRun:         pulumi.Bool(true),
						},
						&compute.BackendServiceBackendCustomMetricArgs{
							Name:   pulumi.String("orca.named_metrics.foo"),
							DryRun: pulumi.Bool(false),
						},
					},
				},
			},
			LogConfig: &compute.BackendServiceLogConfigArgs{
				Enable:       pulumi.Bool(true),
				OptionalMode: pulumi.String("CUSTOM"),
				OptionalFields: pulumi.StringArray{
					pulumi.String("orca_load_report"),
					pulumi.String("tls.protocol"),
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcp = Pulumi.Gcp;

return await Deployment.RunAsync(() => 
{
    var @default = new Gcp.Compute.Network("default", new()
    {
        Name = "network",
    });

    // Zonal NEG with GCE_VM_IP_PORT
    var defaultNetworkEndpointGroup = new Gcp.Compute.NetworkEndpointGroup("default", new()
    {
        Name = "network-endpoint",
        Network = @default.Id,
        DefaultPort = 90,
        Zone = "us-central1-a",
        NetworkEndpointType = "GCE_VM_IP_PORT",
    });

    var defaultHealthCheck = new Gcp.Compute.HealthCheck("default", new()
    {
        Name = "health-check",
        TimeoutSec = 1,
        CheckIntervalSec = 1,
        TcpHealthCheck = new Gcp.Compute.Inputs.HealthCheckTcpHealthCheckArgs
        {
            Port = 80,
        },
    });

    var defaultBackendService = new Gcp.Compute.BackendService("default", new()
    {
        Name = "backend-service",
        HealthChecks = defaultHealthCheck.Id,
        LoadBalancingScheme = "EXTERNAL_MANAGED",
        LocalityLbPolicy = "WEIGHTED_ROUND_ROBIN",
        CustomMetrics = new[]
        {
            new Gcp.Compute.Inputs.BackendServiceCustomMetricArgs
            {
                Name = "orca.application_utilization",
                DryRun = false,
            },
        },
        Backends = new[]
        {
            new Gcp.Compute.Inputs.BackendServiceBackendArgs
            {
                Group = defaultNetworkEndpointGroup.Id,
                BalancingMode = "CUSTOM_METRICS",
                CustomMetrics = new[]
                {
                    new Gcp.Compute.Inputs.BackendServiceBackendCustomMetricArgs
                    {
                        Name = "orca.cpu_utilization",
                        MaxUtilization = 0.9,
                        DryRun = true,
                    },
                    new Gcp.Compute.Inputs.BackendServiceBackendCustomMetricArgs
                    {
                        Name = "orca.named_metrics.foo",
                        DryRun = false,
                    },
                },
            },
        },
        LogConfig = new Gcp.Compute.Inputs.BackendServiceLogConfigArgs
        {
            Enable = true,
            OptionalMode = "CUSTOM",
            OptionalFields = new[]
            {
                "orca_load_report",
                "tls.protocol",
            },
        },
    });

});
package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcp.compute.Network;
import com.pulumi.gcp.compute.NetworkArgs;
import com.pulumi.gcp.compute.NetworkEndpointGroup;
import com.pulumi.gcp.compute.NetworkEndpointGroupArgs;
import com.pulumi.gcp.compute.HealthCheck;
import com.pulumi.gcp.compute.HealthCheckArgs;
import com.pulumi.gcp.compute.inputs.HealthCheckTcpHealthCheckArgs;
import com.pulumi.gcp.compute.BackendService;
import com.pulumi.gcp.compute.BackendServiceArgs;
import com.pulumi.gcp.compute.inputs.BackendServiceCustomMetricArgs;
import com.pulumi.gcp.compute.inputs.BackendServiceBackendArgs;
import com.pulumi.gcp.compute.inputs.BackendServiceLogConfigArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var default_ = new Network("default", NetworkArgs.builder()
            .name("network")
            .build());

        // Zonal NEG with GCE_VM_IP_PORT
        var defaultNetworkEndpointGroup = new NetworkEndpointGroup("defaultNetworkEndpointGroup", NetworkEndpointGroupArgs.builder()
            .name("network-endpoint")
            .network(default_.id())
            .defaultPort(90)
            .zone("us-central1-a")
            .networkEndpointType("GCE_VM_IP_PORT")
            .build());

        var defaultHealthCheck = new HealthCheck("defaultHealthCheck", HealthCheckArgs.builder()
            .name("health-check")
            .timeoutSec(1)
            .checkIntervalSec(1)
            .tcpHealthCheck(HealthCheckTcpHealthCheckArgs.builder()
                .port(80)
                .build())
            .build());

        var defaultBackendService = new BackendService("defaultBackendService", BackendServiceArgs.builder()
            .name("backend-service")
            .healthChecks(defaultHealthCheck.id())
            .loadBalancingScheme("EXTERNAL_MANAGED")
            .localityLbPolicy("WEIGHTED_ROUND_ROBIN")
            .customMetrics(BackendServiceCustomMetricArgs.builder()
                .name("orca.application_utilization")
                .dryRun(false)
                .build())
            .backends(BackendServiceBackendArgs.builder()
                .group(defaultNetworkEndpointGroup.id())
                .balancingMode("CUSTOM_METRICS")
                .customMetrics(                
                    BackendServiceBackendCustomMetricArgs.builder()
                        .name("orca.cpu_utilization")
                        .maxUtilization(0.9)
                        .dryRun(true)
                        .build(),
                    BackendServiceBackendCustomMetricArgs.builder()
                        .name("orca.named_metrics.foo")
                        .dryRun(false)
                        .build())
                .build())
            .logConfig(BackendServiceLogConfigArgs.builder()
                .enable(true)
                .optionalMode("CUSTOM")
                .optionalFields(                
                    "orca_load_report",
                    "tls.protocol")
                .build())
            .build());

    }
}
resources:
  default:
    type: gcp:compute:Network
    properties:
      name: network
  # Zonal NEG with GCE_VM_IP_PORT
  defaultNetworkEndpointGroup:
    type: gcp:compute:NetworkEndpointGroup
    name: default
    properties:
      name: network-endpoint
      network: ${default.id}
      defaultPort: '90'
      zone: us-central1-a
      networkEndpointType: GCE_VM_IP_PORT
  defaultBackendService:
    type: gcp:compute:BackendService
    name: default
    properties:
      name: backend-service
      healthChecks: ${defaultHealthCheck.id}
      loadBalancingScheme: EXTERNAL_MANAGED
      localityLbPolicy: WEIGHTED_ROUND_ROBIN
      customMetrics:
        - name: orca.application_utilization
          dryRun: false
      backends:
        - group: ${defaultNetworkEndpointGroup.id}
          balancingMode: CUSTOM_METRICS
          customMetrics:
            - name: orca.cpu_utilization
              maxUtilization: 0.9
              dryRun: true
            - name: orca.named_metrics.foo
              dryRun: false
      logConfig:
        enable: true
        optionalMode: CUSTOM
        optionalFields:
          - orca_load_report
          - tls.protocol
  defaultHealthCheck:
    type: gcp:compute:HealthCheck
    name: default
    properties:
      name: health-check
      timeoutSec: 1
      checkIntervalSec: 1
      tcpHealthCheck:
        port: '80'

The WEIGHTED_ROUND_ROBIN locality policy distributes traffic based on backend-reported metrics. The customMetrics array at the backend service level defines which metrics to collect, while the customMetrics array in the backends block specifies per-backend metric thresholds. Setting balancingMode to CUSTOM_METRICS tells the load balancer to use these metrics for routing decisions. Backends report metrics via the X-Endpoint-Load-Metrics header.

Beyond these examples

These snippets focus on specific backend service features: health checking and backend grouping, Cloud CDN caching and cache key policies, Traffic Director service mesh routing, and custom metrics and advanced load balancing. They’re intentionally minimal rather than full load balancing deployments.

The examples may reference pre-existing infrastructure such as health check resources, network endpoint groups or instance groups, and VPC networks and subnets for some examples. They focus on configuring the backend service rather than provisioning all supporting resources.

To keep things focused, common backend service patterns are omitted, including:

  • IAP (Identity-Aware Proxy) configuration
  • Security policies and edge security
  • Connection draining and timeout tuning
  • TLS settings and backend authentication
  • Dynamic forwarding with Service Extensions
  • Migration between load balancing schemes

These omissions are intentional: the goal is to illustrate how each backend service feature is wired, not provide drop-in load balancing modules. See the Backend Service resource reference for all available configuration options.

Let's configure GCP Backend Services for Load Balancing

Get started with Pulumi Cloud, then follow our quick setup guide to deploy this infrastructure.

Try Pulumi Cloud for FREE

Frequently Asked Questions

Common Errors & Gotchas
Why am I getting a resourceInUseByAnotherResource error when recreating my backend service?
Recreating a BackendService that references dependent resources like gcp.compute.URLMap causes this error when modifying the number of dependent resources. Use lifecycle.create_before_destroy on the dependent resources to avoid this error.
Can I use session affinity with UDP protocol?
No, session affinity is not applicable if the protocol is UDP.
When do session affinity settings not take effect?
Session affinity settings will not take effect if sessionAffinity is not NONE and localityLbPolicy is not set to MAGLEV, WEIGHTED_MAGLEV, or RING_HASH.
Required Configuration
When is a health check required for my backend service?
A health check must be specified unless the backend service uses an internet or serverless NEG as a backend. For internal load balancing, you must use a HealthCheck resource (not HttpHealthCheck or HttpsHealthCheck).
When is portName required?
portName is required when the loadBalancingScheme is EXTERNAL.
When must I set protocol to GRPC?
Protocol must be set to GRPC when the backend service is referenced by a URL map that is bound to a target gRPC proxy.
Load Balancing Configuration
What load balancing schemes are available?
Four schemes are available: EXTERNAL (default), INTERNAL_SELF_MANAGED, INTERNAL_MANAGED, and EXTERNAL_MANAGED. The scheme determines which features and configurations are available.
What features are available for each load balancing scheme?

Feature availability depends on loadBalancingScheme:

  • circuitBreakers: Only INTERNAL_SELF_MANAGED
  • outlierDetection: INTERNAL_SELF_MANAGED or EXTERNAL_MANAGED
  • maxStreamDuration: Only INTERNAL_SELF_MANAGED
  • consistentHash: Only INTERNAL_SELF_MANAGED (and requires localityLbPolicy of MAGLEV or RING_HASH)
What locality load balancing policies are supported?
Available policies include ROUND_ROBIN, LEAST_REQUEST, RING_HASH, RANDOM, ORIGINAL_DESTINATION, MAGLEV, WEIGHTED_MAGLEV, and WEIGHTED_ROUND_ROBIN. Applicability varies by loadBalancingScheme. For External Network Load Balancing, only MAGLEV and WEIGHTED_MAGLEV are supported (default is MAGLEV).
Advanced Features
When can I configure TLS settings for my backend service?
tlsSettings may only be specified when the backend protocol is SSL, HTTPS, or HTTP2.
What's required to use STRONG_COOKIE_AFFINITY?
You must configure strongSessionAffinityCookie if sessionAffinity is set to STRONG_COOKIE_AFFINITY.
How do I enable Cloud CDN for my backend service?
Set enableCdn to true and optionally configure cdnPolicy to customize caching behavior, including cache modes, TTLs, and cache key policies.
Timeouts & Limits
What's the default timeout for a backend service?
The default timeout is 30 seconds. The full range of allowed values is 1 through 2,147,483,647 seconds. Note that timeout meaning varies depending on the type of load balancer.

Using a different cloud?

Explore networking guides for other cloud providers: