AWS Classic

Pulumi Official
Package maintained by Pulumi
v5.10.0 published on Monday, Jul 11, 2022 by Pulumi

Crawler

Manages a Glue Crawler. More information can be found in the AWS Glue Developer Guide

Example Usage

DynamoDB Target Example

using Pulumi;
using Aws = Pulumi.Aws;

class MyStack : Stack
{
    public MyStack()
    {
        var example = new Aws.Glue.Crawler("example", new Aws.Glue.CrawlerArgs
        {
            DatabaseName = aws_glue_catalog_database.Example.Name,
            Role = aws_iam_role.Example.Arn,
            DynamodbTargets = 
            {
                new Aws.Glue.Inputs.CrawlerDynamodbTargetArgs
                {
                    Path = "table-name",
                },
            },
        });
    }

}
package main

import (
	"github.com/pulumi/pulumi-aws/sdk/v5/go/aws/glue"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := glue.NewCrawler(ctx, "example", &glue.CrawlerArgs{
			DatabaseName: pulumi.Any(aws_glue_catalog_database.Example.Name),
			Role:         pulumi.Any(aws_iam_role.Example.Arn),
			DynamodbTargets: glue.CrawlerDynamodbTargetArray{
				&glue.CrawlerDynamodbTargetArgs{
					Path: pulumi.String("table-name"),
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}
package generated_program;

import java.util.*;
import java.io.*;
import java.nio.*;
import com.pulumi.*;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var example = new Crawler("example", CrawlerArgs.builder()        
            .databaseName(aws_glue_catalog_database.example().name())
            .role(aws_iam_role.example().arn())
            .dynamodbTargets(CrawlerDynamodbTargetArgs.builder()
                .path("table-name")
                .build())
            .build());

    }
}
import pulumi
import pulumi_aws as aws

example = aws.glue.Crawler("example",
    database_name=aws_glue_catalog_database["example"]["name"],
    role=aws_iam_role["example"]["arn"],
    dynamodb_targets=[aws.glue.CrawlerDynamodbTargetArgs(
        path="table-name",
    )])
import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";

const example = new aws.glue.Crawler("example", {
    databaseName: aws_glue_catalog_database.example.name,
    role: aws_iam_role.example.arn,
    dynamodbTargets: [{
        path: "table-name",
    }],
});
resources:
  example:
    type: aws:glue:Crawler
    properties:
      databaseName: ${aws_glue_catalog_database.example.name}
      role: ${aws_iam_role.example.arn}
      dynamodbTargets:
        - path: table-name

JDBC Target Example

using Pulumi;
using Aws = Pulumi.Aws;

class MyStack : Stack
{
    public MyStack()
    {
        var example = new Aws.Glue.Crawler("example", new Aws.Glue.CrawlerArgs
        {
            DatabaseName = aws_glue_catalog_database.Example.Name,
            Role = aws_iam_role.Example.Arn,
            JdbcTargets = 
            {
                new Aws.Glue.Inputs.CrawlerJdbcTargetArgs
                {
                    ConnectionName = aws_glue_connection.Example.Name,
                    Path = "database-name/%",
                },
            },
        });
    }

}
package main

import (
	"fmt"

	"github.com/pulumi/pulumi-aws/sdk/v5/go/aws/glue"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := glue.NewCrawler(ctx, "example", &glue.CrawlerArgs{
			DatabaseName: pulumi.Any(aws_glue_catalog_database.Example.Name),
			Role:         pulumi.Any(aws_iam_role.Example.Arn),
			JdbcTargets: glue.CrawlerJdbcTargetArray{
				&glue.CrawlerJdbcTargetArgs{
					ConnectionName: pulumi.Any(aws_glue_connection.Example.Name),
					Path:           pulumi.String(fmt.Sprintf("%v%v", "database-name/", "%")),
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}
package generated_program;

import java.util.*;
import java.io.*;
import java.nio.*;
import com.pulumi.*;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var example = new Crawler("example", CrawlerArgs.builder()        
            .databaseName(aws_glue_catalog_database.example().name())
            .role(aws_iam_role.example().arn())
            .jdbcTargets(CrawlerJdbcTargetArgs.builder()
                .connectionName(aws_glue_connection.example().name())
                .path("database-name/%")
                .build())
            .build());

    }
}
import pulumi
import pulumi_aws as aws

example = aws.glue.Crawler("example",
    database_name=aws_glue_catalog_database["example"]["name"],
    role=aws_iam_role["example"]["arn"],
    jdbc_targets=[aws.glue.CrawlerJdbcTargetArgs(
        connection_name=aws_glue_connection["example"]["name"],
        path="database-name/%",
    )])
import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";

const example = new aws.glue.Crawler("example", {
    databaseName: aws_glue_catalog_database.example.name,
    role: aws_iam_role.example.arn,
    jdbcTargets: [{
        connectionName: aws_glue_connection.example.name,
        path: `database-name/%`,
    }],
});
resources:
  example:
    type: aws:glue:Crawler
    properties:
      databaseName: ${aws_glue_catalog_database.example.name}
      role: ${aws_iam_role.example.arn}
      jdbcTargets:
        - connectionName: ${aws_glue_connection.example.name}
          path: database-name/%

S3 Target Example

using Pulumi;
using Aws = Pulumi.Aws;

class MyStack : Stack
{
    public MyStack()
    {
        var example = new Aws.Glue.Crawler("example", new Aws.Glue.CrawlerArgs
        {
            DatabaseName = aws_glue_catalog_database.Example.Name,
            Role = aws_iam_role.Example.Arn,
            S3Targets = 
            {
                new Aws.Glue.Inputs.CrawlerS3TargetArgs
                {
                    Path = $"s3://{aws_s3_bucket.Example.Bucket}",
                },
            },
        });
    }

}
package main

import (
	"fmt"

	"github.com/pulumi/pulumi-aws/sdk/v5/go/aws/glue"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := glue.NewCrawler(ctx, "example", &glue.CrawlerArgs{
			DatabaseName: pulumi.Any(aws_glue_catalog_database.Example.Name),
			Role:         pulumi.Any(aws_iam_role.Example.Arn),
			S3Targets: glue.CrawlerS3TargetArray{
				&glue.CrawlerS3TargetArgs{
					Path: pulumi.String(fmt.Sprintf("%v%v", "s3://", aws_s3_bucket.Example.Bucket)),
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}
package generated_program;

import java.util.*;
import java.io.*;
import java.nio.*;
import com.pulumi.*;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var example = new Crawler("example", CrawlerArgs.builder()        
            .databaseName(aws_glue_catalog_database.example().name())
            .role(aws_iam_role.example().arn())
            .s3Targets(CrawlerS3TargetArgs.builder()
                .path(String.format("s3://%s", aws_s3_bucket.example().bucket()))
                .build())
            .build());

    }
}
import pulumi
import pulumi_aws as aws

example = aws.glue.Crawler("example",
    database_name=aws_glue_catalog_database["example"]["name"],
    role=aws_iam_role["example"]["arn"],
    s3_targets=[aws.glue.CrawlerS3TargetArgs(
        path=f"s3://{aws_s3_bucket['example']['bucket']}",
    )])
import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";

const example = new aws.glue.Crawler("example", {
    databaseName: aws_glue_catalog_database.example.name,
    role: aws_iam_role.example.arn,
    s3Targets: [{
        path: `s3://${aws_s3_bucket.example.bucket}`,
    }],
});
resources:
  example:
    type: aws:glue:Crawler
    properties:
      databaseName: ${aws_glue_catalog_database.example.name}
      role: ${aws_iam_role.example.arn}
      s3Targets:
        - path: s3://${aws_s3_bucket.example.bucket}

Catalog Target Example

using Pulumi;
using Aws = Pulumi.Aws;

class MyStack : Stack
{
    public MyStack()
    {
        var example = new Aws.Glue.Crawler("example", new Aws.Glue.CrawlerArgs
        {
            DatabaseName = aws_glue_catalog_database.Example.Name,
            Role = aws_iam_role.Example.Arn,
            CatalogTargets = 
            {
                new Aws.Glue.Inputs.CrawlerCatalogTargetArgs
                {
                    DatabaseName = aws_glue_catalog_database.Example.Name,
                    Tables = 
                    {
                        aws_glue_catalog_table.Example.Name,
                    },
                },
            },
            SchemaChangePolicy = new Aws.Glue.Inputs.CrawlerSchemaChangePolicyArgs
            {
                DeleteBehavior = "LOG",
            },
            Configuration = @"{
  ""Version"":1.0,
  ""Grouping"": {
    ""TableGroupingPolicy"": ""CombineCompatibleSchemas""
  }
}
",
        });
    }

}
package main

import (
	"fmt"

	"github.com/pulumi/pulumi-aws/sdk/v5/go/aws/glue"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := glue.NewCrawler(ctx, "example", &glue.CrawlerArgs{
			DatabaseName: pulumi.Any(aws_glue_catalog_database.Example.Name),
			Role:         pulumi.Any(aws_iam_role.Example.Arn),
			CatalogTargets: glue.CrawlerCatalogTargetArray{
				&glue.CrawlerCatalogTargetArgs{
					DatabaseName: pulumi.Any(aws_glue_catalog_database.Example.Name),
					Tables: pulumi.StringArray{
						pulumi.Any(aws_glue_catalog_table.Example.Name),
					},
				},
			},
			SchemaChangePolicy: &glue.CrawlerSchemaChangePolicyArgs{
				DeleteBehavior: pulumi.String("LOG"),
			},
			Configuration: pulumi.String(fmt.Sprintf("%v%v%v%v%v%v", "{\n", "  \"Version\":1.0,\n", "  \"Grouping\": {\n", "    \"TableGroupingPolicy\": \"CombineCompatibleSchemas\"\n", "  }\n", "}\n")),
		})
		if err != nil {
			return err
		}
		return nil
	})
}
package generated_program;

import java.util.*;
import java.io.*;
import java.nio.*;
import com.pulumi.*;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var example = new Crawler("example", CrawlerArgs.builder()        
            .databaseName(aws_glue_catalog_database.example().name())
            .role(aws_iam_role.example().arn())
            .catalogTargets(CrawlerCatalogTargetArgs.builder()
                .databaseName(aws_glue_catalog_database.example().name())
                .tables(aws_glue_catalog_table.example().name())
                .build())
            .schemaChangePolicy(CrawlerSchemaChangePolicyArgs.builder()
                .deleteBehavior("LOG")
                .build())
            .configuration("""
{
  "Version":1.0,
  "Grouping": {
    "TableGroupingPolicy": "CombineCompatibleSchemas"
  }
}
            """)
            .build());

    }
}
import pulumi
import pulumi_aws as aws

example = aws.glue.Crawler("example",
    database_name=aws_glue_catalog_database["example"]["name"],
    role=aws_iam_role["example"]["arn"],
    catalog_targets=[aws.glue.CrawlerCatalogTargetArgs(
        database_name=aws_glue_catalog_database["example"]["name"],
        tables=[aws_glue_catalog_table["example"]["name"]],
    )],
    schema_change_policy=aws.glue.CrawlerSchemaChangePolicyArgs(
        delete_behavior="LOG",
    ),
    configuration="""{
  "Version":1.0,
  "Grouping": {
    "TableGroupingPolicy": "CombineCompatibleSchemas"
  }
}
""")
import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";

const example = new aws.glue.Crawler("example", {
    databaseName: aws_glue_catalog_database.example.name,
    role: aws_iam_role.example.arn,
    catalogTargets: [{
        databaseName: aws_glue_catalog_database.example.name,
        tables: [aws_glue_catalog_table.example.name],
    }],
    schemaChangePolicy: {
        deleteBehavior: "LOG",
    },
    configuration: `{
  "Version":1.0,
  "Grouping": {
    "TableGroupingPolicy": "CombineCompatibleSchemas"
  }
}
`,
});
resources:
  example:
    type: aws:glue:Crawler
    properties:
      databaseName: ${aws_glue_catalog_database.example.name}
      role: ${aws_iam_role.example.arn}
      catalogTargets:
        - databaseName: ${aws_glue_catalog_database.example.name}
          tables:
            - ${aws_glue_catalog_table.example.name}
      schemaChangePolicy:
        deleteBehavior: LOG
      configuration: |
        {
          "Version":1.0,
          "Grouping": {
            "TableGroupingPolicy": "CombineCompatibleSchemas"
          }
        }        

MongoDB Target Example

using Pulumi;
using Aws = Pulumi.Aws;

class MyStack : Stack
{
    public MyStack()
    {
        var example = new Aws.Glue.Crawler("example", new Aws.Glue.CrawlerArgs
        {
            DatabaseName = aws_glue_catalog_database.Example.Name,
            Role = aws_iam_role.Example.Arn,
            MongodbTargets = 
            {
                new Aws.Glue.Inputs.CrawlerMongodbTargetArgs
                {
                    ConnectionName = aws_glue_connection.Example.Name,
                    Path = "database-name/%",
                },
            },
        });
    }

}
package main

import (
	"fmt"

	"github.com/pulumi/pulumi-aws/sdk/v5/go/aws/glue"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := glue.NewCrawler(ctx, "example", &glue.CrawlerArgs{
			DatabaseName: pulumi.Any(aws_glue_catalog_database.Example.Name),
			Role:         pulumi.Any(aws_iam_role.Example.Arn),
			MongodbTargets: glue.CrawlerMongodbTargetArray{
				&glue.CrawlerMongodbTargetArgs{
					ConnectionName: pulumi.Any(aws_glue_connection.Example.Name),
					Path:           pulumi.String(fmt.Sprintf("%v%v", "database-name/", "%")),
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}
package generated_program;

import java.util.*;
import java.io.*;
import java.nio.*;
import com.pulumi.*;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var example = new Crawler("example", CrawlerArgs.builder()        
            .databaseName(aws_glue_catalog_database.example().name())
            .role(aws_iam_role.example().arn())
            .mongodbTargets(CrawlerMongodbTargetArgs.builder()
                .connectionName(aws_glue_connection.example().name())
                .path("database-name/%")
                .build())
            .build());

    }
}
import pulumi
import pulumi_aws as aws

example = aws.glue.Crawler("example",
    database_name=aws_glue_catalog_database["example"]["name"],
    role=aws_iam_role["example"]["arn"],
    mongodb_targets=[aws.glue.CrawlerMongodbTargetArgs(
        connection_name=aws_glue_connection["example"]["name"],
        path="database-name/%",
    )])
import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";

const example = new aws.glue.Crawler("example", {
    databaseName: aws_glue_catalog_database.example.name,
    role: aws_iam_role.example.arn,
    mongodbTargets: [{
        connectionName: aws_glue_connection.example.name,
        path: `database-name/%`,
    }],
});
resources:
  example:
    type: aws:glue:Crawler
    properties:
      databaseName: ${aws_glue_catalog_database.example.name}
      role: ${aws_iam_role.example.arn}
      mongodbTargets:
        - connectionName: ${aws_glue_connection.example.name}
          path: database-name/%

Configuration Settings Example

using System.Collections.Generic;
using System.Text.Json;
using Pulumi;
using Aws = Pulumi.Aws;

class MyStack : Stack
{
    public MyStack()
    {
        var eventsCrawler = new Aws.Glue.Crawler("eventsCrawler", new Aws.Glue.CrawlerArgs
        {
            DatabaseName = aws_glue_catalog_database.Glue_database.Name,
            Schedule = "cron(0 1 * * ? *)",
            Role = aws_iam_role.Glue_role.Arn,
            Tags = @var.Tags,
            Configuration = JsonSerializer.Serialize(new Dictionary<string, object?>
            {
                { "Grouping", new Dictionary<string, object?>
                {
                    { "TableGroupingPolicy", "CombineCompatibleSchemas" },
                } },
                { "CrawlerOutput", new Dictionary<string, object?>
                {
                    { "Partitions", new Dictionary<string, object?>
                    {
                        { "AddOrUpdateBehavior", "InheritFromTable" },
                    } },
                } },
                { "Version", 1 },
            }),
            S3Targets = 
            {
                new Aws.Glue.Inputs.CrawlerS3TargetArgs
                {
                    Path = $"s3://{aws_s3_bucket.Data_lake_bucket.Bucket}",
                },
            },
        });
    }

}
package main

import (
	"encoding/json"
	"fmt"

	"github.com/pulumi/pulumi-aws/sdk/v5/go/aws/glue"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		tmpJSON0, err := json.Marshal(map[string]interface{}{
			"Grouping": map[string]interface{}{
				"TableGroupingPolicy": "CombineCompatibleSchemas",
			},
			"CrawlerOutput": map[string]interface{}{
				"Partitions": map[string]interface{}{
					"AddOrUpdateBehavior": "InheritFromTable",
				},
			},
			"Version": 1,
		})
		if err != nil {
			return err
		}
		json0 := string(tmpJSON0)
		_, err := glue.NewCrawler(ctx, "eventsCrawler", &glue.CrawlerArgs{
			DatabaseName:  pulumi.Any(aws_glue_catalog_database.Glue_database.Name),
			Schedule:      pulumi.String("cron(0 1 * * ? *)"),
			Role:          pulumi.Any(aws_iam_role.Glue_role.Arn),
			Tags:          pulumi.Any(_var.Tags),
			Configuration: pulumi.String(json0),
			S3Targets: glue.CrawlerS3TargetArray{
				&glue.CrawlerS3TargetArgs{
					Path: pulumi.String(fmt.Sprintf("%v%v", "s3://", aws_s3_bucket.Data_lake_bucket.Bucket)),
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}
package generated_program;

import java.util.*;
import java.io.*;
import java.nio.*;
import com.pulumi.*;
import static com.pulumi.codegen.internal.Serialization.*;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var eventsCrawler = new Crawler("eventsCrawler", CrawlerArgs.builder()        
            .databaseName(aws_glue_catalog_database.glue_database().name())
            .schedule("cron(0 1 * * ? *)")
            .role(aws_iam_role.glue_role().arn())
            .tags(var_.tags())
            .configuration(serializeJson(
                jsonObject(
                    jsonProperty("Grouping", jsonObject(
                        jsonProperty("TableGroupingPolicy", "CombineCompatibleSchemas")
                    )),
                    jsonProperty("CrawlerOutput", jsonObject(
                        jsonProperty("Partitions", jsonObject(
                            jsonProperty("AddOrUpdateBehavior", "InheritFromTable")
                        ))
                    )),
                    jsonProperty("Version", 1)
                )))
            .s3Targets(CrawlerS3TargetArgs.builder()
                .path(String.format("s3://%s", aws_s3_bucket.data_lake_bucket().bucket()))
                .build())
            .build());

    }
}
import pulumi
import json
import pulumi_aws as aws

events_crawler = aws.glue.Crawler("eventsCrawler",
    database_name=aws_glue_catalog_database["glue_database"]["name"],
    schedule="cron(0 1 * * ? *)",
    role=aws_iam_role["glue_role"]["arn"],
    tags=var["tags"],
    configuration=json.dumps({
        "Grouping": {
            "TableGroupingPolicy": "CombineCompatibleSchemas",
        },
        "CrawlerOutput": {
            "Partitions": {
                "AddOrUpdateBehavior": "InheritFromTable",
            },
        },
        "Version": 1,
    }),
    s3_targets=[aws.glue.CrawlerS3TargetArgs(
        path=f"s3://{aws_s3_bucket['data_lake_bucket']['bucket']}",
    )])
import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";

const eventsCrawler = new aws.glue.Crawler("eventsCrawler", {
    databaseName: aws_glue_catalog_database.glue_database.name,
    schedule: "cron(0 1 * * ? *)",
    role: aws_iam_role.glue_role.arn,
    tags: _var.tags,
    configuration: JSON.stringify({
        Grouping: {
            TableGroupingPolicy: "CombineCompatibleSchemas",
        },
        CrawlerOutput: {
            Partitions: {
                AddOrUpdateBehavior: "InheritFromTable",
            },
        },
        Version: 1,
    }),
    s3Targets: [{
        path: `s3://${aws_s3_bucket.data_lake_bucket.bucket}`,
    }],
});
resources:
  eventsCrawler:
    type: aws:glue:Crawler
    properties:
      databaseName: ${aws_glue_catalog_database.glue_database.name}
      schedule: cron(0 1 * * ? *)
      role: ${aws_iam_role.glue_role.arn}
      tags: ${var.tags}
      configuration:
        Fn::ToJSON:
          Grouping:
            TableGroupingPolicy: CombineCompatibleSchemas
          CrawlerOutput:
            Partitions:
              AddOrUpdateBehavior: InheritFromTable
          Version: 1
      s3Targets:
        - path: s3://${aws_s3_bucket.data_lake_bucket.bucket}

Create a Crawler Resource

new Crawler(name: string, args: CrawlerArgs, opts?: CustomResourceOptions);
@overload
def Crawler(resource_name: str,
            opts: Optional[ResourceOptions] = None,
            catalog_targets: Optional[Sequence[CrawlerCatalogTargetArgs]] = None,
            classifiers: Optional[Sequence[str]] = None,
            configuration: Optional[str] = None,
            database_name: Optional[str] = None,
            delta_targets: Optional[Sequence[CrawlerDeltaTargetArgs]] = None,
            description: Optional[str] = None,
            dynamodb_targets: Optional[Sequence[CrawlerDynamodbTargetArgs]] = None,
            jdbc_targets: Optional[Sequence[CrawlerJdbcTargetArgs]] = None,
            lineage_configuration: Optional[CrawlerLineageConfigurationArgs] = None,
            mongodb_targets: Optional[Sequence[CrawlerMongodbTargetArgs]] = None,
            name: Optional[str] = None,
            recrawl_policy: Optional[CrawlerRecrawlPolicyArgs] = None,
            role: Optional[str] = None,
            s3_targets: Optional[Sequence[CrawlerS3TargetArgs]] = None,
            schedule: Optional[str] = None,
            schema_change_policy: Optional[CrawlerSchemaChangePolicyArgs] = None,
            security_configuration: Optional[str] = None,
            table_prefix: Optional[str] = None,
            tags: Optional[Mapping[str, str]] = None)
@overload
def Crawler(resource_name: str,
            args: CrawlerArgs,
            opts: Optional[ResourceOptions] = None)
func NewCrawler(ctx *Context, name string, args CrawlerArgs, opts ...ResourceOption) (*Crawler, error)
public Crawler(string name, CrawlerArgs args, CustomResourceOptions? opts = null)
public Crawler(String name, CrawlerArgs args)
public Crawler(String name, CrawlerArgs args, CustomResourceOptions options)
type: aws:glue:Crawler
properties: # The arguments to resource properties.
options: # Bag of options to control resource's behavior.

name string
The unique name of the resource.
args CrawlerArgs
The arguments to resource properties.
opts CustomResourceOptions
Bag of options to control resource's behavior.
resource_name str
The unique name of the resource.
args CrawlerArgs
The arguments to resource properties.
opts ResourceOptions
Bag of options to control resource's behavior.
ctx Context
Context object for the current deployment.
name string
The unique name of the resource.
args CrawlerArgs
The arguments to resource properties.
opts ResourceOption
Bag of options to control resource's behavior.
name string
The unique name of the resource.
args CrawlerArgs
The arguments to resource properties.
opts CustomResourceOptions
Bag of options to control resource's behavior.
name String
The unique name of the resource.
args CrawlerArgs
The arguments to resource properties.
options CustomResourceOptions
Bag of options to control resource's behavior.

Crawler Resource Properties

To learn more about resource properties and how to use them, see Inputs and Outputs in the Architecture and Concepts docs.

Inputs

The Crawler resource accepts the following input properties:

DatabaseName string

The name of the Glue database to be synchronized.

Role string

The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.

CatalogTargets List<CrawlerCatalogTargetArgs>
Classifiers List<string>

List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.

Configuration string

JSON string of configuration information. For more details see Setting Crawler Configuration Options.

DeltaTargets List<CrawlerDeltaTargetArgs>
Description string

Description of the crawler.

DynamodbTargets List<CrawlerDynamodbTargetArgs>

List of nested DynamoDB target arguments. See Dynamodb Target below.

JdbcTargets List<CrawlerJdbcTargetArgs>

List of nested JBDC target arguments. See JDBC Target below.

LineageConfiguration CrawlerLineageConfigurationArgs

Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.

MongodbTargets List<CrawlerMongodbTargetArgs>

List nested MongoDB target arguments. See MongoDB Target below.

Name string

Name of the crawler.

RecrawlPolicy CrawlerRecrawlPolicyArgs

A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.

S3Targets List<CrawlerS3TargetArgs>

List nested Amazon S3 target arguments. See S3 Target below.

Schedule string

A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).

SchemaChangePolicy CrawlerSchemaChangePolicyArgs

Policy for the crawler's update and deletion behavior. See Schema Change Policy below.

SecurityConfiguration string

The name of Security Configuration to be used by the crawler

TablePrefix string

The table prefix used for catalog tables that are created.

Tags Dictionary<string, string>

Key-value map of resource tags. .If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.

DatabaseName string

The name of the Glue database to be synchronized.

Role string

The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.

CatalogTargets []CrawlerCatalogTargetArgs
Classifiers []string

List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.

Configuration string

JSON string of configuration information. For more details see Setting Crawler Configuration Options.

DeltaTargets []CrawlerDeltaTargetArgs
Description string

Description of the crawler.

DynamodbTargets []CrawlerDynamodbTargetArgs

List of nested DynamoDB target arguments. See Dynamodb Target below.

JdbcTargets []CrawlerJdbcTargetArgs

List of nested JBDC target arguments. See JDBC Target below.

LineageConfiguration CrawlerLineageConfigurationArgs

Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.

MongodbTargets []CrawlerMongodbTargetArgs

List nested MongoDB target arguments. See MongoDB Target below.

Name string

Name of the crawler.

RecrawlPolicy CrawlerRecrawlPolicyArgs

A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.

S3Targets []CrawlerS3TargetArgs

List nested Amazon S3 target arguments. See S3 Target below.

Schedule string

A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).

SchemaChangePolicy CrawlerSchemaChangePolicyArgs

Policy for the crawler's update and deletion behavior. See Schema Change Policy below.

SecurityConfiguration string

The name of Security Configuration to be used by the crawler

TablePrefix string

The table prefix used for catalog tables that are created.

Tags map[string]string

Key-value map of resource tags. .If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.

databaseName String

The name of the Glue database to be synchronized.

role String

The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.

catalogTargets List<CrawlerCatalogTargetArgs>
classifiers List<String>

List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.

configuration String

JSON string of configuration information. For more details see Setting Crawler Configuration Options.

deltaTargets List<CrawlerDeltaTargetArgs>
description String

Description of the crawler.

dynamodbTargets List<CrawlerDynamodbTargetArgs>

List of nested DynamoDB target arguments. See Dynamodb Target below.

jdbcTargets List<CrawlerJdbcTargetArgs>

List of nested JBDC target arguments. See JDBC Target below.

lineageConfiguration CrawlerLineageConfigurationArgs

Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.

mongodbTargets List<CrawlerMongodbTargetArgs>

List nested MongoDB target arguments. See MongoDB Target below.

name String

Name of the crawler.

recrawlPolicy CrawlerRecrawlPolicyArgs

A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.

s3Targets List<CrawlerS3TargetArgs>

List nested Amazon S3 target arguments. See S3 Target below.

schedule String

A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).

schemaChangePolicy CrawlerSchemaChangePolicyArgs

Policy for the crawler's update and deletion behavior. See Schema Change Policy below.

securityConfiguration String

The name of Security Configuration to be used by the crawler

tablePrefix String

The table prefix used for catalog tables that are created.

tags Map<String,String>

Key-value map of resource tags. .If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.

databaseName string

The name of the Glue database to be synchronized.

role string

The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.

catalogTargets CrawlerCatalogTargetArgs[]
classifiers string[]

List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.

configuration string

JSON string of configuration information. For more details see Setting Crawler Configuration Options.

deltaTargets CrawlerDeltaTargetArgs[]
description string

Description of the crawler.

dynamodbTargets CrawlerDynamodbTargetArgs[]

List of nested DynamoDB target arguments. See Dynamodb Target below.

jdbcTargets CrawlerJdbcTargetArgs[]

List of nested JBDC target arguments. See JDBC Target below.

lineageConfiguration CrawlerLineageConfigurationArgs

Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.

mongodbTargets CrawlerMongodbTargetArgs[]

List nested MongoDB target arguments. See MongoDB Target below.

name string

Name of the crawler.

recrawlPolicy CrawlerRecrawlPolicyArgs

A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.

s3Targets CrawlerS3TargetArgs[]

List nested Amazon S3 target arguments. See S3 Target below.

schedule string

A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).

schemaChangePolicy CrawlerSchemaChangePolicyArgs

Policy for the crawler's update and deletion behavior. See Schema Change Policy below.

securityConfiguration string

The name of Security Configuration to be used by the crawler

tablePrefix string

The table prefix used for catalog tables that are created.

tags {[key: string]: string}

Key-value map of resource tags. .If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.

database_name str

The name of the Glue database to be synchronized.

role str

The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.

catalog_targets Sequence[CrawlerCatalogTargetArgs]
classifiers Sequence[str]

List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.

configuration str

JSON string of configuration information. For more details see Setting Crawler Configuration Options.

delta_targets Sequence[CrawlerDeltaTargetArgs]
description str

Description of the crawler.

dynamodb_targets Sequence[CrawlerDynamodbTargetArgs]

List of nested DynamoDB target arguments. See Dynamodb Target below.

jdbc_targets Sequence[CrawlerJdbcTargetArgs]

List of nested JBDC target arguments. See JDBC Target below.

lineage_configuration CrawlerLineageConfigurationArgs

Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.

mongodb_targets Sequence[CrawlerMongodbTargetArgs]

List nested MongoDB target arguments. See MongoDB Target below.

name str

Name of the crawler.

recrawl_policy CrawlerRecrawlPolicyArgs

A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.

s3_targets Sequence[CrawlerS3TargetArgs]

List nested Amazon S3 target arguments. See S3 Target below.

schedule str

A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).

schema_change_policy CrawlerSchemaChangePolicyArgs

Policy for the crawler's update and deletion behavior. See Schema Change Policy below.

security_configuration str

The name of Security Configuration to be used by the crawler

table_prefix str

The table prefix used for catalog tables that are created.

tags Mapping[str, str]

Key-value map of resource tags. .If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.

databaseName String

The name of the Glue database to be synchronized.

role String

The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.

catalogTargets List<Property Map>
classifiers List<String>

List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.

configuration String

JSON string of configuration information. For more details see Setting Crawler Configuration Options.

deltaTargets List<Property Map>
description String

Description of the crawler.

dynamodbTargets List<Property Map>

List of nested DynamoDB target arguments. See Dynamodb Target below.

jdbcTargets List<Property Map>

List of nested JBDC target arguments. See JDBC Target below.

lineageConfiguration Property Map

Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.

mongodbTargets List<Property Map>

List nested MongoDB target arguments. See MongoDB Target below.

name String

Name of the crawler.

recrawlPolicy Property Map

A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.

s3Targets List<Property Map>

List nested Amazon S3 target arguments. See S3 Target below.

schedule String

A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).

schemaChangePolicy Property Map

Policy for the crawler's update and deletion behavior. See Schema Change Policy below.

securityConfiguration String

The name of Security Configuration to be used by the crawler

tablePrefix String

The table prefix used for catalog tables that are created.

tags Map<String>

Key-value map of resource tags. .If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.

Outputs

All input properties are implicitly available as output properties. Additionally, the Crawler resource produces the following output properties:

Arn string

The ARN of the crawler

Id string

The provider-assigned unique ID for this managed resource.

TagsAll Dictionary<string, string>

A map of tags assigned to the resource, including those inherited from the provider .

Arn string

The ARN of the crawler

Id string

The provider-assigned unique ID for this managed resource.

TagsAll map[string]string

A map of tags assigned to the resource, including those inherited from the provider .

arn String

The ARN of the crawler

id String

The provider-assigned unique ID for this managed resource.

tagsAll Map<String,String>

A map of tags assigned to the resource, including those inherited from the provider .

arn string

The ARN of the crawler

id string

The provider-assigned unique ID for this managed resource.

tagsAll {[key: string]: string}

A map of tags assigned to the resource, including those inherited from the provider .

arn str

The ARN of the crawler

id str

The provider-assigned unique ID for this managed resource.

tags_all Mapping[str, str]

A map of tags assigned to the resource, including those inherited from the provider .

arn String

The ARN of the crawler

id String

The provider-assigned unique ID for this managed resource.

tagsAll Map<String>

A map of tags assigned to the resource, including those inherited from the provider .

Look up an Existing Crawler Resource

Get an existing Crawler resource’s state with the given name, ID, and optional extra properties used to qualify the lookup.

public static get(name: string, id: Input<ID>, state?: CrawlerState, opts?: CustomResourceOptions): Crawler
@staticmethod
def get(resource_name: str,
        id: str,
        opts: Optional[ResourceOptions] = None,
        arn: Optional[str] = None,
        catalog_targets: Optional[Sequence[CrawlerCatalogTargetArgs]] = None,
        classifiers: Optional[Sequence[str]] = None,
        configuration: Optional[str] = None,
        database_name: Optional[str] = None,
        delta_targets: Optional[Sequence[CrawlerDeltaTargetArgs]] = None,
        description: Optional[str] = None,
        dynamodb_targets: Optional[Sequence[CrawlerDynamodbTargetArgs]] = None,
        jdbc_targets: Optional[Sequence[CrawlerJdbcTargetArgs]] = None,
        lineage_configuration: Optional[CrawlerLineageConfigurationArgs] = None,
        mongodb_targets: Optional[Sequence[CrawlerMongodbTargetArgs]] = None,
        name: Optional[str] = None,
        recrawl_policy: Optional[CrawlerRecrawlPolicyArgs] = None,
        role: Optional[str] = None,
        s3_targets: Optional[Sequence[CrawlerS3TargetArgs]] = None,
        schedule: Optional[str] = None,
        schema_change_policy: Optional[CrawlerSchemaChangePolicyArgs] = None,
        security_configuration: Optional[str] = None,
        table_prefix: Optional[str] = None,
        tags: Optional[Mapping[str, str]] = None,
        tags_all: Optional[Mapping[str, str]] = None) -> Crawler
func GetCrawler(ctx *Context, name string, id IDInput, state *CrawlerState, opts ...ResourceOption) (*Crawler, error)
public static Crawler Get(string name, Input<string> id, CrawlerState? state, CustomResourceOptions? opts = null)
public static Crawler get(String name, Output<String> id, CrawlerState state, CustomResourceOptions options)
Resource lookup is not supported in YAML
name
The unique name of the resulting resource.
id
The unique provider ID of the resource to lookup.
state
Any extra arguments used during the lookup.
opts
A bag of options that control this resource's behavior.
resource_name
The unique name of the resulting resource.
id
The unique provider ID of the resource to lookup.
name
The unique name of the resulting resource.
id
The unique provider ID of the resource to lookup.
state
Any extra arguments used during the lookup.
opts
A bag of options that control this resource's behavior.
name
The unique name of the resulting resource.
id
The unique provider ID of the resource to lookup.
state
Any extra arguments used during the lookup.
opts
A bag of options that control this resource's behavior.
name
The unique name of the resulting resource.
id
The unique provider ID of the resource to lookup.
state
Any extra arguments used during the lookup.
opts
A bag of options that control this resource's behavior.
The following state arguments are supported:
Arn string

The ARN of the crawler

CatalogTargets List<CrawlerCatalogTargetArgs>
Classifiers List<string>

List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.

Configuration string

JSON string of configuration information. For more details see Setting Crawler Configuration Options.

DatabaseName string

The name of the Glue database to be synchronized.

DeltaTargets List<CrawlerDeltaTargetArgs>
Description string

Description of the crawler.

DynamodbTargets List<CrawlerDynamodbTargetArgs>

List of nested DynamoDB target arguments. See Dynamodb Target below.

JdbcTargets List<CrawlerJdbcTargetArgs>

List of nested JBDC target arguments. See JDBC Target below.

LineageConfiguration CrawlerLineageConfigurationArgs

Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.

MongodbTargets List<CrawlerMongodbTargetArgs>

List nested MongoDB target arguments. See MongoDB Target below.

Name string

Name of the crawler.

RecrawlPolicy CrawlerRecrawlPolicyArgs

A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.

Role string

The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.

S3Targets List<CrawlerS3TargetArgs>

List nested Amazon S3 target arguments. See S3 Target below.

Schedule string

A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).

SchemaChangePolicy CrawlerSchemaChangePolicyArgs

Policy for the crawler's update and deletion behavior. See Schema Change Policy below.

SecurityConfiguration string

The name of Security Configuration to be used by the crawler

TablePrefix string

The table prefix used for catalog tables that are created.

Tags Dictionary<string, string>

Key-value map of resource tags. .If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.

TagsAll Dictionary<string, string>

A map of tags assigned to the resource, including those inherited from the provider .

Arn string

The ARN of the crawler

CatalogTargets []CrawlerCatalogTargetArgs
Classifiers []string

List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.

Configuration string

JSON string of configuration information. For more details see Setting Crawler Configuration Options.

DatabaseName string

The name of the Glue database to be synchronized.

DeltaTargets []CrawlerDeltaTargetArgs
Description string

Description of the crawler.

DynamodbTargets []CrawlerDynamodbTargetArgs

List of nested DynamoDB target arguments. See Dynamodb Target below.

JdbcTargets []CrawlerJdbcTargetArgs

List of nested JBDC target arguments. See JDBC Target below.

LineageConfiguration CrawlerLineageConfigurationArgs

Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.

MongodbTargets []CrawlerMongodbTargetArgs

List nested MongoDB target arguments. See MongoDB Target below.

Name string

Name of the crawler.

RecrawlPolicy CrawlerRecrawlPolicyArgs

A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.

Role string

The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.

S3Targets []CrawlerS3TargetArgs

List nested Amazon S3 target arguments. See S3 Target below.

Schedule string

A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).

SchemaChangePolicy CrawlerSchemaChangePolicyArgs

Policy for the crawler's update and deletion behavior. See Schema Change Policy below.

SecurityConfiguration string

The name of Security Configuration to be used by the crawler

TablePrefix string

The table prefix used for catalog tables that are created.

Tags map[string]string

Key-value map of resource tags. .If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.

TagsAll map[string]string

A map of tags assigned to the resource, including those inherited from the provider .

arn String

The ARN of the crawler

catalogTargets List<CrawlerCatalogTargetArgs>
classifiers List<String>

List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.

configuration String

JSON string of configuration information. For more details see Setting Crawler Configuration Options.

databaseName String

The name of the Glue database to be synchronized.

deltaTargets List<CrawlerDeltaTargetArgs>
description String

Description of the crawler.

dynamodbTargets List<CrawlerDynamodbTargetArgs>

List of nested DynamoDB target arguments. See Dynamodb Target below.

jdbcTargets List<CrawlerJdbcTargetArgs>

List of nested JBDC target arguments. See JDBC Target below.

lineageConfiguration CrawlerLineageConfigurationArgs

Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.

mongodbTargets List<CrawlerMongodbTargetArgs>

List nested MongoDB target arguments. See MongoDB Target below.

name String

Name of the crawler.

recrawlPolicy CrawlerRecrawlPolicyArgs

A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.

role String

The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.

s3Targets List<CrawlerS3TargetArgs>

List nested Amazon S3 target arguments. See S3 Target below.

schedule String

A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).

schemaChangePolicy CrawlerSchemaChangePolicyArgs

Policy for the crawler's update and deletion behavior. See Schema Change Policy below.

securityConfiguration String

The name of Security Configuration to be used by the crawler

tablePrefix String

The table prefix used for catalog tables that are created.

tags Map<String,String>

Key-value map of resource tags. .If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.

tagsAll Map<String,String>

A map of tags assigned to the resource, including those inherited from the provider .

arn string

The ARN of the crawler

catalogTargets CrawlerCatalogTargetArgs[]
classifiers string[]

List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.

configuration string

JSON string of configuration information. For more details see Setting Crawler Configuration Options.

databaseName string

The name of the Glue database to be synchronized.

deltaTargets CrawlerDeltaTargetArgs[]
description string

Description of the crawler.

dynamodbTargets CrawlerDynamodbTargetArgs[]

List of nested DynamoDB target arguments. See Dynamodb Target below.

jdbcTargets CrawlerJdbcTargetArgs[]

List of nested JBDC target arguments. See JDBC Target below.

lineageConfiguration CrawlerLineageConfigurationArgs

Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.

mongodbTargets CrawlerMongodbTargetArgs[]

List nested MongoDB target arguments. See MongoDB Target below.

name string

Name of the crawler.

recrawlPolicy CrawlerRecrawlPolicyArgs

A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.

role string

The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.

s3Targets CrawlerS3TargetArgs[]

List nested Amazon S3 target arguments. See S3 Target below.

schedule string

A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).

schemaChangePolicy CrawlerSchemaChangePolicyArgs

Policy for the crawler's update and deletion behavior. See Schema Change Policy below.

securityConfiguration string

The name of Security Configuration to be used by the crawler

tablePrefix string

The table prefix used for catalog tables that are created.

tags {[key: string]: string}

Key-value map of resource tags. .If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.

tagsAll {[key: string]: string}

A map of tags assigned to the resource, including those inherited from the provider .

arn str

The ARN of the crawler

catalog_targets Sequence[CrawlerCatalogTargetArgs]
classifiers Sequence[str]

List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.

configuration str

JSON string of configuration information. For more details see Setting Crawler Configuration Options.

database_name str

The name of the Glue database to be synchronized.

delta_targets Sequence[CrawlerDeltaTargetArgs]
description str

Description of the crawler.

dynamodb_targets Sequence[CrawlerDynamodbTargetArgs]

List of nested DynamoDB target arguments. See Dynamodb Target below.

jdbc_targets Sequence[CrawlerJdbcTargetArgs]

List of nested JBDC target arguments. See JDBC Target below.

lineage_configuration CrawlerLineageConfigurationArgs

Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.

mongodb_targets Sequence[CrawlerMongodbTargetArgs]

List nested MongoDB target arguments. See MongoDB Target below.

name str

Name of the crawler.

recrawl_policy CrawlerRecrawlPolicyArgs

A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.

role str

The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.

s3_targets Sequence[CrawlerS3TargetArgs]

List nested Amazon S3 target arguments. See S3 Target below.

schedule str

A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).

schema_change_policy CrawlerSchemaChangePolicyArgs

Policy for the crawler's update and deletion behavior. See Schema Change Policy below.

security_configuration str

The name of Security Configuration to be used by the crawler

table_prefix str

The table prefix used for catalog tables that are created.

tags Mapping[str, str]

Key-value map of resource tags. .If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.

tags_all Mapping[str, str]

A map of tags assigned to the resource, including those inherited from the provider .

arn String

The ARN of the crawler

catalogTargets List<Property Map>
classifiers List<String>

List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.

configuration String

JSON string of configuration information. For more details see Setting Crawler Configuration Options.

databaseName String

The name of the Glue database to be synchronized.

deltaTargets List<Property Map>
description String

Description of the crawler.

dynamodbTargets List<Property Map>

List of nested DynamoDB target arguments. See Dynamodb Target below.

jdbcTargets List<Property Map>

List of nested JBDC target arguments. See JDBC Target below.

lineageConfiguration Property Map

Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.

mongodbTargets List<Property Map>

List nested MongoDB target arguments. See MongoDB Target below.

name String

Name of the crawler.

recrawlPolicy Property Map

A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.

role String

The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.

s3Targets List<Property Map>

List nested Amazon S3 target arguments. See S3 Target below.

schedule String

A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).

schemaChangePolicy Property Map

Policy for the crawler's update and deletion behavior. See Schema Change Policy below.

securityConfiguration String

The name of Security Configuration to be used by the crawler

tablePrefix String

The table prefix used for catalog tables that are created.

tags Map<String>

Key-value map of resource tags. .If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.

tagsAll Map<String>

A map of tags assigned to the resource, including those inherited from the provider .

Supporting Types

CrawlerCatalogTarget

DatabaseName string

The name of the Glue database to be synchronized.

Tables List<string>

A list of catalog tables to be synchronized.

DatabaseName string

The name of the Glue database to be synchronized.

Tables []string

A list of catalog tables to be synchronized.

databaseName String

The name of the Glue database to be synchronized.

tables List<String>

A list of catalog tables to be synchronized.

databaseName string

The name of the Glue database to be synchronized.

tables string[]

A list of catalog tables to be synchronized.

database_name str

The name of the Glue database to be synchronized.

tables Sequence[str]

A list of catalog tables to be synchronized.

databaseName String

The name of the Glue database to be synchronized.

tables List<String>

A list of catalog tables to be synchronized.

CrawlerDeltaTarget

ConnectionName string

The name of the connection to use to connect to the Delta table target.

DeltaTables List<string>

A list of the Amazon S3 paths to the Delta tables.

WriteManifest bool

Specifies whether to write the manifest files to the Delta table path.

ConnectionName string

The name of the connection to use to connect to the Delta table target.

DeltaTables []string

A list of the Amazon S3 paths to the Delta tables.

WriteManifest bool

Specifies whether to write the manifest files to the Delta table path.

connectionName String

The name of the connection to use to connect to the Delta table target.

deltaTables List<String>

A list of the Amazon S3 paths to the Delta tables.

writeManifest Boolean

Specifies whether to write the manifest files to the Delta table path.

connectionName string

The name of the connection to use to connect to the Delta table target.

deltaTables string[]

A list of the Amazon S3 paths to the Delta tables.

writeManifest boolean

Specifies whether to write the manifest files to the Delta table path.

connection_name str

The name of the connection to use to connect to the Delta table target.

delta_tables Sequence[str]

A list of the Amazon S3 paths to the Delta tables.

write_manifest bool

Specifies whether to write the manifest files to the Delta table path.

connectionName String

The name of the connection to use to connect to the Delta table target.

deltaTables List<String>

A list of the Amazon S3 paths to the Delta tables.

writeManifest Boolean

Specifies whether to write the manifest files to the Delta table path.

CrawlerDynamodbTarget

Path string

The path of the Amazon DocumentDB or MongoDB target (database/collection).

ScanAll bool

Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. Default value is true.

ScanRate double

The percentage of the configured read capacity units to use by the AWS Glue crawler. The valid values are null or a value between 0.1 to 1.5.

Path string

The path of the Amazon DocumentDB or MongoDB target (database/collection).

ScanAll bool

Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. Default value is true.

ScanRate float64

The percentage of the configured read capacity units to use by the AWS Glue crawler. The valid values are null or a value between 0.1 to 1.5.

path String

The path of the Amazon DocumentDB or MongoDB target (database/collection).

scanAll Boolean

Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. Default value is true.

scanRate Double

The percentage of the configured read capacity units to use by the AWS Glue crawler. The valid values are null or a value between 0.1 to 1.5.

path string

The path of the Amazon DocumentDB or MongoDB target (database/collection).

scanAll boolean

Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. Default value is true.

scanRate number

The percentage of the configured read capacity units to use by the AWS Glue crawler. The valid values are null or a value between 0.1 to 1.5.

path str

The path of the Amazon DocumentDB or MongoDB target (database/collection).

scan_all bool

Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. Default value is true.

scan_rate float

The percentage of the configured read capacity units to use by the AWS Glue crawler. The valid values are null or a value between 0.1 to 1.5.

path String

The path of the Amazon DocumentDB or MongoDB target (database/collection).

scanAll Boolean

Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. Default value is true.

scanRate Number

The percentage of the configured read capacity units to use by the AWS Glue crawler. The valid values are null or a value between 0.1 to 1.5.

CrawlerJdbcTarget

ConnectionName string

The name of the connection to use to connect to the Delta table target.

Path string

The path of the Amazon DocumentDB or MongoDB target (database/collection).

Exclusions List<string>

A list of glob patterns used to exclude from the crawl.

ConnectionName string

The name of the connection to use to connect to the Delta table target.

Path string

The path of the Amazon DocumentDB or MongoDB target (database/collection).

Exclusions []string

A list of glob patterns used to exclude from the crawl.

connectionName String

The name of the connection to use to connect to the Delta table target.

path String

The path of the Amazon DocumentDB or MongoDB target (database/collection).

exclusions List<String>

A list of glob patterns used to exclude from the crawl.

connectionName string

The name of the connection to use to connect to the Delta table target.

path string

The path of the Amazon DocumentDB or MongoDB target (database/collection).

exclusions string[]

A list of glob patterns used to exclude from the crawl.

connection_name str

The name of the connection to use to connect to the Delta table target.

path str

The path of the Amazon DocumentDB or MongoDB target (database/collection).

exclusions Sequence[str]

A list of glob patterns used to exclude from the crawl.

connectionName String

The name of the connection to use to connect to the Delta table target.

path String

The path of the Amazon DocumentDB or MongoDB target (database/collection).

exclusions List<String>

A list of glob patterns used to exclude from the crawl.

CrawlerLineageConfiguration

CrawlerLineageSettings string

Specifies whether data lineage is enabled for the crawler. Valid values are: ENABLE and DISABLE. Default value is Disable.

CrawlerLineageSettings string

Specifies whether data lineage is enabled for the crawler. Valid values are: ENABLE and DISABLE. Default value is Disable.

crawlerLineageSettings String

Specifies whether data lineage is enabled for the crawler. Valid values are: ENABLE and DISABLE. Default value is Disable.

crawlerLineageSettings string

Specifies whether data lineage is enabled for the crawler. Valid values are: ENABLE and DISABLE. Default value is Disable.

crawler_lineage_settings str

Specifies whether data lineage is enabled for the crawler. Valid values are: ENABLE and DISABLE. Default value is Disable.

crawlerLineageSettings String

Specifies whether data lineage is enabled for the crawler. Valid values are: ENABLE and DISABLE. Default value is Disable.

CrawlerMongodbTarget

ConnectionName string

The name of the connection to use to connect to the Delta table target.

Path string

The path of the Amazon DocumentDB or MongoDB target (database/collection).

ScanAll bool

Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. Default value is true.

ConnectionName string

The name of the connection to use to connect to the Delta table target.

Path string

The path of the Amazon DocumentDB or MongoDB target (database/collection).

ScanAll bool

Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. Default value is true.

connectionName String

The name of the connection to use to connect to the Delta table target.

path String

The path of the Amazon DocumentDB or MongoDB target (database/collection).

scanAll Boolean

Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. Default value is true.

connectionName string

The name of the connection to use to connect to the Delta table target.

path string

The path of the Amazon DocumentDB or MongoDB target (database/collection).

scanAll boolean

Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. Default value is true.

connection_name str

The name of the connection to use to connect to the Delta table target.

path str

The path of the Amazon DocumentDB or MongoDB target (database/collection).

scan_all bool

Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. Default value is true.

connectionName String

The name of the connection to use to connect to the Delta table target.

path String

The path of the Amazon DocumentDB or MongoDB target (database/collection).

scanAll Boolean

Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. Default value is true.

CrawlerRecrawlPolicy

RecrawlBehavior string

Specifies whether to crawl the entire dataset again, crawl only folders that were added since the last crawler run, or crawl what S3 notifies the crawler of via SQS. Valid Values are: CRAWL_EVENT_MODE, CRAWL_EVERYTHING and CRAWL_NEW_FOLDERS_ONLY. Default value is CRAWL_EVERYTHING.

RecrawlBehavior string

Specifies whether to crawl the entire dataset again, crawl only folders that were added since the last crawler run, or crawl what S3 notifies the crawler of via SQS. Valid Values are: CRAWL_EVENT_MODE, CRAWL_EVERYTHING and CRAWL_NEW_FOLDERS_ONLY. Default value is CRAWL_EVERYTHING.

recrawlBehavior String

Specifies whether to crawl the entire dataset again, crawl only folders that were added since the last crawler run, or crawl what S3 notifies the crawler of via SQS. Valid Values are: CRAWL_EVENT_MODE, CRAWL_EVERYTHING and CRAWL_NEW_FOLDERS_ONLY. Default value is CRAWL_EVERYTHING.

recrawlBehavior string

Specifies whether to crawl the entire dataset again, crawl only folders that were added since the last crawler run, or crawl what S3 notifies the crawler of via SQS. Valid Values are: CRAWL_EVENT_MODE, CRAWL_EVERYTHING and CRAWL_NEW_FOLDERS_ONLY. Default value is CRAWL_EVERYTHING.

recrawl_behavior str

Specifies whether to crawl the entire dataset again, crawl only folders that were added since the last crawler run, or crawl what S3 notifies the crawler of via SQS. Valid Values are: CRAWL_EVENT_MODE, CRAWL_EVERYTHING and CRAWL_NEW_FOLDERS_ONLY. Default value is CRAWL_EVERYTHING.

recrawlBehavior String

Specifies whether to crawl the entire dataset again, crawl only folders that were added since the last crawler run, or crawl what S3 notifies the crawler of via SQS. Valid Values are: CRAWL_EVENT_MODE, CRAWL_EVERYTHING and CRAWL_NEW_FOLDERS_ONLY. Default value is CRAWL_EVERYTHING.

CrawlerS3Target

Path string

The path of the Amazon DocumentDB or MongoDB target (database/collection).

ConnectionName string

The name of the connection to use to connect to the Delta table target.

DlqEventQueueArn string

The ARN of the dead-letter SQS queue.

EventQueueArn string

The ARN of the SQS queue to receive S3 notifications from.

Exclusions List<string>

A list of glob patterns used to exclude from the crawl.

SampleSize int

Sets the number of files in each leaf folder to be crawled when crawling sample files in a dataset. If not set, all the files are crawled. A valid value is an integer between 1 and 249.

Path string

The path of the Amazon DocumentDB or MongoDB target (database/collection).

ConnectionName string

The name of the connection to use to connect to the Delta table target.

DlqEventQueueArn string

The ARN of the dead-letter SQS queue.

EventQueueArn string

The ARN of the SQS queue to receive S3 notifications from.

Exclusions []string

A list of glob patterns used to exclude from the crawl.

SampleSize int

Sets the number of files in each leaf folder to be crawled when crawling sample files in a dataset. If not set, all the files are crawled. A valid value is an integer between 1 and 249.

path String

The path of the Amazon DocumentDB or MongoDB target (database/collection).

connectionName String

The name of the connection to use to connect to the Delta table target.

dlqEventQueueArn String

The ARN of the dead-letter SQS queue.

eventQueueArn String

The ARN of the SQS queue to receive S3 notifications from.

exclusions List<String>

A list of glob patterns used to exclude from the crawl.

sampleSize Integer

Sets the number of files in each leaf folder to be crawled when crawling sample files in a dataset. If not set, all the files are crawled. A valid value is an integer between 1 and 249.

path string

The path of the Amazon DocumentDB or MongoDB target (database/collection).

connectionName string

The name of the connection to use to connect to the Delta table target.

dlqEventQueueArn string

The ARN of the dead-letter SQS queue.

eventQueueArn string

The ARN of the SQS queue to receive S3 notifications from.

exclusions string[]

A list of glob patterns used to exclude from the crawl.

sampleSize number

Sets the number of files in each leaf folder to be crawled when crawling sample files in a dataset. If not set, all the files are crawled. A valid value is an integer between 1 and 249.

path str

The path of the Amazon DocumentDB or MongoDB target (database/collection).

connection_name str

The name of the connection to use to connect to the Delta table target.

dlq_event_queue_arn str

The ARN of the dead-letter SQS queue.

event_queue_arn str

The ARN of the SQS queue to receive S3 notifications from.

exclusions Sequence[str]

A list of glob patterns used to exclude from the crawl.

sample_size int

Sets the number of files in each leaf folder to be crawled when crawling sample files in a dataset. If not set, all the files are crawled. A valid value is an integer between 1 and 249.

path String

The path of the Amazon DocumentDB or MongoDB target (database/collection).

connectionName String

The name of the connection to use to connect to the Delta table target.

dlqEventQueueArn String

The ARN of the dead-letter SQS queue.

eventQueueArn String

The ARN of the SQS queue to receive S3 notifications from.

exclusions List<String>

A list of glob patterns used to exclude from the crawl.

sampleSize Number

Sets the number of files in each leaf folder to be crawled when crawling sample files in a dataset. If not set, all the files are crawled. A valid value is an integer between 1 and 249.

CrawlerSchemaChangePolicy

DeleteBehavior string

The deletion behavior when the crawler finds a deleted object. Valid values: LOG, DELETE_FROM_DATABASE, or DEPRECATE_IN_DATABASE. Defaults to DEPRECATE_IN_DATABASE.

UpdateBehavior string

The update behavior when the crawler finds a changed schema. Valid values: LOG or UPDATE_IN_DATABASE. Defaults to UPDATE_IN_DATABASE.

DeleteBehavior string

The deletion behavior when the crawler finds a deleted object. Valid values: LOG, DELETE_FROM_DATABASE, or DEPRECATE_IN_DATABASE. Defaults to DEPRECATE_IN_DATABASE.

UpdateBehavior string

The update behavior when the crawler finds a changed schema. Valid values: LOG or UPDATE_IN_DATABASE. Defaults to UPDATE_IN_DATABASE.

deleteBehavior String

The deletion behavior when the crawler finds a deleted object. Valid values: LOG, DELETE_FROM_DATABASE, or DEPRECATE_IN_DATABASE. Defaults to DEPRECATE_IN_DATABASE.

updateBehavior String

The update behavior when the crawler finds a changed schema. Valid values: LOG or UPDATE_IN_DATABASE. Defaults to UPDATE_IN_DATABASE.

deleteBehavior string

The deletion behavior when the crawler finds a deleted object. Valid values: LOG, DELETE_FROM_DATABASE, or DEPRECATE_IN_DATABASE. Defaults to DEPRECATE_IN_DATABASE.

updateBehavior string

The update behavior when the crawler finds a changed schema. Valid values: LOG or UPDATE_IN_DATABASE. Defaults to UPDATE_IN_DATABASE.

delete_behavior str

The deletion behavior when the crawler finds a deleted object. Valid values: LOG, DELETE_FROM_DATABASE, or DEPRECATE_IN_DATABASE. Defaults to DEPRECATE_IN_DATABASE.

update_behavior str

The update behavior when the crawler finds a changed schema. Valid values: LOG or UPDATE_IN_DATABASE. Defaults to UPDATE_IN_DATABASE.

deleteBehavior String

The deletion behavior when the crawler finds a deleted object. Valid values: LOG, DELETE_FROM_DATABASE, or DEPRECATE_IN_DATABASE. Defaults to DEPRECATE_IN_DATABASE.

updateBehavior String

The update behavior when the crawler finds a changed schema. Valid values: LOG or UPDATE_IN_DATABASE. Defaults to UPDATE_IN_DATABASE.

Import

Glue Crawlers can be imported using name, e.g.,

 $ pulumi import aws:glue/crawler:Crawler MyJob MyJob

Package Details

Repository
https://github.com/pulumi/pulumi-aws
License
Apache-2.0
Notes

This Pulumi package is based on the aws Terraform Provider.