Try AWS Native preview for resources not in the classic version.
aws.glue.Crawler
Explore with Pulumi AI
Try AWS Native preview for resources not in the classic version.
Manages a Glue Crawler. More information can be found in the AWS Glue Developer Guide
Example Usage
DynamoDB Target Example
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Aws = Pulumi.Aws;
return await Deployment.RunAsync(() =>
{
var example = new Aws.Glue.Crawler("example", new()
{
DatabaseName = aws_glue_catalog_database.Example.Name,
Role = aws_iam_role.Example.Arn,
DynamodbTargets = new[]
{
new Aws.Glue.Inputs.CrawlerDynamodbTargetArgs
{
Path = "table-name",
},
},
});
});
package main
import (
"github.com/pulumi/pulumi-aws/sdk/v6/go/aws/glue"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := glue.NewCrawler(ctx, "example", &glue.CrawlerArgs{
DatabaseName: pulumi.Any(aws_glue_catalog_database.Example.Name),
Role: pulumi.Any(aws_iam_role.Example.Arn),
DynamodbTargets: glue.CrawlerDynamodbTargetArray{
&glue.CrawlerDynamodbTargetArgs{
Path: pulumi.String("table-name"),
},
},
})
if err != nil {
return err
}
return nil
})
}
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.glue.Crawler;
import com.pulumi.aws.glue.CrawlerArgs;
import com.pulumi.aws.glue.inputs.CrawlerDynamodbTargetArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var example = new Crawler("example", CrawlerArgs.builder()
.databaseName(aws_glue_catalog_database.example().name())
.role(aws_iam_role.example().arn())
.dynamodbTargets(CrawlerDynamodbTargetArgs.builder()
.path("table-name")
.build())
.build());
}
}
import pulumi
import pulumi_aws as aws
example = aws.glue.Crawler("example",
database_name=aws_glue_catalog_database["example"]["name"],
role=aws_iam_role["example"]["arn"],
dynamodb_targets=[aws.glue.CrawlerDynamodbTargetArgs(
path="table-name",
)])
import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";
const example = new aws.glue.Crawler("example", {
databaseName: aws_glue_catalog_database.example.name,
role: aws_iam_role.example.arn,
dynamodbTargets: [{
path: "table-name",
}],
});
resources:
example:
type: aws:glue:Crawler
properties:
databaseName: ${aws_glue_catalog_database.example.name}
role: ${aws_iam_role.example.arn}
dynamodbTargets:
- path: table-name
JDBC Target Example
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Aws = Pulumi.Aws;
return await Deployment.RunAsync(() =>
{
var example = new Aws.Glue.Crawler("example", new()
{
DatabaseName = aws_glue_catalog_database.Example.Name,
Role = aws_iam_role.Example.Arn,
JdbcTargets = new[]
{
new Aws.Glue.Inputs.CrawlerJdbcTargetArgs
{
ConnectionName = aws_glue_connection.Example.Name,
Path = "database-name/%",
},
},
});
});
package main
import (
"github.com/pulumi/pulumi-aws/sdk/v6/go/aws/glue"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := glue.NewCrawler(ctx, "example", &glue.CrawlerArgs{
DatabaseName: pulumi.Any(aws_glue_catalog_database.Example.Name),
Role: pulumi.Any(aws_iam_role.Example.Arn),
JdbcTargets: glue.CrawlerJdbcTargetArray{
&glue.CrawlerJdbcTargetArgs{
ConnectionName: pulumi.Any(aws_glue_connection.Example.Name),
Path: pulumi.String("database-name/%"),
},
},
})
if err != nil {
return err
}
return nil
})
}
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.glue.Crawler;
import com.pulumi.aws.glue.CrawlerArgs;
import com.pulumi.aws.glue.inputs.CrawlerJdbcTargetArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var example = new Crawler("example", CrawlerArgs.builder()
.databaseName(aws_glue_catalog_database.example().name())
.role(aws_iam_role.example().arn())
.jdbcTargets(CrawlerJdbcTargetArgs.builder()
.connectionName(aws_glue_connection.example().name())
.path("database-name/%")
.build())
.build());
}
}
import pulumi
import pulumi_aws as aws
example = aws.glue.Crawler("example",
database_name=aws_glue_catalog_database["example"]["name"],
role=aws_iam_role["example"]["arn"],
jdbc_targets=[aws.glue.CrawlerJdbcTargetArgs(
connection_name=aws_glue_connection["example"]["name"],
path="database-name/%",
)])
import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";
const example = new aws.glue.Crawler("example", {
databaseName: aws_glue_catalog_database.example.name,
role: aws_iam_role.example.arn,
jdbcTargets: [{
connectionName: aws_glue_connection.example.name,
path: "database-name/%",
}],
});
resources:
example:
type: aws:glue:Crawler
properties:
databaseName: ${aws_glue_catalog_database.example.name}
role: ${aws_iam_role.example.arn}
jdbcTargets:
- connectionName: ${aws_glue_connection.example.name}
path: database-name/%
S3 Target Example
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Aws = Pulumi.Aws;
return await Deployment.RunAsync(() =>
{
var example = new Aws.Glue.Crawler("example", new()
{
DatabaseName = aws_glue_catalog_database.Example.Name,
Role = aws_iam_role.Example.Arn,
S3Targets = new[]
{
new Aws.Glue.Inputs.CrawlerS3TargetArgs
{
Path = $"s3://{aws_s3_bucket.Example.Bucket}",
},
},
});
});
package main
import (
"fmt"
"github.com/pulumi/pulumi-aws/sdk/v6/go/aws/glue"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := glue.NewCrawler(ctx, "example", &glue.CrawlerArgs{
DatabaseName: pulumi.Any(aws_glue_catalog_database.Example.Name),
Role: pulumi.Any(aws_iam_role.Example.Arn),
S3Targets: glue.CrawlerS3TargetArray{
&glue.CrawlerS3TargetArgs{
Path: pulumi.String(fmt.Sprintf("s3://%v", aws_s3_bucket.Example.Bucket)),
},
},
})
if err != nil {
return err
}
return nil
})
}
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.glue.Crawler;
import com.pulumi.aws.glue.CrawlerArgs;
import com.pulumi.aws.glue.inputs.CrawlerS3TargetArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var example = new Crawler("example", CrawlerArgs.builder()
.databaseName(aws_glue_catalog_database.example().name())
.role(aws_iam_role.example().arn())
.s3Targets(CrawlerS3TargetArgs.builder()
.path(String.format("s3://%s", aws_s3_bucket.example().bucket()))
.build())
.build());
}
}
import pulumi
import pulumi_aws as aws
example = aws.glue.Crawler("example",
database_name=aws_glue_catalog_database["example"]["name"],
role=aws_iam_role["example"]["arn"],
s3_targets=[aws.glue.CrawlerS3TargetArgs(
path=f"s3://{aws_s3_bucket['example']['bucket']}",
)])
import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";
const example = new aws.glue.Crawler("example", {
databaseName: aws_glue_catalog_database.example.name,
role: aws_iam_role.example.arn,
s3Targets: [{
path: `s3://${aws_s3_bucket.example.bucket}`,
}],
});
resources:
example:
type: aws:glue:Crawler
properties:
databaseName: ${aws_glue_catalog_database.example.name}
role: ${aws_iam_role.example.arn}
s3Targets:
- path: s3://${aws_s3_bucket.example.bucket}
Catalog Target Example
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Aws = Pulumi.Aws;
return await Deployment.RunAsync(() =>
{
var example = new Aws.Glue.Crawler("example", new()
{
DatabaseName = aws_glue_catalog_database.Example.Name,
Role = aws_iam_role.Example.Arn,
CatalogTargets = new[]
{
new Aws.Glue.Inputs.CrawlerCatalogTargetArgs
{
DatabaseName = aws_glue_catalog_database.Example.Name,
Tables = new[]
{
aws_glue_catalog_table.Example.Name,
},
},
},
SchemaChangePolicy = new Aws.Glue.Inputs.CrawlerSchemaChangePolicyArgs
{
DeleteBehavior = "LOG",
},
Configuration = @"{
""Version"":1.0,
""Grouping"": {
""TableGroupingPolicy"": ""CombineCompatibleSchemas""
}
}
",
});
});
package main
import (
"github.com/pulumi/pulumi-aws/sdk/v6/go/aws/glue"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := glue.NewCrawler(ctx, "example", &glue.CrawlerArgs{
DatabaseName: pulumi.Any(aws_glue_catalog_database.Example.Name),
Role: pulumi.Any(aws_iam_role.Example.Arn),
CatalogTargets: glue.CrawlerCatalogTargetArray{
&glue.CrawlerCatalogTargetArgs{
DatabaseName: pulumi.Any(aws_glue_catalog_database.Example.Name),
Tables: pulumi.StringArray{
aws_glue_catalog_table.Example.Name,
},
},
},
SchemaChangePolicy: &glue.CrawlerSchemaChangePolicyArgs{
DeleteBehavior: pulumi.String("LOG"),
},
Configuration: pulumi.String(`{
"Version":1.0,
"Grouping": {
"TableGroupingPolicy": "CombineCompatibleSchemas"
}
}
`),
})
if err != nil {
return err
}
return nil
})
}
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.glue.Crawler;
import com.pulumi.aws.glue.CrawlerArgs;
import com.pulumi.aws.glue.inputs.CrawlerCatalogTargetArgs;
import com.pulumi.aws.glue.inputs.CrawlerSchemaChangePolicyArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var example = new Crawler("example", CrawlerArgs.builder()
.databaseName(aws_glue_catalog_database.example().name())
.role(aws_iam_role.example().arn())
.catalogTargets(CrawlerCatalogTargetArgs.builder()
.databaseName(aws_glue_catalog_database.example().name())
.tables(aws_glue_catalog_table.example().name())
.build())
.schemaChangePolicy(CrawlerSchemaChangePolicyArgs.builder()
.deleteBehavior("LOG")
.build())
.configuration("""
{
"Version":1.0,
"Grouping": {
"TableGroupingPolicy": "CombineCompatibleSchemas"
}
}
""")
.build());
}
}
import pulumi
import pulumi_aws as aws
example = aws.glue.Crawler("example",
database_name=aws_glue_catalog_database["example"]["name"],
role=aws_iam_role["example"]["arn"],
catalog_targets=[aws.glue.CrawlerCatalogTargetArgs(
database_name=aws_glue_catalog_database["example"]["name"],
tables=[aws_glue_catalog_table["example"]["name"]],
)],
schema_change_policy=aws.glue.CrawlerSchemaChangePolicyArgs(
delete_behavior="LOG",
),
configuration="""{
"Version":1.0,
"Grouping": {
"TableGroupingPolicy": "CombineCompatibleSchemas"
}
}
""")
import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";
const example = new aws.glue.Crawler("example", {
databaseName: aws_glue_catalog_database.example.name,
role: aws_iam_role.example.arn,
catalogTargets: [{
databaseName: aws_glue_catalog_database.example.name,
tables: [aws_glue_catalog_table.example.name],
}],
schemaChangePolicy: {
deleteBehavior: "LOG",
},
configuration: `{
"Version":1.0,
"Grouping": {
"TableGroupingPolicy": "CombineCompatibleSchemas"
}
}
`,
});
resources:
example:
type: aws:glue:Crawler
properties:
databaseName: ${aws_glue_catalog_database.example.name}
role: ${aws_iam_role.example.arn}
catalogTargets:
- databaseName: ${aws_glue_catalog_database.example.name}
tables:
- ${aws_glue_catalog_table.example.name}
schemaChangePolicy:
deleteBehavior: LOG
configuration: |
{
"Version":1.0,
"Grouping": {
"TableGroupingPolicy": "CombineCompatibleSchemas"
}
}
MongoDB Target Example
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Aws = Pulumi.Aws;
return await Deployment.RunAsync(() =>
{
var example = new Aws.Glue.Crawler("example", new()
{
DatabaseName = aws_glue_catalog_database.Example.Name,
Role = aws_iam_role.Example.Arn,
MongodbTargets = new[]
{
new Aws.Glue.Inputs.CrawlerMongodbTargetArgs
{
ConnectionName = aws_glue_connection.Example.Name,
Path = "database-name/%",
},
},
});
});
package main
import (
"github.com/pulumi/pulumi-aws/sdk/v6/go/aws/glue"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := glue.NewCrawler(ctx, "example", &glue.CrawlerArgs{
DatabaseName: pulumi.Any(aws_glue_catalog_database.Example.Name),
Role: pulumi.Any(aws_iam_role.Example.Arn),
MongodbTargets: glue.CrawlerMongodbTargetArray{
&glue.CrawlerMongodbTargetArgs{
ConnectionName: pulumi.Any(aws_glue_connection.Example.Name),
Path: pulumi.String("database-name/%"),
},
},
})
if err != nil {
return err
}
return nil
})
}
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.glue.Crawler;
import com.pulumi.aws.glue.CrawlerArgs;
import com.pulumi.aws.glue.inputs.CrawlerMongodbTargetArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var example = new Crawler("example", CrawlerArgs.builder()
.databaseName(aws_glue_catalog_database.example().name())
.role(aws_iam_role.example().arn())
.mongodbTargets(CrawlerMongodbTargetArgs.builder()
.connectionName(aws_glue_connection.example().name())
.path("database-name/%")
.build())
.build());
}
}
import pulumi
import pulumi_aws as aws
example = aws.glue.Crawler("example",
database_name=aws_glue_catalog_database["example"]["name"],
role=aws_iam_role["example"]["arn"],
mongodb_targets=[aws.glue.CrawlerMongodbTargetArgs(
connection_name=aws_glue_connection["example"]["name"],
path="database-name/%",
)])
import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";
const example = new aws.glue.Crawler("example", {
databaseName: aws_glue_catalog_database.example.name,
role: aws_iam_role.example.arn,
mongodbTargets: [{
connectionName: aws_glue_connection.example.name,
path: "database-name/%",
}],
});
resources:
example:
type: aws:glue:Crawler
properties:
databaseName: ${aws_glue_catalog_database.example.name}
role: ${aws_iam_role.example.arn}
mongodbTargets:
- connectionName: ${aws_glue_connection.example.name}
path: database-name/%
Configuration Settings Example
using System.Collections.Generic;
using System.Linq;
using System.Text.Json;
using Pulumi;
using Aws = Pulumi.Aws;
return await Deployment.RunAsync(() =>
{
var eventsCrawler = new Aws.Glue.Crawler("eventsCrawler", new()
{
DatabaseName = aws_glue_catalog_database.Glue_database.Name,
Schedule = "cron(0 1 * * ? *)",
Role = aws_iam_role.Glue_role.Arn,
Tags = @var.Tags,
Configuration = JsonSerializer.Serialize(new Dictionary<string, object?>
{
["Grouping"] = new Dictionary<string, object?>
{
["TableGroupingPolicy"] = "CombineCompatibleSchemas",
},
["CrawlerOutput"] = new Dictionary<string, object?>
{
["Partitions"] = new Dictionary<string, object?>
{
["AddOrUpdateBehavior"] = "InheritFromTable",
},
},
["Version"] = 1,
}),
S3Targets = new[]
{
new Aws.Glue.Inputs.CrawlerS3TargetArgs
{
Path = $"s3://{aws_s3_bucket.Data_lake_bucket.Bucket}",
},
},
});
});
package main
import (
"encoding/json"
"fmt"
"github.com/pulumi/pulumi-aws/sdk/v6/go/aws/glue"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
tmpJSON0, err := json.Marshal(map[string]interface{}{
"Grouping": map[string]interface{}{
"TableGroupingPolicy": "CombineCompatibleSchemas",
},
"CrawlerOutput": map[string]interface{}{
"Partitions": map[string]interface{}{
"AddOrUpdateBehavior": "InheritFromTable",
},
},
"Version": 1,
})
if err != nil {
return err
}
json0 := string(tmpJSON0)
_, err = glue.NewCrawler(ctx, "eventsCrawler", &glue.CrawlerArgs{
DatabaseName: pulumi.Any(aws_glue_catalog_database.Glue_database.Name),
Schedule: pulumi.String("cron(0 1 * * ? *)"),
Role: pulumi.Any(aws_iam_role.Glue_role.Arn),
Tags: pulumi.Any(_var.Tags),
Configuration: pulumi.String(json0),
S3Targets: glue.CrawlerS3TargetArray{
&glue.CrawlerS3TargetArgs{
Path: pulumi.String(fmt.Sprintf("s3://%v", aws_s3_bucket.Data_lake_bucket.Bucket)),
},
},
})
if err != nil {
return err
}
return nil
})
}
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.glue.Crawler;
import com.pulumi.aws.glue.CrawlerArgs;
import com.pulumi.aws.glue.inputs.CrawlerS3TargetArgs;
import static com.pulumi.codegen.internal.Serialization.*;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var eventsCrawler = new Crawler("eventsCrawler", CrawlerArgs.builder()
.databaseName(aws_glue_catalog_database.glue_database().name())
.schedule("cron(0 1 * * ? *)")
.role(aws_iam_role.glue_role().arn())
.tags(var_.tags())
.configuration(serializeJson(
jsonObject(
jsonProperty("Grouping", jsonObject(
jsonProperty("TableGroupingPolicy", "CombineCompatibleSchemas")
)),
jsonProperty("CrawlerOutput", jsonObject(
jsonProperty("Partitions", jsonObject(
jsonProperty("AddOrUpdateBehavior", "InheritFromTable")
))
)),
jsonProperty("Version", 1)
)))
.s3Targets(CrawlerS3TargetArgs.builder()
.path(String.format("s3://%s", aws_s3_bucket.data_lake_bucket().bucket()))
.build())
.build());
}
}
import pulumi
import json
import pulumi_aws as aws
events_crawler = aws.glue.Crawler("eventsCrawler",
database_name=aws_glue_catalog_database["glue_database"]["name"],
schedule="cron(0 1 * * ? *)",
role=aws_iam_role["glue_role"]["arn"],
tags=var["tags"],
configuration=json.dumps({
"Grouping": {
"TableGroupingPolicy": "CombineCompatibleSchemas",
},
"CrawlerOutput": {
"Partitions": {
"AddOrUpdateBehavior": "InheritFromTable",
},
},
"Version": 1,
}),
s3_targets=[aws.glue.CrawlerS3TargetArgs(
path=f"s3://{aws_s3_bucket['data_lake_bucket']['bucket']}",
)])
import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";
const eventsCrawler = new aws.glue.Crawler("eventsCrawler", {
databaseName: aws_glue_catalog_database.glue_database.name,
schedule: "cron(0 1 * * ? *)",
role: aws_iam_role.glue_role.arn,
tags: _var.tags,
configuration: JSON.stringify({
Grouping: {
TableGroupingPolicy: "CombineCompatibleSchemas",
},
CrawlerOutput: {
Partitions: {
AddOrUpdateBehavior: "InheritFromTable",
},
},
Version: 1,
}),
s3Targets: [{
path: `s3://${aws_s3_bucket.data_lake_bucket.bucket}`,
}],
});
resources:
eventsCrawler:
type: aws:glue:Crawler
properties:
databaseName: ${aws_glue_catalog_database.glue_database.name}
schedule: cron(0 1 * * ? *)
role: ${aws_iam_role.glue_role.arn}
tags: ${var.tags}
configuration:
fn::toJSON:
Grouping:
TableGroupingPolicy: CombineCompatibleSchemas
CrawlerOutput:
Partitions:
AddOrUpdateBehavior: InheritFromTable
Version: 1
s3Targets:
- path: s3://${aws_s3_bucket.data_lake_bucket.bucket}
Create Crawler Resource
new Crawler(name: string, args: CrawlerArgs, opts?: CustomResourceOptions);
@overload
def Crawler(resource_name: str,
opts: Optional[ResourceOptions] = None,
catalog_targets: Optional[Sequence[CrawlerCatalogTargetArgs]] = None,
classifiers: Optional[Sequence[str]] = None,
configuration: Optional[str] = None,
database_name: Optional[str] = None,
delta_targets: Optional[Sequence[CrawlerDeltaTargetArgs]] = None,
description: Optional[str] = None,
dynamodb_targets: Optional[Sequence[CrawlerDynamodbTargetArgs]] = None,
hudi_targets: Optional[Sequence[CrawlerHudiTargetArgs]] = None,
iceberg_targets: Optional[Sequence[CrawlerIcebergTargetArgs]] = None,
jdbc_targets: Optional[Sequence[CrawlerJdbcTargetArgs]] = None,
lake_formation_configuration: Optional[CrawlerLakeFormationConfigurationArgs] = None,
lineage_configuration: Optional[CrawlerLineageConfigurationArgs] = None,
mongodb_targets: Optional[Sequence[CrawlerMongodbTargetArgs]] = None,
name: Optional[str] = None,
recrawl_policy: Optional[CrawlerRecrawlPolicyArgs] = None,
role: Optional[str] = None,
s3_targets: Optional[Sequence[CrawlerS3TargetArgs]] = None,
schedule: Optional[str] = None,
schema_change_policy: Optional[CrawlerSchemaChangePolicyArgs] = None,
security_configuration: Optional[str] = None,
table_prefix: Optional[str] = None,
tags: Optional[Mapping[str, str]] = None)
@overload
def Crawler(resource_name: str,
args: CrawlerArgs,
opts: Optional[ResourceOptions] = None)
func NewCrawler(ctx *Context, name string, args CrawlerArgs, opts ...ResourceOption) (*Crawler, error)
public Crawler(string name, CrawlerArgs args, CustomResourceOptions? opts = null)
public Crawler(String name, CrawlerArgs args)
public Crawler(String name, CrawlerArgs args, CustomResourceOptions options)
type: aws:glue:Crawler
properties: # The arguments to resource properties.
options: # Bag of options to control resource's behavior.
- name string
- The unique name of the resource.
- args CrawlerArgs
- The arguments to resource properties.
- opts CustomResourceOptions
- Bag of options to control resource's behavior.
- resource_name str
- The unique name of the resource.
- args CrawlerArgs
- The arguments to resource properties.
- opts ResourceOptions
- Bag of options to control resource's behavior.
- ctx Context
- Context object for the current deployment.
- name string
- The unique name of the resource.
- args CrawlerArgs
- The arguments to resource properties.
- opts ResourceOption
- Bag of options to control resource's behavior.
- name string
- The unique name of the resource.
- args CrawlerArgs
- The arguments to resource properties.
- opts CustomResourceOptions
- Bag of options to control resource's behavior.
- name String
- The unique name of the resource.
- args CrawlerArgs
- The arguments to resource properties.
- options CustomResourceOptions
- Bag of options to control resource's behavior.
Crawler Resource Properties
To learn more about resource properties and how to use them, see Inputs and Outputs in the Architecture and Concepts docs.
Inputs
The Crawler resource accepts the following input properties:
- Database
Name string Glue database where results are written.
- Role string
The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.
- Catalog
Targets List<CrawlerCatalog Target> - Classifiers List<string>
List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
- Configuration string
JSON string of configuration information. For more details see Setting Crawler Configuration Options.
- Delta
Targets List<CrawlerDelta Target> List of nested Delta Lake target arguments. See Delta Target below.
- Description string
Description of the crawler.
- Dynamodb
Targets List<CrawlerDynamodb Target> List of nested DynamoDB target arguments. See Dynamodb Target below.
- Hudi
Targets List<CrawlerHudi Target> List nested Hudi target arguments. See Iceberg Target below.
- Iceberg
Targets List<CrawlerIceberg Target> List nested Iceberg target arguments. See Iceberg Target below.
- Jdbc
Targets List<CrawlerJdbc Target> List of nested JBDC target arguments. See JDBC Target below.
- Lake
Formation CrawlerConfiguration Lake Formation Configuration Specifies Lake Formation configuration settings for the crawler. See Lake Formation Configuration below.
- Lineage
Configuration CrawlerLineage Configuration Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.
- Mongodb
Targets List<CrawlerMongodb Target> List nested MongoDB target arguments. See MongoDB Target below.
- Name string
Name of the crawler.
- Recrawl
Policy CrawlerRecrawl Policy A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.
- S3Targets
List<Crawler
S3Target> List nested Amazon S3 target arguments. See S3 Target below.
- Schedule string
A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify:
cron(15 12 * * ? *)
.- Schema
Change CrawlerPolicy Schema Change Policy Policy for the crawler's update and deletion behavior. See Schema Change Policy below.
- Security
Configuration string The name of Security Configuration to be used by the crawler
- Table
Prefix string The table prefix used for catalog tables that are created.
- Dictionary<string, string>
Key-value map of resource tags. .If configured with a provider
default_tags
configuration block present, tags with matching keys will overwrite those defined at the provider-level.
- Database
Name string Glue database where results are written.
- Role string
The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.
- Catalog
Targets []CrawlerCatalog Target Args - Classifiers []string
List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
- Configuration string
JSON string of configuration information. For more details see Setting Crawler Configuration Options.
- Delta
Targets []CrawlerDelta Target Args List of nested Delta Lake target arguments. See Delta Target below.
- Description string
Description of the crawler.
- Dynamodb
Targets []CrawlerDynamodb Target Args List of nested DynamoDB target arguments. See Dynamodb Target below.
- Hudi
Targets []CrawlerHudi Target Args List nested Hudi target arguments. See Iceberg Target below.
- Iceberg
Targets []CrawlerIceberg Target Args List nested Iceberg target arguments. See Iceberg Target below.
- Jdbc
Targets []CrawlerJdbc Target Args List of nested JBDC target arguments. See JDBC Target below.
- Lake
Formation CrawlerConfiguration Lake Formation Configuration Args Specifies Lake Formation configuration settings for the crawler. See Lake Formation Configuration below.
- Lineage
Configuration CrawlerLineage Configuration Args Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.
- Mongodb
Targets []CrawlerMongodb Target Args List nested MongoDB target arguments. See MongoDB Target below.
- Name string
Name of the crawler.
- Recrawl
Policy CrawlerRecrawl Policy Args A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.
- S3Targets
[]Crawler
S3Target Args List nested Amazon S3 target arguments. See S3 Target below.
- Schedule string
A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify:
cron(15 12 * * ? *)
.- Schema
Change CrawlerPolicy Schema Change Policy Args Policy for the crawler's update and deletion behavior. See Schema Change Policy below.
- Security
Configuration string The name of Security Configuration to be used by the crawler
- Table
Prefix string The table prefix used for catalog tables that are created.
- map[string]string
Key-value map of resource tags. .If configured with a provider
default_tags
configuration block present, tags with matching keys will overwrite those defined at the provider-level.
- database
Name String Glue database where results are written.
- role String
The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.
- catalog
Targets List<CrawlerCatalog Target> - classifiers List<String>
List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
- configuration String
JSON string of configuration information. For more details see Setting Crawler Configuration Options.
- delta
Targets List<CrawlerDelta Target> List of nested Delta Lake target arguments. See Delta Target below.
- description String
Description of the crawler.
- dynamodb
Targets List<CrawlerDynamodb Target> List of nested DynamoDB target arguments. See Dynamodb Target below.
- hudi
Targets List<CrawlerHudi Target> List nested Hudi target arguments. See Iceberg Target below.
- iceberg
Targets List<CrawlerIceberg Target> List nested Iceberg target arguments. See Iceberg Target below.
- jdbc
Targets List<CrawlerJdbc Target> List of nested JBDC target arguments. See JDBC Target below.
- lake
Formation CrawlerConfiguration Lake Formation Configuration Specifies Lake Formation configuration settings for the crawler. See Lake Formation Configuration below.
- lineage
Configuration CrawlerLineage Configuration Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.
- mongodb
Targets List<CrawlerMongodb Target> List nested MongoDB target arguments. See MongoDB Target below.
- name String
Name of the crawler.
- recrawl
Policy CrawlerRecrawl Policy A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.
- s3Targets
List<Crawler
S3Target> List nested Amazon S3 target arguments. See S3 Target below.
- schedule String
A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify:
cron(15 12 * * ? *)
.- schema
Change CrawlerPolicy Schema Change Policy Policy for the crawler's update and deletion behavior. See Schema Change Policy below.
- security
Configuration String The name of Security Configuration to be used by the crawler
- table
Prefix String The table prefix used for catalog tables that are created.
- Map<String,String>
Key-value map of resource tags. .If configured with a provider
default_tags
configuration block present, tags with matching keys will overwrite those defined at the provider-level.
- database
Name string Glue database where results are written.
- role string
The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.
- catalog
Targets CrawlerCatalog Target[] - classifiers string[]
List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
- configuration string
JSON string of configuration information. For more details see Setting Crawler Configuration Options.
- delta
Targets CrawlerDelta Target[] List of nested Delta Lake target arguments. See Delta Target below.
- description string
Description of the crawler.
- dynamodb
Targets CrawlerDynamodb Target[] List of nested DynamoDB target arguments. See Dynamodb Target below.
- hudi
Targets CrawlerHudi Target[] List nested Hudi target arguments. See Iceberg Target below.
- iceberg
Targets CrawlerIceberg Target[] List nested Iceberg target arguments. See Iceberg Target below.
- jdbc
Targets CrawlerJdbc Target[] List of nested JBDC target arguments. See JDBC Target below.
- lake
Formation CrawlerConfiguration Lake Formation Configuration Specifies Lake Formation configuration settings for the crawler. See Lake Formation Configuration below.
- lineage
Configuration CrawlerLineage Configuration Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.
- mongodb
Targets CrawlerMongodb Target[] List nested MongoDB target arguments. See MongoDB Target below.
- name string
Name of the crawler.
- recrawl
Policy CrawlerRecrawl Policy A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.
- s3Targets
Crawler
S3Target[] List nested Amazon S3 target arguments. See S3 Target below.
- schedule string
A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify:
cron(15 12 * * ? *)
.- schema
Change CrawlerPolicy Schema Change Policy Policy for the crawler's update and deletion behavior. See Schema Change Policy below.
- security
Configuration string The name of Security Configuration to be used by the crawler
- table
Prefix string The table prefix used for catalog tables that are created.
- {[key: string]: string}
Key-value map of resource tags. .If configured with a provider
default_tags
configuration block present, tags with matching keys will overwrite those defined at the provider-level.
- database_
name str Glue database where results are written.
- role str
The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.
- catalog_
targets Sequence[CrawlerCatalog Target Args] - classifiers Sequence[str]
List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
- configuration str
JSON string of configuration information. For more details see Setting Crawler Configuration Options.
- delta_
targets Sequence[CrawlerDelta Target Args] List of nested Delta Lake target arguments. See Delta Target below.
- description str
Description of the crawler.
- dynamodb_
targets Sequence[CrawlerDynamodb Target Args] List of nested DynamoDB target arguments. See Dynamodb Target below.
- hudi_
targets Sequence[CrawlerHudi Target Args] List nested Hudi target arguments. See Iceberg Target below.
- iceberg_
targets Sequence[CrawlerIceberg Target Args] List nested Iceberg target arguments. See Iceberg Target below.
- jdbc_
targets Sequence[CrawlerJdbc Target Args] List of nested JBDC target arguments. See JDBC Target below.
- lake_
formation_ Crawlerconfiguration Lake Formation Configuration Args Specifies Lake Formation configuration settings for the crawler. See Lake Formation Configuration below.
- lineage_
configuration CrawlerLineage Configuration Args Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.
- mongodb_
targets Sequence[CrawlerMongodb Target Args] List nested MongoDB target arguments. See MongoDB Target below.
- name str
Name of the crawler.
- recrawl_
policy CrawlerRecrawl Policy Args A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.
- s3_
targets Sequence[CrawlerS3Target Args] List nested Amazon S3 target arguments. See S3 Target below.
- schedule str
A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify:
cron(15 12 * * ? *)
.- schema_
change_ Crawlerpolicy Schema Change Policy Args Policy for the crawler's update and deletion behavior. See Schema Change Policy below.
- security_
configuration str The name of Security Configuration to be used by the crawler
- table_
prefix str The table prefix used for catalog tables that are created.
- Mapping[str, str]
Key-value map of resource tags. .If configured with a provider
default_tags
configuration block present, tags with matching keys will overwrite those defined at the provider-level.
- database
Name String Glue database where results are written.
- role String
The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.
- catalog
Targets List<Property Map> - classifiers List<String>
List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
- configuration String
JSON string of configuration information. For more details see Setting Crawler Configuration Options.
- delta
Targets List<Property Map> List of nested Delta Lake target arguments. See Delta Target below.
- description String
Description of the crawler.
- dynamodb
Targets List<Property Map> List of nested DynamoDB target arguments. See Dynamodb Target below.
- hudi
Targets List<Property Map> List nested Hudi target arguments. See Iceberg Target below.
- iceberg
Targets List<Property Map> List nested Iceberg target arguments. See Iceberg Target below.
- jdbc
Targets List<Property Map> List of nested JBDC target arguments. See JDBC Target below.
- lake
Formation Property MapConfiguration Specifies Lake Formation configuration settings for the crawler. See Lake Formation Configuration below.
- lineage
Configuration Property Map Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.
- mongodb
Targets List<Property Map> List nested MongoDB target arguments. See MongoDB Target below.
- name String
Name of the crawler.
- recrawl
Policy Property Map A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.
- s3Targets List<Property Map>
List nested Amazon S3 target arguments. See S3 Target below.
- schedule String
A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify:
cron(15 12 * * ? *)
.- schema
Change Property MapPolicy Policy for the crawler's update and deletion behavior. See Schema Change Policy below.
- security
Configuration String The name of Security Configuration to be used by the crawler
- table
Prefix String The table prefix used for catalog tables that are created.
- Map<String>
Key-value map of resource tags. .If configured with a provider
default_tags
configuration block present, tags with matching keys will overwrite those defined at the provider-level.
Outputs
All input properties are implicitly available as output properties. Additionally, the Crawler resource produces the following output properties:
Look up Existing Crawler Resource
Get an existing Crawler resource’s state with the given name, ID, and optional extra properties used to qualify the lookup.
public static get(name: string, id: Input<ID>, state?: CrawlerState, opts?: CustomResourceOptions): Crawler
@staticmethod
def get(resource_name: str,
id: str,
opts: Optional[ResourceOptions] = None,
arn: Optional[str] = None,
catalog_targets: Optional[Sequence[CrawlerCatalogTargetArgs]] = None,
classifiers: Optional[Sequence[str]] = None,
configuration: Optional[str] = None,
database_name: Optional[str] = None,
delta_targets: Optional[Sequence[CrawlerDeltaTargetArgs]] = None,
description: Optional[str] = None,
dynamodb_targets: Optional[Sequence[CrawlerDynamodbTargetArgs]] = None,
hudi_targets: Optional[Sequence[CrawlerHudiTargetArgs]] = None,
iceberg_targets: Optional[Sequence[CrawlerIcebergTargetArgs]] = None,
jdbc_targets: Optional[Sequence[CrawlerJdbcTargetArgs]] = None,
lake_formation_configuration: Optional[CrawlerLakeFormationConfigurationArgs] = None,
lineage_configuration: Optional[CrawlerLineageConfigurationArgs] = None,
mongodb_targets: Optional[Sequence[CrawlerMongodbTargetArgs]] = None,
name: Optional[str] = None,
recrawl_policy: Optional[CrawlerRecrawlPolicyArgs] = None,
role: Optional[str] = None,
s3_targets: Optional[Sequence[CrawlerS3TargetArgs]] = None,
schedule: Optional[str] = None,
schema_change_policy: Optional[CrawlerSchemaChangePolicyArgs] = None,
security_configuration: Optional[str] = None,
table_prefix: Optional[str] = None,
tags: Optional[Mapping[str, str]] = None,
tags_all: Optional[Mapping[str, str]] = None) -> Crawler
func GetCrawler(ctx *Context, name string, id IDInput, state *CrawlerState, opts ...ResourceOption) (*Crawler, error)
public static Crawler Get(string name, Input<string> id, CrawlerState? state, CustomResourceOptions? opts = null)
public static Crawler get(String name, Output<String> id, CrawlerState state, CustomResourceOptions options)
Resource lookup is not supported in YAML
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- resource_name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- Arn string
The ARN of the crawler
- Catalog
Targets List<CrawlerCatalog Target> - Classifiers List<string>
List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
- Configuration string
JSON string of configuration information. For more details see Setting Crawler Configuration Options.
- Database
Name string Glue database where results are written.
- Delta
Targets List<CrawlerDelta Target> List of nested Delta Lake target arguments. See Delta Target below.
- Description string
Description of the crawler.
- Dynamodb
Targets List<CrawlerDynamodb Target> List of nested DynamoDB target arguments. See Dynamodb Target below.
- Hudi
Targets List<CrawlerHudi Target> List nested Hudi target arguments. See Iceberg Target below.
- Iceberg
Targets List<CrawlerIceberg Target> List nested Iceberg target arguments. See Iceberg Target below.
- Jdbc
Targets List<CrawlerJdbc Target> List of nested JBDC target arguments. See JDBC Target below.
- Lake
Formation CrawlerConfiguration Lake Formation Configuration Specifies Lake Formation configuration settings for the crawler. See Lake Formation Configuration below.
- Lineage
Configuration CrawlerLineage Configuration Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.
- Mongodb
Targets List<CrawlerMongodb Target> List nested MongoDB target arguments. See MongoDB Target below.
- Name string
Name of the crawler.
- Recrawl
Policy CrawlerRecrawl Policy A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.
- Role string
The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.
- S3Targets
List<Crawler
S3Target> List nested Amazon S3 target arguments. See S3 Target below.
- Schedule string
A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify:
cron(15 12 * * ? *)
.- Schema
Change CrawlerPolicy Schema Change Policy Policy for the crawler's update and deletion behavior. See Schema Change Policy below.
- Security
Configuration string The name of Security Configuration to be used by the crawler
- Table
Prefix string The table prefix used for catalog tables that are created.
- Dictionary<string, string>
Key-value map of resource tags. .If configured with a provider
default_tags
configuration block present, tags with matching keys will overwrite those defined at the provider-level.- Dictionary<string, string>
A map of tags assigned to the resource, including those inherited from the provider
default_tags
configuration block.Please use
tags
instead.
- Arn string
The ARN of the crawler
- Catalog
Targets []CrawlerCatalog Target Args - Classifiers []string
List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
- Configuration string
JSON string of configuration information. For more details see Setting Crawler Configuration Options.
- Database
Name string Glue database where results are written.
- Delta
Targets []CrawlerDelta Target Args List of nested Delta Lake target arguments. See Delta Target below.
- Description string
Description of the crawler.
- Dynamodb
Targets []CrawlerDynamodb Target Args List of nested DynamoDB target arguments. See Dynamodb Target below.
- Hudi
Targets []CrawlerHudi Target Args List nested Hudi target arguments. See Iceberg Target below.
- Iceberg
Targets []CrawlerIceberg Target Args List nested Iceberg target arguments. See Iceberg Target below.
- Jdbc
Targets []CrawlerJdbc Target Args List of nested JBDC target arguments. See JDBC Target below.
- Lake
Formation CrawlerConfiguration Lake Formation Configuration Args Specifies Lake Formation configuration settings for the crawler. See Lake Formation Configuration below.
- Lineage
Configuration CrawlerLineage Configuration Args Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.
- Mongodb
Targets []CrawlerMongodb Target Args List nested MongoDB target arguments. See MongoDB Target below.
- Name string
Name of the crawler.
- Recrawl
Policy CrawlerRecrawl Policy Args A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.
- Role string
The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.
- S3Targets
[]Crawler
S3Target Args List nested Amazon S3 target arguments. See S3 Target below.
- Schedule string
A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify:
cron(15 12 * * ? *)
.- Schema
Change CrawlerPolicy Schema Change Policy Args Policy for the crawler's update and deletion behavior. See Schema Change Policy below.
- Security
Configuration string The name of Security Configuration to be used by the crawler
- Table
Prefix string The table prefix used for catalog tables that are created.
- map[string]string
Key-value map of resource tags. .If configured with a provider
default_tags
configuration block present, tags with matching keys will overwrite those defined at the provider-level.- map[string]string
A map of tags assigned to the resource, including those inherited from the provider
default_tags
configuration block.Please use
tags
instead.
- arn String
The ARN of the crawler
- catalog
Targets List<CrawlerCatalog Target> - classifiers List<String>
List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
- configuration String
JSON string of configuration information. For more details see Setting Crawler Configuration Options.
- database
Name String Glue database where results are written.
- delta
Targets List<CrawlerDelta Target> List of nested Delta Lake target arguments. See Delta Target below.
- description String
Description of the crawler.
- dynamodb
Targets List<CrawlerDynamodb Target> List of nested DynamoDB target arguments. See Dynamodb Target below.
- hudi
Targets List<CrawlerHudi Target> List nested Hudi target arguments. See Iceberg Target below.
- iceberg
Targets List<CrawlerIceberg Target> List nested Iceberg target arguments. See Iceberg Target below.
- jdbc
Targets List<CrawlerJdbc Target> List of nested JBDC target arguments. See JDBC Target below.
- lake
Formation CrawlerConfiguration Lake Formation Configuration Specifies Lake Formation configuration settings for the crawler. See Lake Formation Configuration below.
- lineage
Configuration CrawlerLineage Configuration Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.
- mongodb
Targets List<CrawlerMongodb Target> List nested MongoDB target arguments. See MongoDB Target below.
- name String
Name of the crawler.
- recrawl
Policy CrawlerRecrawl Policy A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.
- role String
The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.
- s3Targets
List<Crawler
S3Target> List nested Amazon S3 target arguments. See S3 Target below.
- schedule String
A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify:
cron(15 12 * * ? *)
.- schema
Change CrawlerPolicy Schema Change Policy Policy for the crawler's update and deletion behavior. See Schema Change Policy below.
- security
Configuration String The name of Security Configuration to be used by the crawler
- table
Prefix String The table prefix used for catalog tables that are created.
- Map<String,String>
Key-value map of resource tags. .If configured with a provider
default_tags
configuration block present, tags with matching keys will overwrite those defined at the provider-level.- Map<String,String>
A map of tags assigned to the resource, including those inherited from the provider
default_tags
configuration block.Please use
tags
instead.
- arn string
The ARN of the crawler
- catalog
Targets CrawlerCatalog Target[] - classifiers string[]
List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
- configuration string
JSON string of configuration information. For more details see Setting Crawler Configuration Options.
- database
Name string Glue database where results are written.
- delta
Targets CrawlerDelta Target[] List of nested Delta Lake target arguments. See Delta Target below.
- description string
Description of the crawler.
- dynamodb
Targets CrawlerDynamodb Target[] List of nested DynamoDB target arguments. See Dynamodb Target below.
- hudi
Targets CrawlerHudi Target[] List nested Hudi target arguments. See Iceberg Target below.
- iceberg
Targets CrawlerIceberg Target[] List nested Iceberg target arguments. See Iceberg Target below.
- jdbc
Targets CrawlerJdbc Target[] List of nested JBDC target arguments. See JDBC Target below.
- lake
Formation CrawlerConfiguration Lake Formation Configuration Specifies Lake Formation configuration settings for the crawler. See Lake Formation Configuration below.
- lineage
Configuration CrawlerLineage Configuration Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.
- mongodb
Targets CrawlerMongodb Target[] List nested MongoDB target arguments. See MongoDB Target below.
- name string
Name of the crawler.
- recrawl
Policy CrawlerRecrawl Policy A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.
- role string
The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.
- s3Targets
Crawler
S3Target[] List nested Amazon S3 target arguments. See S3 Target below.
- schedule string
A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify:
cron(15 12 * * ? *)
.- schema
Change CrawlerPolicy Schema Change Policy Policy for the crawler's update and deletion behavior. See Schema Change Policy below.
- security
Configuration string The name of Security Configuration to be used by the crawler
- table
Prefix string The table prefix used for catalog tables that are created.
- {[key: string]: string}
Key-value map of resource tags. .If configured with a provider
default_tags
configuration block present, tags with matching keys will overwrite those defined at the provider-level.- {[key: string]: string}
A map of tags assigned to the resource, including those inherited from the provider
default_tags
configuration block.Please use
tags
instead.
- arn str
The ARN of the crawler
- catalog_
targets Sequence[CrawlerCatalog Target Args] - classifiers Sequence[str]
List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
- configuration str
JSON string of configuration information. For more details see Setting Crawler Configuration Options.
- database_
name str Glue database where results are written.
- delta_
targets Sequence[CrawlerDelta Target Args] List of nested Delta Lake target arguments. See Delta Target below.
- description str
Description of the crawler.
- dynamodb_
targets Sequence[CrawlerDynamodb Target Args] List of nested DynamoDB target arguments. See Dynamodb Target below.
- hudi_
targets Sequence[CrawlerHudi Target Args] List nested Hudi target arguments. See Iceberg Target below.
- iceberg_
targets Sequence[CrawlerIceberg Target Args] List nested Iceberg target arguments. See Iceberg Target below.
- jdbc_
targets Sequence[CrawlerJdbc Target Args] List of nested JBDC target arguments. See JDBC Target below.
- lake_
formation_ Crawlerconfiguration Lake Formation Configuration Args Specifies Lake Formation configuration settings for the crawler. See Lake Formation Configuration below.
- lineage_
configuration CrawlerLineage Configuration Args Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.
- mongodb_
targets Sequence[CrawlerMongodb Target Args] List nested MongoDB target arguments. See MongoDB Target below.
- name str
Name of the crawler.
- recrawl_
policy CrawlerRecrawl Policy Args A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.
- role str
The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.
- s3_
targets Sequence[CrawlerS3Target Args] List nested Amazon S3 target arguments. See S3 Target below.
- schedule str
A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify:
cron(15 12 * * ? *)
.- schema_
change_ Crawlerpolicy Schema Change Policy Args Policy for the crawler's update and deletion behavior. See Schema Change Policy below.
- security_
configuration str The name of Security Configuration to be used by the crawler
- table_
prefix str The table prefix used for catalog tables that are created.
- Mapping[str, str]
Key-value map of resource tags. .If configured with a provider
default_tags
configuration block present, tags with matching keys will overwrite those defined at the provider-level.- Mapping[str, str]
A map of tags assigned to the resource, including those inherited from the provider
default_tags
configuration block.Please use
tags
instead.
- arn String
The ARN of the crawler
- catalog
Targets List<Property Map> - classifiers List<String>
List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
- configuration String
JSON string of configuration information. For more details see Setting Crawler Configuration Options.
- database
Name String Glue database where results are written.
- delta
Targets List<Property Map> List of nested Delta Lake target arguments. See Delta Target below.
- description String
Description of the crawler.
- dynamodb
Targets List<Property Map> List of nested DynamoDB target arguments. See Dynamodb Target below.
- hudi
Targets List<Property Map> List nested Hudi target arguments. See Iceberg Target below.
- iceberg
Targets List<Property Map> List nested Iceberg target arguments. See Iceberg Target below.
- jdbc
Targets List<Property Map> List of nested JBDC target arguments. See JDBC Target below.
- lake
Formation Property MapConfiguration Specifies Lake Formation configuration settings for the crawler. See Lake Formation Configuration below.
- lineage
Configuration Property Map Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.
- mongodb
Targets List<Property Map> List nested MongoDB target arguments. See MongoDB Target below.
- name String
Name of the crawler.
- recrawl
Policy Property Map A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.
- role String
The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.
- s3Targets List<Property Map>
List nested Amazon S3 target arguments. See S3 Target below.
- schedule String
A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify:
cron(15 12 * * ? *)
.- schema
Change Property MapPolicy Policy for the crawler's update and deletion behavior. See Schema Change Policy below.
- security
Configuration String The name of Security Configuration to be used by the crawler
- table
Prefix String The table prefix used for catalog tables that are created.
- Map<String>
Key-value map of resource tags. .If configured with a provider
default_tags
configuration block present, tags with matching keys will overwrite those defined at the provider-level.- Map<String>
A map of tags assigned to the resource, including those inherited from the provider
default_tags
configuration block.Please use
tags
instead.
Supporting Types
CrawlerCatalogTarget, CrawlerCatalogTargetArgs
- Database
Name string The name of the Glue database to be synchronized.
- Tables List<string>
A list of catalog tables to be synchronized.
- Connection
Name string The name of the connection for an Amazon S3-backed Data Catalog table to be a target of the crawl when using a Catalog connection type paired with a
NETWORK
Connection type.- Dlq
Event stringQueue Arn A valid Amazon SQS ARN.
Note:
deletion_behavior
of catalog target doesn't supportDEPRECATE_IN_DATABASE
.Note:
configuration
for catalog target crawlers will have{ ... "Grouping": { "TableGroupingPolicy": "CombineCompatibleSchemas"} }
by default.- Event
Queue stringArn A valid Amazon SQS ARN.
- Database
Name string The name of the Glue database to be synchronized.
- Tables []string
A list of catalog tables to be synchronized.
- Connection
Name string The name of the connection for an Amazon S3-backed Data Catalog table to be a target of the crawl when using a Catalog connection type paired with a
NETWORK
Connection type.- Dlq
Event stringQueue Arn A valid Amazon SQS ARN.
Note:
deletion_behavior
of catalog target doesn't supportDEPRECATE_IN_DATABASE
.Note:
configuration
for catalog target crawlers will have{ ... "Grouping": { "TableGroupingPolicy": "CombineCompatibleSchemas"} }
by default.- Event
Queue stringArn A valid Amazon SQS ARN.
- database
Name String The name of the Glue database to be synchronized.
- tables List<String>
A list of catalog tables to be synchronized.
- connection
Name String The name of the connection for an Amazon S3-backed Data Catalog table to be a target of the crawl when using a Catalog connection type paired with a
NETWORK
Connection type.- dlq
Event StringQueue Arn A valid Amazon SQS ARN.
Note:
deletion_behavior
of catalog target doesn't supportDEPRECATE_IN_DATABASE
.Note:
configuration
for catalog target crawlers will have{ ... "Grouping": { "TableGroupingPolicy": "CombineCompatibleSchemas"} }
by default.- event
Queue StringArn A valid Amazon SQS ARN.
- database
Name string The name of the Glue database to be synchronized.
- tables string[]
A list of catalog tables to be synchronized.
- connection
Name string The name of the connection for an Amazon S3-backed Data Catalog table to be a target of the crawl when using a Catalog connection type paired with a
NETWORK
Connection type.- dlq
Event stringQueue Arn A valid Amazon SQS ARN.
Note:
deletion_behavior
of catalog target doesn't supportDEPRECATE_IN_DATABASE
.Note:
configuration
for catalog target crawlers will have{ ... "Grouping": { "TableGroupingPolicy": "CombineCompatibleSchemas"} }
by default.- event
Queue stringArn A valid Amazon SQS ARN.
- database_
name str The name of the Glue database to be synchronized.
- tables Sequence[str]
A list of catalog tables to be synchronized.
- connection_
name str The name of the connection for an Amazon S3-backed Data Catalog table to be a target of the crawl when using a Catalog connection type paired with a
NETWORK
Connection type.- dlq_
event_ strqueue_ arn A valid Amazon SQS ARN.
Note:
deletion_behavior
of catalog target doesn't supportDEPRECATE_IN_DATABASE
.Note:
configuration
for catalog target crawlers will have{ ... "Grouping": { "TableGroupingPolicy": "CombineCompatibleSchemas"} }
by default.- event_
queue_ strarn A valid Amazon SQS ARN.
- database
Name String The name of the Glue database to be synchronized.
- tables List<String>
A list of catalog tables to be synchronized.
- connection
Name String The name of the connection for an Amazon S3-backed Data Catalog table to be a target of the crawl when using a Catalog connection type paired with a
NETWORK
Connection type.- dlq
Event StringQueue Arn A valid Amazon SQS ARN.
Note:
deletion_behavior
of catalog target doesn't supportDEPRECATE_IN_DATABASE
.Note:
configuration
for catalog target crawlers will have{ ... "Grouping": { "TableGroupingPolicy": "CombineCompatibleSchemas"} }
by default.- event
Queue StringArn A valid Amazon SQS ARN.
CrawlerDeltaTarget, CrawlerDeltaTargetArgs
- Delta
Tables List<string> A list of the Amazon S3 paths to the Delta tables.
- Write
Manifest bool Specifies whether to write the manifest files to the Delta table path.
- Connection
Name string The name of the connection to use to connect to the Delta table target.
- Create
Native boolDelta Table Specifies whether the crawler will create native tables, to allow integration with query engines that support querying of the Delta transaction log directly.
- Delta
Tables []string A list of the Amazon S3 paths to the Delta tables.
- Write
Manifest bool Specifies whether to write the manifest files to the Delta table path.
- Connection
Name string The name of the connection to use to connect to the Delta table target.
- Create
Native boolDelta Table Specifies whether the crawler will create native tables, to allow integration with query engines that support querying of the Delta transaction log directly.
- delta
Tables List<String> A list of the Amazon S3 paths to the Delta tables.
- write
Manifest Boolean Specifies whether to write the manifest files to the Delta table path.
- connection
Name String The name of the connection to use to connect to the Delta table target.
- create
Native BooleanDelta Table Specifies whether the crawler will create native tables, to allow integration with query engines that support querying of the Delta transaction log directly.
- delta
Tables string[] A list of the Amazon S3 paths to the Delta tables.
- write
Manifest boolean Specifies whether to write the manifest files to the Delta table path.
- connection
Name string The name of the connection to use to connect to the Delta table target.
- create
Native booleanDelta Table Specifies whether the crawler will create native tables, to allow integration with query engines that support querying of the Delta transaction log directly.
- delta_
tables Sequence[str] A list of the Amazon S3 paths to the Delta tables.
- write_
manifest bool Specifies whether to write the manifest files to the Delta table path.
- connection_
name str The name of the connection to use to connect to the Delta table target.
- create_
native_ booldelta_ table Specifies whether the crawler will create native tables, to allow integration with query engines that support querying of the Delta transaction log directly.
- delta
Tables List<String> A list of the Amazon S3 paths to the Delta tables.
- write
Manifest Boolean Specifies whether to write the manifest files to the Delta table path.
- connection
Name String The name of the connection to use to connect to the Delta table target.
- create
Native BooleanDelta Table Specifies whether the crawler will create native tables, to allow integration with query engines that support querying of the Delta transaction log directly.
CrawlerDynamodbTarget, CrawlerDynamodbTargetArgs
- Path string
The name of the DynamoDB table to crawl.
- Scan
All bool Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. defaults to
true
.- Scan
Rate double The percentage of the configured read capacity units to use by the AWS Glue crawler. The valid values are null or a value between 0.1 to 1.5.
- Path string
The name of the DynamoDB table to crawl.
- Scan
All bool Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. defaults to
true
.- Scan
Rate float64 The percentage of the configured read capacity units to use by the AWS Glue crawler. The valid values are null or a value between 0.1 to 1.5.
- path String
The name of the DynamoDB table to crawl.
- scan
All Boolean Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. defaults to
true
.- scan
Rate Double The percentage of the configured read capacity units to use by the AWS Glue crawler. The valid values are null or a value between 0.1 to 1.5.
- path string
The name of the DynamoDB table to crawl.
- scan
All boolean Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. defaults to
true
.- scan
Rate number The percentage of the configured read capacity units to use by the AWS Glue crawler. The valid values are null or a value between 0.1 to 1.5.
- path str
The name of the DynamoDB table to crawl.
- scan_
all bool Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. defaults to
true
.- scan_
rate float The percentage of the configured read capacity units to use by the AWS Glue crawler. The valid values are null or a value between 0.1 to 1.5.
- path String
The name of the DynamoDB table to crawl.
- scan
All Boolean Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. defaults to
true
.- scan
Rate Number The percentage of the configured read capacity units to use by the AWS Glue crawler. The valid values are null or a value between 0.1 to 1.5.
CrawlerHudiTarget, CrawlerHudiTargetArgs
- Maximum
Traversal intDepth The maximum depth of Amazon S3 paths that the crawler can traverse to discover the Hudi metadata folder in your Amazon S3 path. Used to limit the crawler run time. Valid values are between
1
and20
.- Paths List<string>
One or more Amazon S3 paths that contains Hudi metadata folders as s3://bucket/prefix.
- Connection
Name string The name of the connection to use to connect to the Hudi target.
- Exclusions List<string>
A list of glob patterns used to exclude from the crawl.
- Maximum
Traversal intDepth The maximum depth of Amazon S3 paths that the crawler can traverse to discover the Hudi metadata folder in your Amazon S3 path. Used to limit the crawler run time. Valid values are between
1
and20
.- Paths []string
One or more Amazon S3 paths that contains Hudi metadata folders as s3://bucket/prefix.
- Connection
Name string The name of the connection to use to connect to the Hudi target.
- Exclusions []string
A list of glob patterns used to exclude from the crawl.
- maximum
Traversal IntegerDepth The maximum depth of Amazon S3 paths that the crawler can traverse to discover the Hudi metadata folder in your Amazon S3 path. Used to limit the crawler run time. Valid values are between
1
and20
.- paths List<String>
One or more Amazon S3 paths that contains Hudi metadata folders as s3://bucket/prefix.
- connection
Name String The name of the connection to use to connect to the Hudi target.
- exclusions List<String>
A list of glob patterns used to exclude from the crawl.
- maximum
Traversal numberDepth The maximum depth of Amazon S3 paths that the crawler can traverse to discover the Hudi metadata folder in your Amazon S3 path. Used to limit the crawler run time. Valid values are between
1
and20
.- paths string[]
One or more Amazon S3 paths that contains Hudi metadata folders as s3://bucket/prefix.
- connection
Name string The name of the connection to use to connect to the Hudi target.
- exclusions string[]
A list of glob patterns used to exclude from the crawl.
- maximum_
traversal_ intdepth The maximum depth of Amazon S3 paths that the crawler can traverse to discover the Hudi metadata folder in your Amazon S3 path. Used to limit the crawler run time. Valid values are between
1
and20
.- paths Sequence[str]
One or more Amazon S3 paths that contains Hudi metadata folders as s3://bucket/prefix.
- connection_
name str The name of the connection to use to connect to the Hudi target.
- exclusions Sequence[str]
A list of glob patterns used to exclude from the crawl.
- maximum
Traversal NumberDepth The maximum depth of Amazon S3 paths that the crawler can traverse to discover the Hudi metadata folder in your Amazon S3 path. Used to limit the crawler run time. Valid values are between
1
and20
.- paths List<String>
One or more Amazon S3 paths that contains Hudi metadata folders as s3://bucket/prefix.
- connection
Name String The name of the connection to use to connect to the Hudi target.
- exclusions List<String>
A list of glob patterns used to exclude from the crawl.
CrawlerIcebergTarget, CrawlerIcebergTargetArgs
- Maximum
Traversal intDepth The maximum depth of Amazon S3 paths that the crawler can traverse to discover the Iceberg metadata folder in your Amazon S3 path. Used to limit the crawler run time. Valid values are between
1
and20
.- Paths List<string>
One or more Amazon S3 paths that contains Iceberg metadata folders as s3://bucket/prefix.
- Connection
Name string The name of the connection to use to connect to the Iceberg target.
- Exclusions List<string>
A list of glob patterns used to exclude from the crawl.
- Maximum
Traversal intDepth The maximum depth of Amazon S3 paths that the crawler can traverse to discover the Iceberg metadata folder in your Amazon S3 path. Used to limit the crawler run time. Valid values are between
1
and20
.- Paths []string
One or more Amazon S3 paths that contains Iceberg metadata folders as s3://bucket/prefix.
- Connection
Name string The name of the connection to use to connect to the Iceberg target.
- Exclusions []string
A list of glob patterns used to exclude from the crawl.
- maximum
Traversal IntegerDepth The maximum depth of Amazon S3 paths that the crawler can traverse to discover the Iceberg metadata folder in your Amazon S3 path. Used to limit the crawler run time. Valid values are between
1
and20
.- paths List<String>
One or more Amazon S3 paths that contains Iceberg metadata folders as s3://bucket/prefix.
- connection
Name String The name of the connection to use to connect to the Iceberg target.
- exclusions List<String>
A list of glob patterns used to exclude from the crawl.
- maximum
Traversal numberDepth The maximum depth of Amazon S3 paths that the crawler can traverse to discover the Iceberg metadata folder in your Amazon S3 path. Used to limit the crawler run time. Valid values are between
1
and20
.- paths string[]
One or more Amazon S3 paths that contains Iceberg metadata folders as s3://bucket/prefix.
- connection
Name string The name of the connection to use to connect to the Iceberg target.
- exclusions string[]
A list of glob patterns used to exclude from the crawl.
- maximum_
traversal_ intdepth The maximum depth of Amazon S3 paths that the crawler can traverse to discover the Iceberg metadata folder in your Amazon S3 path. Used to limit the crawler run time. Valid values are between
1
and20
.- paths Sequence[str]
One or more Amazon S3 paths that contains Iceberg metadata folders as s3://bucket/prefix.
- connection_
name str The name of the connection to use to connect to the Iceberg target.
- exclusions Sequence[str]
A list of glob patterns used to exclude from the crawl.
- maximum
Traversal NumberDepth The maximum depth of Amazon S3 paths that the crawler can traverse to discover the Iceberg metadata folder in your Amazon S3 path. Used to limit the crawler run time. Valid values are between
1
and20
.- paths List<String>
One or more Amazon S3 paths that contains Iceberg metadata folders as s3://bucket/prefix.
- connection
Name String The name of the connection to use to connect to the Iceberg target.
- exclusions List<String>
A list of glob patterns used to exclude from the crawl.
CrawlerJdbcTarget, CrawlerJdbcTargetArgs
- Connection
Name string The name of the connection to use to connect to the JDBC target.
- Path string
The path of the JDBC target.
- Enable
Additional List<string>Metadatas Specify a value of
RAWTYPES
orCOMMENTS
to enable additional metadata intable responses.RAWTYPES
provides the native-level datatype.COMMENTS
provides comments associated with a column or table in the database.- Exclusions List<string>
A list of glob patterns used to exclude from the crawl.
- Connection
Name string The name of the connection to use to connect to the JDBC target.
- Path string
The path of the JDBC target.
- Enable
Additional []stringMetadatas Specify a value of
RAWTYPES
orCOMMENTS
to enable additional metadata intable responses.RAWTYPES
provides the native-level datatype.COMMENTS
provides comments associated with a column or table in the database.- Exclusions []string
A list of glob patterns used to exclude from the crawl.
- connection
Name String The name of the connection to use to connect to the JDBC target.
- path String
The path of the JDBC target.
- enable
Additional List<String>Metadatas Specify a value of
RAWTYPES
orCOMMENTS
to enable additional metadata intable responses.RAWTYPES
provides the native-level datatype.COMMENTS
provides comments associated with a column or table in the database.- exclusions List<String>
A list of glob patterns used to exclude from the crawl.
- connection
Name string The name of the connection to use to connect to the JDBC target.
- path string
The path of the JDBC target.
- enable
Additional string[]Metadatas Specify a value of
RAWTYPES
orCOMMENTS
to enable additional metadata intable responses.RAWTYPES
provides the native-level datatype.COMMENTS
provides comments associated with a column or table in the database.- exclusions string[]
A list of glob patterns used to exclude from the crawl.
- connection_
name str The name of the connection to use to connect to the JDBC target.
- path str
The path of the JDBC target.
- enable_
additional_ Sequence[str]metadatas Specify a value of
RAWTYPES
orCOMMENTS
to enable additional metadata intable responses.RAWTYPES
provides the native-level datatype.COMMENTS
provides comments associated with a column or table in the database.- exclusions Sequence[str]
A list of glob patterns used to exclude from the crawl.
- connection
Name String The name of the connection to use to connect to the JDBC target.
- path String
The path of the JDBC target.
- enable
Additional List<String>Metadatas Specify a value of
RAWTYPES
orCOMMENTS
to enable additional metadata intable responses.RAWTYPES
provides the native-level datatype.COMMENTS
provides comments associated with a column or table in the database.- exclusions List<String>
A list of glob patterns used to exclude from the crawl.
CrawlerLakeFormationConfiguration, CrawlerLakeFormationConfigurationArgs
- Account
Id string Required for cross account crawls. For same account crawls as the target data, this can omitted.
- Use
Lake boolFormation Credentials Specifies whether to use Lake Formation credentials for the crawler instead of the IAM role credentials.
- Account
Id string Required for cross account crawls. For same account crawls as the target data, this can omitted.
- Use
Lake boolFormation Credentials Specifies whether to use Lake Formation credentials for the crawler instead of the IAM role credentials.
- account
Id String Required for cross account crawls. For same account crawls as the target data, this can omitted.
- use
Lake BooleanFormation Credentials Specifies whether to use Lake Formation credentials for the crawler instead of the IAM role credentials.
- account
Id string Required for cross account crawls. For same account crawls as the target data, this can omitted.
- use
Lake booleanFormation Credentials Specifies whether to use Lake Formation credentials for the crawler instead of the IAM role credentials.
- account_
id str Required for cross account crawls. For same account crawls as the target data, this can omitted.
- use_
lake_ boolformation_ credentials Specifies whether to use Lake Formation credentials for the crawler instead of the IAM role credentials.
- account
Id String Required for cross account crawls. For same account crawls as the target data, this can omitted.
- use
Lake BooleanFormation Credentials Specifies whether to use Lake Formation credentials for the crawler instead of the IAM role credentials.
CrawlerLineageConfiguration, CrawlerLineageConfigurationArgs
- Crawler
Lineage stringSettings Specifies whether data lineage is enabled for the crawler. Valid values are:
ENABLE
andDISABLE
. Default value isDISABLE
.
- Crawler
Lineage stringSettings Specifies whether data lineage is enabled for the crawler. Valid values are:
ENABLE
andDISABLE
. Default value isDISABLE
.
- crawler
Lineage StringSettings Specifies whether data lineage is enabled for the crawler. Valid values are:
ENABLE
andDISABLE
. Default value isDISABLE
.
- crawler
Lineage stringSettings Specifies whether data lineage is enabled for the crawler. Valid values are:
ENABLE
andDISABLE
. Default value isDISABLE
.
- crawler_
lineage_ strsettings Specifies whether data lineage is enabled for the crawler. Valid values are:
ENABLE
andDISABLE
. Default value isDISABLE
.
- crawler
Lineage StringSettings Specifies whether data lineage is enabled for the crawler. Valid values are:
ENABLE
andDISABLE
. Default value isDISABLE
.
CrawlerMongodbTarget, CrawlerMongodbTargetArgs
- Connection
Name string The name of the connection to use to connect to the Amazon DocumentDB or MongoDB target.
- Path string
The path of the Amazon DocumentDB or MongoDB target (database/collection).
- Scan
All bool Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. Default value is
true
.
- Connection
Name string The name of the connection to use to connect to the Amazon DocumentDB or MongoDB target.
- Path string
The path of the Amazon DocumentDB or MongoDB target (database/collection).
- Scan
All bool Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. Default value is
true
.
- connection
Name String The name of the connection to use to connect to the Amazon DocumentDB or MongoDB target.
- path String
The path of the Amazon DocumentDB or MongoDB target (database/collection).
- scan
All Boolean Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. Default value is
true
.
- connection
Name string The name of the connection to use to connect to the Amazon DocumentDB or MongoDB target.
- path string
The path of the Amazon DocumentDB or MongoDB target (database/collection).
- scan
All boolean Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. Default value is
true
.
- connection_
name str The name of the connection to use to connect to the Amazon DocumentDB or MongoDB target.
- path str
The path of the Amazon DocumentDB or MongoDB target (database/collection).
- scan_
all bool Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. Default value is
true
.
- connection
Name String The name of the connection to use to connect to the Amazon DocumentDB or MongoDB target.
- path String
The path of the Amazon DocumentDB or MongoDB target (database/collection).
- scan
All Boolean Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. Default value is
true
.
CrawlerRecrawlPolicy, CrawlerRecrawlPolicyArgs
- Recrawl
Behavior string Specifies whether to crawl the entire dataset again, crawl only folders that were added since the last crawler run, or crawl what S3 notifies the crawler of via SQS. Valid Values are:
CRAWL_EVENT_MODE
,CRAWL_EVERYTHING
andCRAWL_NEW_FOLDERS_ONLY
. Default value isCRAWL_EVERYTHING
.
- Recrawl
Behavior string Specifies whether to crawl the entire dataset again, crawl only folders that were added since the last crawler run, or crawl what S3 notifies the crawler of via SQS. Valid Values are:
CRAWL_EVENT_MODE
,CRAWL_EVERYTHING
andCRAWL_NEW_FOLDERS_ONLY
. Default value isCRAWL_EVERYTHING
.
- recrawl
Behavior String Specifies whether to crawl the entire dataset again, crawl only folders that were added since the last crawler run, or crawl what S3 notifies the crawler of via SQS. Valid Values are:
CRAWL_EVENT_MODE
,CRAWL_EVERYTHING
andCRAWL_NEW_FOLDERS_ONLY
. Default value isCRAWL_EVERYTHING
.
- recrawl
Behavior string Specifies whether to crawl the entire dataset again, crawl only folders that were added since the last crawler run, or crawl what S3 notifies the crawler of via SQS. Valid Values are:
CRAWL_EVENT_MODE
,CRAWL_EVERYTHING
andCRAWL_NEW_FOLDERS_ONLY
. Default value isCRAWL_EVERYTHING
.
- recrawl_
behavior str Specifies whether to crawl the entire dataset again, crawl only folders that were added since the last crawler run, or crawl what S3 notifies the crawler of via SQS. Valid Values are:
CRAWL_EVENT_MODE
,CRAWL_EVERYTHING
andCRAWL_NEW_FOLDERS_ONLY
. Default value isCRAWL_EVERYTHING
.
- recrawl
Behavior String Specifies whether to crawl the entire dataset again, crawl only folders that were added since the last crawler run, or crawl what S3 notifies the crawler of via SQS. Valid Values are:
CRAWL_EVENT_MODE
,CRAWL_EVERYTHING
andCRAWL_NEW_FOLDERS_ONLY
. Default value isCRAWL_EVERYTHING
.
CrawlerS3Target, CrawlerS3TargetArgs
- Path string
The name of the DynamoDB table to crawl.
- Connection
Name string The name of the connection to use to connect to the JDBC target.
- Dlq
Event stringQueue Arn The ARN of the dead-letter SQS queue.
Note:
deletion_behavior
of catalog target doesn't supportDEPRECATE_IN_DATABASE
.Note:
configuration
for catalog target crawlers will have{ ... "Grouping": { "TableGroupingPolicy": "CombineCompatibleSchemas"} }
by default.- Event
Queue stringArn The ARN of the SQS queue to receive S3 notifications from.
- Exclusions List<string>
A list of glob patterns used to exclude from the crawl.
- Sample
Size int Sets the number of files in each leaf folder to be crawled when crawling sample files in a dataset. If not set, all the files are crawled. A valid value is an integer between 1 and 249.
- Path string
The name of the DynamoDB table to crawl.
- Connection
Name string The name of the connection to use to connect to the JDBC target.
- Dlq
Event stringQueue Arn The ARN of the dead-letter SQS queue.
Note:
deletion_behavior
of catalog target doesn't supportDEPRECATE_IN_DATABASE
.Note:
configuration
for catalog target crawlers will have{ ... "Grouping": { "TableGroupingPolicy": "CombineCompatibleSchemas"} }
by default.- Event
Queue stringArn The ARN of the SQS queue to receive S3 notifications from.
- Exclusions []string
A list of glob patterns used to exclude from the crawl.
- Sample
Size int Sets the number of files in each leaf folder to be crawled when crawling sample files in a dataset. If not set, all the files are crawled. A valid value is an integer between 1 and 249.
- path String
The name of the DynamoDB table to crawl.
- connection
Name String The name of the connection to use to connect to the JDBC target.
- dlq
Event StringQueue Arn The ARN of the dead-letter SQS queue.
Note:
deletion_behavior
of catalog target doesn't supportDEPRECATE_IN_DATABASE
.Note:
configuration
for catalog target crawlers will have{ ... "Grouping": { "TableGroupingPolicy": "CombineCompatibleSchemas"} }
by default.- event
Queue StringArn The ARN of the SQS queue to receive S3 notifications from.
- exclusions List<String>
A list of glob patterns used to exclude from the crawl.
- sample
Size Integer Sets the number of files in each leaf folder to be crawled when crawling sample files in a dataset. If not set, all the files are crawled. A valid value is an integer between 1 and 249.
- path string
The name of the DynamoDB table to crawl.
- connection
Name string The name of the connection to use to connect to the JDBC target.
- dlq
Event stringQueue Arn The ARN of the dead-letter SQS queue.
Note:
deletion_behavior
of catalog target doesn't supportDEPRECATE_IN_DATABASE
.Note:
configuration
for catalog target crawlers will have{ ... "Grouping": { "TableGroupingPolicy": "CombineCompatibleSchemas"} }
by default.- event
Queue stringArn The ARN of the SQS queue to receive S3 notifications from.
- exclusions string[]
A list of glob patterns used to exclude from the crawl.
- sample
Size number Sets the number of files in each leaf folder to be crawled when crawling sample files in a dataset. If not set, all the files are crawled. A valid value is an integer between 1 and 249.
- path str
The name of the DynamoDB table to crawl.
- connection_
name str The name of the connection to use to connect to the JDBC target.
- dlq_
event_ strqueue_ arn The ARN of the dead-letter SQS queue.
Note:
deletion_behavior
of catalog target doesn't supportDEPRECATE_IN_DATABASE
.Note:
configuration
for catalog target crawlers will have{ ... "Grouping": { "TableGroupingPolicy": "CombineCompatibleSchemas"} }
by default.- event_
queue_ strarn The ARN of the SQS queue to receive S3 notifications from.
- exclusions Sequence[str]
A list of glob patterns used to exclude from the crawl.
- sample_
size int Sets the number of files in each leaf folder to be crawled when crawling sample files in a dataset. If not set, all the files are crawled. A valid value is an integer between 1 and 249.
- path String
The name of the DynamoDB table to crawl.
- connection
Name String The name of the connection to use to connect to the JDBC target.
- dlq
Event StringQueue Arn The ARN of the dead-letter SQS queue.
Note:
deletion_behavior
of catalog target doesn't supportDEPRECATE_IN_DATABASE
.Note:
configuration
for catalog target crawlers will have{ ... "Grouping": { "TableGroupingPolicy": "CombineCompatibleSchemas"} }
by default.- event
Queue StringArn The ARN of the SQS queue to receive S3 notifications from.
- exclusions List<String>
A list of glob patterns used to exclude from the crawl.
- sample
Size Number Sets the number of files in each leaf folder to be crawled when crawling sample files in a dataset. If not set, all the files are crawled. A valid value is an integer between 1 and 249.
CrawlerSchemaChangePolicy, CrawlerSchemaChangePolicyArgs
- Delete
Behavior string The deletion behavior when the crawler finds a deleted object. Valid values:
LOG
,DELETE_FROM_DATABASE
, orDEPRECATE_IN_DATABASE
. Defaults toDEPRECATE_IN_DATABASE
.- Update
Behavior string The update behavior when the crawler finds a changed schema. Valid values:
LOG
orUPDATE_IN_DATABASE
. Defaults toUPDATE_IN_DATABASE
.
- Delete
Behavior string The deletion behavior when the crawler finds a deleted object. Valid values:
LOG
,DELETE_FROM_DATABASE
, orDEPRECATE_IN_DATABASE
. Defaults toDEPRECATE_IN_DATABASE
.- Update
Behavior string The update behavior when the crawler finds a changed schema. Valid values:
LOG
orUPDATE_IN_DATABASE
. Defaults toUPDATE_IN_DATABASE
.
- delete
Behavior String The deletion behavior when the crawler finds a deleted object. Valid values:
LOG
,DELETE_FROM_DATABASE
, orDEPRECATE_IN_DATABASE
. Defaults toDEPRECATE_IN_DATABASE
.- update
Behavior String The update behavior when the crawler finds a changed schema. Valid values:
LOG
orUPDATE_IN_DATABASE
. Defaults toUPDATE_IN_DATABASE
.
- delete
Behavior string The deletion behavior when the crawler finds a deleted object. Valid values:
LOG
,DELETE_FROM_DATABASE
, orDEPRECATE_IN_DATABASE
. Defaults toDEPRECATE_IN_DATABASE
.- update
Behavior string The update behavior when the crawler finds a changed schema. Valid values:
LOG
orUPDATE_IN_DATABASE
. Defaults toUPDATE_IN_DATABASE
.
- delete_
behavior str The deletion behavior when the crawler finds a deleted object. Valid values:
LOG
,DELETE_FROM_DATABASE
, orDEPRECATE_IN_DATABASE
. Defaults toDEPRECATE_IN_DATABASE
.- update_
behavior str The update behavior when the crawler finds a changed schema. Valid values:
LOG
orUPDATE_IN_DATABASE
. Defaults toUPDATE_IN_DATABASE
.
- delete
Behavior String The deletion behavior when the crawler finds a deleted object. Valid values:
LOG
,DELETE_FROM_DATABASE
, orDEPRECATE_IN_DATABASE
. Defaults toDEPRECATE_IN_DATABASE
.- update
Behavior String The update behavior when the crawler finds a changed schema. Valid values:
LOG
orUPDATE_IN_DATABASE
. Defaults toUPDATE_IN_DATABASE
.
Import
Using pulumi import
, import Glue Crawlers using name
. For example:
$ pulumi import aws:glue/crawler:Crawler MyJob MyJob
Package Details
- Repository
- AWS Classic pulumi/pulumi-aws
- License
- Apache-2.0
- Notes
This Pulumi package is based on the
aws
Terraform Provider.
Try AWS Native preview for resources not in the classic version.