SourceS3 Resource
Example Usage
import * as pulumi from "@pulumi/pulumi";
import * as airbyte from "@pulumi/airbyte";
const mySourceS3 = new airbyte.SourceS3("my_source_s3", {
configuration: {
awsAccessKeyId: "...my_aws_access_key_id...",
awsSecretAccessKey: "...my_aws_secret_access_key...",
bucket: "...my_bucket...",
dataset: "...my_dataset...",
deliveryMethod: {
copyRawFiles: {
preserveDirectoryStructure: false,
},
},
endpoint: "my-s3-endpoint.com",
format: {
parquet: {
batchSize: 6,
bufferSize: 8,
columns: ["..."],
},
},
pathPattern: "**",
provider: {
awsAccessKeyId: "...my_aws_access_key_id...",
awsSecretAccessKey: "...my_aws_secret_access_key...",
bucket: "...my_bucket...",
endpoint: "...my_endpoint...",
pathPrefix: "...my_path_prefix...",
regionName: "...my_region_name...",
roleArn: "...my_role_arn...",
startDate: "2021-01-01T00:00:00Z",
},
regionName: "...my_region_name...",
roleArn: "...my_role_arn...",
schema: "{\"column_1\": \"number\", \"column_2\": \"string\", \"column_3\": \"array\", \"column_4\": \"object\", \"column_5\": \"boolean\"}",
startDate: "2021-01-01T00:00:00.000000Z",
streams: [{
daysToSyncIfHistoryIsFull: 5,
format: {
excelFormat: {},
jsonlFormat: {},
},
globs: ["..."],
inputSchema: "...my_input_schema...",
legacyPrefix: "...my_legacy_prefix...",
name: "...my_name...",
primaryKey: "...my_primary_key...",
recentNFilesToReadForSchemaDiscovery: 10,
schemaless: true,
validationPolicy: "Wait for Discover",
}],
},
definitionId: "07ef8ae4-b6a4-4fd9-99ea-a368c6fc144c",
name: "...my_name...",
secretId: "...my_secret_id...",
workspaceId: "bba7dce0-5020-4916-bbd7-be8f298d5f78",
});
import pulumi
import pulumi_airbyte as airbyte
my_source_s3 = airbyte.SourceS3("my_source_s3",
configuration={
"aws_access_key_id": "...my_aws_access_key_id...",
"aws_secret_access_key": "...my_aws_secret_access_key...",
"bucket": "...my_bucket...",
"dataset": "...my_dataset...",
"delivery_method": {
"copy_raw_files": {
"preserve_directory_structure": False,
},
},
"endpoint": "my-s3-endpoint.com",
"format": {
"parquet": {
"batch_size": 6,
"buffer_size": 8,
"columns": ["..."],
},
},
"path_pattern": "**",
"provider": {
"aws_access_key_id": "...my_aws_access_key_id...",
"aws_secret_access_key": "...my_aws_secret_access_key...",
"bucket": "...my_bucket...",
"endpoint": "...my_endpoint...",
"path_prefix": "...my_path_prefix...",
"region_name": "...my_region_name...",
"role_arn": "...my_role_arn...",
"start_date": "2021-01-01T00:00:00Z",
},
"region_name": "...my_region_name...",
"role_arn": "...my_role_arn...",
"schema": "{\"column_1\": \"number\", \"column_2\": \"string\", \"column_3\": \"array\", \"column_4\": \"object\", \"column_5\": \"boolean\"}",
"start_date": "2021-01-01T00:00:00.000000Z",
"streams": [{
"days_to_sync_if_history_is_full": 5,
"format": {
"excel_format": {},
"jsonl_format": {},
},
"globs": ["..."],
"input_schema": "...my_input_schema...",
"legacy_prefix": "...my_legacy_prefix...",
"name": "...my_name...",
"primary_key": "...my_primary_key...",
"recent_n_files_to_read_for_schema_discovery": 10,
"schemaless": True,
"validation_policy": "Wait for Discover",
}],
},
definition_id="07ef8ae4-b6a4-4fd9-99ea-a368c6fc144c",
name="...my_name...",
secret_id="...my_secret_id...",
workspace_id="bba7dce0-5020-4916-bbd7-be8f298d5f78")
package main
import (
"github.com/pulumi/pulumi-terraform-provider/sdks/go/airbyte/airbyte"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := airbyte.NewSourceS3(ctx, "my_source_s3", &airbyte.SourceS3Args{
Configuration: &airbyte.SourceS3ConfigurationArgs{
AwsAccessKeyId: pulumi.String("...my_aws_access_key_id..."),
AwsSecretAccessKey: pulumi.String("...my_aws_secret_access_key..."),
Bucket: pulumi.String("...my_bucket..."),
Dataset: pulumi.String("...my_dataset..."),
DeliveryMethod: &airbyte.SourceS3ConfigurationDeliveryMethodArgs{
CopyRawFiles: &airbyte.SourceS3ConfigurationDeliveryMethodCopyRawFilesArgs{
PreserveDirectoryStructure: pulumi.Bool(false),
},
},
Endpoint: pulumi.String("my-s3-endpoint.com"),
Format: &airbyte.SourceS3ConfigurationFormatArgs{
Parquet: &airbyte.SourceS3ConfigurationFormatParquetArgs{
BatchSize: pulumi.Float64(6),
BufferSize: pulumi.Float64(8),
Columns: pulumi.StringArray{
pulumi.String("..."),
},
},
},
PathPattern: pulumi.String("**"),
Provider: &airbyte.SourceS3ConfigurationProviderArgs{
AwsAccessKeyId: pulumi.String("...my_aws_access_key_id..."),
AwsSecretAccessKey: pulumi.String("...my_aws_secret_access_key..."),
Bucket: pulumi.String("...my_bucket..."),
Endpoint: pulumi.String("...my_endpoint..."),
PathPrefix: pulumi.String("...my_path_prefix..."),
RegionName: pulumi.String("...my_region_name..."),
RoleArn: pulumi.String("...my_role_arn..."),
StartDate: pulumi.String("2021-01-01T00:00:00Z"),
},
RegionName: pulumi.String("...my_region_name..."),
RoleArn: pulumi.String("...my_role_arn..."),
Schema: pulumi.String("{\"column_1\": \"number\", \"column_2\": \"string\", \"column_3\": \"array\", \"column_4\": \"object\", \"column_5\": \"boolean\"}"),
StartDate: pulumi.String("2021-01-01T00:00:00.000000Z"),
Streams: airbyte.SourceS3ConfigurationStreamArray{
&airbyte.SourceS3ConfigurationStreamArgs{
DaysToSyncIfHistoryIsFull: pulumi.Float64(5),
Format: &airbyte.SourceS3ConfigurationStreamFormatArgs{
ExcelFormat: &airbyte.SourceS3ConfigurationStreamFormatExcelFormatArgs{},
JsonlFormat: &airbyte.SourceS3ConfigurationStreamFormatJsonlFormatArgs{},
},
Globs: pulumi.StringArray{
pulumi.String("..."),
},
InputSchema: pulumi.String("...my_input_schema..."),
LegacyPrefix: pulumi.String("...my_legacy_prefix..."),
Name: pulumi.String("...my_name..."),
PrimaryKey: pulumi.String("...my_primary_key..."),
RecentNFilesToReadForSchemaDiscovery: pulumi.Float64(10),
Schemaless: pulumi.Bool(true),
ValidationPolicy: pulumi.String("Wait for Discover"),
},
},
},
DefinitionId: pulumi.String("07ef8ae4-b6a4-4fd9-99ea-a368c6fc144c"),
Name: pulumi.String("...my_name..."),
SecretId: pulumi.String("...my_secret_id..."),
WorkspaceId: pulumi.String("bba7dce0-5020-4916-bbd7-be8f298d5f78"),
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Airbyte = Pulumi.Airbyte;
return await Deployment.RunAsync(() =>
{
var mySourceS3 = new Airbyte.SourceS3("my_source_s3", new()
{
Configuration = new Airbyte.Inputs.SourceS3ConfigurationArgs
{
AwsAccessKeyId = "...my_aws_access_key_id...",
AwsSecretAccessKey = "...my_aws_secret_access_key...",
Bucket = "...my_bucket...",
Dataset = "...my_dataset...",
DeliveryMethod = new Airbyte.Inputs.SourceS3ConfigurationDeliveryMethodArgs
{
CopyRawFiles = new Airbyte.Inputs.SourceS3ConfigurationDeliveryMethodCopyRawFilesArgs
{
PreserveDirectoryStructure = false,
},
},
Endpoint = "my-s3-endpoint.com",
Format = new Airbyte.Inputs.SourceS3ConfigurationFormatArgs
{
Parquet = new Airbyte.Inputs.SourceS3ConfigurationFormatParquetArgs
{
BatchSize = 6,
BufferSize = 8,
Columns = new[]
{
"...",
},
},
},
PathPattern = "**",
Provider = new Airbyte.Inputs.SourceS3ConfigurationProviderArgs
{
AwsAccessKeyId = "...my_aws_access_key_id...",
AwsSecretAccessKey = "...my_aws_secret_access_key...",
Bucket = "...my_bucket...",
Endpoint = "...my_endpoint...",
PathPrefix = "...my_path_prefix...",
RegionName = "...my_region_name...",
RoleArn = "...my_role_arn...",
StartDate = "2021-01-01T00:00:00Z",
},
RegionName = "...my_region_name...",
RoleArn = "...my_role_arn...",
Schema = "{\"column_1\": \"number\", \"column_2\": \"string\", \"column_3\": \"array\", \"column_4\": \"object\", \"column_5\": \"boolean\"}",
StartDate = "2021-01-01T00:00:00.000000Z",
Streams = new[]
{
new Airbyte.Inputs.SourceS3ConfigurationStreamArgs
{
DaysToSyncIfHistoryIsFull = 5,
Format = new Airbyte.Inputs.SourceS3ConfigurationStreamFormatArgs
{
ExcelFormat = null,
JsonlFormat = null,
},
Globs = new[]
{
"...",
},
InputSchema = "...my_input_schema...",
LegacyPrefix = "...my_legacy_prefix...",
Name = "...my_name...",
PrimaryKey = "...my_primary_key...",
RecentNFilesToReadForSchemaDiscovery = 10,
Schemaless = true,
ValidationPolicy = "Wait for Discover",
},
},
},
DefinitionId = "07ef8ae4-b6a4-4fd9-99ea-a368c6fc144c",
Name = "...my_name...",
SecretId = "...my_secret_id...",
WorkspaceId = "bba7dce0-5020-4916-bbd7-be8f298d5f78",
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.airbyte.SourceS3;
import com.pulumi.airbyte.SourceS3Args;
import com.pulumi.airbyte.inputs.SourceS3ConfigurationArgs;
import com.pulumi.airbyte.inputs.SourceS3ConfigurationDeliveryMethodArgs;
import com.pulumi.airbyte.inputs.SourceS3ConfigurationDeliveryMethodCopyRawFilesArgs;
import com.pulumi.airbyte.inputs.SourceS3ConfigurationFormatArgs;
import com.pulumi.airbyte.inputs.SourceS3ConfigurationFormatParquetArgs;
import com.pulumi.airbyte.inputs.SourceS3ConfigurationProviderArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var mySourceS3 = new SourceS3("mySourceS3", SourceS3Args.builder()
.configuration(SourceS3ConfigurationArgs.builder()
.awsAccessKeyId("...my_aws_access_key_id...")
.awsSecretAccessKey("...my_aws_secret_access_key...")
.bucket("...my_bucket...")
.dataset("...my_dataset...")
.deliveryMethod(SourceS3ConfigurationDeliveryMethodArgs.builder()
.copyRawFiles(SourceS3ConfigurationDeliveryMethodCopyRawFilesArgs.builder()
.preserveDirectoryStructure(false)
.build())
.build())
.endpoint("my-s3-endpoint.com")
.format(SourceS3ConfigurationFormatArgs.builder()
.parquet(SourceS3ConfigurationFormatParquetArgs.builder()
.batchSize(6.0)
.bufferSize(8.0)
.columns("...")
.build())
.build())
.pathPattern("**")
.provider(SourceS3ConfigurationProviderArgs.builder()
.awsAccessKeyId("...my_aws_access_key_id...")
.awsSecretAccessKey("...my_aws_secret_access_key...")
.bucket("...my_bucket...")
.endpoint("...my_endpoint...")
.pathPrefix("...my_path_prefix...")
.regionName("...my_region_name...")
.roleArn("...my_role_arn...")
.startDate("2021-01-01T00:00:00Z")
.build())
.regionName("...my_region_name...")
.roleArn("...my_role_arn...")
.schema("{\"column_1\": \"number\", \"column_2\": \"string\", \"column_3\": \"array\", \"column_4\": \"object\", \"column_5\": \"boolean\"}")
.startDate("2021-01-01T00:00:00.000000Z")
.streams(SourceS3ConfigurationStreamArgs.builder()
.daysToSyncIfHistoryIsFull(5.0)
.format(SourceS3ConfigurationStreamFormatArgs.builder()
.excelFormat(SourceS3ConfigurationStreamFormatExcelFormatArgs.builder()
.build())
.jsonlFormat(SourceS3ConfigurationStreamFormatJsonlFormatArgs.builder()
.build())
.build())
.globs("...")
.inputSchema("...my_input_schema...")
.legacyPrefix("...my_legacy_prefix...")
.name("...my_name...")
.primaryKey("...my_primary_key...")
.recentNFilesToReadForSchemaDiscovery(10.0)
.schemaless(true)
.validationPolicy("Wait for Discover")
.build())
.build())
.definitionId("07ef8ae4-b6a4-4fd9-99ea-a368c6fc144c")
.name("...my_name...")
.secretId("...my_secret_id...")
.workspaceId("bba7dce0-5020-4916-bbd7-be8f298d5f78")
.build());
}
}
resources:
mySourceS3:
type: airbyte:SourceS3
name: my_source_s3
properties:
configuration:
awsAccessKeyId: '...my_aws_access_key_id...'
awsSecretAccessKey: '...my_aws_secret_access_key...'
bucket: '...my_bucket...'
dataset: '...my_dataset...'
deliveryMethod:
copyRawFiles:
preserveDirectoryStructure: false
endpoint: my-s3-endpoint.com
format:
parquet:
batchSize: 6
bufferSize: 8
columns:
- '...'
pathPattern: '**'
provider:
awsAccessKeyId: '...my_aws_access_key_id...'
awsSecretAccessKey: '...my_aws_secret_access_key...'
bucket: '...my_bucket...'
endpoint: '...my_endpoint...'
pathPrefix: '...my_path_prefix...'
regionName: '...my_region_name...'
roleArn: '...my_role_arn...'
startDate: 2021-01-01T00:00:00Z
regionName: '...my_region_name...'
roleArn: '...my_role_arn...'
schema: '{"column_1": "number", "column_2": "string", "column_3": "array", "column_4": "object", "column_5": "boolean"}'
startDate: 2021-01-01T00:00:00.000000Z
streams:
- daysToSyncIfHistoryIsFull: 5
format:
excelFormat: {}
jsonlFormat: {}
globs:
- '...'
inputSchema: '...my_input_schema...'
legacyPrefix: '...my_legacy_prefix...'
name: '...my_name...'
primaryKey: '...my_primary_key...'
recentNFilesToReadForSchemaDiscovery: 10
schemaless: true
validationPolicy: Wait for Discover
definitionId: 07ef8ae4-b6a4-4fd9-99ea-a368c6fc144c
name: '...my_name...'
secretId: '...my_secret_id...'
workspaceId: bba7dce0-5020-4916-bbd7-be8f298d5f78
Create SourceS3 Resource
Resources are created with functions called constructors. To learn more about declaring and configuring resources, see Resources.
Constructor syntax
new SourceS3(name: string, args: SourceS3Args, opts?: CustomResourceOptions);@overload
def SourceS3(resource_name: str,
args: SourceS3Args,
opts: Optional[ResourceOptions] = None)
@overload
def SourceS3(resource_name: str,
opts: Optional[ResourceOptions] = None,
configuration: Optional[SourceS3ConfigurationArgs] = None,
workspace_id: Optional[str] = None,
definition_id: Optional[str] = None,
name: Optional[str] = None,
secret_id: Optional[str] = None)func NewSourceS3(ctx *Context, name string, args SourceS3Args, opts ...ResourceOption) (*SourceS3, error)public SourceS3(string name, SourceS3Args args, CustomResourceOptions? opts = null)
public SourceS3(String name, SourceS3Args args)
public SourceS3(String name, SourceS3Args args, CustomResourceOptions options)
type: airbyte:SourceS3
properties: # The arguments to resource properties.
options: # Bag of options to control resource's behavior.
Parameters
- name string
- The unique name of the resource.
- args SourceS3Args
- The arguments to resource properties.
- opts CustomResourceOptions
- Bag of options to control resource's behavior.
- resource_name str
- The unique name of the resource.
- args SourceS3Args
- The arguments to resource properties.
- opts ResourceOptions
- Bag of options to control resource's behavior.
- ctx Context
- Context object for the current deployment.
- name string
- The unique name of the resource.
- args SourceS3Args
- The arguments to resource properties.
- opts ResourceOption
- Bag of options to control resource's behavior.
- name string
- The unique name of the resource.
- args SourceS3Args
- The arguments to resource properties.
- opts CustomResourceOptions
- Bag of options to control resource's behavior.
- name String
- The unique name of the resource.
- args SourceS3Args
- The arguments to resource properties.
- options CustomResourceOptions
- Bag of options to control resource's behavior.
Constructor example
The following reference example uses placeholder values for all input properties.
var sourceS3Resource = new Airbyte.SourceS3("sourceS3Resource", new()
{
Configuration = new Airbyte.Inputs.SourceS3ConfigurationArgs
{
Bucket = "string",
Streams = new[]
{
new Airbyte.Inputs.SourceS3ConfigurationStreamArgs
{
Format = new Airbyte.Inputs.SourceS3ConfigurationStreamFormatArgs
{
AvroFormat = new Airbyte.Inputs.SourceS3ConfigurationStreamFormatAvroFormatArgs
{
DoubleAsString = false,
},
CsvFormat = new Airbyte.Inputs.SourceS3ConfigurationStreamFormatCsvFormatArgs
{
Delimiter = "string",
DoubleQuote = false,
Encoding = "string",
EscapeChar = "string",
FalseValues = new[]
{
"string",
},
HeaderDefinition = new Airbyte.Inputs.SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionArgs
{
Autogenerated = null,
FromCsv = null,
UserProvided = new Airbyte.Inputs.SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionUserProvidedArgs
{
ColumnNames = new[]
{
"string",
},
},
},
IgnoreErrorsOnFieldsMismatch = false,
InferenceType = "string",
NullValues = new[]
{
"string",
},
QuoteChar = "string",
SkipRowsAfterHeader = 0,
SkipRowsBeforeHeader = 0,
StringsCanBeNull = false,
TrueValues = new[]
{
"string",
},
},
ExcelFormat = null,
JsonlFormat = null,
ParquetFormat = new Airbyte.Inputs.SourceS3ConfigurationStreamFormatParquetFormatArgs
{
DecimalAsFloat = false,
},
UnstructuredDocumentFormat = new Airbyte.Inputs.SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatArgs
{
Processing = new Airbyte.Inputs.SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatProcessingArgs
{
Local = null,
},
SkipUnprocessableFiles = false,
Strategy = "string",
},
},
Name = "string",
DaysToSyncIfHistoryIsFull = 0,
Globs = new[]
{
"string",
},
InputSchema = "string",
LegacyPrefix = "string",
PrimaryKey = "string",
RecentNFilesToReadForSchemaDiscovery = 0,
Schemaless = false,
ValidationPolicy = "string",
},
},
Format = new Airbyte.Inputs.SourceS3ConfigurationFormatArgs
{
Avro = null,
Csv = new Airbyte.Inputs.SourceS3ConfigurationFormatCsvArgs
{
AdditionalReaderOptions = "string",
AdvancedOptions = "string",
BlockSize = 0,
Delimiter = "string",
DoubleQuote = false,
Encoding = "string",
EscapeChar = "string",
InferDatatypes = false,
NewlinesInValues = false,
QuoteChar = "string",
},
Jsonl = new Airbyte.Inputs.SourceS3ConfigurationFormatJsonlArgs
{
BlockSize = 0,
NewlinesInValues = false,
UnexpectedFieldBehavior = "string",
},
Parquet = new Airbyte.Inputs.SourceS3ConfigurationFormatParquetArgs
{
BatchSize = 0,
BufferSize = 0,
Columns = new[]
{
"string",
},
},
},
Dataset = "string",
DeliveryMethod = new Airbyte.Inputs.SourceS3ConfigurationDeliveryMethodArgs
{
CopyRawFiles = new Airbyte.Inputs.SourceS3ConfigurationDeliveryMethodCopyRawFilesArgs
{
PreserveDirectoryStructure = false,
},
ReplicateRecords = null,
},
Endpoint = "string",
AwsAccessKeyId = "string",
PathPattern = "string",
Provider = new Airbyte.Inputs.SourceS3ConfigurationProviderArgs
{
AwsAccessKeyId = "string",
AwsSecretAccessKey = "string",
Bucket = "string",
Endpoint = "string",
PathPrefix = "string",
RegionName = "string",
RoleArn = "string",
StartDate = "string",
},
RegionName = "string",
RoleArn = "string",
Schema = "string",
StartDate = "string",
AwsSecretAccessKey = "string",
},
WorkspaceId = "string",
DefinitionId = "string",
Name = "string",
SecretId = "string",
});
example, err := airbyte.NewSourceS3(ctx, "sourceS3Resource", &airbyte.SourceS3Args{
Configuration: &airbyte.SourceS3ConfigurationArgs{
Bucket: pulumi.String("string"),
Streams: airbyte.SourceS3ConfigurationStreamArray{
&airbyte.SourceS3ConfigurationStreamArgs{
Format: &airbyte.SourceS3ConfigurationStreamFormatArgs{
AvroFormat: &airbyte.SourceS3ConfigurationStreamFormatAvroFormatArgs{
DoubleAsString: pulumi.Bool(false),
},
CsvFormat: &airbyte.SourceS3ConfigurationStreamFormatCsvFormatArgs{
Delimiter: pulumi.String("string"),
DoubleQuote: pulumi.Bool(false),
Encoding: pulumi.String("string"),
EscapeChar: pulumi.String("string"),
FalseValues: pulumi.StringArray{
pulumi.String("string"),
},
HeaderDefinition: &airbyte.SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionArgs{
Autogenerated: &airbyte.SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionAutogeneratedArgs{},
FromCsv: &airbyte.SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionFromCsvArgs{},
UserProvided: &airbyte.SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionUserProvidedArgs{
ColumnNames: pulumi.StringArray{
pulumi.String("string"),
},
},
},
IgnoreErrorsOnFieldsMismatch: pulumi.Bool(false),
InferenceType: pulumi.String("string"),
NullValues: pulumi.StringArray{
pulumi.String("string"),
},
QuoteChar: pulumi.String("string"),
SkipRowsAfterHeader: pulumi.Float64(0),
SkipRowsBeforeHeader: pulumi.Float64(0),
StringsCanBeNull: pulumi.Bool(false),
TrueValues: pulumi.StringArray{
pulumi.String("string"),
},
},
ExcelFormat: &airbyte.SourceS3ConfigurationStreamFormatExcelFormatArgs{},
JsonlFormat: &airbyte.SourceS3ConfigurationStreamFormatJsonlFormatArgs{},
ParquetFormat: &airbyte.SourceS3ConfigurationStreamFormatParquetFormatArgs{
DecimalAsFloat: pulumi.Bool(false),
},
UnstructuredDocumentFormat: &airbyte.SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatArgs{
Processing: &airbyte.SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatProcessingArgs{
Local: &airbyte.SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatProcessingLocalArgs{},
},
SkipUnprocessableFiles: pulumi.Bool(false),
Strategy: pulumi.String("string"),
},
},
Name: pulumi.String("string"),
DaysToSyncIfHistoryIsFull: pulumi.Float64(0),
Globs: pulumi.StringArray{
pulumi.String("string"),
},
InputSchema: pulumi.String("string"),
LegacyPrefix: pulumi.String("string"),
PrimaryKey: pulumi.String("string"),
RecentNFilesToReadForSchemaDiscovery: pulumi.Float64(0),
Schemaless: pulumi.Bool(false),
ValidationPolicy: pulumi.String("string"),
},
},
Format: &airbyte.SourceS3ConfigurationFormatArgs{
Avro: &airbyte.SourceS3ConfigurationFormatAvroArgs{},
Csv: &airbyte.SourceS3ConfigurationFormatCsvArgs{
AdditionalReaderOptions: pulumi.String("string"),
AdvancedOptions: pulumi.String("string"),
BlockSize: pulumi.Float64(0),
Delimiter: pulumi.String("string"),
DoubleQuote: pulumi.Bool(false),
Encoding: pulumi.String("string"),
EscapeChar: pulumi.String("string"),
InferDatatypes: pulumi.Bool(false),
NewlinesInValues: pulumi.Bool(false),
QuoteChar: pulumi.String("string"),
},
Jsonl: &airbyte.SourceS3ConfigurationFormatJsonlArgs{
BlockSize: pulumi.Float64(0),
NewlinesInValues: pulumi.Bool(false),
UnexpectedFieldBehavior: pulumi.String("string"),
},
Parquet: &airbyte.SourceS3ConfigurationFormatParquetArgs{
BatchSize: pulumi.Float64(0),
BufferSize: pulumi.Float64(0),
Columns: pulumi.StringArray{
pulumi.String("string"),
},
},
},
Dataset: pulumi.String("string"),
DeliveryMethod: &airbyte.SourceS3ConfigurationDeliveryMethodArgs{
CopyRawFiles: &airbyte.SourceS3ConfigurationDeliveryMethodCopyRawFilesArgs{
PreserveDirectoryStructure: pulumi.Bool(false),
},
ReplicateRecords: &airbyte.SourceS3ConfigurationDeliveryMethodReplicateRecordsArgs{},
},
Endpoint: pulumi.String("string"),
AwsAccessKeyId: pulumi.String("string"),
PathPattern: pulumi.String("string"),
Provider: &airbyte.SourceS3ConfigurationProviderArgs{
AwsAccessKeyId: pulumi.String("string"),
AwsSecretAccessKey: pulumi.String("string"),
Bucket: pulumi.String("string"),
Endpoint: pulumi.String("string"),
PathPrefix: pulumi.String("string"),
RegionName: pulumi.String("string"),
RoleArn: pulumi.String("string"),
StartDate: pulumi.String("string"),
},
RegionName: pulumi.String("string"),
RoleArn: pulumi.String("string"),
Schema: pulumi.String("string"),
StartDate: pulumi.String("string"),
AwsSecretAccessKey: pulumi.String("string"),
},
WorkspaceId: pulumi.String("string"),
DefinitionId: pulumi.String("string"),
Name: pulumi.String("string"),
SecretId: pulumi.String("string"),
})
var sourceS3Resource = new SourceS3("sourceS3Resource", SourceS3Args.builder()
.configuration(SourceS3ConfigurationArgs.builder()
.bucket("string")
.streams(SourceS3ConfigurationStreamArgs.builder()
.format(SourceS3ConfigurationStreamFormatArgs.builder()
.avroFormat(SourceS3ConfigurationStreamFormatAvroFormatArgs.builder()
.doubleAsString(false)
.build())
.csvFormat(SourceS3ConfigurationStreamFormatCsvFormatArgs.builder()
.delimiter("string")
.doubleQuote(false)
.encoding("string")
.escapeChar("string")
.falseValues("string")
.headerDefinition(SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionArgs.builder()
.autogenerated(SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionAutogeneratedArgs.builder()
.build())
.fromCsv(SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionFromCsvArgs.builder()
.build())
.userProvided(SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionUserProvidedArgs.builder()
.columnNames("string")
.build())
.build())
.ignoreErrorsOnFieldsMismatch(false)
.inferenceType("string")
.nullValues("string")
.quoteChar("string")
.skipRowsAfterHeader(0.0)
.skipRowsBeforeHeader(0.0)
.stringsCanBeNull(false)
.trueValues("string")
.build())
.excelFormat(SourceS3ConfigurationStreamFormatExcelFormatArgs.builder()
.build())
.jsonlFormat(SourceS3ConfigurationStreamFormatJsonlFormatArgs.builder()
.build())
.parquetFormat(SourceS3ConfigurationStreamFormatParquetFormatArgs.builder()
.decimalAsFloat(false)
.build())
.unstructuredDocumentFormat(SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatArgs.builder()
.processing(SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatProcessingArgs.builder()
.local(SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatProcessingLocalArgs.builder()
.build())
.build())
.skipUnprocessableFiles(false)
.strategy("string")
.build())
.build())
.name("string")
.daysToSyncIfHistoryIsFull(0.0)
.globs("string")
.inputSchema("string")
.legacyPrefix("string")
.primaryKey("string")
.recentNFilesToReadForSchemaDiscovery(0.0)
.schemaless(false)
.validationPolicy("string")
.build())
.format(SourceS3ConfigurationFormatArgs.builder()
.avro(SourceS3ConfigurationFormatAvroArgs.builder()
.build())
.csv(SourceS3ConfigurationFormatCsvArgs.builder()
.additionalReaderOptions("string")
.advancedOptions("string")
.blockSize(0.0)
.delimiter("string")
.doubleQuote(false)
.encoding("string")
.escapeChar("string")
.inferDatatypes(false)
.newlinesInValues(false)
.quoteChar("string")
.build())
.jsonl(SourceS3ConfigurationFormatJsonlArgs.builder()
.blockSize(0.0)
.newlinesInValues(false)
.unexpectedFieldBehavior("string")
.build())
.parquet(SourceS3ConfigurationFormatParquetArgs.builder()
.batchSize(0.0)
.bufferSize(0.0)
.columns("string")
.build())
.build())
.dataset("string")
.deliveryMethod(SourceS3ConfigurationDeliveryMethodArgs.builder()
.copyRawFiles(SourceS3ConfigurationDeliveryMethodCopyRawFilesArgs.builder()
.preserveDirectoryStructure(false)
.build())
.replicateRecords(SourceS3ConfigurationDeliveryMethodReplicateRecordsArgs.builder()
.build())
.build())
.endpoint("string")
.awsAccessKeyId("string")
.pathPattern("string")
.provider(SourceS3ConfigurationProviderArgs.builder()
.awsAccessKeyId("string")
.awsSecretAccessKey("string")
.bucket("string")
.endpoint("string")
.pathPrefix("string")
.regionName("string")
.roleArn("string")
.startDate("string")
.build())
.regionName("string")
.roleArn("string")
.schema("string")
.startDate("string")
.awsSecretAccessKey("string")
.build())
.workspaceId("string")
.definitionId("string")
.name("string")
.secretId("string")
.build());
source_s3_resource = airbyte.SourceS3("sourceS3Resource",
configuration={
"bucket": "string",
"streams": [{
"format": {
"avro_format": {
"double_as_string": False,
},
"csv_format": {
"delimiter": "string",
"double_quote": False,
"encoding": "string",
"escape_char": "string",
"false_values": ["string"],
"header_definition": {
"autogenerated": {},
"from_csv": {},
"user_provided": {
"column_names": ["string"],
},
},
"ignore_errors_on_fields_mismatch": False,
"inference_type": "string",
"null_values": ["string"],
"quote_char": "string",
"skip_rows_after_header": 0,
"skip_rows_before_header": 0,
"strings_can_be_null": False,
"true_values": ["string"],
},
"excel_format": {},
"jsonl_format": {},
"parquet_format": {
"decimal_as_float": False,
},
"unstructured_document_format": {
"processing": {
"local": {},
},
"skip_unprocessable_files": False,
"strategy": "string",
},
},
"name": "string",
"days_to_sync_if_history_is_full": 0,
"globs": ["string"],
"input_schema": "string",
"legacy_prefix": "string",
"primary_key": "string",
"recent_n_files_to_read_for_schema_discovery": 0,
"schemaless": False,
"validation_policy": "string",
}],
"format": {
"avro": {},
"csv": {
"additional_reader_options": "string",
"advanced_options": "string",
"block_size": 0,
"delimiter": "string",
"double_quote": False,
"encoding": "string",
"escape_char": "string",
"infer_datatypes": False,
"newlines_in_values": False,
"quote_char": "string",
},
"jsonl": {
"block_size": 0,
"newlines_in_values": False,
"unexpected_field_behavior": "string",
},
"parquet": {
"batch_size": 0,
"buffer_size": 0,
"columns": ["string"],
},
},
"dataset": "string",
"delivery_method": {
"copy_raw_files": {
"preserve_directory_structure": False,
},
"replicate_records": {},
},
"endpoint": "string",
"aws_access_key_id": "string",
"path_pattern": "string",
"provider": {
"aws_access_key_id": "string",
"aws_secret_access_key": "string",
"bucket": "string",
"endpoint": "string",
"path_prefix": "string",
"region_name": "string",
"role_arn": "string",
"start_date": "string",
},
"region_name": "string",
"role_arn": "string",
"schema": "string",
"start_date": "string",
"aws_secret_access_key": "string",
},
workspace_id="string",
definition_id="string",
name="string",
secret_id="string")
const sourceS3Resource = new airbyte.SourceS3("sourceS3Resource", {
configuration: {
bucket: "string",
streams: [{
format: {
avroFormat: {
doubleAsString: false,
},
csvFormat: {
delimiter: "string",
doubleQuote: false,
encoding: "string",
escapeChar: "string",
falseValues: ["string"],
headerDefinition: {
autogenerated: {},
fromCsv: {},
userProvided: {
columnNames: ["string"],
},
},
ignoreErrorsOnFieldsMismatch: false,
inferenceType: "string",
nullValues: ["string"],
quoteChar: "string",
skipRowsAfterHeader: 0,
skipRowsBeforeHeader: 0,
stringsCanBeNull: false,
trueValues: ["string"],
},
excelFormat: {},
jsonlFormat: {},
parquetFormat: {
decimalAsFloat: false,
},
unstructuredDocumentFormat: {
processing: {
local: {},
},
skipUnprocessableFiles: false,
strategy: "string",
},
},
name: "string",
daysToSyncIfHistoryIsFull: 0,
globs: ["string"],
inputSchema: "string",
legacyPrefix: "string",
primaryKey: "string",
recentNFilesToReadForSchemaDiscovery: 0,
schemaless: false,
validationPolicy: "string",
}],
format: {
avro: {},
csv: {
additionalReaderOptions: "string",
advancedOptions: "string",
blockSize: 0,
delimiter: "string",
doubleQuote: false,
encoding: "string",
escapeChar: "string",
inferDatatypes: false,
newlinesInValues: false,
quoteChar: "string",
},
jsonl: {
blockSize: 0,
newlinesInValues: false,
unexpectedFieldBehavior: "string",
},
parquet: {
batchSize: 0,
bufferSize: 0,
columns: ["string"],
},
},
dataset: "string",
deliveryMethod: {
copyRawFiles: {
preserveDirectoryStructure: false,
},
replicateRecords: {},
},
endpoint: "string",
awsAccessKeyId: "string",
pathPattern: "string",
provider: {
awsAccessKeyId: "string",
awsSecretAccessKey: "string",
bucket: "string",
endpoint: "string",
pathPrefix: "string",
regionName: "string",
roleArn: "string",
startDate: "string",
},
regionName: "string",
roleArn: "string",
schema: "string",
startDate: "string",
awsSecretAccessKey: "string",
},
workspaceId: "string",
definitionId: "string",
name: "string",
secretId: "string",
});
type: airbyte:SourceS3
properties:
configuration:
awsAccessKeyId: string
awsSecretAccessKey: string
bucket: string
dataset: string
deliveryMethod:
copyRawFiles:
preserveDirectoryStructure: false
replicateRecords: {}
endpoint: string
format:
avro: {}
csv:
additionalReaderOptions: string
advancedOptions: string
blockSize: 0
delimiter: string
doubleQuote: false
encoding: string
escapeChar: string
inferDatatypes: false
newlinesInValues: false
quoteChar: string
jsonl:
blockSize: 0
newlinesInValues: false
unexpectedFieldBehavior: string
parquet:
batchSize: 0
bufferSize: 0
columns:
- string
pathPattern: string
provider:
awsAccessKeyId: string
awsSecretAccessKey: string
bucket: string
endpoint: string
pathPrefix: string
regionName: string
roleArn: string
startDate: string
regionName: string
roleArn: string
schema: string
startDate: string
streams:
- daysToSyncIfHistoryIsFull: 0
format:
avroFormat:
doubleAsString: false
csvFormat:
delimiter: string
doubleQuote: false
encoding: string
escapeChar: string
falseValues:
- string
headerDefinition:
autogenerated: {}
fromCsv: {}
userProvided:
columnNames:
- string
ignoreErrorsOnFieldsMismatch: false
inferenceType: string
nullValues:
- string
quoteChar: string
skipRowsAfterHeader: 0
skipRowsBeforeHeader: 0
stringsCanBeNull: false
trueValues:
- string
excelFormat: {}
jsonlFormat: {}
parquetFormat:
decimalAsFloat: false
unstructuredDocumentFormat:
processing:
local: {}
skipUnprocessableFiles: false
strategy: string
globs:
- string
inputSchema: string
legacyPrefix: string
name: string
primaryKey: string
recentNFilesToReadForSchemaDiscovery: 0
schemaless: false
validationPolicy: string
definitionId: string
name: string
secretId: string
workspaceId: string
SourceS3 Resource Properties
To learn more about resource properties and how to use them, see Inputs and Outputs in the Architecture and Concepts docs.
Inputs
In Python, inputs that are objects can be passed either as argument classes or as dictionary literals.
The SourceS3 resource accepts the following input properties:
- Configuration
Source
S3Configuration - NOTE: When this Spec is changed, legacyconfigtransformer.py must also be modified to uptake the changes because it is responsible for converting legacy S3 v3 configs into v4 configs using the File-Based CDK.
- Workspace
Id string - Definition
Id string - The UUID of the connector definition. One of configuration.sourceType or definitionId must be provided. Default: "69589781-7828-43c5-9f63-8925b1c1ccc2"; Requires replacement if changed.
- Name string
- Name of the source e.g. dev-mysql-instance.
- Secret
Id string - Optional secretID obtained through the public API OAuth redirect flow. Requires replacement if changed.
- Configuration
Source
S3Configuration Args - NOTE: When this Spec is changed, legacyconfigtransformer.py must also be modified to uptake the changes because it is responsible for converting legacy S3 v3 configs into v4 configs using the File-Based CDK.
- Workspace
Id string - Definition
Id string - The UUID of the connector definition. One of configuration.sourceType or definitionId must be provided. Default: "69589781-7828-43c5-9f63-8925b1c1ccc2"; Requires replacement if changed.
- Name string
- Name of the source e.g. dev-mysql-instance.
- Secret
Id string - Optional secretID obtained through the public API OAuth redirect flow. Requires replacement if changed.
- configuration
Source
S3Configuration - NOTE: When this Spec is changed, legacyconfigtransformer.py must also be modified to uptake the changes because it is responsible for converting legacy S3 v3 configs into v4 configs using the File-Based CDK.
- workspace
Id String - definition
Id String - The UUID of the connector definition. One of configuration.sourceType or definitionId must be provided. Default: "69589781-7828-43c5-9f63-8925b1c1ccc2"; Requires replacement if changed.
- name String
- Name of the source e.g. dev-mysql-instance.
- secret
Id String - Optional secretID obtained through the public API OAuth redirect flow. Requires replacement if changed.
- configuration
Source
S3Configuration - NOTE: When this Spec is changed, legacyconfigtransformer.py must also be modified to uptake the changes because it is responsible for converting legacy S3 v3 configs into v4 configs using the File-Based CDK.
- workspace
Id string - definition
Id string - The UUID of the connector definition. One of configuration.sourceType or definitionId must be provided. Default: "69589781-7828-43c5-9f63-8925b1c1ccc2"; Requires replacement if changed.
- name string
- Name of the source e.g. dev-mysql-instance.
- secret
Id string - Optional secretID obtained through the public API OAuth redirect flow. Requires replacement if changed.
- configuration
Source
S3Configuration Args - NOTE: When this Spec is changed, legacyconfigtransformer.py must also be modified to uptake the changes because it is responsible for converting legacy S3 v3 configs into v4 configs using the File-Based CDK.
- workspace_
id str - definition_
id str - The UUID of the connector definition. One of configuration.sourceType or definitionId must be provided. Default: "69589781-7828-43c5-9f63-8925b1c1ccc2"; Requires replacement if changed.
- name str
- Name of the source e.g. dev-mysql-instance.
- secret_
id str - Optional secretID obtained through the public API OAuth redirect flow. Requires replacement if changed.
- configuration Property Map
- NOTE: When this Spec is changed, legacyconfigtransformer.py must also be modified to uptake the changes because it is responsible for converting legacy S3 v3 configs into v4 configs using the File-Based CDK.
- workspace
Id String - definition
Id String - The UUID of the connector definition. One of configuration.sourceType or definitionId must be provided. Default: "69589781-7828-43c5-9f63-8925b1c1ccc2"; Requires replacement if changed.
- name String
- Name of the source e.g. dev-mysql-instance.
- secret
Id String - Optional secretID obtained through the public API OAuth redirect flow. Requires replacement if changed.
Outputs
All input properties are implicitly available as output properties. Additionally, the SourceS3 resource produces the following output properties:
- Created
At double - Id string
- The provider-assigned unique ID for this managed resource.
- Resource
Allocation SourceS3Resource Allocation - actor or actor definition specific resource requirements. if default is set, these are the requirements that should be set for ALL jobs run for this actor definition. it is overriden by the job type specific configurations. if not set, the platform will use defaults. these values will be overriden by configuration at the connection level.
- Source
Id string - Source
Type string
- Created
At float64 - Id string
- The provider-assigned unique ID for this managed resource.
- Resource
Allocation SourceS3Resource Allocation - actor or actor definition specific resource requirements. if default is set, these are the requirements that should be set for ALL jobs run for this actor definition. it is overriden by the job type specific configurations. if not set, the platform will use defaults. these values will be overriden by configuration at the connection level.
- Source
Id string - Source
Type string
- created
At Double - id String
- The provider-assigned unique ID for this managed resource.
- resource
Allocation SourceS3Resource Allocation - actor or actor definition specific resource requirements. if default is set, these are the requirements that should be set for ALL jobs run for this actor definition. it is overriden by the job type specific configurations. if not set, the platform will use defaults. these values will be overriden by configuration at the connection level.
- source
Id String - source
Type String
- created
At number - id string
- The provider-assigned unique ID for this managed resource.
- resource
Allocation SourceS3Resource Allocation - actor or actor definition specific resource requirements. if default is set, these are the requirements that should be set for ALL jobs run for this actor definition. it is overriden by the job type specific configurations. if not set, the platform will use defaults. these values will be overriden by configuration at the connection level.
- source
Id string - source
Type string
- created_
at float - id str
- The provider-assigned unique ID for this managed resource.
- resource_
allocation SourceS3Resource Allocation - actor or actor definition specific resource requirements. if default is set, these are the requirements that should be set for ALL jobs run for this actor definition. it is overriden by the job type specific configurations. if not set, the platform will use defaults. these values will be overriden by configuration at the connection level.
- source_
id str - source_
type str
- created
At Number - id String
- The provider-assigned unique ID for this managed resource.
- resource
Allocation Property Map - actor or actor definition specific resource requirements. if default is set, these are the requirements that should be set for ALL jobs run for this actor definition. it is overriden by the job type specific configurations. if not set, the platform will use defaults. these values will be overriden by configuration at the connection level.
- source
Id String - source
Type String
Look up Existing SourceS3 Resource
Get an existing SourceS3 resource’s state with the given name, ID, and optional extra properties used to qualify the lookup.
public static get(name: string, id: Input<ID>, state?: SourceS3State, opts?: CustomResourceOptions): SourceS3@staticmethod
def get(resource_name: str,
id: str,
opts: Optional[ResourceOptions] = None,
configuration: Optional[SourceS3ConfigurationArgs] = None,
created_at: Optional[float] = None,
definition_id: Optional[str] = None,
name: Optional[str] = None,
resource_allocation: Optional[SourceS3ResourceAllocationArgs] = None,
secret_id: Optional[str] = None,
source_id: Optional[str] = None,
source_type: Optional[str] = None,
workspace_id: Optional[str] = None) -> SourceS3func GetSourceS3(ctx *Context, name string, id IDInput, state *SourceS3State, opts ...ResourceOption) (*SourceS3, error)public static SourceS3 Get(string name, Input<string> id, SourceS3State? state, CustomResourceOptions? opts = null)public static SourceS3 get(String name, Output<String> id, SourceS3State state, CustomResourceOptions options)resources: _: type: airbyte:SourceS3 get: id: ${id}- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- resource_name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- Configuration
Source
S3Configuration - NOTE: When this Spec is changed, legacyconfigtransformer.py must also be modified to uptake the changes because it is responsible for converting legacy S3 v3 configs into v4 configs using the File-Based CDK.
- Created
At double - Definition
Id string - The UUID of the connector definition. One of configuration.sourceType or definitionId must be provided. Default: "69589781-7828-43c5-9f63-8925b1c1ccc2"; Requires replacement if changed.
- Name string
- Name of the source e.g. dev-mysql-instance.
- Resource
Allocation SourceS3Resource Allocation - actor or actor definition specific resource requirements. if default is set, these are the requirements that should be set for ALL jobs run for this actor definition. it is overriden by the job type specific configurations. if not set, the platform will use defaults. these values will be overriden by configuration at the connection level.
- Secret
Id string - Optional secretID obtained through the public API OAuth redirect flow. Requires replacement if changed.
- Source
Id string - Source
Type string - Workspace
Id string
- Configuration
Source
S3Configuration Args - NOTE: When this Spec is changed, legacyconfigtransformer.py must also be modified to uptake the changes because it is responsible for converting legacy S3 v3 configs into v4 configs using the File-Based CDK.
- Created
At float64 - Definition
Id string - The UUID of the connector definition. One of configuration.sourceType or definitionId must be provided. Default: "69589781-7828-43c5-9f63-8925b1c1ccc2"; Requires replacement if changed.
- Name string
- Name of the source e.g. dev-mysql-instance.
- Resource
Allocation SourceS3Resource Allocation Args - actor or actor definition specific resource requirements. if default is set, these are the requirements that should be set for ALL jobs run for this actor definition. it is overriden by the job type specific configurations. if not set, the platform will use defaults. these values will be overriden by configuration at the connection level.
- Secret
Id string - Optional secretID obtained through the public API OAuth redirect flow. Requires replacement if changed.
- Source
Id string - Source
Type string - Workspace
Id string
- configuration
Source
S3Configuration - NOTE: When this Spec is changed, legacyconfigtransformer.py must also be modified to uptake the changes because it is responsible for converting legacy S3 v3 configs into v4 configs using the File-Based CDK.
- created
At Double - definition
Id String - The UUID of the connector definition. One of configuration.sourceType or definitionId must be provided. Default: "69589781-7828-43c5-9f63-8925b1c1ccc2"; Requires replacement if changed.
- name String
- Name of the source e.g. dev-mysql-instance.
- resource
Allocation SourceS3Resource Allocation - actor or actor definition specific resource requirements. if default is set, these are the requirements that should be set for ALL jobs run for this actor definition. it is overriden by the job type specific configurations. if not set, the platform will use defaults. these values will be overriden by configuration at the connection level.
- secret
Id String - Optional secretID obtained through the public API OAuth redirect flow. Requires replacement if changed.
- source
Id String - source
Type String - workspace
Id String
- configuration
Source
S3Configuration - NOTE: When this Spec is changed, legacyconfigtransformer.py must also be modified to uptake the changes because it is responsible for converting legacy S3 v3 configs into v4 configs using the File-Based CDK.
- created
At number - definition
Id string - The UUID of the connector definition. One of configuration.sourceType or definitionId must be provided. Default: "69589781-7828-43c5-9f63-8925b1c1ccc2"; Requires replacement if changed.
- name string
- Name of the source e.g. dev-mysql-instance.
- resource
Allocation SourceS3Resource Allocation - actor or actor definition specific resource requirements. if default is set, these are the requirements that should be set for ALL jobs run for this actor definition. it is overriden by the job type specific configurations. if not set, the platform will use defaults. these values will be overriden by configuration at the connection level.
- secret
Id string - Optional secretID obtained through the public API OAuth redirect flow. Requires replacement if changed.
- source
Id string - source
Type string - workspace
Id string
- configuration
Source
S3Configuration Args - NOTE: When this Spec is changed, legacyconfigtransformer.py must also be modified to uptake the changes because it is responsible for converting legacy S3 v3 configs into v4 configs using the File-Based CDK.
- created_
at float - definition_
id str - The UUID of the connector definition. One of configuration.sourceType or definitionId must be provided. Default: "69589781-7828-43c5-9f63-8925b1c1ccc2"; Requires replacement if changed.
- name str
- Name of the source e.g. dev-mysql-instance.
- resource_
allocation SourceS3Resource Allocation Args - actor or actor definition specific resource requirements. if default is set, these are the requirements that should be set for ALL jobs run for this actor definition. it is overriden by the job type specific configurations. if not set, the platform will use defaults. these values will be overriden by configuration at the connection level.
- secret_
id str - Optional secretID obtained through the public API OAuth redirect flow. Requires replacement if changed.
- source_
id str - source_
type str - workspace_
id str
- configuration Property Map
- NOTE: When this Spec is changed, legacyconfigtransformer.py must also be modified to uptake the changes because it is responsible for converting legacy S3 v3 configs into v4 configs using the File-Based CDK.
- created
At Number - definition
Id String - The UUID of the connector definition. One of configuration.sourceType or definitionId must be provided. Default: "69589781-7828-43c5-9f63-8925b1c1ccc2"; Requires replacement if changed.
- name String
- Name of the source e.g. dev-mysql-instance.
- resource
Allocation Property Map - actor or actor definition specific resource requirements. if default is set, these are the requirements that should be set for ALL jobs run for this actor definition. it is overriden by the job type specific configurations. if not set, the platform will use defaults. these values will be overriden by configuration at the connection level.
- secret
Id String - Optional secretID obtained through the public API OAuth redirect flow. Requires replacement if changed.
- source
Id String - source
Type String - workspace
Id String
Supporting Types
SourceS3Configuration, SourceS3ConfigurationArgs
- Bucket string
- Name of the S3 bucket where the file(s) exist.
- Streams
List<Source
S3Configuration Stream> - Each instance of this configuration defines a \n\nstream\n\n. Use this to define which files belong in the stream, their format, and how they should be parsed and validated. When sending data to warehouse destination such as Snowflake or BigQuery, each stream is a separate table.
- Aws
Access stringKey Id - In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
- Aws
Secret stringAccess Key - In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
- Dataset string
- Deprecated and will be removed soon. Please do not use this field anymore and use streams.name instead. The name of the stream you would like this source to output. Can contain letters, numbers, or underscores.
- Delivery
Method SourceS3Configuration Delivery Method - Endpoint string
- Endpoint to an S3 compatible service. Leave empty to use AWS. The custom endpoint must be secure, but the 'https' prefix is not required. Default: ""
- Format
Source
S3Configuration Format - Deprecated and will be removed soon. Please do not use this field anymore and use streams.format instead. The format of the files you'd like to replicate
- Path
Pattern string - Deprecated and will be removed soon. Please do not use this field anymore and use streams.globs instead. A regular expression which tells the connector which files to replicate. All files which match this pattern will be replicated. Use | to separate multiple patterns. See \n\nthis page\n\n to understand pattern syntax (GLOBSTAR and SPLIT flags are enabled). Use pattern \n\n**\n\n to pick up all files.
- Provider
Source
S3Configuration Provider - Deprecated and will be removed soon. Please do not use this field anymore and use bucket, awsaccesskeyid, awssecretaccesskey and endpoint instead. Use this to load files from S3 or S3-compatible services
- Region
Name string - AWS region where the S3 bucket is located. If not provided, the region will be determined automatically.
- Role
Arn string - Specifies the Amazon Resource Name (ARN) of an IAM role that you want to use to perform operations requested using this profile. Set the External ID to the Airbyte workspace ID, which can be found in the URL of this page.
- Schema string
- Deprecated and will be removed soon. Please do not use this field anymore and use streams.input*schema instead. Optionally provide a schema to enforce, as a valid JSON string. Ensure this is a mapping of \n\n{ "column" : "type" }\n\n, where types are valid href="https://json-schema.org/understanding-json-schema/reference/type.html" target="*blank">JSON Schema datatypes\n\n. Leave as {} to auto-infer the schema.
- Start
Date string - UTC date and time in the format 2017-01-25T00:00:00.000000Z. Any file modified before this date will not be replicated.
- Bucket string
- Name of the S3 bucket where the file(s) exist.
- Streams
[]Source
S3Configuration Stream - Each instance of this configuration defines a \n\nstream\n\n. Use this to define which files belong in the stream, their format, and how they should be parsed and validated. When sending data to warehouse destination such as Snowflake or BigQuery, each stream is a separate table.
- Aws
Access stringKey Id - In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
- Aws
Secret stringAccess Key - In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
- Dataset string
- Deprecated and will be removed soon. Please do not use this field anymore and use streams.name instead. The name of the stream you would like this source to output. Can contain letters, numbers, or underscores.
- Delivery
Method SourceS3Configuration Delivery Method - Endpoint string
- Endpoint to an S3 compatible service. Leave empty to use AWS. The custom endpoint must be secure, but the 'https' prefix is not required. Default: ""
- Format
Source
S3Configuration Format - Deprecated and will be removed soon. Please do not use this field anymore and use streams.format instead. The format of the files you'd like to replicate
- Path
Pattern string - Deprecated and will be removed soon. Please do not use this field anymore and use streams.globs instead. A regular expression which tells the connector which files to replicate. All files which match this pattern will be replicated. Use | to separate multiple patterns. See \n\nthis page\n\n to understand pattern syntax (GLOBSTAR and SPLIT flags are enabled). Use pattern \n\n**\n\n to pick up all files.
- Provider
Source
S3Configuration Provider - Deprecated and will be removed soon. Please do not use this field anymore and use bucket, awsaccesskeyid, awssecretaccesskey and endpoint instead. Use this to load files from S3 or S3-compatible services
- Region
Name string - AWS region where the S3 bucket is located. If not provided, the region will be determined automatically.
- Role
Arn string - Specifies the Amazon Resource Name (ARN) of an IAM role that you want to use to perform operations requested using this profile. Set the External ID to the Airbyte workspace ID, which can be found in the URL of this page.
- Schema string
- Deprecated and will be removed soon. Please do not use this field anymore and use streams.input*schema instead. Optionally provide a schema to enforce, as a valid JSON string. Ensure this is a mapping of \n\n{ "column" : "type" }\n\n, where types are valid href="https://json-schema.org/understanding-json-schema/reference/type.html" target="*blank">JSON Schema datatypes\n\n. Leave as {} to auto-infer the schema.
- Start
Date string - UTC date and time in the format 2017-01-25T00:00:00.000000Z. Any file modified before this date will not be replicated.
- bucket String
- Name of the S3 bucket where the file(s) exist.
- streams
List<Source
S3Configuration Stream> - Each instance of this configuration defines a \n\nstream\n\n. Use this to define which files belong in the stream, their format, and how they should be parsed and validated. When sending data to warehouse destination such as Snowflake or BigQuery, each stream is a separate table.
- aws
Access StringKey Id - In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
- aws
Secret StringAccess Key - In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
- dataset String
- Deprecated and will be removed soon. Please do not use this field anymore and use streams.name instead. The name of the stream you would like this source to output. Can contain letters, numbers, or underscores.
- delivery
Method SourceS3Configuration Delivery Method - endpoint String
- Endpoint to an S3 compatible service. Leave empty to use AWS. The custom endpoint must be secure, but the 'https' prefix is not required. Default: ""
- format
Source
S3Configuration Format - Deprecated and will be removed soon. Please do not use this field anymore and use streams.format instead. The format of the files you'd like to replicate
- path
Pattern String - Deprecated and will be removed soon. Please do not use this field anymore and use streams.globs instead. A regular expression which tells the connector which files to replicate. All files which match this pattern will be replicated. Use | to separate multiple patterns. See \n\nthis page\n\n to understand pattern syntax (GLOBSTAR and SPLIT flags are enabled). Use pattern \n\n**\n\n to pick up all files.
- provider
Source
S3Configuration Provider - Deprecated and will be removed soon. Please do not use this field anymore and use bucket, awsaccesskeyid, awssecretaccesskey and endpoint instead. Use this to load files from S3 or S3-compatible services
- region
Name String - AWS region where the S3 bucket is located. If not provided, the region will be determined automatically.
- role
Arn String - Specifies the Amazon Resource Name (ARN) of an IAM role that you want to use to perform operations requested using this profile. Set the External ID to the Airbyte workspace ID, which can be found in the URL of this page.
- schema String
- Deprecated and will be removed soon. Please do not use this field anymore and use streams.input*schema instead. Optionally provide a schema to enforce, as a valid JSON string. Ensure this is a mapping of \n\n{ "column" : "type" }\n\n, where types are valid href="https://json-schema.org/understanding-json-schema/reference/type.html" target="*blank">JSON Schema datatypes\n\n. Leave as {} to auto-infer the schema.
- start
Date String - UTC date and time in the format 2017-01-25T00:00:00.000000Z. Any file modified before this date will not be replicated.
- bucket string
- Name of the S3 bucket where the file(s) exist.
- streams
Source
S3Configuration Stream[] - Each instance of this configuration defines a \n\nstream\n\n. Use this to define which files belong in the stream, their format, and how they should be parsed and validated. When sending data to warehouse destination such as Snowflake or BigQuery, each stream is a separate table.
- aws
Access stringKey Id - In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
- aws
Secret stringAccess Key - In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
- dataset string
- Deprecated and will be removed soon. Please do not use this field anymore and use streams.name instead. The name of the stream you would like this source to output. Can contain letters, numbers, or underscores.
- delivery
Method SourceS3Configuration Delivery Method - endpoint string
- Endpoint to an S3 compatible service. Leave empty to use AWS. The custom endpoint must be secure, but the 'https' prefix is not required. Default: ""
- format
Source
S3Configuration Format - Deprecated and will be removed soon. Please do not use this field anymore and use streams.format instead. The format of the files you'd like to replicate
- path
Pattern string - Deprecated and will be removed soon. Please do not use this field anymore and use streams.globs instead. A regular expression which tells the connector which files to replicate. All files which match this pattern will be replicated. Use | to separate multiple patterns. See \n\nthis page\n\n to understand pattern syntax (GLOBSTAR and SPLIT flags are enabled). Use pattern \n\n**\n\n to pick up all files.
- provider
Source
S3Configuration Provider - Deprecated and will be removed soon. Please do not use this field anymore and use bucket, awsaccesskeyid, awssecretaccesskey and endpoint instead. Use this to load files from S3 or S3-compatible services
- region
Name string - AWS region where the S3 bucket is located. If not provided, the region will be determined automatically.
- role
Arn string - Specifies the Amazon Resource Name (ARN) of an IAM role that you want to use to perform operations requested using this profile. Set the External ID to the Airbyte workspace ID, which can be found in the URL of this page.
- schema string
- Deprecated and will be removed soon. Please do not use this field anymore and use streams.input*schema instead. Optionally provide a schema to enforce, as a valid JSON string. Ensure this is a mapping of \n\n{ "column" : "type" }\n\n, where types are valid href="https://json-schema.org/understanding-json-schema/reference/type.html" target="*blank">JSON Schema datatypes\n\n. Leave as {} to auto-infer the schema.
- start
Date string - UTC date and time in the format 2017-01-25T00:00:00.000000Z. Any file modified before this date will not be replicated.
- bucket str
- Name of the S3 bucket where the file(s) exist.
- streams
Sequence[Source
S3Configuration Stream] - Each instance of this configuration defines a \n\nstream\n\n. Use this to define which files belong in the stream, their format, and how they should be parsed and validated. When sending data to warehouse destination such as Snowflake or BigQuery, each stream is a separate table.
- aws_
access_ strkey_ id - In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
- aws_
secret_ straccess_ key - In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
- dataset str
- Deprecated and will be removed soon. Please do not use this field anymore and use streams.name instead. The name of the stream you would like this source to output. Can contain letters, numbers, or underscores.
- delivery_
method SourceS3Configuration Delivery Method - endpoint str
- Endpoint to an S3 compatible service. Leave empty to use AWS. The custom endpoint must be secure, but the 'https' prefix is not required. Default: ""
- format
Source
S3Configuration Format - Deprecated and will be removed soon. Please do not use this field anymore and use streams.format instead. The format of the files you'd like to replicate
- path_
pattern str - Deprecated and will be removed soon. Please do not use this field anymore and use streams.globs instead. A regular expression which tells the connector which files to replicate. All files which match this pattern will be replicated. Use | to separate multiple patterns. See \n\nthis page\n\n to understand pattern syntax (GLOBSTAR and SPLIT flags are enabled). Use pattern \n\n**\n\n to pick up all files.
- provider
Source
S3Configuration Provider - Deprecated and will be removed soon. Please do not use this field anymore and use bucket, awsaccesskeyid, awssecretaccesskey and endpoint instead. Use this to load files from S3 or S3-compatible services
- region_
name str - AWS region where the S3 bucket is located. If not provided, the region will be determined automatically.
- role_
arn str - Specifies the Amazon Resource Name (ARN) of an IAM role that you want to use to perform operations requested using this profile. Set the External ID to the Airbyte workspace ID, which can be found in the URL of this page.
- schema str
- Deprecated and will be removed soon. Please do not use this field anymore and use streams.input*schema instead. Optionally provide a schema to enforce, as a valid JSON string. Ensure this is a mapping of \n\n{ "column" : "type" }\n\n, where types are valid href="https://json-schema.org/understanding-json-schema/reference/type.html" target="*blank">JSON Schema datatypes\n\n. Leave as {} to auto-infer the schema.
- start_
date str - UTC date and time in the format 2017-01-25T00:00:00.000000Z. Any file modified before this date will not be replicated.
- bucket String
- Name of the S3 bucket where the file(s) exist.
- streams List<Property Map>
- Each instance of this configuration defines a \n\nstream\n\n. Use this to define which files belong in the stream, their format, and how they should be parsed and validated. When sending data to warehouse destination such as Snowflake or BigQuery, each stream is a separate table.
- aws
Access StringKey Id - In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
- aws
Secret StringAccess Key - In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
- dataset String
- Deprecated and will be removed soon. Please do not use this field anymore and use streams.name instead. The name of the stream you would like this source to output. Can contain letters, numbers, or underscores.
- delivery
Method Property Map - endpoint String
- Endpoint to an S3 compatible service. Leave empty to use AWS. The custom endpoint must be secure, but the 'https' prefix is not required. Default: ""
- format Property Map
- Deprecated and will be removed soon. Please do not use this field anymore and use streams.format instead. The format of the files you'd like to replicate
- path
Pattern String - Deprecated and will be removed soon. Please do not use this field anymore and use streams.globs instead. A regular expression which tells the connector which files to replicate. All files which match this pattern will be replicated. Use | to separate multiple patterns. See \n\nthis page\n\n to understand pattern syntax (GLOBSTAR and SPLIT flags are enabled). Use pattern \n\n**\n\n to pick up all files.
- provider Property Map
- Deprecated and will be removed soon. Please do not use this field anymore and use bucket, awsaccesskeyid, awssecretaccesskey and endpoint instead. Use this to load files from S3 or S3-compatible services
- region
Name String - AWS region where the S3 bucket is located. If not provided, the region will be determined automatically.
- role
Arn String - Specifies the Amazon Resource Name (ARN) of an IAM role that you want to use to perform operations requested using this profile. Set the External ID to the Airbyte workspace ID, which can be found in the URL of this page.
- schema String
- Deprecated and will be removed soon. Please do not use this field anymore and use streams.input*schema instead. Optionally provide a schema to enforce, as a valid JSON string. Ensure this is a mapping of \n\n{ "column" : "type" }\n\n, where types are valid href="https://json-schema.org/understanding-json-schema/reference/type.html" target="*blank">JSON Schema datatypes\n\n. Leave as {} to auto-infer the schema.
- start
Date String - UTC date and time in the format 2017-01-25T00:00:00.000000Z. Any file modified before this date will not be replicated.
SourceS3ConfigurationDeliveryMethod, SourceS3ConfigurationDeliveryMethodArgs
- Copy
Raw SourceFiles S3Configuration Delivery Method Copy Raw Files - Copy raw files without parsing their contents. Bits are copied into the destination exactly as they appeared in the source. Recommended for use with unstructured text data, non-text and compressed files.
- Replicate
Records SourceS3Configuration Delivery Method Replicate Records - Recommended - Extract and load structured records into your destination of choice. This is the classic method of moving data in Airbyte. It allows for blocking and hashing individual fields or files from a structured schema. Data can be flattened, typed and deduped depending on the destination.
- Copy
Raw SourceFiles S3Configuration Delivery Method Copy Raw Files - Copy raw files without parsing their contents. Bits are copied into the destination exactly as they appeared in the source. Recommended for use with unstructured text data, non-text and compressed files.
- Replicate
Records SourceS3Configuration Delivery Method Replicate Records - Recommended - Extract and load structured records into your destination of choice. This is the classic method of moving data in Airbyte. It allows for blocking and hashing individual fields or files from a structured schema. Data can be flattened, typed and deduped depending on the destination.
- copy
Raw SourceFiles S3Configuration Delivery Method Copy Raw Files - Copy raw files without parsing their contents. Bits are copied into the destination exactly as they appeared in the source. Recommended for use with unstructured text data, non-text and compressed files.
- replicate
Records SourceS3Configuration Delivery Method Replicate Records - Recommended - Extract and load structured records into your destination of choice. This is the classic method of moving data in Airbyte. It allows for blocking and hashing individual fields or files from a structured schema. Data can be flattened, typed and deduped depending on the destination.
- copy
Raw SourceFiles S3Configuration Delivery Method Copy Raw Files - Copy raw files without parsing their contents. Bits are copied into the destination exactly as they appeared in the source. Recommended for use with unstructured text data, non-text and compressed files.
- replicate
Records SourceS3Configuration Delivery Method Replicate Records - Recommended - Extract and load structured records into your destination of choice. This is the classic method of moving data in Airbyte. It allows for blocking and hashing individual fields or files from a structured schema. Data can be flattened, typed and deduped depending on the destination.
- copy_
raw_ Sourcefiles S3Configuration Delivery Method Copy Raw Files - Copy raw files without parsing their contents. Bits are copied into the destination exactly as they appeared in the source. Recommended for use with unstructured text data, non-text and compressed files.
- replicate_
records SourceS3Configuration Delivery Method Replicate Records - Recommended - Extract and load structured records into your destination of choice. This is the classic method of moving data in Airbyte. It allows for blocking and hashing individual fields or files from a structured schema. Data can be flattened, typed and deduped depending on the destination.
- copy
Raw Property MapFiles - Copy raw files without parsing their contents. Bits are copied into the destination exactly as they appeared in the source. Recommended for use with unstructured text data, non-text and compressed files.
- replicate
Records Property Map - Recommended - Extract and load structured records into your destination of choice. This is the classic method of moving data in Airbyte. It allows for blocking and hashing individual fields or files from a structured schema. Data can be flattened, typed and deduped depending on the destination.
SourceS3ConfigurationDeliveryMethodCopyRawFiles, SourceS3ConfigurationDeliveryMethodCopyRawFilesArgs
- Preserve
Directory boolStructure - If enabled, sends subdirectory folder structure along with source file names to the destination. Otherwise, files will be synced by their names only. This option is ignored when file-based replication is not enabled. Default: true
- Preserve
Directory boolStructure - If enabled, sends subdirectory folder structure along with source file names to the destination. Otherwise, files will be synced by their names only. This option is ignored when file-based replication is not enabled. Default: true
- preserve
Directory BooleanStructure - If enabled, sends subdirectory folder structure along with source file names to the destination. Otherwise, files will be synced by their names only. This option is ignored when file-based replication is not enabled. Default: true
- preserve
Directory booleanStructure - If enabled, sends subdirectory folder structure along with source file names to the destination. Otherwise, files will be synced by their names only. This option is ignored when file-based replication is not enabled. Default: true
- preserve_
directory_ boolstructure - If enabled, sends subdirectory folder structure along with source file names to the destination. Otherwise, files will be synced by their names only. This option is ignored when file-based replication is not enabled. Default: true
- preserve
Directory BooleanStructure - If enabled, sends subdirectory folder structure along with source file names to the destination. Otherwise, files will be synced by their names only. This option is ignored when file-based replication is not enabled. Default: true
SourceS3ConfigurationFormat, SourceS3ConfigurationFormatArgs
- Avro
Source
S3Configuration Format Avro - This connector utilises \n\nfastavro\n\n for Avro parsing.
- Csv
Source
S3Configuration Format Csv - This connector utilises \n\nPyArrow (Apache Arrow)\n\n for CSV parsing.
- Jsonl
Source
S3Configuration Format Jsonl - This connector uses \n\nPyArrow\n\n for JSON Lines (jsonl) file parsing.
- Parquet
Source
S3Configuration Format Parquet - This connector utilises \n\nPyArrow (Apache Arrow)\n\n for Parquet parsing.
- Avro
Source
S3Configuration Format Avro - This connector utilises \n\nfastavro\n\n for Avro parsing.
- Csv
Source
S3Configuration Format Csv - This connector utilises \n\nPyArrow (Apache Arrow)\n\n for CSV parsing.
- Jsonl
Source
S3Configuration Format Jsonl - This connector uses \n\nPyArrow\n\n for JSON Lines (jsonl) file parsing.
- Parquet
Source
S3Configuration Format Parquet - This connector utilises \n\nPyArrow (Apache Arrow)\n\n for Parquet parsing.
- avro
Source
S3Configuration Format Avro - This connector utilises \n\nfastavro\n\n for Avro parsing.
- csv
Source
S3Configuration Format Csv - This connector utilises \n\nPyArrow (Apache Arrow)\n\n for CSV parsing.
- jsonl
Source
S3Configuration Format Jsonl - This connector uses \n\nPyArrow\n\n for JSON Lines (jsonl) file parsing.
- parquet
Source
S3Configuration Format Parquet - This connector utilises \n\nPyArrow (Apache Arrow)\n\n for Parquet parsing.
- avro
Source
S3Configuration Format Avro - This connector utilises \n\nfastavro\n\n for Avro parsing.
- csv
Source
S3Configuration Format Csv - This connector utilises \n\nPyArrow (Apache Arrow)\n\n for CSV parsing.
- jsonl
Source
S3Configuration Format Jsonl - This connector uses \n\nPyArrow\n\n for JSON Lines (jsonl) file parsing.
- parquet
Source
S3Configuration Format Parquet - This connector utilises \n\nPyArrow (Apache Arrow)\n\n for Parquet parsing.
- avro
Source
S3Configuration Format Avro - This connector utilises \n\nfastavro\n\n for Avro parsing.
- csv
Source
S3Configuration Format Csv - This connector utilises \n\nPyArrow (Apache Arrow)\n\n for CSV parsing.
- jsonl
Source
S3Configuration Format Jsonl - This connector uses \n\nPyArrow\n\n for JSON Lines (jsonl) file parsing.
- parquet
Source
S3Configuration Format Parquet - This connector utilises \n\nPyArrow (Apache Arrow)\n\n for Parquet parsing.
- avro Property Map
- This connector utilises \n\nfastavro\n\n for Avro parsing.
- csv Property Map
- This connector utilises \n\nPyArrow (Apache Arrow)\n\n for CSV parsing.
- jsonl Property Map
- This connector uses \n\nPyArrow\n\n for JSON Lines (jsonl) file parsing.
- parquet Property Map
- This connector utilises \n\nPyArrow (Apache Arrow)\n\n for Parquet parsing.
SourceS3ConfigurationFormatCsv, SourceS3ConfigurationFormatCsvArgs
- Additional
Reader stringOptions - Optionally add a valid JSON string here to provide additional options to the csv reader. Mappings must correspond to options \n\ndetailed here\n\n. 'column_types' is used internally to handle schema so overriding that would likely cause problems.
- Advanced
Options string - Optionally add a valid JSON string here to provide additional \n\nPyarrow ReadOptions\n\n. Specify 'columnnames' here if your CSV doesn't have header, or if you want to use custom column names. 'blocksize' and 'encoding' are already used above, specify them again here will override the values above.
- Block
Size double - The chunk size in bytes to process at a time in memory from each file. If your data is particularly wide and failing during schema detection, increasing this should solve it. Beware of raising this too high as you could hit OOM errors. Default: 10000
- Delimiter string
- The character delimiting individual cells in the CSV data. This may only be a 1-character string. For tab-delimited data enter '\t'. Default: ","
- Double
Quote bool - Whether two quotes in a quoted CSV value denote a single quote in the data. Default: true
- Encoding string
- The character encoding of the CSV data. Leave blank to default to \n\nUTF8\n\n. See \n\nlist of python encodings\n\n for allowable options. Default: "utf8"
- Escape
Char string - The character used for escaping special characters. To disallow escaping, leave this field blank.
- Infer
Datatypes bool - Configures whether a schema for the source should be inferred from the current data or not. If set to false and a custom schema is set, then the manually enforced schema is used. If a schema is not manually set, and this is set to false, then all fields will be read as strings. Default: true
- Newlines
In boolValues - Whether newline characters are allowed in CSV values. Turning this on may affect performance. Leave blank to default to False. Default: false
- Quote
Char string - The character used for quoting CSV values. To disallow quoting, make this field blank. Default: """
- Additional
Reader stringOptions - Optionally add a valid JSON string here to provide additional options to the csv reader. Mappings must correspond to options \n\ndetailed here\n\n. 'column_types' is used internally to handle schema so overriding that would likely cause problems.
- Advanced
Options string - Optionally add a valid JSON string here to provide additional \n\nPyarrow ReadOptions\n\n. Specify 'columnnames' here if your CSV doesn't have header, or if you want to use custom column names. 'blocksize' and 'encoding' are already used above, specify them again here will override the values above.
- Block
Size float64 - The chunk size in bytes to process at a time in memory from each file. If your data is particularly wide and failing during schema detection, increasing this should solve it. Beware of raising this too high as you could hit OOM errors. Default: 10000
- Delimiter string
- The character delimiting individual cells in the CSV data. This may only be a 1-character string. For tab-delimited data enter '\t'. Default: ","
- Double
Quote bool - Whether two quotes in a quoted CSV value denote a single quote in the data. Default: true
- Encoding string
- The character encoding of the CSV data. Leave blank to default to \n\nUTF8\n\n. See \n\nlist of python encodings\n\n for allowable options. Default: "utf8"
- Escape
Char string - The character used for escaping special characters. To disallow escaping, leave this field blank.
- Infer
Datatypes bool - Configures whether a schema for the source should be inferred from the current data or not. If set to false and a custom schema is set, then the manually enforced schema is used. If a schema is not manually set, and this is set to false, then all fields will be read as strings. Default: true
- Newlines
In boolValues - Whether newline characters are allowed in CSV values. Turning this on may affect performance. Leave blank to default to False. Default: false
- Quote
Char string - The character used for quoting CSV values. To disallow quoting, make this field blank. Default: """
- additional
Reader StringOptions - Optionally add a valid JSON string here to provide additional options to the csv reader. Mappings must correspond to options \n\ndetailed here\n\n. 'column_types' is used internally to handle schema so overriding that would likely cause problems.
- advanced
Options String - Optionally add a valid JSON string here to provide additional \n\nPyarrow ReadOptions\n\n. Specify 'columnnames' here if your CSV doesn't have header, or if you want to use custom column names. 'blocksize' and 'encoding' are already used above, specify them again here will override the values above.
- block
Size Double - The chunk size in bytes to process at a time in memory from each file. If your data is particularly wide and failing during schema detection, increasing this should solve it. Beware of raising this too high as you could hit OOM errors. Default: 10000
- delimiter String
- The character delimiting individual cells in the CSV data. This may only be a 1-character string. For tab-delimited data enter '\t'. Default: ","
- double
Quote Boolean - Whether two quotes in a quoted CSV value denote a single quote in the data. Default: true
- encoding String
- The character encoding of the CSV data. Leave blank to default to \n\nUTF8\n\n. See \n\nlist of python encodings\n\n for allowable options. Default: "utf8"
- escape
Char String - The character used for escaping special characters. To disallow escaping, leave this field blank.
- infer
Datatypes Boolean - Configures whether a schema for the source should be inferred from the current data or not. If set to false and a custom schema is set, then the manually enforced schema is used. If a schema is not manually set, and this is set to false, then all fields will be read as strings. Default: true
- newlines
In BooleanValues - Whether newline characters are allowed in CSV values. Turning this on may affect performance. Leave blank to default to False. Default: false
- quote
Char String - The character used for quoting CSV values. To disallow quoting, make this field blank. Default: """
- additional
Reader stringOptions - Optionally add a valid JSON string here to provide additional options to the csv reader. Mappings must correspond to options \n\ndetailed here\n\n. 'column_types' is used internally to handle schema so overriding that would likely cause problems.
- advanced
Options string - Optionally add a valid JSON string here to provide additional \n\nPyarrow ReadOptions\n\n. Specify 'columnnames' here if your CSV doesn't have header, or if you want to use custom column names. 'blocksize' and 'encoding' are already used above, specify them again here will override the values above.
- block
Size number - The chunk size in bytes to process at a time in memory from each file. If your data is particularly wide and failing during schema detection, increasing this should solve it. Beware of raising this too high as you could hit OOM errors. Default: 10000
- delimiter string
- The character delimiting individual cells in the CSV data. This may only be a 1-character string. For tab-delimited data enter '\t'. Default: ","
- double
Quote boolean - Whether two quotes in a quoted CSV value denote a single quote in the data. Default: true
- encoding string
- The character encoding of the CSV data. Leave blank to default to \n\nUTF8\n\n. See \n\nlist of python encodings\n\n for allowable options. Default: "utf8"
- escape
Char string - The character used for escaping special characters. To disallow escaping, leave this field blank.
- infer
Datatypes boolean - Configures whether a schema for the source should be inferred from the current data or not. If set to false and a custom schema is set, then the manually enforced schema is used. If a schema is not manually set, and this is set to false, then all fields will be read as strings. Default: true
- newlines
In booleanValues - Whether newline characters are allowed in CSV values. Turning this on may affect performance. Leave blank to default to False. Default: false
- quote
Char string - The character used for quoting CSV values. To disallow quoting, make this field blank. Default: """
- additional_
reader_ stroptions - Optionally add a valid JSON string here to provide additional options to the csv reader. Mappings must correspond to options \n\ndetailed here\n\n. 'column_types' is used internally to handle schema so overriding that would likely cause problems.
- advanced_
options str - Optionally add a valid JSON string here to provide additional \n\nPyarrow ReadOptions\n\n. Specify 'columnnames' here if your CSV doesn't have header, or if you want to use custom column names. 'blocksize' and 'encoding' are already used above, specify them again here will override the values above.
- block_
size float - The chunk size in bytes to process at a time in memory from each file. If your data is particularly wide and failing during schema detection, increasing this should solve it. Beware of raising this too high as you could hit OOM errors. Default: 10000
- delimiter str
- The character delimiting individual cells in the CSV data. This may only be a 1-character string. For tab-delimited data enter '\t'. Default: ","
- double_
quote bool - Whether two quotes in a quoted CSV value denote a single quote in the data. Default: true
- encoding str
- The character encoding of the CSV data. Leave blank to default to \n\nUTF8\n\n. See \n\nlist of python encodings\n\n for allowable options. Default: "utf8"
- escape_
char str - The character used for escaping special characters. To disallow escaping, leave this field blank.
- infer_
datatypes bool - Configures whether a schema for the source should be inferred from the current data or not. If set to false and a custom schema is set, then the manually enforced schema is used. If a schema is not manually set, and this is set to false, then all fields will be read as strings. Default: true
- newlines_
in_ boolvalues - Whether newline characters are allowed in CSV values. Turning this on may affect performance. Leave blank to default to False. Default: false
- quote_
char str - The character used for quoting CSV values. To disallow quoting, make this field blank. Default: """
- additional
Reader StringOptions - Optionally add a valid JSON string here to provide additional options to the csv reader. Mappings must correspond to options \n\ndetailed here\n\n. 'column_types' is used internally to handle schema so overriding that would likely cause problems.
- advanced
Options String - Optionally add a valid JSON string here to provide additional \n\nPyarrow ReadOptions\n\n. Specify 'columnnames' here if your CSV doesn't have header, or if you want to use custom column names. 'blocksize' and 'encoding' are already used above, specify them again here will override the values above.
- block
Size Number - The chunk size in bytes to process at a time in memory from each file. If your data is particularly wide and failing during schema detection, increasing this should solve it. Beware of raising this too high as you could hit OOM errors. Default: 10000
- delimiter String
- The character delimiting individual cells in the CSV data. This may only be a 1-character string. For tab-delimited data enter '\t'. Default: ","
- double
Quote Boolean - Whether two quotes in a quoted CSV value denote a single quote in the data. Default: true
- encoding String
- The character encoding of the CSV data. Leave blank to default to \n\nUTF8\n\n. See \n\nlist of python encodings\n\n for allowable options. Default: "utf8"
- escape
Char String - The character used for escaping special characters. To disallow escaping, leave this field blank.
- infer
Datatypes Boolean - Configures whether a schema for the source should be inferred from the current data or not. If set to false and a custom schema is set, then the manually enforced schema is used. If a schema is not manually set, and this is set to false, then all fields will be read as strings. Default: true
- newlines
In BooleanValues - Whether newline characters are allowed in CSV values. Turning this on may affect performance. Leave blank to default to False. Default: false
- quote
Char String - The character used for quoting CSV values. To disallow quoting, make this field blank. Default: """
SourceS3ConfigurationFormatJsonl, SourceS3ConfigurationFormatJsonlArgs
- Block
Size double - The chunk size in bytes to process at a time in memory from each file. If your data is particularly wide and failing during schema detection, increasing this should solve it. Beware of raising this too high as you could hit OOM errors. Default: 0
- Newlines
In boolValues - Whether newline characters are allowed in JSON values. Turning this on may affect performance. Leave blank to default to False. Default: false
- Unexpected
Field stringBehavior - How JSON fields outside of explicit*schema (if given) are treated. Check href="https://arrow.apache.org/docs/python/generated/pyarrow.json.ParseOptions.html" target="*blank">PyArrow documentation\n\n for details. Default: "infer"; must be one of ["ignore", "infer", "error"]
- Block
Size float64 - The chunk size in bytes to process at a time in memory from each file. If your data is particularly wide and failing during schema detection, increasing this should solve it. Beware of raising this too high as you could hit OOM errors. Default: 0
- Newlines
In boolValues - Whether newline characters are allowed in JSON values. Turning this on may affect performance. Leave blank to default to False. Default: false
- Unexpected
Field stringBehavior - How JSON fields outside of explicit*schema (if given) are treated. Check href="https://arrow.apache.org/docs/python/generated/pyarrow.json.ParseOptions.html" target="*blank">PyArrow documentation\n\n for details. Default: "infer"; must be one of ["ignore", "infer", "error"]
- block
Size Double - The chunk size in bytes to process at a time in memory from each file. If your data is particularly wide and failing during schema detection, increasing this should solve it. Beware of raising this too high as you could hit OOM errors. Default: 0
- newlines
In BooleanValues - Whether newline characters are allowed in JSON values. Turning this on may affect performance. Leave blank to default to False. Default: false
- unexpected
Field StringBehavior - How JSON fields outside of explicit*schema (if given) are treated. Check href="https://arrow.apache.org/docs/python/generated/pyarrow.json.ParseOptions.html" target="*blank">PyArrow documentation\n\n for details. Default: "infer"; must be one of ["ignore", "infer", "error"]
- block
Size number - The chunk size in bytes to process at a time in memory from each file. If your data is particularly wide and failing during schema detection, increasing this should solve it. Beware of raising this too high as you could hit OOM errors. Default: 0
- newlines
In booleanValues - Whether newline characters are allowed in JSON values. Turning this on may affect performance. Leave blank to default to False. Default: false
- unexpected
Field stringBehavior - How JSON fields outside of explicit*schema (if given) are treated. Check href="https://arrow.apache.org/docs/python/generated/pyarrow.json.ParseOptions.html" target="*blank">PyArrow documentation\n\n for details. Default: "infer"; must be one of ["ignore", "infer", "error"]
- block_
size float - The chunk size in bytes to process at a time in memory from each file. If your data is particularly wide and failing during schema detection, increasing this should solve it. Beware of raising this too high as you could hit OOM errors. Default: 0
- newlines_
in_ boolvalues - Whether newline characters are allowed in JSON values. Turning this on may affect performance. Leave blank to default to False. Default: false
- unexpected_
field_ strbehavior - How JSON fields outside of explicit*schema (if given) are treated. Check href="https://arrow.apache.org/docs/python/generated/pyarrow.json.ParseOptions.html" target="*blank">PyArrow documentation\n\n for details. Default: "infer"; must be one of ["ignore", "infer", "error"]
- block
Size Number - The chunk size in bytes to process at a time in memory from each file. If your data is particularly wide and failing during schema detection, increasing this should solve it. Beware of raising this too high as you could hit OOM errors. Default: 0
- newlines
In BooleanValues - Whether newline characters are allowed in JSON values. Turning this on may affect performance. Leave blank to default to False. Default: false
- unexpected
Field StringBehavior - How JSON fields outside of explicit*schema (if given) are treated. Check href="https://arrow.apache.org/docs/python/generated/pyarrow.json.ParseOptions.html" target="*blank">PyArrow documentation\n\n for details. Default: "infer"; must be one of ["ignore", "infer", "error"]
SourceS3ConfigurationFormatParquet, SourceS3ConfigurationFormatParquetArgs
- Batch
Size double - Maximum number of records per batch read from the input files. Batches may be smaller if there aren’t enough rows in the file. This option can help avoid out-of-memory errors if your data is particularly wide. Default: 65536
- Buffer
Size double - Perform read buffering when deserializing individual column chunks. By default every group column will be loaded fully to memory. This option can help avoid out-of-memory errors if your data is particularly wide. Default: 2
- Columns List<string>
- If you only want to sync a subset of the columns from the file(s), add the columns you want here as a comma-delimited list. Leave it empty to sync all columns.
- Batch
Size float64 - Maximum number of records per batch read from the input files. Batches may be smaller if there aren’t enough rows in the file. This option can help avoid out-of-memory errors if your data is particularly wide. Default: 65536
- Buffer
Size float64 - Perform read buffering when deserializing individual column chunks. By default every group column will be loaded fully to memory. This option can help avoid out-of-memory errors if your data is particularly wide. Default: 2
- Columns []string
- If you only want to sync a subset of the columns from the file(s), add the columns you want here as a comma-delimited list. Leave it empty to sync all columns.
- batch
Size Double - Maximum number of records per batch read from the input files. Batches may be smaller if there aren’t enough rows in the file. This option can help avoid out-of-memory errors if your data is particularly wide. Default: 65536
- buffer
Size Double - Perform read buffering when deserializing individual column chunks. By default every group column will be loaded fully to memory. This option can help avoid out-of-memory errors if your data is particularly wide. Default: 2
- columns List<String>
- If you only want to sync a subset of the columns from the file(s), add the columns you want here as a comma-delimited list. Leave it empty to sync all columns.
- batch
Size number - Maximum number of records per batch read from the input files. Batches may be smaller if there aren’t enough rows in the file. This option can help avoid out-of-memory errors if your data is particularly wide. Default: 65536
- buffer
Size number - Perform read buffering when deserializing individual column chunks. By default every group column will be loaded fully to memory. This option can help avoid out-of-memory errors if your data is particularly wide. Default: 2
- columns string[]
- If you only want to sync a subset of the columns from the file(s), add the columns you want here as a comma-delimited list. Leave it empty to sync all columns.
- batch_
size float - Maximum number of records per batch read from the input files. Batches may be smaller if there aren’t enough rows in the file. This option can help avoid out-of-memory errors if your data is particularly wide. Default: 65536
- buffer_
size float - Perform read buffering when deserializing individual column chunks. By default every group column will be loaded fully to memory. This option can help avoid out-of-memory errors if your data is particularly wide. Default: 2
- columns Sequence[str]
- If you only want to sync a subset of the columns from the file(s), add the columns you want here as a comma-delimited list. Leave it empty to sync all columns.
- batch
Size Number - Maximum number of records per batch read from the input files. Batches may be smaller if there aren’t enough rows in the file. This option can help avoid out-of-memory errors if your data is particularly wide. Default: 65536
- buffer
Size Number - Perform read buffering when deserializing individual column chunks. By default every group column will be loaded fully to memory. This option can help avoid out-of-memory errors if your data is particularly wide. Default: 2
- columns List<String>
- If you only want to sync a subset of the columns from the file(s), add the columns you want here as a comma-delimited list. Leave it empty to sync all columns.
SourceS3ConfigurationProvider, SourceS3ConfigurationProviderArgs
- Aws
Access stringKey Id - In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
- Aws
Secret stringAccess Key - In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
- Bucket string
- Name of the S3 bucket where the file(s) exist.
- Endpoint string
- Endpoint to an S3 compatible service. Leave empty to use AWS. Default: ""
- Path
Prefix string - By providing a path-like prefix (e.g. myFolder/thisTable/) under which all the relevant files sit, we can optimize finding these in S3. This is optional but recommended if your bucket contains many folders/files which you don't need to replicate. Default: ""
- Region
Name string - AWS region where the S3 bucket is located. If not provided, the region will be determined automatically.
- Role
Arn string - Specifies the Amazon Resource Name (ARN) of an IAM role that you want to use to perform operations requested using this profile. Set the External ID to the Airbyte workspace ID, which can be found in the URL of this page.
- Start
Date string - UTC date and time in the format 2017-01-25T00:00:00Z. Any file modified before this date will not be replicated.
- Aws
Access stringKey Id - In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
- Aws
Secret stringAccess Key - In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
- Bucket string
- Name of the S3 bucket where the file(s) exist.
- Endpoint string
- Endpoint to an S3 compatible service. Leave empty to use AWS. Default: ""
- Path
Prefix string - By providing a path-like prefix (e.g. myFolder/thisTable/) under which all the relevant files sit, we can optimize finding these in S3. This is optional but recommended if your bucket contains many folders/files which you don't need to replicate. Default: ""
- Region
Name string - AWS region where the S3 bucket is located. If not provided, the region will be determined automatically.
- Role
Arn string - Specifies the Amazon Resource Name (ARN) of an IAM role that you want to use to perform operations requested using this profile. Set the External ID to the Airbyte workspace ID, which can be found in the URL of this page.
- Start
Date string - UTC date and time in the format 2017-01-25T00:00:00Z. Any file modified before this date will not be replicated.
- aws
Access StringKey Id - In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
- aws
Secret StringAccess Key - In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
- bucket String
- Name of the S3 bucket where the file(s) exist.
- endpoint String
- Endpoint to an S3 compatible service. Leave empty to use AWS. Default: ""
- path
Prefix String - By providing a path-like prefix (e.g. myFolder/thisTable/) under which all the relevant files sit, we can optimize finding these in S3. This is optional but recommended if your bucket contains many folders/files which you don't need to replicate. Default: ""
- region
Name String - AWS region where the S3 bucket is located. If not provided, the region will be determined automatically.
- role
Arn String - Specifies the Amazon Resource Name (ARN) of an IAM role that you want to use to perform operations requested using this profile. Set the External ID to the Airbyte workspace ID, which can be found in the URL of this page.
- start
Date String - UTC date and time in the format 2017-01-25T00:00:00Z. Any file modified before this date will not be replicated.
- aws
Access stringKey Id - In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
- aws
Secret stringAccess Key - In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
- bucket string
- Name of the S3 bucket where the file(s) exist.
- endpoint string
- Endpoint to an S3 compatible service. Leave empty to use AWS. Default: ""
- path
Prefix string - By providing a path-like prefix (e.g. myFolder/thisTable/) under which all the relevant files sit, we can optimize finding these in S3. This is optional but recommended if your bucket contains many folders/files which you don't need to replicate. Default: ""
- region
Name string - AWS region where the S3 bucket is located. If not provided, the region will be determined automatically.
- role
Arn string - Specifies the Amazon Resource Name (ARN) of an IAM role that you want to use to perform operations requested using this profile. Set the External ID to the Airbyte workspace ID, which can be found in the URL of this page.
- start
Date string - UTC date and time in the format 2017-01-25T00:00:00Z. Any file modified before this date will not be replicated.
- aws_
access_ strkey_ id - In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
- aws_
secret_ straccess_ key - In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
- bucket str
- Name of the S3 bucket where the file(s) exist.
- endpoint str
- Endpoint to an S3 compatible service. Leave empty to use AWS. Default: ""
- path_
prefix str - By providing a path-like prefix (e.g. myFolder/thisTable/) under which all the relevant files sit, we can optimize finding these in S3. This is optional but recommended if your bucket contains many folders/files which you don't need to replicate. Default: ""
- region_
name str - AWS region where the S3 bucket is located. If not provided, the region will be determined automatically.
- role_
arn str - Specifies the Amazon Resource Name (ARN) of an IAM role that you want to use to perform operations requested using this profile. Set the External ID to the Airbyte workspace ID, which can be found in the URL of this page.
- start_
date str - UTC date and time in the format 2017-01-25T00:00:00Z. Any file modified before this date will not be replicated.
- aws
Access StringKey Id - In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
- aws
Secret StringAccess Key - In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
- bucket String
- Name of the S3 bucket where the file(s) exist.
- endpoint String
- Endpoint to an S3 compatible service. Leave empty to use AWS. Default: ""
- path
Prefix String - By providing a path-like prefix (e.g. myFolder/thisTable/) under which all the relevant files sit, we can optimize finding these in S3. This is optional but recommended if your bucket contains many folders/files which you don't need to replicate. Default: ""
- region
Name String - AWS region where the S3 bucket is located. If not provided, the region will be determined automatically.
- role
Arn String - Specifies the Amazon Resource Name (ARN) of an IAM role that you want to use to perform operations requested using this profile. Set the External ID to the Airbyte workspace ID, which can be found in the URL of this page.
- start
Date String - UTC date and time in the format 2017-01-25T00:00:00Z. Any file modified before this date will not be replicated.
SourceS3ConfigurationStream, SourceS3ConfigurationStreamArgs
- Format
Source
S3Configuration Stream Format - The configuration options that are used to alter how to read incoming files that deviate from the standard formatting.
- Name string
- The name of the stream.
- Days
To doubleSync If History Is Full - When the state history of the file store is full, syncs will only read files that were last modified in the provided day range. Default: 3
- Globs List<string>
- The pattern used to specify which files should be selected from the file system. For more information on glob pattern matching look \n\nhere\n\n. Default: ["**"]
- Input
Schema string - The schema that will be used to validate records extracted from the file. This will override the stream schema that is auto-detected from incoming files.
- Legacy
Prefix string - The path prefix configured in v3 versions of the S3 connector. This option is deprecated in favor of a single glob.
- Primary
Key string - The column or columns (for a composite key) that serves as the unique identifier of a record. If empty, the primary key will default to the parser's default primary key.
- Recent
NFiles doubleTo Read For Schema Discovery - The number of resent files which will be used to discover the schema for this stream.
- Schemaless bool
- When enabled, syncs will not validate or structure records against the stream's schema. Default: false
- Validation
Policy string - The name of the validation policy that dictates sync behavior when a record does not adhere to the stream schema. Default: "Emit Record"; must be one of ["Emit Record", "Skip Record", "Wait for Discover"]
- Format
Source
S3Configuration Stream Format - The configuration options that are used to alter how to read incoming files that deviate from the standard formatting.
- Name string
- The name of the stream.
- Days
To float64Sync If History Is Full - When the state history of the file store is full, syncs will only read files that were last modified in the provided day range. Default: 3
- Globs []string
- The pattern used to specify which files should be selected from the file system. For more information on glob pattern matching look \n\nhere\n\n. Default: ["**"]
- Input
Schema string - The schema that will be used to validate records extracted from the file. This will override the stream schema that is auto-detected from incoming files.
- Legacy
Prefix string - The path prefix configured in v3 versions of the S3 connector. This option is deprecated in favor of a single glob.
- Primary
Key string - The column or columns (for a composite key) that serves as the unique identifier of a record. If empty, the primary key will default to the parser's default primary key.
- Recent
NFiles float64To Read For Schema Discovery - The number of resent files which will be used to discover the schema for this stream.
- Schemaless bool
- When enabled, syncs will not validate or structure records against the stream's schema. Default: false
- Validation
Policy string - The name of the validation policy that dictates sync behavior when a record does not adhere to the stream schema. Default: "Emit Record"; must be one of ["Emit Record", "Skip Record", "Wait for Discover"]
- format
Source
S3Configuration Stream Format - The configuration options that are used to alter how to read incoming files that deviate from the standard formatting.
- name String
- The name of the stream.
- days
To DoubleSync If History Is Full - When the state history of the file store is full, syncs will only read files that were last modified in the provided day range. Default: 3
- globs List<String>
- The pattern used to specify which files should be selected from the file system. For more information on glob pattern matching look \n\nhere\n\n. Default: ["**"]
- input
Schema String - The schema that will be used to validate records extracted from the file. This will override the stream schema that is auto-detected from incoming files.
- legacy
Prefix String - The path prefix configured in v3 versions of the S3 connector. This option is deprecated in favor of a single glob.
- primary
Key String - The column or columns (for a composite key) that serves as the unique identifier of a record. If empty, the primary key will default to the parser's default primary key.
- recent
NFiles DoubleTo Read For Schema Discovery - The number of resent files which will be used to discover the schema for this stream.
- schemaless Boolean
- When enabled, syncs will not validate or structure records against the stream's schema. Default: false
- validation
Policy String - The name of the validation policy that dictates sync behavior when a record does not adhere to the stream schema. Default: "Emit Record"; must be one of ["Emit Record", "Skip Record", "Wait for Discover"]
- format
Source
S3Configuration Stream Format - The configuration options that are used to alter how to read incoming files that deviate from the standard formatting.
- name string
- The name of the stream.
- days
To numberSync If History Is Full - When the state history of the file store is full, syncs will only read files that were last modified in the provided day range. Default: 3
- globs string[]
- The pattern used to specify which files should be selected from the file system. For more information on glob pattern matching look \n\nhere\n\n. Default: ["**"]
- input
Schema string - The schema that will be used to validate records extracted from the file. This will override the stream schema that is auto-detected from incoming files.
- legacy
Prefix string - The path prefix configured in v3 versions of the S3 connector. This option is deprecated in favor of a single glob.
- primary
Key string - The column or columns (for a composite key) that serves as the unique identifier of a record. If empty, the primary key will default to the parser's default primary key.
- recent
NFiles numberTo Read For Schema Discovery - The number of resent files which will be used to discover the schema for this stream.
- schemaless boolean
- When enabled, syncs will not validate or structure records against the stream's schema. Default: false
- validation
Policy string - The name of the validation policy that dictates sync behavior when a record does not adhere to the stream schema. Default: "Emit Record"; must be one of ["Emit Record", "Skip Record", "Wait for Discover"]
- format
Source
S3Configuration Stream Format - The configuration options that are used to alter how to read incoming files that deviate from the standard formatting.
- name str
- The name of the stream.
- days_
to_ floatsync_ if_ history_ is_ full - When the state history of the file store is full, syncs will only read files that were last modified in the provided day range. Default: 3
- globs Sequence[str]
- The pattern used to specify which files should be selected from the file system. For more information on glob pattern matching look \n\nhere\n\n. Default: ["**"]
- input_
schema str - The schema that will be used to validate records extracted from the file. This will override the stream schema that is auto-detected from incoming files.
- legacy_
prefix str - The path prefix configured in v3 versions of the S3 connector. This option is deprecated in favor of a single glob.
- primary_
key str - The column or columns (for a composite key) that serves as the unique identifier of a record. If empty, the primary key will default to the parser's default primary key.
- recent_
n_ floatfiles_ to_ read_ for_ schema_ discovery - The number of resent files which will be used to discover the schema for this stream.
- schemaless bool
- When enabled, syncs will not validate or structure records against the stream's schema. Default: false
- validation_
policy str - The name of the validation policy that dictates sync behavior when a record does not adhere to the stream schema. Default: "Emit Record"; must be one of ["Emit Record", "Skip Record", "Wait for Discover"]
- format Property Map
- The configuration options that are used to alter how to read incoming files that deviate from the standard formatting.
- name String
- The name of the stream.
- days
To NumberSync If History Is Full - When the state history of the file store is full, syncs will only read files that were last modified in the provided day range. Default: 3
- globs List<String>
- The pattern used to specify which files should be selected from the file system. For more information on glob pattern matching look \n\nhere\n\n. Default: ["**"]
- input
Schema String - The schema that will be used to validate records extracted from the file. This will override the stream schema that is auto-detected from incoming files.
- legacy
Prefix String - The path prefix configured in v3 versions of the S3 connector. This option is deprecated in favor of a single glob.
- primary
Key String - The column or columns (for a composite key) that serves as the unique identifier of a record. If empty, the primary key will default to the parser's default primary key.
- recent
NFiles NumberTo Read For Schema Discovery - The number of resent files which will be used to discover the schema for this stream.
- schemaless Boolean
- When enabled, syncs will not validate or structure records against the stream's schema. Default: false
- validation
Policy String - The name of the validation policy that dictates sync behavior when a record does not adhere to the stream schema. Default: "Emit Record"; must be one of ["Emit Record", "Skip Record", "Wait for Discover"]
SourceS3ConfigurationStreamFormat, SourceS3ConfigurationStreamFormatArgs
- Avro
Format SourceS3Configuration Stream Format Avro Format - Csv
Format SourceS3Configuration Stream Format Csv Format - Excel
Format SourceS3Configuration Stream Format Excel Format - Jsonl
Format SourceS3Configuration Stream Format Jsonl Format - Parquet
Format SourceS3Configuration Stream Format Parquet Format - Unstructured
Document SourceFormat S3Configuration Stream Format Unstructured Document Format - Extract text from document formats (.pdf, .docx, .md, .pptx) and emit as one record per file.
- Avro
Format SourceS3Configuration Stream Format Avro Format - Csv
Format SourceS3Configuration Stream Format Csv Format - Excel
Format SourceS3Configuration Stream Format Excel Format - Jsonl
Format SourceS3Configuration Stream Format Jsonl Format - Parquet
Format SourceS3Configuration Stream Format Parquet Format - Unstructured
Document SourceFormat S3Configuration Stream Format Unstructured Document Format - Extract text from document formats (.pdf, .docx, .md, .pptx) and emit as one record per file.
- avro
Format SourceS3Configuration Stream Format Avro Format - csv
Format SourceS3Configuration Stream Format Csv Format - excel
Format SourceS3Configuration Stream Format Excel Format - jsonl
Format SourceS3Configuration Stream Format Jsonl Format - parquet
Format SourceS3Configuration Stream Format Parquet Format - unstructured
Document SourceFormat S3Configuration Stream Format Unstructured Document Format - Extract text from document formats (.pdf, .docx, .md, .pptx) and emit as one record per file.
- avro
Format SourceS3Configuration Stream Format Avro Format - csv
Format SourceS3Configuration Stream Format Csv Format - excel
Format SourceS3Configuration Stream Format Excel Format - jsonl
Format SourceS3Configuration Stream Format Jsonl Format - parquet
Format SourceS3Configuration Stream Format Parquet Format - unstructured
Document SourceFormat S3Configuration Stream Format Unstructured Document Format - Extract text from document formats (.pdf, .docx, .md, .pptx) and emit as one record per file.
- avro_
format SourceS3Configuration Stream Format Avro Format - csv_
format SourceS3Configuration Stream Format Csv Format - excel_
format SourceS3Configuration Stream Format Excel Format - jsonl_
format SourceS3Configuration Stream Format Jsonl Format - parquet_
format SourceS3Configuration Stream Format Parquet Format - unstructured_
document_ Sourceformat S3Configuration Stream Format Unstructured Document Format - Extract text from document formats (.pdf, .docx, .md, .pptx) and emit as one record per file.
- avro
Format Property Map - csv
Format Property Map - excel
Format Property Map - jsonl
Format Property Map - parquet
Format Property Map - unstructured
Document Property MapFormat - Extract text from document formats (.pdf, .docx, .md, .pptx) and emit as one record per file.
SourceS3ConfigurationStreamFormatAvroFormat, SourceS3ConfigurationStreamFormatAvroFormatArgs
- Double
As boolString - Whether to convert double fields to strings. This is recommended if you have decimal numbers with a high degree of precision because there can be a loss precision when handling floating point numbers. Default: false
- Double
As boolString - Whether to convert double fields to strings. This is recommended if you have decimal numbers with a high degree of precision because there can be a loss precision when handling floating point numbers. Default: false
- double
As BooleanString - Whether to convert double fields to strings. This is recommended if you have decimal numbers with a high degree of precision because there can be a loss precision when handling floating point numbers. Default: false
- double
As booleanString - Whether to convert double fields to strings. This is recommended if you have decimal numbers with a high degree of precision because there can be a loss precision when handling floating point numbers. Default: false
- double_
as_ boolstring - Whether to convert double fields to strings. This is recommended if you have decimal numbers with a high degree of precision because there can be a loss precision when handling floating point numbers. Default: false
- double
As BooleanString - Whether to convert double fields to strings. This is recommended if you have decimal numbers with a high degree of precision because there can be a loss precision when handling floating point numbers. Default: false
SourceS3ConfigurationStreamFormatCsvFormat, SourceS3ConfigurationStreamFormatCsvFormatArgs
- Delimiter string
- The character delimiting individual cells in the CSV data. This may only be a 1-character string. For tab-delimited data enter '\t'. Default: ","
- Double
Quote bool - Whether two quotes in a quoted CSV value denote a single quote in the data. Default: true
- Encoding string
- The character encoding of the CSV data. Leave blank to default to \n\nUTF8\n\n. See \n\nlist of python encodings\n\n for allowable options. Default: "utf8"
- Escape
Char string - The character used for escaping special characters. To disallow escaping, leave this field blank.
- False
Values List<string> - A set of case-sensitive strings that should be interpreted as false values. Default: ["n","no","f","false","off","0"]
- Header
Definition SourceS3Configuration Stream Format Csv Format Header Definition - How headers will be defined.
User Providedassumes the CSV does not have a header row and uses the headers provided andAutogeneratedassumes the CSV does not have a header row and the CDK will generate headers using forf{i}whereiis the index starting from 0. Else, the default behavior is to use the header from the CSV file. If a user wants to autogenerate or provide column names for a CSV having headers, they can skip rows. - Ignore
Errors boolOn Fields Mismatch - Whether to ignore errors that occur when the number of fields in the CSV does not match the number of columns in the schema. Default: false
- Inference
Type string - How to infer the types of the columns. If none, inference default to strings. must be one of ["None", "Primitive Types Only"]
- Null
Values List<string> - A set of case-sensitive strings that should be interpreted as null values. For example, if the value 'NA' should be interpreted as null, enter 'NA' in this field. Default: []
- Quote
Char string - The character used for quoting CSV values. To disallow quoting, make this field blank. Default: """
- Skip
Rows doubleAfter Header - The number of rows to skip after the header row. Default: 0
- Skip
Rows doubleBefore Header - The number of rows to skip before the header row. For example, if the header row is on the 3rd row, enter 2 in this field. Default: 0
- Strings
Can boolBe Null - Whether strings can be interpreted as null values. If true, strings that match the nullvalues set will be interpreted as null. If false, strings that match the nullvalues set will be interpreted as the string itself. Default: true
- True
Values List<string> - A set of case-sensitive strings that should be interpreted as true values. Default: ["y","yes","t","true","on","1"]
- Delimiter string
- The character delimiting individual cells in the CSV data. This may only be a 1-character string. For tab-delimited data enter '\t'. Default: ","
- Double
Quote bool - Whether two quotes in a quoted CSV value denote a single quote in the data. Default: true
- Encoding string
- The character encoding of the CSV data. Leave blank to default to \n\nUTF8\n\n. See \n\nlist of python encodings\n\n for allowable options. Default: "utf8"
- Escape
Char string - The character used for escaping special characters. To disallow escaping, leave this field blank.
- False
Values []string - A set of case-sensitive strings that should be interpreted as false values. Default: ["n","no","f","false","off","0"]
- Header
Definition SourceS3Configuration Stream Format Csv Format Header Definition - How headers will be defined.
User Providedassumes the CSV does not have a header row and uses the headers provided andAutogeneratedassumes the CSV does not have a header row and the CDK will generate headers using forf{i}whereiis the index starting from 0. Else, the default behavior is to use the header from the CSV file. If a user wants to autogenerate or provide column names for a CSV having headers, they can skip rows. - Ignore
Errors boolOn Fields Mismatch - Whether to ignore errors that occur when the number of fields in the CSV does not match the number of columns in the schema. Default: false
- Inference
Type string - How to infer the types of the columns. If none, inference default to strings. must be one of ["None", "Primitive Types Only"]
- Null
Values []string - A set of case-sensitive strings that should be interpreted as null values. For example, if the value 'NA' should be interpreted as null, enter 'NA' in this field. Default: []
- Quote
Char string - The character used for quoting CSV values. To disallow quoting, make this field blank. Default: """
- Skip
Rows float64After Header - The number of rows to skip after the header row. Default: 0
- Skip
Rows float64Before Header - The number of rows to skip before the header row. For example, if the header row is on the 3rd row, enter 2 in this field. Default: 0
- Strings
Can boolBe Null - Whether strings can be interpreted as null values. If true, strings that match the nullvalues set will be interpreted as null. If false, strings that match the nullvalues set will be interpreted as the string itself. Default: true
- True
Values []string - A set of case-sensitive strings that should be interpreted as true values. Default: ["y","yes","t","true","on","1"]
- delimiter String
- The character delimiting individual cells in the CSV data. This may only be a 1-character string. For tab-delimited data enter '\t'. Default: ","
- double
Quote Boolean - Whether two quotes in a quoted CSV value denote a single quote in the data. Default: true
- encoding String
- The character encoding of the CSV data. Leave blank to default to \n\nUTF8\n\n. See \n\nlist of python encodings\n\n for allowable options. Default: "utf8"
- escape
Char String - The character used for escaping special characters. To disallow escaping, leave this field blank.
- false
Values List<String> - A set of case-sensitive strings that should be interpreted as false values. Default: ["n","no","f","false","off","0"]
- header
Definition SourceS3Configuration Stream Format Csv Format Header Definition - How headers will be defined.
User Providedassumes the CSV does not have a header row and uses the headers provided andAutogeneratedassumes the CSV does not have a header row and the CDK will generate headers using forf{i}whereiis the index starting from 0. Else, the default behavior is to use the header from the CSV file. If a user wants to autogenerate or provide column names for a CSV having headers, they can skip rows. - ignore
Errors BooleanOn Fields Mismatch - Whether to ignore errors that occur when the number of fields in the CSV does not match the number of columns in the schema. Default: false
- inference
Type String - How to infer the types of the columns. If none, inference default to strings. must be one of ["None", "Primitive Types Only"]
- null
Values List<String> - A set of case-sensitive strings that should be interpreted as null values. For example, if the value 'NA' should be interpreted as null, enter 'NA' in this field. Default: []
- quote
Char String - The character used for quoting CSV values. To disallow quoting, make this field blank. Default: """
- skip
Rows DoubleAfter Header - The number of rows to skip after the header row. Default: 0
- skip
Rows DoubleBefore Header - The number of rows to skip before the header row. For example, if the header row is on the 3rd row, enter 2 in this field. Default: 0
- strings
Can BooleanBe Null - Whether strings can be interpreted as null values. If true, strings that match the nullvalues set will be interpreted as null. If false, strings that match the nullvalues set will be interpreted as the string itself. Default: true
- true
Values List<String> - A set of case-sensitive strings that should be interpreted as true values. Default: ["y","yes","t","true","on","1"]
- delimiter string
- The character delimiting individual cells in the CSV data. This may only be a 1-character string. For tab-delimited data enter '\t'. Default: ","
- double
Quote boolean - Whether two quotes in a quoted CSV value denote a single quote in the data. Default: true
- encoding string
- The character encoding of the CSV data. Leave blank to default to \n\nUTF8\n\n. See \n\nlist of python encodings\n\n for allowable options. Default: "utf8"
- escape
Char string - The character used for escaping special characters. To disallow escaping, leave this field blank.
- false
Values string[] - A set of case-sensitive strings that should be interpreted as false values. Default: ["n","no","f","false","off","0"]
- header
Definition SourceS3Configuration Stream Format Csv Format Header Definition - How headers will be defined.
User Providedassumes the CSV does not have a header row and uses the headers provided andAutogeneratedassumes the CSV does not have a header row and the CDK will generate headers using forf{i}whereiis the index starting from 0. Else, the default behavior is to use the header from the CSV file. If a user wants to autogenerate or provide column names for a CSV having headers, they can skip rows. - ignore
Errors booleanOn Fields Mismatch - Whether to ignore errors that occur when the number of fields in the CSV does not match the number of columns in the schema. Default: false
- inference
Type string - How to infer the types of the columns. If none, inference default to strings. must be one of ["None", "Primitive Types Only"]
- null
Values string[] - A set of case-sensitive strings that should be interpreted as null values. For example, if the value 'NA' should be interpreted as null, enter 'NA' in this field. Default: []
- quote
Char string - The character used for quoting CSV values. To disallow quoting, make this field blank. Default: """
- skip
Rows numberAfter Header - The number of rows to skip after the header row. Default: 0
- skip
Rows numberBefore Header - The number of rows to skip before the header row. For example, if the header row is on the 3rd row, enter 2 in this field. Default: 0
- strings
Can booleanBe Null - Whether strings can be interpreted as null values. If true, strings that match the nullvalues set will be interpreted as null. If false, strings that match the nullvalues set will be interpreted as the string itself. Default: true
- true
Values string[] - A set of case-sensitive strings that should be interpreted as true values. Default: ["y","yes","t","true","on","1"]
- delimiter str
- The character delimiting individual cells in the CSV data. This may only be a 1-character string. For tab-delimited data enter '\t'. Default: ","
- double_
quote bool - Whether two quotes in a quoted CSV value denote a single quote in the data. Default: true
- encoding str
- The character encoding of the CSV data. Leave blank to default to \n\nUTF8\n\n. See \n\nlist of python encodings\n\n for allowable options. Default: "utf8"
- escape_
char str - The character used for escaping special characters. To disallow escaping, leave this field blank.
- false_
values Sequence[str] - A set of case-sensitive strings that should be interpreted as false values. Default: ["n","no","f","false","off","0"]
- header_
definition SourceS3Configuration Stream Format Csv Format Header Definition - How headers will be defined.
User Providedassumes the CSV does not have a header row and uses the headers provided andAutogeneratedassumes the CSV does not have a header row and the CDK will generate headers using forf{i}whereiis the index starting from 0. Else, the default behavior is to use the header from the CSV file. If a user wants to autogenerate or provide column names for a CSV having headers, they can skip rows. - ignore_
errors_ boolon_ fields_ mismatch - Whether to ignore errors that occur when the number of fields in the CSV does not match the number of columns in the schema. Default: false
- inference_
type str - How to infer the types of the columns. If none, inference default to strings. must be one of ["None", "Primitive Types Only"]
- null_
values Sequence[str] - A set of case-sensitive strings that should be interpreted as null values. For example, if the value 'NA' should be interpreted as null, enter 'NA' in this field. Default: []
- quote_
char str - The character used for quoting CSV values. To disallow quoting, make this field blank. Default: """
- skip_
rows_ floatafter_ header - The number of rows to skip after the header row. Default: 0
- skip_
rows_ floatbefore_ header - The number of rows to skip before the header row. For example, if the header row is on the 3rd row, enter 2 in this field. Default: 0
- strings_
can_ boolbe_ null - Whether strings can be interpreted as null values. If true, strings that match the nullvalues set will be interpreted as null. If false, strings that match the nullvalues set will be interpreted as the string itself. Default: true
- true_
values Sequence[str] - A set of case-sensitive strings that should be interpreted as true values. Default: ["y","yes","t","true","on","1"]
- delimiter String
- The character delimiting individual cells in the CSV data. This may only be a 1-character string. For tab-delimited data enter '\t'. Default: ","
- double
Quote Boolean - Whether two quotes in a quoted CSV value denote a single quote in the data. Default: true
- encoding String
- The character encoding of the CSV data. Leave blank to default to \n\nUTF8\n\n. See \n\nlist of python encodings\n\n for allowable options. Default: "utf8"
- escape
Char String - The character used for escaping special characters. To disallow escaping, leave this field blank.
- false
Values List<String> - A set of case-sensitive strings that should be interpreted as false values. Default: ["n","no","f","false","off","0"]
- header
Definition Property Map - How headers will be defined.
User Providedassumes the CSV does not have a header row and uses the headers provided andAutogeneratedassumes the CSV does not have a header row and the CDK will generate headers using forf{i}whereiis the index starting from 0. Else, the default behavior is to use the header from the CSV file. If a user wants to autogenerate or provide column names for a CSV having headers, they can skip rows. - ignore
Errors BooleanOn Fields Mismatch - Whether to ignore errors that occur when the number of fields in the CSV does not match the number of columns in the schema. Default: false
- inference
Type String - How to infer the types of the columns. If none, inference default to strings. must be one of ["None", "Primitive Types Only"]
- null
Values List<String> - A set of case-sensitive strings that should be interpreted as null values. For example, if the value 'NA' should be interpreted as null, enter 'NA' in this field. Default: []
- quote
Char String - The character used for quoting CSV values. To disallow quoting, make this field blank. Default: """
- skip
Rows NumberAfter Header - The number of rows to skip after the header row. Default: 0
- skip
Rows NumberBefore Header - The number of rows to skip before the header row. For example, if the header row is on the 3rd row, enter 2 in this field. Default: 0
- strings
Can BooleanBe Null - Whether strings can be interpreted as null values. If true, strings that match the nullvalues set will be interpreted as null. If false, strings that match the nullvalues set will be interpreted as the string itself. Default: true
- true
Values List<String> - A set of case-sensitive strings that should be interpreted as true values. Default: ["y","yes","t","true","on","1"]
SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinition, SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionArgs
SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionUserProvided, SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionUserProvidedArgs
- Column
Names List<string> - The column names that will be used while emitting the CSV records
- Column
Names []string - The column names that will be used while emitting the CSV records
- column
Names List<String> - The column names that will be used while emitting the CSV records
- column
Names string[] - The column names that will be used while emitting the CSV records
- column_
names Sequence[str] - The column names that will be used while emitting the CSV records
- column
Names List<String> - The column names that will be used while emitting the CSV records
SourceS3ConfigurationStreamFormatParquetFormat, SourceS3ConfigurationStreamFormatParquetFormatArgs
- Decimal
As boolFloat - Whether to convert decimal fields to floats. There is a loss of precision when converting decimals to floats, so this is not recommended. Default: false
- Decimal
As boolFloat - Whether to convert decimal fields to floats. There is a loss of precision when converting decimals to floats, so this is not recommended. Default: false
- decimal
As BooleanFloat - Whether to convert decimal fields to floats. There is a loss of precision when converting decimals to floats, so this is not recommended. Default: false
- decimal
As booleanFloat - Whether to convert decimal fields to floats. There is a loss of precision when converting decimals to floats, so this is not recommended. Default: false
- decimal_
as_ boolfloat - Whether to convert decimal fields to floats. There is a loss of precision when converting decimals to floats, so this is not recommended. Default: false
- decimal
As BooleanFloat - Whether to convert decimal fields to floats. There is a loss of precision when converting decimals to floats, so this is not recommended. Default: false
SourceS3ConfigurationStreamFormatUnstructuredDocumentFormat, SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatArgs
- Processing
Source
S3Configuration Stream Format Unstructured Document Format Processing - Processing configuration
- Skip
Unprocessable boolFiles - If true, skip files that cannot be parsed and pass the error message along as the absourcefileparse_error field. If false, fail the sync. Default: true
- Strategy string
- The strategy used to parse documents.
fastextracts text directly from the document which doesn't work for all files.ocr_onlyis more reliable, but slower.hi_resis the most reliable, but requires an API key and a hosted instance of unstructured and can't be used with local mode. See the unstructured.io documentation for more details: https://unstructured-io.github.io/unstructured/core/partition.html#partition-pdf. Default: "auto"; must be one of ["auto", "fast", "ocronly", "hires"]
- Processing
Source
S3Configuration Stream Format Unstructured Document Format Processing - Processing configuration
- Skip
Unprocessable boolFiles - If true, skip files that cannot be parsed and pass the error message along as the absourcefileparse_error field. If false, fail the sync. Default: true
- Strategy string
- The strategy used to parse documents.
fastextracts text directly from the document which doesn't work for all files.ocr_onlyis more reliable, but slower.hi_resis the most reliable, but requires an API key and a hosted instance of unstructured and can't be used with local mode. See the unstructured.io documentation for more details: https://unstructured-io.github.io/unstructured/core/partition.html#partition-pdf. Default: "auto"; must be one of ["auto", "fast", "ocronly", "hires"]
- processing
Source
S3Configuration Stream Format Unstructured Document Format Processing - Processing configuration
- skip
Unprocessable BooleanFiles - If true, skip files that cannot be parsed and pass the error message along as the absourcefileparse_error field. If false, fail the sync. Default: true
- strategy String
- The strategy used to parse documents.
fastextracts text directly from the document which doesn't work for all files.ocr_onlyis more reliable, but slower.hi_resis the most reliable, but requires an API key and a hosted instance of unstructured and can't be used with local mode. See the unstructured.io documentation for more details: https://unstructured-io.github.io/unstructured/core/partition.html#partition-pdf. Default: "auto"; must be one of ["auto", "fast", "ocronly", "hires"]
- processing
Source
S3Configuration Stream Format Unstructured Document Format Processing - Processing configuration
- skip
Unprocessable booleanFiles - If true, skip files that cannot be parsed and pass the error message along as the absourcefileparse_error field. If false, fail the sync. Default: true
- strategy string
- The strategy used to parse documents.
fastextracts text directly from the document which doesn't work for all files.ocr_onlyis more reliable, but slower.hi_resis the most reliable, but requires an API key and a hosted instance of unstructured and can't be used with local mode. See the unstructured.io documentation for more details: https://unstructured-io.github.io/unstructured/core/partition.html#partition-pdf. Default: "auto"; must be one of ["auto", "fast", "ocronly", "hires"]
- processing
Source
S3Configuration Stream Format Unstructured Document Format Processing - Processing configuration
- skip_
unprocessable_ boolfiles - If true, skip files that cannot be parsed and pass the error message along as the absourcefileparse_error field. If false, fail the sync. Default: true
- strategy str
- The strategy used to parse documents.
fastextracts text directly from the document which doesn't work for all files.ocr_onlyis more reliable, but slower.hi_resis the most reliable, but requires an API key and a hosted instance of unstructured and can't be used with local mode. See the unstructured.io documentation for more details: https://unstructured-io.github.io/unstructured/core/partition.html#partition-pdf. Default: "auto"; must be one of ["auto", "fast", "ocronly", "hires"]
- processing Property Map
- Processing configuration
- skip
Unprocessable BooleanFiles - If true, skip files that cannot be parsed and pass the error message along as the absourcefileparse_error field. If false, fail the sync. Default: true
- strategy String
- The strategy used to parse documents.
fastextracts text directly from the document which doesn't work for all files.ocr_onlyis more reliable, but slower.hi_resis the most reliable, but requires an API key and a hosted instance of unstructured and can't be used with local mode. See the unstructured.io documentation for more details: https://unstructured-io.github.io/unstructured/core/partition.html#partition-pdf. Default: "auto"; must be one of ["auto", "fast", "ocronly", "hires"]
SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatProcessing, SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatProcessingArgs
- Local
Source
S3Configuration Stream Format Unstructured Document Format Processing Local - Process files locally, supporting
fastandocrmodes. This is the default option.
- Local
Source
S3Configuration Stream Format Unstructured Document Format Processing Local - Process files locally, supporting
fastandocrmodes. This is the default option.
- local
Source
S3Configuration Stream Format Unstructured Document Format Processing Local - Process files locally, supporting
fastandocrmodes. This is the default option.
- local
Source
S3Configuration Stream Format Unstructured Document Format Processing Local - Process files locally, supporting
fastandocrmodes. This is the default option.
- local
Source
S3Configuration Stream Format Unstructured Document Format Processing Local - Process files locally, supporting
fastandocrmodes. This is the default option.
- local Property Map
- Process files locally, supporting
fastandocrmodes. This is the default option.
SourceS3ResourceAllocation, SourceS3ResourceAllocationArgs
- Default
Source
S3Resource Allocation Default - optional resource requirements to run workers (blank for unbounded allocations)
- Job
Specifics List<SourceS3Resource Allocation Job Specific>
- Default
Source
S3Resource Allocation Default - optional resource requirements to run workers (blank for unbounded allocations)
- Job
Specifics []SourceS3Resource Allocation Job Specific
- default_
Source
S3Resource Allocation Default - optional resource requirements to run workers (blank for unbounded allocations)
- job
Specifics List<SourceS3Resource Allocation Job Specific>
- default
Source
S3Resource Allocation Default - optional resource requirements to run workers (blank for unbounded allocations)
- job
Specifics SourceS3Resource Allocation Job Specific[]
- default
Source
S3Resource Allocation Default - optional resource requirements to run workers (blank for unbounded allocations)
- job_
specifics Sequence[SourceS3Resource Allocation Job Specific]
- default Property Map
- optional resource requirements to run workers (blank for unbounded allocations)
- job
Specifics List<Property Map>
SourceS3ResourceAllocationDefault, SourceS3ResourceAllocationDefaultArgs
- Cpu
Limit string - Cpu
Request string - Ephemeral
Storage stringLimit - Ephemeral
Storage stringRequest - Memory
Limit string - Memory
Request string
- Cpu
Limit string - Cpu
Request string - Ephemeral
Storage stringLimit - Ephemeral
Storage stringRequest - Memory
Limit string - Memory
Request string
- cpu
Limit String - cpu
Request String - ephemeral
Storage StringLimit - ephemeral
Storage StringRequest - memory
Limit String - memory
Request String
- cpu
Limit string - cpu
Request string - ephemeral
Storage stringLimit - ephemeral
Storage stringRequest - memory
Limit string - memory
Request string
- cpu_
limit str - cpu_
request str - ephemeral_
storage_ strlimit - ephemeral_
storage_ strrequest - memory_
limit str - memory_
request str
- cpu
Limit String - cpu
Request String - ephemeral
Storage StringLimit - ephemeral
Storage StringRequest - memory
Limit String - memory
Request String
SourceS3ResourceAllocationJobSpecific, SourceS3ResourceAllocationJobSpecificArgs
- Job
Type string - enum that describes the different types of jobs that the platform runs.
- Resource
Requirements SourceS3Resource Allocation Job Specific Resource Requirements - optional resource requirements to run workers (blank for unbounded allocations)
- Job
Type string - enum that describes the different types of jobs that the platform runs.
- Resource
Requirements SourceS3Resource Allocation Job Specific Resource Requirements - optional resource requirements to run workers (blank for unbounded allocations)
- job
Type String - enum that describes the different types of jobs that the platform runs.
- resource
Requirements SourceS3Resource Allocation Job Specific Resource Requirements - optional resource requirements to run workers (blank for unbounded allocations)
- job
Type string - enum that describes the different types of jobs that the platform runs.
- resource
Requirements SourceS3Resource Allocation Job Specific Resource Requirements - optional resource requirements to run workers (blank for unbounded allocations)
- job_
type str - enum that describes the different types of jobs that the platform runs.
- resource_
requirements SourceS3Resource Allocation Job Specific Resource Requirements - optional resource requirements to run workers (blank for unbounded allocations)
- job
Type String - enum that describes the different types of jobs that the platform runs.
- resource
Requirements Property Map - optional resource requirements to run workers (blank for unbounded allocations)
SourceS3ResourceAllocationJobSpecificResourceRequirements, SourceS3ResourceAllocationJobSpecificResourceRequirementsArgs
- Cpu
Limit string - Cpu
Request string - Ephemeral
Storage stringLimit - Ephemeral
Storage stringRequest - Memory
Limit string - Memory
Request string
- Cpu
Limit string - Cpu
Request string - Ephemeral
Storage stringLimit - Ephemeral
Storage stringRequest - Memory
Limit string - Memory
Request string
- cpu
Limit String - cpu
Request String - ephemeral
Storage StringLimit - ephemeral
Storage StringRequest - memory
Limit String - memory
Request String
- cpu
Limit string - cpu
Request string - ephemeral
Storage stringLimit - ephemeral
Storage stringRequest - memory
Limit string - memory
Request string
- cpu_
limit str - cpu_
request str - ephemeral_
storage_ strlimit - ephemeral_
storage_ strrequest - memory_
limit str - memory_
request str
- cpu
Limit String - cpu
Request String - ephemeral
Storage StringLimit - ephemeral
Storage StringRequest - memory
Limit String - memory
Request String
Import
In Terraform v1.5.0 and later, the import block can be used with the id attribute, for example:
terraform
import {
to = airbyte_source_s3.my_airbyte_source_s3
id = “…”
}
The pulumi import command can be used, for example:
$ pulumi import airbyte:index/sourceS3:SourceS3 my_airbyte_source_s3 "..."
To learn more about importing existing cloud resources, see Importing resources.
Package Details
- Repository
- airbyte airbytehq/terraform-provider-airbyte
- License
- Notes
- This Pulumi package is based on the
airbyteTerraform Provider.
