SourceS3

airbyte 1.0.0-rc8, Feb 23 26

airbyte 1.0.0-rc8 published on Monday, Feb 23, 2026 by airbytehq

Schema (JSON)

airbytehq/terraform-provider-airbyte

airbyte 1.0.0-rc8 published on Monday, Feb 23, 2026 by airbytehq

Schema (JSON)

airbytehq/terraform-provider-airbyte

Example Usage

import * as pulumi from "@pulumi/pulumi";
import * as airbyte from "@pulumi/airbyte";

const mySourceS3 = new airbyte.SourceS3("my_source_s3", {
    configuration: {
        awsAccessKeyId: "...my_aws_access_key_id...",
        awsSecretAccessKey: "...my_aws_secret_access_key...",
        bucket: "...my_bucket...",
        dataset: "...my_dataset...",
        deliveryMethod: {
            copyRawFiles: {
                preserveDirectoryStructure: false,
            },
        },
        endpoint: "my-s3-endpoint.com",
        format: {
            parquet: {
                batchSize: 6,
                bufferSize: 8,
                columns: ["..."],
            },
        },
        pathPattern: "**",
        provider: {
            awsAccessKeyId: "...my_aws_access_key_id...",
            awsSecretAccessKey: "...my_aws_secret_access_key...",
            bucket: "...my_bucket...",
            endpoint: "...my_endpoint...",
            pathPrefix: "...my_path_prefix...",
            regionName: "...my_region_name...",
            roleArn: "...my_role_arn...",
            startDate: "2021-01-01T00:00:00Z",
        },
        regionName: "...my_region_name...",
        roleArn: "...my_role_arn...",
        schema: "{\"column_1\": \"number\", \"column_2\": \"string\", \"column_3\": \"array\", \"column_4\": \"object\", \"column_5\": \"boolean\"}",
        startDate: "2021-01-01T00:00:00.000000Z",
        streams: [{
            daysToSyncIfHistoryIsFull: 5,
            format: {
                excelFormat: {},
                jsonlFormat: {},
            },
            globs: ["..."],
            inputSchema: "...my_input_schema...",
            legacyPrefix: "...my_legacy_prefix...",
            name: "...my_name...",
            primaryKey: "...my_primary_key...",
            recentNFilesToReadForSchemaDiscovery: 10,
            schemaless: true,
            validationPolicy: "Wait for Discover",
        }],
    },
    definitionId: "07ef8ae4-b6a4-4fd9-99ea-a368c6fc144c",
    name: "...my_name...",
    secretId: "...my_secret_id...",
    workspaceId: "bba7dce0-5020-4916-bbd7-be8f298d5f78",
});

import pulumi
import pulumi_airbyte as airbyte

my_source_s3 = airbyte.SourceS3("my_source_s3",
    configuration={
        "aws_access_key_id": "...my_aws_access_key_id...",
        "aws_secret_access_key": "...my_aws_secret_access_key...",
        "bucket": "...my_bucket...",
        "dataset": "...my_dataset...",
        "delivery_method": {
            "copy_raw_files": {
                "preserve_directory_structure": False,
            },
        },
        "endpoint": "my-s3-endpoint.com",
        "format": {
            "parquet": {
                "batch_size": 6,
                "buffer_size": 8,
                "columns": ["..."],
            },
        },
        "path_pattern": "**",
        "provider": {
            "aws_access_key_id": "...my_aws_access_key_id...",
            "aws_secret_access_key": "...my_aws_secret_access_key...",
            "bucket": "...my_bucket...",
            "endpoint": "...my_endpoint...",
            "path_prefix": "...my_path_prefix...",
            "region_name": "...my_region_name...",
            "role_arn": "...my_role_arn...",
            "start_date": "2021-01-01T00:00:00Z",
        },
        "region_name": "...my_region_name...",
        "role_arn": "...my_role_arn...",
        "schema": "{\"column_1\": \"number\", \"column_2\": \"string\", \"column_3\": \"array\", \"column_4\": \"object\", \"column_5\": \"boolean\"}",
        "start_date": "2021-01-01T00:00:00.000000Z",
        "streams": [{
            "days_to_sync_if_history_is_full": 5,
            "format": {
                "excel_format": {},
                "jsonl_format": {},
            },
            "globs": ["..."],
            "input_schema": "...my_input_schema...",
            "legacy_prefix": "...my_legacy_prefix...",
            "name": "...my_name...",
            "primary_key": "...my_primary_key...",
            "recent_n_files_to_read_for_schema_discovery": 10,
            "schemaless": True,
            "validation_policy": "Wait for Discover",
        }],
    },
    definition_id="07ef8ae4-b6a4-4fd9-99ea-a368c6fc144c",
    name="...my_name...",
    secret_id="...my_secret_id...",
    workspace_id="bba7dce0-5020-4916-bbd7-be8f298d5f78")

package main

import (
	"github.com/pulumi/pulumi-terraform-provider/sdks/go/airbyte/airbyte"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := airbyte.NewSourceS3(ctx, "my_source_s3", &airbyte.SourceS3Args{
			Configuration: &airbyte.SourceS3ConfigurationArgs{
				AwsAccessKeyId:     pulumi.String("...my_aws_access_key_id..."),
				AwsSecretAccessKey: pulumi.String("...my_aws_secret_access_key..."),
				Bucket:             pulumi.String("...my_bucket..."),
				Dataset:            pulumi.String("...my_dataset..."),
				DeliveryMethod: &airbyte.SourceS3ConfigurationDeliveryMethodArgs{
					CopyRawFiles: &airbyte.SourceS3ConfigurationDeliveryMethodCopyRawFilesArgs{
						PreserveDirectoryStructure: pulumi.Bool(false),
					},
				},
				Endpoint: pulumi.String("my-s3-endpoint.com"),
				Format: &airbyte.SourceS3ConfigurationFormatArgs{
					Parquet: &airbyte.SourceS3ConfigurationFormatParquetArgs{
						BatchSize:  pulumi.Float64(6),
						BufferSize: pulumi.Float64(8),
						Columns: pulumi.StringArray{
							pulumi.String("..."),
						},
					},
				},
				PathPattern: pulumi.String("**"),
				Provider: &airbyte.SourceS3ConfigurationProviderArgs{
					AwsAccessKeyId:     pulumi.String("...my_aws_access_key_id..."),
					AwsSecretAccessKey: pulumi.String("...my_aws_secret_access_key..."),
					Bucket:             pulumi.String("...my_bucket..."),
					Endpoint:           pulumi.String("...my_endpoint..."),
					PathPrefix:         pulumi.String("...my_path_prefix..."),
					RegionName:         pulumi.String("...my_region_name..."),
					RoleArn:            pulumi.String("...my_role_arn..."),
					StartDate:          pulumi.String("2021-01-01T00:00:00Z"),
				},
				RegionName: pulumi.String("...my_region_name..."),
				RoleArn:    pulumi.String("...my_role_arn..."),
				Schema:     pulumi.String("{\"column_1\": \"number\", \"column_2\": \"string\", \"column_3\": \"array\", \"column_4\": \"object\", \"column_5\": \"boolean\"}"),
				StartDate:  pulumi.String("2021-01-01T00:00:00.000000Z"),
				Streams: airbyte.SourceS3ConfigurationStreamArray{
					&airbyte.SourceS3ConfigurationStreamArgs{
						DaysToSyncIfHistoryIsFull: pulumi.Float64(5),
						Format: &airbyte.SourceS3ConfigurationStreamFormatArgs{
							ExcelFormat: &airbyte.SourceS3ConfigurationStreamFormatExcelFormatArgs{},
							JsonlFormat: &airbyte.SourceS3ConfigurationStreamFormatJsonlFormatArgs{},
						},
						Globs: pulumi.StringArray{
							pulumi.String("..."),
						},
						InputSchema:                          pulumi.String("...my_input_schema..."),
						LegacyPrefix:                         pulumi.String("...my_legacy_prefix..."),
						Name:                                 pulumi.String("...my_name..."),
						PrimaryKey:                           pulumi.String("...my_primary_key..."),
						RecentNFilesToReadForSchemaDiscovery: pulumi.Float64(10),
						Schemaless:                           pulumi.Bool(true),
						ValidationPolicy:                     pulumi.String("Wait for Discover"),
					},
				},
			},
			DefinitionId: pulumi.String("07ef8ae4-b6a4-4fd9-99ea-a368c6fc144c"),
			Name:         pulumi.String("...my_name..."),
			SecretId:     pulumi.String("...my_secret_id..."),
			WorkspaceId:  pulumi.String("bba7dce0-5020-4916-bbd7-be8f298d5f78"),
		})
		if err != nil {
			return err
		}
		return nil
	})
}

using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Airbyte = Pulumi.Airbyte;

return await Deployment.RunAsync(() => 
{
    var mySourceS3 = new Airbyte.SourceS3("my_source_s3", new()
    {
        Configuration = new Airbyte.Inputs.SourceS3ConfigurationArgs
        {
            AwsAccessKeyId = "...my_aws_access_key_id...",
            AwsSecretAccessKey = "...my_aws_secret_access_key...",
            Bucket = "...my_bucket...",
            Dataset = "...my_dataset...",
            DeliveryMethod = new Airbyte.Inputs.SourceS3ConfigurationDeliveryMethodArgs
            {
                CopyRawFiles = new Airbyte.Inputs.SourceS3ConfigurationDeliveryMethodCopyRawFilesArgs
                {
                    PreserveDirectoryStructure = false,
                },
            },
            Endpoint = "my-s3-endpoint.com",
            Format = new Airbyte.Inputs.SourceS3ConfigurationFormatArgs
            {
                Parquet = new Airbyte.Inputs.SourceS3ConfigurationFormatParquetArgs
                {
                    BatchSize = 6,
                    BufferSize = 8,
                    Columns = new[]
                    {
                        "...",
                    },
                },
            },
            PathPattern = "**",
            Provider = new Airbyte.Inputs.SourceS3ConfigurationProviderArgs
            {
                AwsAccessKeyId = "...my_aws_access_key_id...",
                AwsSecretAccessKey = "...my_aws_secret_access_key...",
                Bucket = "...my_bucket...",
                Endpoint = "...my_endpoint...",
                PathPrefix = "...my_path_prefix...",
                RegionName = "...my_region_name...",
                RoleArn = "...my_role_arn...",
                StartDate = "2021-01-01T00:00:00Z",
            },
            RegionName = "...my_region_name...",
            RoleArn = "...my_role_arn...",
            Schema = "{\"column_1\": \"number\", \"column_2\": \"string\", \"column_3\": \"array\", \"column_4\": \"object\", \"column_5\": \"boolean\"}",
            StartDate = "2021-01-01T00:00:00.000000Z",
            Streams = new[]
            {
                new Airbyte.Inputs.SourceS3ConfigurationStreamArgs
                {
                    DaysToSyncIfHistoryIsFull = 5,
                    Format = new Airbyte.Inputs.SourceS3ConfigurationStreamFormatArgs
                    {
                        ExcelFormat = null,
                        JsonlFormat = null,
                    },
                    Globs = new[]
                    {
                        "...",
                    },
                    InputSchema = "...my_input_schema...",
                    LegacyPrefix = "...my_legacy_prefix...",
                    Name = "...my_name...",
                    PrimaryKey = "...my_primary_key...",
                    RecentNFilesToReadForSchemaDiscovery = 10,
                    Schemaless = true,
                    ValidationPolicy = "Wait for Discover",
                },
            },
        },
        DefinitionId = "07ef8ae4-b6a4-4fd9-99ea-a368c6fc144c",
        Name = "...my_name...",
        SecretId = "...my_secret_id...",
        WorkspaceId = "bba7dce0-5020-4916-bbd7-be8f298d5f78",
    });

});

package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.airbyte.SourceS3;
import com.pulumi.airbyte.SourceS3Args;
import com.pulumi.airbyte.inputs.SourceS3ConfigurationArgs;
import com.pulumi.airbyte.inputs.SourceS3ConfigurationDeliveryMethodArgs;
import com.pulumi.airbyte.inputs.SourceS3ConfigurationDeliveryMethodCopyRawFilesArgs;
import com.pulumi.airbyte.inputs.SourceS3ConfigurationFormatArgs;
import com.pulumi.airbyte.inputs.SourceS3ConfigurationFormatParquetArgs;
import com.pulumi.airbyte.inputs.SourceS3ConfigurationProviderArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var mySourceS3 = new SourceS3("mySourceS3", SourceS3Args.builder()
            .configuration(SourceS3ConfigurationArgs.builder()
                .awsAccessKeyId("...my_aws_access_key_id...")
                .awsSecretAccessKey("...my_aws_secret_access_key...")
                .bucket("...my_bucket...")
                .dataset("...my_dataset...")
                .deliveryMethod(SourceS3ConfigurationDeliveryMethodArgs.builder()
                    .copyRawFiles(SourceS3ConfigurationDeliveryMethodCopyRawFilesArgs.builder()
                        .preserveDirectoryStructure(false)
                        .build())
                    .build())
                .endpoint("my-s3-endpoint.com")
                .format(SourceS3ConfigurationFormatArgs.builder()
                    .parquet(SourceS3ConfigurationFormatParquetArgs.builder()
                        .batchSize(6.0)
                        .bufferSize(8.0)
                        .columns("...")
                        .build())
                    .build())
                .pathPattern("**")
                .provider(SourceS3ConfigurationProviderArgs.builder()
                    .awsAccessKeyId("...my_aws_access_key_id...")
                    .awsSecretAccessKey("...my_aws_secret_access_key...")
                    .bucket("...my_bucket...")
                    .endpoint("...my_endpoint...")
                    .pathPrefix("...my_path_prefix...")
                    .regionName("...my_region_name...")
                    .roleArn("...my_role_arn...")
                    .startDate("2021-01-01T00:00:00Z")
                    .build())
                .regionName("...my_region_name...")
                .roleArn("...my_role_arn...")
                .schema("{\"column_1\": \"number\", \"column_2\": \"string\", \"column_3\": \"array\", \"column_4\": \"object\", \"column_5\": \"boolean\"}")
                .startDate("2021-01-01T00:00:00.000000Z")
                .streams(SourceS3ConfigurationStreamArgs.builder()
                    .daysToSyncIfHistoryIsFull(5.0)
                    .format(SourceS3ConfigurationStreamFormatArgs.builder()
                        .excelFormat(SourceS3ConfigurationStreamFormatExcelFormatArgs.builder()
                            .build())
                        .jsonlFormat(SourceS3ConfigurationStreamFormatJsonlFormatArgs.builder()
                            .build())
                        .build())
                    .globs("...")
                    .inputSchema("...my_input_schema...")
                    .legacyPrefix("...my_legacy_prefix...")
                    .name("...my_name...")
                    .primaryKey("...my_primary_key...")
                    .recentNFilesToReadForSchemaDiscovery(10.0)
                    .schemaless(true)
                    .validationPolicy("Wait for Discover")
                    .build())
                .build())
            .definitionId("07ef8ae4-b6a4-4fd9-99ea-a368c6fc144c")
            .name("...my_name...")
            .secretId("...my_secret_id...")
            .workspaceId("bba7dce0-5020-4916-bbd7-be8f298d5f78")
            .build());

    }
}

resources:
  mySourceS3:
    type: airbyte:SourceS3
    name: my_source_s3
    properties:
      configuration:
        awsAccessKeyId: '...my_aws_access_key_id...'
        awsSecretAccessKey: '...my_aws_secret_access_key...'
        bucket: '...my_bucket...'
        dataset: '...my_dataset...'
        deliveryMethod:
          copyRawFiles:
            preserveDirectoryStructure: false
        endpoint: my-s3-endpoint.com
        format:
          parquet:
            batchSize: 6
            bufferSize: 8
            columns:
              - '...'
        pathPattern: '**'
        provider:
          awsAccessKeyId: '...my_aws_access_key_id...'
          awsSecretAccessKey: '...my_aws_secret_access_key...'
          bucket: '...my_bucket...'
          endpoint: '...my_endpoint...'
          pathPrefix: '...my_path_prefix...'
          regionName: '...my_region_name...'
          roleArn: '...my_role_arn...'
          startDate: 2021-01-01T00:00:00Z
        regionName: '...my_region_name...'
        roleArn: '...my_role_arn...'
        schema: '{"column_1": "number", "column_2": "string", "column_3": "array", "column_4": "object", "column_5": "boolean"}'
        startDate: 2021-01-01T00:00:00.000000Z
        streams:
          - daysToSyncIfHistoryIsFull: 5
            format:
              excelFormat: {}
              jsonlFormat: {}
            globs:
              - '...'
            inputSchema: '...my_input_schema...'
            legacyPrefix: '...my_legacy_prefix...'
            name: '...my_name...'
            primaryKey: '...my_primary_key...'
            recentNFilesToReadForSchemaDiscovery: 10
            schemaless: true
            validationPolicy: Wait for Discover
      definitionId: 07ef8ae4-b6a4-4fd9-99ea-a368c6fc144c
      name: '...my_name...'
      secretId: '...my_secret_id...'
      workspaceId: bba7dce0-5020-4916-bbd7-be8f298d5f78

Create SourceS3 Resource

Resources are created with functions called constructors. To learn more about declaring and configuring resources, see Resources.

Constructor syntax

new SourceS3(name: string, args: SourceS3Args, opts?: CustomResourceOptions);

@overload
def SourceS3(resource_name: str,
             args: SourceS3Args,
             opts: Optional[ResourceOptions] = None)

@overload
def SourceS3(resource_name: str,
             opts: Optional[ResourceOptions] = None,
             configuration: Optional[SourceS3ConfigurationArgs] = None,
             workspace_id: Optional[str] = None,
             definition_id: Optional[str] = None,
             name: Optional[str] = None,
             secret_id: Optional[str] = None)

func NewSourceS3(ctx *Context, name string, args SourceS3Args, opts ...ResourceOption) (*SourceS3, error)

public SourceS3(string name, SourceS3Args args, CustomResourceOptions? opts = null)

public SourceS3(String name, SourceS3Args args)
public SourceS3(String name, SourceS3Args args, CustomResourceOptions options)

type: airbyte:SourceS3
properties: # The arguments to resource properties.
options: # Bag of options to control resource's behavior.

Parameters

name string: The unique name of the resource.
args SourceS3Args: The arguments to resource properties.
opts CustomResourceOptions: Bag of options to control resource's behavior.

resource_name str: The unique name of the resource.
args SourceS3Args: The arguments to resource properties.
opts ResourceOptions: Bag of options to control resource's behavior.

ctx Context: Context object for the current deployment.
name string: The unique name of the resource.
args SourceS3Args: The arguments to resource properties.
opts ResourceOption: Bag of options to control resource's behavior.

name string: The unique name of the resource.
args SourceS3Args: The arguments to resource properties.
opts CustomResourceOptions: Bag of options to control resource's behavior.

name String: The unique name of the resource.
args SourceS3Args: The arguments to resource properties.
options CustomResourceOptions: Bag of options to control resource's behavior.

Constructor example

The following reference example uses placeholder values for all input properties.

var sourceS3Resource = new Airbyte.SourceS3("sourceS3Resource", new()
{
    Configuration = new Airbyte.Inputs.SourceS3ConfigurationArgs
    {
        Bucket = "string",
        Streams = new[]
        {
            new Airbyte.Inputs.SourceS3ConfigurationStreamArgs
            {
                Format = new Airbyte.Inputs.SourceS3ConfigurationStreamFormatArgs
                {
                    AvroFormat = new Airbyte.Inputs.SourceS3ConfigurationStreamFormatAvroFormatArgs
                    {
                        DoubleAsString = false,
                    },
                    CsvFormat = new Airbyte.Inputs.SourceS3ConfigurationStreamFormatCsvFormatArgs
                    {
                        Delimiter = "string",
                        DoubleQuote = false,
                        Encoding = "string",
                        EscapeChar = "string",
                        FalseValues = new[]
                        {
                            "string",
                        },
                        HeaderDefinition = new Airbyte.Inputs.SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionArgs
                        {
                            Autogenerated = null,
                            FromCsv = null,
                            UserProvided = new Airbyte.Inputs.SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionUserProvidedArgs
                            {
                                ColumnNames = new[]
                                {
                                    "string",
                                },
                            },
                        },
                        IgnoreErrorsOnFieldsMismatch = false,
                        InferenceType = "string",
                        NullValues = new[]
                        {
                            "string",
                        },
                        QuoteChar = "string",
                        SkipRowsAfterHeader = 0,
                        SkipRowsBeforeHeader = 0,
                        StringsCanBeNull = false,
                        TrueValues = new[]
                        {
                            "string",
                        },
                    },
                    ExcelFormat = null,
                    JsonlFormat = null,
                    ParquetFormat = new Airbyte.Inputs.SourceS3ConfigurationStreamFormatParquetFormatArgs
                    {
                        DecimalAsFloat = false,
                    },
                    UnstructuredDocumentFormat = new Airbyte.Inputs.SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatArgs
                    {
                        Processing = new Airbyte.Inputs.SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatProcessingArgs
                        {
                            Local = null,
                        },
                        SkipUnprocessableFiles = false,
                        Strategy = "string",
                    },
                },
                Name = "string",
                DaysToSyncIfHistoryIsFull = 0,
                Globs = new[]
                {
                    "string",
                },
                InputSchema = "string",
                LegacyPrefix = "string",
                PrimaryKey = "string",
                RecentNFilesToReadForSchemaDiscovery = 0,
                Schemaless = false,
                ValidationPolicy = "string",
            },
        },
        Format = new Airbyte.Inputs.SourceS3ConfigurationFormatArgs
        {
            Avro = null,
            Csv = new Airbyte.Inputs.SourceS3ConfigurationFormatCsvArgs
            {
                AdditionalReaderOptions = "string",
                AdvancedOptions = "string",
                BlockSize = 0,
                Delimiter = "string",
                DoubleQuote = false,
                Encoding = "string",
                EscapeChar = "string",
                InferDatatypes = false,
                NewlinesInValues = false,
                QuoteChar = "string",
            },
            Jsonl = new Airbyte.Inputs.SourceS3ConfigurationFormatJsonlArgs
            {
                BlockSize = 0,
                NewlinesInValues = false,
                UnexpectedFieldBehavior = "string",
            },
            Parquet = new Airbyte.Inputs.SourceS3ConfigurationFormatParquetArgs
            {
                BatchSize = 0,
                BufferSize = 0,
                Columns = new[]
                {
                    "string",
                },
            },
        },
        Dataset = "string",
        DeliveryMethod = new Airbyte.Inputs.SourceS3ConfigurationDeliveryMethodArgs
        {
            CopyRawFiles = new Airbyte.Inputs.SourceS3ConfigurationDeliveryMethodCopyRawFilesArgs
            {
                PreserveDirectoryStructure = false,
            },
            ReplicateRecords = null,
        },
        Endpoint = "string",
        AwsAccessKeyId = "string",
        PathPattern = "string",
        Provider = new Airbyte.Inputs.SourceS3ConfigurationProviderArgs
        {
            AwsAccessKeyId = "string",
            AwsSecretAccessKey = "string",
            Bucket = "string",
            Endpoint = "string",
            PathPrefix = "string",
            RegionName = "string",
            RoleArn = "string",
            StartDate = "string",
        },
        RegionName = "string",
        RoleArn = "string",
        Schema = "string",
        StartDate = "string",
        AwsSecretAccessKey = "string",
    },
    WorkspaceId = "string",
    DefinitionId = "string",
    Name = "string",
    SecretId = "string",
});

example, err := airbyte.NewSourceS3(ctx, "sourceS3Resource", &airbyte.SourceS3Args{
	Configuration: &airbyte.SourceS3ConfigurationArgs{
		Bucket: pulumi.String("string"),
		Streams: airbyte.SourceS3ConfigurationStreamArray{
			&airbyte.SourceS3ConfigurationStreamArgs{
				Format: &airbyte.SourceS3ConfigurationStreamFormatArgs{
					AvroFormat: &airbyte.SourceS3ConfigurationStreamFormatAvroFormatArgs{
						DoubleAsString: pulumi.Bool(false),
					},
					CsvFormat: &airbyte.SourceS3ConfigurationStreamFormatCsvFormatArgs{
						Delimiter:   pulumi.String("string"),
						DoubleQuote: pulumi.Bool(false),
						Encoding:    pulumi.String("string"),
						EscapeChar:  pulumi.String("string"),
						FalseValues: pulumi.StringArray{
							pulumi.String("string"),
						},
						HeaderDefinition: &airbyte.SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionArgs{
							Autogenerated: &airbyte.SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionAutogeneratedArgs{},
							FromCsv:       &airbyte.SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionFromCsvArgs{},
							UserProvided: &airbyte.SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionUserProvidedArgs{
								ColumnNames: pulumi.StringArray{
									pulumi.String("string"),
								},
							},
						},
						IgnoreErrorsOnFieldsMismatch: pulumi.Bool(false),
						InferenceType:                pulumi.String("string"),
						NullValues: pulumi.StringArray{
							pulumi.String("string"),
						},
						QuoteChar:            pulumi.String("string"),
						SkipRowsAfterHeader:  pulumi.Float64(0),
						SkipRowsBeforeHeader: pulumi.Float64(0),
						StringsCanBeNull:     pulumi.Bool(false),
						TrueValues: pulumi.StringArray{
							pulumi.String("string"),
						},
					},
					ExcelFormat: &airbyte.SourceS3ConfigurationStreamFormatExcelFormatArgs{},
					JsonlFormat: &airbyte.SourceS3ConfigurationStreamFormatJsonlFormatArgs{},
					ParquetFormat: &airbyte.SourceS3ConfigurationStreamFormatParquetFormatArgs{
						DecimalAsFloat: pulumi.Bool(false),
					},
					UnstructuredDocumentFormat: &airbyte.SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatArgs{
						Processing: &airbyte.SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatProcessingArgs{
							Local: &airbyte.SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatProcessingLocalArgs{},
						},
						SkipUnprocessableFiles: pulumi.Bool(false),
						Strategy:               pulumi.String("string"),
					},
				},
				Name:                      pulumi.String("string"),
				DaysToSyncIfHistoryIsFull: pulumi.Float64(0),
				Globs: pulumi.StringArray{
					pulumi.String("string"),
				},
				InputSchema:                          pulumi.String("string"),
				LegacyPrefix:                         pulumi.String("string"),
				PrimaryKey:                           pulumi.String("string"),
				RecentNFilesToReadForSchemaDiscovery: pulumi.Float64(0),
				Schemaless:                           pulumi.Bool(false),
				ValidationPolicy:                     pulumi.String("string"),
			},
		},
		Format: &airbyte.SourceS3ConfigurationFormatArgs{
			Avro: &airbyte.SourceS3ConfigurationFormatAvroArgs{},
			Csv: &airbyte.SourceS3ConfigurationFormatCsvArgs{
				AdditionalReaderOptions: pulumi.String("string"),
				AdvancedOptions:         pulumi.String("string"),
				BlockSize:               pulumi.Float64(0),
				Delimiter:               pulumi.String("string"),
				DoubleQuote:             pulumi.Bool(false),
				Encoding:                pulumi.String("string"),
				EscapeChar:              pulumi.String("string"),
				InferDatatypes:          pulumi.Bool(false),
				NewlinesInValues:        pulumi.Bool(false),
				QuoteChar:               pulumi.String("string"),
			},
			Jsonl: &airbyte.SourceS3ConfigurationFormatJsonlArgs{
				BlockSize:               pulumi.Float64(0),
				NewlinesInValues:        pulumi.Bool(false),
				UnexpectedFieldBehavior: pulumi.String("string"),
			},
			Parquet: &airbyte.SourceS3ConfigurationFormatParquetArgs{
				BatchSize:  pulumi.Float64(0),
				BufferSize: pulumi.Float64(0),
				Columns: pulumi.StringArray{
					pulumi.String("string"),
				},
			},
		},
		Dataset: pulumi.String("string"),
		DeliveryMethod: &airbyte.SourceS3ConfigurationDeliveryMethodArgs{
			CopyRawFiles: &airbyte.SourceS3ConfigurationDeliveryMethodCopyRawFilesArgs{
				PreserveDirectoryStructure: pulumi.Bool(false),
			},
			ReplicateRecords: &airbyte.SourceS3ConfigurationDeliveryMethodReplicateRecordsArgs{},
		},
		Endpoint:       pulumi.String("string"),
		AwsAccessKeyId: pulumi.String("string"),
		PathPattern:    pulumi.String("string"),
		Provider: &airbyte.SourceS3ConfigurationProviderArgs{
			AwsAccessKeyId:     pulumi.String("string"),
			AwsSecretAccessKey: pulumi.String("string"),
			Bucket:             pulumi.String("string"),
			Endpoint:           pulumi.String("string"),
			PathPrefix:         pulumi.String("string"),
			RegionName:         pulumi.String("string"),
			RoleArn:            pulumi.String("string"),
			StartDate:          pulumi.String("string"),
		},
		RegionName:         pulumi.String("string"),
		RoleArn:            pulumi.String("string"),
		Schema:             pulumi.String("string"),
		StartDate:          pulumi.String("string"),
		AwsSecretAccessKey: pulumi.String("string"),
	},
	WorkspaceId:  pulumi.String("string"),
	DefinitionId: pulumi.String("string"),
	Name:         pulumi.String("string"),
	SecretId:     pulumi.String("string"),
})

var sourceS3Resource = new SourceS3("sourceS3Resource", SourceS3Args.builder()
    .configuration(SourceS3ConfigurationArgs.builder()
        .bucket("string")
        .streams(SourceS3ConfigurationStreamArgs.builder()
            .format(SourceS3ConfigurationStreamFormatArgs.builder()
                .avroFormat(SourceS3ConfigurationStreamFormatAvroFormatArgs.builder()
                    .doubleAsString(false)
                    .build())
                .csvFormat(SourceS3ConfigurationStreamFormatCsvFormatArgs.builder()
                    .delimiter("string")
                    .doubleQuote(false)
                    .encoding("string")
                    .escapeChar("string")
                    .falseValues("string")
                    .headerDefinition(SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionArgs.builder()
                        .autogenerated(SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionAutogeneratedArgs.builder()
                            .build())
                        .fromCsv(SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionFromCsvArgs.builder()
                            .build())
                        .userProvided(SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionUserProvidedArgs.builder()
                            .columnNames("string")
                            .build())
                        .build())
                    .ignoreErrorsOnFieldsMismatch(false)
                    .inferenceType("string")
                    .nullValues("string")
                    .quoteChar("string")
                    .skipRowsAfterHeader(0.0)
                    .skipRowsBeforeHeader(0.0)
                    .stringsCanBeNull(false)
                    .trueValues("string")
                    .build())
                .excelFormat(SourceS3ConfigurationStreamFormatExcelFormatArgs.builder()
                    .build())
                .jsonlFormat(SourceS3ConfigurationStreamFormatJsonlFormatArgs.builder()
                    .build())
                .parquetFormat(SourceS3ConfigurationStreamFormatParquetFormatArgs.builder()
                    .decimalAsFloat(false)
                    .build())
                .unstructuredDocumentFormat(SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatArgs.builder()
                    .processing(SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatProcessingArgs.builder()
                        .local(SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatProcessingLocalArgs.builder()
                            .build())
                        .build())
                    .skipUnprocessableFiles(false)
                    .strategy("string")
                    .build())
                .build())
            .name("string")
            .daysToSyncIfHistoryIsFull(0.0)
            .globs("string")
            .inputSchema("string")
            .legacyPrefix("string")
            .primaryKey("string")
            .recentNFilesToReadForSchemaDiscovery(0.0)
            .schemaless(false)
            .validationPolicy("string")
            .build())
        .format(SourceS3ConfigurationFormatArgs.builder()
            .avro(SourceS3ConfigurationFormatAvroArgs.builder()
                .build())
            .csv(SourceS3ConfigurationFormatCsvArgs.builder()
                .additionalReaderOptions("string")
                .advancedOptions("string")
                .blockSize(0.0)
                .delimiter("string")
                .doubleQuote(false)
                .encoding("string")
                .escapeChar("string")
                .inferDatatypes(false)
                .newlinesInValues(false)
                .quoteChar("string")
                .build())
            .jsonl(SourceS3ConfigurationFormatJsonlArgs.builder()
                .blockSize(0.0)
                .newlinesInValues(false)
                .unexpectedFieldBehavior("string")
                .build())
            .parquet(SourceS3ConfigurationFormatParquetArgs.builder()
                .batchSize(0.0)
                .bufferSize(0.0)
                .columns("string")
                .build())
            .build())
        .dataset("string")
        .deliveryMethod(SourceS3ConfigurationDeliveryMethodArgs.builder()
            .copyRawFiles(SourceS3ConfigurationDeliveryMethodCopyRawFilesArgs.builder()
                .preserveDirectoryStructure(false)
                .build())
            .replicateRecords(SourceS3ConfigurationDeliveryMethodReplicateRecordsArgs.builder()
                .build())
            .build())
        .endpoint("string")
        .awsAccessKeyId("string")
        .pathPattern("string")
        .provider(SourceS3ConfigurationProviderArgs.builder()
            .awsAccessKeyId("string")
            .awsSecretAccessKey("string")
            .bucket("string")
            .endpoint("string")
            .pathPrefix("string")
            .regionName("string")
            .roleArn("string")
            .startDate("string")
            .build())
        .regionName("string")
        .roleArn("string")
        .schema("string")
        .startDate("string")
        .awsSecretAccessKey("string")
        .build())
    .workspaceId("string")
    .definitionId("string")
    .name("string")
    .secretId("string")
    .build());

source_s3_resource = airbyte.SourceS3("sourceS3Resource",
    configuration={
        "bucket": "string",
        "streams": [{
            "format": {
                "avro_format": {
                    "double_as_string": False,
                },
                "csv_format": {
                    "delimiter": "string",
                    "double_quote": False,
                    "encoding": "string",
                    "escape_char": "string",
                    "false_values": ["string"],
                    "header_definition": {
                        "autogenerated": {},
                        "from_csv": {},
                        "user_provided": {
                            "column_names": ["string"],
                        },
                    },
                    "ignore_errors_on_fields_mismatch": False,
                    "inference_type": "string",
                    "null_values": ["string"],
                    "quote_char": "string",
                    "skip_rows_after_header": 0,
                    "skip_rows_before_header": 0,
                    "strings_can_be_null": False,
                    "true_values": ["string"],
                },
                "excel_format": {},
                "jsonl_format": {},
                "parquet_format": {
                    "decimal_as_float": False,
                },
                "unstructured_document_format": {
                    "processing": {
                        "local": {},
                    },
                    "skip_unprocessable_files": False,
                    "strategy": "string",
                },
            },
            "name": "string",
            "days_to_sync_if_history_is_full": 0,
            "globs": ["string"],
            "input_schema": "string",
            "legacy_prefix": "string",
            "primary_key": "string",
            "recent_n_files_to_read_for_schema_discovery": 0,
            "schemaless": False,
            "validation_policy": "string",
        }],
        "format": {
            "avro": {},
            "csv": {
                "additional_reader_options": "string",
                "advanced_options": "string",
                "block_size": 0,
                "delimiter": "string",
                "double_quote": False,
                "encoding": "string",
                "escape_char": "string",
                "infer_datatypes": False,
                "newlines_in_values": False,
                "quote_char": "string",
            },
            "jsonl": {
                "block_size": 0,
                "newlines_in_values": False,
                "unexpected_field_behavior": "string",
            },
            "parquet": {
                "batch_size": 0,
                "buffer_size": 0,
                "columns": ["string"],
            },
        },
        "dataset": "string",
        "delivery_method": {
            "copy_raw_files": {
                "preserve_directory_structure": False,
            },
            "replicate_records": {},
        },
        "endpoint": "string",
        "aws_access_key_id": "string",
        "path_pattern": "string",
        "provider": {
            "aws_access_key_id": "string",
            "aws_secret_access_key": "string",
            "bucket": "string",
            "endpoint": "string",
            "path_prefix": "string",
            "region_name": "string",
            "role_arn": "string",
            "start_date": "string",
        },
        "region_name": "string",
        "role_arn": "string",
        "schema": "string",
        "start_date": "string",
        "aws_secret_access_key": "string",
    },
    workspace_id="string",
    definition_id="string",
    name="string",
    secret_id="string")

const sourceS3Resource = new airbyte.SourceS3("sourceS3Resource", {
    configuration: {
        bucket: "string",
        streams: [{
            format: {
                avroFormat: {
                    doubleAsString: false,
                },
                csvFormat: {
                    delimiter: "string",
                    doubleQuote: false,
                    encoding: "string",
                    escapeChar: "string",
                    falseValues: ["string"],
                    headerDefinition: {
                        autogenerated: {},
                        fromCsv: {},
                        userProvided: {
                            columnNames: ["string"],
                        },
                    },
                    ignoreErrorsOnFieldsMismatch: false,
                    inferenceType: "string",
                    nullValues: ["string"],
                    quoteChar: "string",
                    skipRowsAfterHeader: 0,
                    skipRowsBeforeHeader: 0,
                    stringsCanBeNull: false,
                    trueValues: ["string"],
                },
                excelFormat: {},
                jsonlFormat: {},
                parquetFormat: {
                    decimalAsFloat: false,
                },
                unstructuredDocumentFormat: {
                    processing: {
                        local: {},
                    },
                    skipUnprocessableFiles: false,
                    strategy: "string",
                },
            },
            name: "string",
            daysToSyncIfHistoryIsFull: 0,
            globs: ["string"],
            inputSchema: "string",
            legacyPrefix: "string",
            primaryKey: "string",
            recentNFilesToReadForSchemaDiscovery: 0,
            schemaless: false,
            validationPolicy: "string",
        }],
        format: {
            avro: {},
            csv: {
                additionalReaderOptions: "string",
                advancedOptions: "string",
                blockSize: 0,
                delimiter: "string",
                doubleQuote: false,
                encoding: "string",
                escapeChar: "string",
                inferDatatypes: false,
                newlinesInValues: false,
                quoteChar: "string",
            },
            jsonl: {
                blockSize: 0,
                newlinesInValues: false,
                unexpectedFieldBehavior: "string",
            },
            parquet: {
                batchSize: 0,
                bufferSize: 0,
                columns: ["string"],
            },
        },
        dataset: "string",
        deliveryMethod: {
            copyRawFiles: {
                preserveDirectoryStructure: false,
            },
            replicateRecords: {},
        },
        endpoint: "string",
        awsAccessKeyId: "string",
        pathPattern: "string",
        provider: {
            awsAccessKeyId: "string",
            awsSecretAccessKey: "string",
            bucket: "string",
            endpoint: "string",
            pathPrefix: "string",
            regionName: "string",
            roleArn: "string",
            startDate: "string",
        },
        regionName: "string",
        roleArn: "string",
        schema: "string",
        startDate: "string",
        awsSecretAccessKey: "string",
    },
    workspaceId: "string",
    definitionId: "string",
    name: "string",
    secretId: "string",
});

type: airbyte:SourceS3
properties:
    configuration:
        awsAccessKeyId: string
        awsSecretAccessKey: string
        bucket: string
        dataset: string
        deliveryMethod:
            copyRawFiles:
                preserveDirectoryStructure: false
            replicateRecords: {}
        endpoint: string
        format:
            avro: {}
            csv:
                additionalReaderOptions: string
                advancedOptions: string
                blockSize: 0
                delimiter: string
                doubleQuote: false
                encoding: string
                escapeChar: string
                inferDatatypes: false
                newlinesInValues: false
                quoteChar: string
            jsonl:
                blockSize: 0
                newlinesInValues: false
                unexpectedFieldBehavior: string
            parquet:
                batchSize: 0
                bufferSize: 0
                columns:
                    - string
        pathPattern: string
        provider:
            awsAccessKeyId: string
            awsSecretAccessKey: string
            bucket: string
            endpoint: string
            pathPrefix: string
            regionName: string
            roleArn: string
            startDate: string
        regionName: string
        roleArn: string
        schema: string
        startDate: string
        streams:
            - daysToSyncIfHistoryIsFull: 0
              format:
                avroFormat:
                    doubleAsString: false
                csvFormat:
                    delimiter: string
                    doubleQuote: false
                    encoding: string
                    escapeChar: string
                    falseValues:
                        - string
                    headerDefinition:
                        autogenerated: {}
                        fromCsv: {}
                        userProvided:
                            columnNames:
                                - string
                    ignoreErrorsOnFieldsMismatch: false
                    inferenceType: string
                    nullValues:
                        - string
                    quoteChar: string
                    skipRowsAfterHeader: 0
                    skipRowsBeforeHeader: 0
                    stringsCanBeNull: false
                    trueValues:
                        - string
                excelFormat: {}
                jsonlFormat: {}
                parquetFormat:
                    decimalAsFloat: false
                unstructuredDocumentFormat:
                    processing:
                        local: {}
                    skipUnprocessableFiles: false
                    strategy: string
              globs:
                - string
              inputSchema: string
              legacyPrefix: string
              name: string
              primaryKey: string
              recentNFilesToReadForSchemaDiscovery: 0
              schemaless: false
              validationPolicy: string
    definitionId: string
    name: string
    secretId: string
    workspaceId: string

SourceS3 Resource Properties

To learn more about resource properties and how to use them, see Inputs and Outputs in the Architecture and Concepts docs.

Inputs

In Python, inputs that are objects can be passed either as argument classes or as dictionary literals.

The SourceS3 resource accepts the following input properties:

Configuration SourceS3Configuration: NOTE: When this Spec is changed, legacyconfigtransformer.py must also be modified to uptake the changes because it is responsible for converting legacy S3 v3 configs into v4 configs using the File-Based CDK.
WorkspaceId string
DefinitionId string: The UUID of the connector definition. One of configuration.sourceType or definitionId must be provided. Default: "69589781-7828-43c5-9f63-8925b1c1ccc2"; Requires replacement if changed.
Name string: Name of the source e.g. dev-mysql-instance.
SecretId string: Optional secretID obtained through the public API OAuth redirect flow. Requires replacement if changed.

Configuration SourceS3ConfigurationArgs: NOTE: When this Spec is changed, legacyconfigtransformer.py must also be modified to uptake the changes because it is responsible for converting legacy S3 v3 configs into v4 configs using the File-Based CDK.
WorkspaceId string
DefinitionId string: The UUID of the connector definition. One of configuration.sourceType or definitionId must be provided. Default: "69589781-7828-43c5-9f63-8925b1c1ccc2"; Requires replacement if changed.
Name string: Name of the source e.g. dev-mysql-instance.
SecretId string: Optional secretID obtained through the public API OAuth redirect flow. Requires replacement if changed.

configuration SourceS3Configuration: NOTE: When this Spec is changed, legacyconfigtransformer.py must also be modified to uptake the changes because it is responsible for converting legacy S3 v3 configs into v4 configs using the File-Based CDK.
workspaceId String
definitionId String: The UUID of the connector definition. One of configuration.sourceType or definitionId must be provided. Default: "69589781-7828-43c5-9f63-8925b1c1ccc2"; Requires replacement if changed.
name String: Name of the source e.g. dev-mysql-instance.
secretId String: Optional secretID obtained through the public API OAuth redirect flow. Requires replacement if changed.

configuration SourceS3Configuration: NOTE: When this Spec is changed, legacyconfigtransformer.py must also be modified to uptake the changes because it is responsible for converting legacy S3 v3 configs into v4 configs using the File-Based CDK.
workspaceId string
definitionId string: The UUID of the connector definition. One of configuration.sourceType or definitionId must be provided. Default: "69589781-7828-43c5-9f63-8925b1c1ccc2"; Requires replacement if changed.
name string: Name of the source e.g. dev-mysql-instance.
secretId string: Optional secretID obtained through the public API OAuth redirect flow. Requires replacement if changed.

configuration SourceS3ConfigurationArgs: NOTE: When this Spec is changed, legacyconfigtransformer.py must also be modified to uptake the changes because it is responsible for converting legacy S3 v3 configs into v4 configs using the File-Based CDK.
workspace_id str
definition_id str: The UUID of the connector definition. One of configuration.sourceType or definitionId must be provided. Default: "69589781-7828-43c5-9f63-8925b1c1ccc2"; Requires replacement if changed.
name str: Name of the source e.g. dev-mysql-instance.
secret_id str: Optional secretID obtained through the public API OAuth redirect flow. Requires replacement if changed.

configuration Property Map: NOTE: When this Spec is changed, legacyconfigtransformer.py must also be modified to uptake the changes because it is responsible for converting legacy S3 v3 configs into v4 configs using the File-Based CDK.
workspaceId String
definitionId String: The UUID of the connector definition. One of configuration.sourceType or definitionId must be provided. Default: "69589781-7828-43c5-9f63-8925b1c1ccc2"; Requires replacement if changed.
name String: Name of the source e.g. dev-mysql-instance.
secretId String: Optional secretID obtained through the public API OAuth redirect flow. Requires replacement if changed.

Outputs

All input properties are implicitly available as output properties. Additionally, the SourceS3 resource produces the following output properties:

CreatedAt double
Id string: The provider-assigned unique ID for this managed resource.
ResourceAllocation SourceS3ResourceAllocation: actor or actor definition specific resource requirements. if default is set, these are the requirements that should be set for ALL jobs run for this actor definition. it is overriden by the job type specific configurations. if not set, the platform will use defaults. these values will be overriden by configuration at the connection level.
SourceId string
SourceType string

CreatedAt float64
Id string: The provider-assigned unique ID for this managed resource.
ResourceAllocation SourceS3ResourceAllocation: actor or actor definition specific resource requirements. if default is set, these are the requirements that should be set for ALL jobs run for this actor definition. it is overriden by the job type specific configurations. if not set, the platform will use defaults. these values will be overriden by configuration at the connection level.
SourceId string
SourceType string

createdAt Double
id String: The provider-assigned unique ID for this managed resource.
resourceAllocation SourceS3ResourceAllocation: actor or actor definition specific resource requirements. if default is set, these are the requirements that should be set for ALL jobs run for this actor definition. it is overriden by the job type specific configurations. if not set, the platform will use defaults. these values will be overriden by configuration at the connection level.
sourceId String
sourceType String

createdAt number
id string: The provider-assigned unique ID for this managed resource.
resourceAllocation SourceS3ResourceAllocation: actor or actor definition specific resource requirements. if default is set, these are the requirements that should be set for ALL jobs run for this actor definition. it is overriden by the job type specific configurations. if not set, the platform will use defaults. these values will be overriden by configuration at the connection level.
sourceId string
sourceType string

created_at float
id str: The provider-assigned unique ID for this managed resource.
resource_allocation SourceS3ResourceAllocation: actor or actor definition specific resource requirements. if default is set, these are the requirements that should be set for ALL jobs run for this actor definition. it is overriden by the job type specific configurations. if not set, the platform will use defaults. these values will be overriden by configuration at the connection level.
source_id str
source_type str

createdAt Number
id String: The provider-assigned unique ID for this managed resource.
resourceAllocation Property Map: actor or actor definition specific resource requirements. if default is set, these are the requirements that should be set for ALL jobs run for this actor definition. it is overriden by the job type specific configurations. if not set, the platform will use defaults. these values will be overriden by configuration at the connection level.
sourceId String
sourceType String

Look up Existing SourceS3 Resource

Get an existing SourceS3 resource’s state with the given name, ID, and optional extra properties used to qualify the lookup.

public static get(name: string, id: Input<ID>, state?: SourceS3State, opts?: CustomResourceOptions): SourceS3

@staticmethod
def get(resource_name: str,
        id: str,
        opts: Optional[ResourceOptions] = None,
        configuration: Optional[SourceS3ConfigurationArgs] = None,
        created_at: Optional[float] = None,
        definition_id: Optional[str] = None,
        name: Optional[str] = None,
        resource_allocation: Optional[SourceS3ResourceAllocationArgs] = None,
        secret_id: Optional[str] = None,
        source_id: Optional[str] = None,
        source_type: Optional[str] = None,
        workspace_id: Optional[str] = None) -> SourceS3

func GetSourceS3(ctx *Context, name string, id IDInput, state *SourceS3State, opts ...ResourceOption) (*SourceS3, error)

public static SourceS3 Get(string name, Input<string> id, SourceS3State? state, CustomResourceOptions? opts = null)

public static SourceS3 get(String name, Output<String> id, SourceS3State state, CustomResourceOptions options)

resources:  _:    type: airbyte:SourceS3    get:      id: ${id}

name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.
state: Any extra arguments used during the lookup.
opts: A bag of options that control this resource's behavior.

resource_name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.

name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.
state: Any extra arguments used during the lookup.
opts: A bag of options that control this resource's behavior.

name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.
state: Any extra arguments used during the lookup.
opts: A bag of options that control this resource's behavior.

name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.
state: Any extra arguments used during the lookup.
opts: A bag of options that control this resource's behavior.

The following state arguments are supported:

Configuration SourceS3Configuration: NOTE: When this Spec is changed, legacyconfigtransformer.py must also be modified to uptake the changes because it is responsible for converting legacy S3 v3 configs into v4 configs using the File-Based CDK.
CreatedAt double
DefinitionId string: The UUID of the connector definition. One of configuration.sourceType or definitionId must be provided. Default: "69589781-7828-43c5-9f63-8925b1c1ccc2"; Requires replacement if changed.
Name string: Name of the source e.g. dev-mysql-instance.
ResourceAllocation SourceS3ResourceAllocation: actor or actor definition specific resource requirements. if default is set, these are the requirements that should be set for ALL jobs run for this actor definition. it is overriden by the job type specific configurations. if not set, the platform will use defaults. these values will be overriden by configuration at the connection level.
SecretId string: Optional secretID obtained through the public API OAuth redirect flow. Requires replacement if changed.
SourceId string
SourceType string
WorkspaceId string

Configuration SourceS3ConfigurationArgs: NOTE: When this Spec is changed, legacyconfigtransformer.py must also be modified to uptake the changes because it is responsible for converting legacy S3 v3 configs into v4 configs using the File-Based CDK.
CreatedAt float64
DefinitionId string: The UUID of the connector definition. One of configuration.sourceType or definitionId must be provided. Default: "69589781-7828-43c5-9f63-8925b1c1ccc2"; Requires replacement if changed.
Name string: Name of the source e.g. dev-mysql-instance.
ResourceAllocation SourceS3ResourceAllocationArgs: actor or actor definition specific resource requirements. if default is set, these are the requirements that should be set for ALL jobs run for this actor definition. it is overriden by the job type specific configurations. if not set, the platform will use defaults. these values will be overriden by configuration at the connection level.
SecretId string: Optional secretID obtained through the public API OAuth redirect flow. Requires replacement if changed.
SourceId string
SourceType string
WorkspaceId string

configuration SourceS3Configuration: NOTE: When this Spec is changed, legacyconfigtransformer.py must also be modified to uptake the changes because it is responsible for converting legacy S3 v3 configs into v4 configs using the File-Based CDK.
createdAt Double
definitionId String: The UUID of the connector definition. One of configuration.sourceType or definitionId must be provided. Default: "69589781-7828-43c5-9f63-8925b1c1ccc2"; Requires replacement if changed.
name String: Name of the source e.g. dev-mysql-instance.
resourceAllocation SourceS3ResourceAllocation: actor or actor definition specific resource requirements. if default is set, these are the requirements that should be set for ALL jobs run for this actor definition. it is overriden by the job type specific configurations. if not set, the platform will use defaults. these values will be overriden by configuration at the connection level.
secretId String: Optional secretID obtained through the public API OAuth redirect flow. Requires replacement if changed.
sourceId String
sourceType String
workspaceId String

configuration SourceS3Configuration: NOTE: When this Spec is changed, legacyconfigtransformer.py must also be modified to uptake the changes because it is responsible for converting legacy S3 v3 configs into v4 configs using the File-Based CDK.
createdAt number
definitionId string: The UUID of the connector definition. One of configuration.sourceType or definitionId must be provided. Default: "69589781-7828-43c5-9f63-8925b1c1ccc2"; Requires replacement if changed.
name string: Name of the source e.g. dev-mysql-instance.
resourceAllocation SourceS3ResourceAllocation: actor or actor definition specific resource requirements. if default is set, these are the requirements that should be set for ALL jobs run for this actor definition. it is overriden by the job type specific configurations. if not set, the platform will use defaults. these values will be overriden by configuration at the connection level.
secretId string: Optional secretID obtained through the public API OAuth redirect flow. Requires replacement if changed.
sourceId string
sourceType string
workspaceId string

configuration SourceS3ConfigurationArgs: NOTE: When this Spec is changed, legacyconfigtransformer.py must also be modified to uptake the changes because it is responsible for converting legacy S3 v3 configs into v4 configs using the File-Based CDK.
created_at float
definition_id str: The UUID of the connector definition. One of configuration.sourceType or definitionId must be provided. Default: "69589781-7828-43c5-9f63-8925b1c1ccc2"; Requires replacement if changed.
name str: Name of the source e.g. dev-mysql-instance.
resource_allocation SourceS3ResourceAllocationArgs: actor or actor definition specific resource requirements. if default is set, these are the requirements that should be set for ALL jobs run for this actor definition. it is overriden by the job type specific configurations. if not set, the platform will use defaults. these values will be overriden by configuration at the connection level.
secret_id str: Optional secretID obtained through the public API OAuth redirect flow. Requires replacement if changed.
source_id str
source_type str
workspace_id str

configuration Property Map: NOTE: When this Spec is changed, legacyconfigtransformer.py must also be modified to uptake the changes because it is responsible for converting legacy S3 v3 configs into v4 configs using the File-Based CDK.
createdAt Number
definitionId String: The UUID of the connector definition. One of configuration.sourceType or definitionId must be provided. Default: "69589781-7828-43c5-9f63-8925b1c1ccc2"; Requires replacement if changed.
name String: Name of the source e.g. dev-mysql-instance.
resourceAllocation Property Map: actor or actor definition specific resource requirements. if default is set, these are the requirements that should be set for ALL jobs run for this actor definition. it is overriden by the job type specific configurations. if not set, the platform will use defaults. these values will be overriden by configuration at the connection level.
secretId String: Optional secretID obtained through the public API OAuth redirect flow. Requires replacement if changed.
sourceId String
sourceType String
workspaceId String

Supporting Types

SourceS3Configuration, SourceS3ConfigurationArgs

Bucket string: Name of the S3 bucket where the file(s) exist.
Streams List<SourceS3ConfigurationStream>: Each instance of this configuration defines a \n\nstream\n\n. Use this to define which files belong in the stream, their format, and how they should be parsed and validated. When sending data to warehouse destination such as Snowflake or BigQuery, each stream is a separate table.
AwsAccessKeyId string: In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
AwsSecretAccessKey string: In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
Dataset string: Deprecated and will be removed soon. Please do not use this field anymore and use streams.name instead. The name of the stream you would like this source to output. Can contain letters, numbers, or underscores.
DeliveryMethod SourceS3ConfigurationDeliveryMethod
Endpoint string: Endpoint to an S3 compatible service. Leave empty to use AWS. The custom endpoint must be secure, but the 'https' prefix is not required. Default: ""
Format SourceS3ConfigurationFormat: Deprecated and will be removed soon. Please do not use this field anymore and use streams.format instead. The format of the files you'd like to replicate
PathPattern string: Deprecated and will be removed soon. Please do not use this field anymore and use streams.globs instead. A regular expression which tells the connector which files to replicate. All files which match this pattern will be replicated. Use | to separate multiple patterns. See \n\nthis page\n\n to understand pattern syntax (GLOBSTAR and SPLIT flags are enabled). Use pattern \n\n**\n\n to pick up all files.
Provider SourceS3ConfigurationProvider: Deprecated and will be removed soon. Please do not use this field anymore and use bucket, awsaccesskeyid, awssecretaccesskey and endpoint instead. Use this to load files from S3 or S3-compatible services
RegionName string: AWS region where the S3 bucket is located. If not provided, the region will be determined automatically.
RoleArn string: Specifies the Amazon Resource Name (ARN) of an IAM role that you want to use to perform operations requested using this profile. Set the External ID to the Airbyte workspace ID, which can be found in the URL of this page.
Schema string: Deprecated and will be removed soon. Please do not use this field anymore and use streams.input*schema instead. Optionally provide a schema to enforce, as a valid JSON string. Ensure this is a mapping of \n\n{ "column" : "type" }\n\n, where types are valid href="https://json-schema.org/understanding-json-schema/reference/type.html" target="*blank">JSON Schema datatypes\n\n. Leave as {} to auto-infer the schema.
StartDate string: UTC date and time in the format 2017-01-25T00:00:00.000000Z. Any file modified before this date will not be replicated.

Bucket string: Name of the S3 bucket where the file(s) exist.
Streams []SourceS3ConfigurationStream: Each instance of this configuration defines a \n\nstream\n\n. Use this to define which files belong in the stream, their format, and how they should be parsed and validated. When sending data to warehouse destination such as Snowflake or BigQuery, each stream is a separate table.
AwsAccessKeyId string: In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
AwsSecretAccessKey string: In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
Dataset string: Deprecated and will be removed soon. Please do not use this field anymore and use streams.name instead. The name of the stream you would like this source to output. Can contain letters, numbers, or underscores.
DeliveryMethod SourceS3ConfigurationDeliveryMethod
Endpoint string: Endpoint to an S3 compatible service. Leave empty to use AWS. The custom endpoint must be secure, but the 'https' prefix is not required. Default: ""
Format SourceS3ConfigurationFormat: Deprecated and will be removed soon. Please do not use this field anymore and use streams.format instead. The format of the files you'd like to replicate
PathPattern string: Deprecated and will be removed soon. Please do not use this field anymore and use streams.globs instead. A regular expression which tells the connector which files to replicate. All files which match this pattern will be replicated. Use | to separate multiple patterns. See \n\nthis page\n\n to understand pattern syntax (GLOBSTAR and SPLIT flags are enabled). Use pattern \n\n**\n\n to pick up all files.
Provider SourceS3ConfigurationProvider: Deprecated and will be removed soon. Please do not use this field anymore and use bucket, awsaccesskeyid, awssecretaccesskey and endpoint instead. Use this to load files from S3 or S3-compatible services
RegionName string: AWS region where the S3 bucket is located. If not provided, the region will be determined automatically.
RoleArn string: Specifies the Amazon Resource Name (ARN) of an IAM role that you want to use to perform operations requested using this profile. Set the External ID to the Airbyte workspace ID, which can be found in the URL of this page.
Schema string: Deprecated and will be removed soon. Please do not use this field anymore and use streams.input*schema instead. Optionally provide a schema to enforce, as a valid JSON string. Ensure this is a mapping of \n\n{ "column" : "type" }\n\n, where types are valid href="https://json-schema.org/understanding-json-schema/reference/type.html" target="*blank">JSON Schema datatypes\n\n. Leave as {} to auto-infer the schema.
StartDate string: UTC date and time in the format 2017-01-25T00:00:00.000000Z. Any file modified before this date will not be replicated.

bucket String: Name of the S3 bucket where the file(s) exist.
streams List<SourceS3ConfigurationStream>: Each instance of this configuration defines a \n\nstream\n\n. Use this to define which files belong in the stream, their format, and how they should be parsed and validated. When sending data to warehouse destination such as Snowflake or BigQuery, each stream is a separate table.
awsAccessKeyId String: In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
awsSecretAccessKey String: In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
dataset String: Deprecated and will be removed soon. Please do not use this field anymore and use streams.name instead. The name of the stream you would like this source to output. Can contain letters, numbers, or underscores.
deliveryMethod SourceS3ConfigurationDeliveryMethod
endpoint String: Endpoint to an S3 compatible service. Leave empty to use AWS. The custom endpoint must be secure, but the 'https' prefix is not required. Default: ""
format SourceS3ConfigurationFormat: Deprecated and will be removed soon. Please do not use this field anymore and use streams.format instead. The format of the files you'd like to replicate
pathPattern String: Deprecated and will be removed soon. Please do not use this field anymore and use streams.globs instead. A regular expression which tells the connector which files to replicate. All files which match this pattern will be replicated. Use | to separate multiple patterns. See \n\nthis page\n\n to understand pattern syntax (GLOBSTAR and SPLIT flags are enabled). Use pattern \n\n**\n\n to pick up all files.
provider SourceS3ConfigurationProvider: Deprecated and will be removed soon. Please do not use this field anymore and use bucket, awsaccesskeyid, awssecretaccesskey and endpoint instead. Use this to load files from S3 or S3-compatible services
regionName String: AWS region where the S3 bucket is located. If not provided, the region will be determined automatically.
roleArn String: Specifies the Amazon Resource Name (ARN) of an IAM role that you want to use to perform operations requested using this profile. Set the External ID to the Airbyte workspace ID, which can be found in the URL of this page.
schema String: Deprecated and will be removed soon. Please do not use this field anymore and use streams.input*schema instead. Optionally provide a schema to enforce, as a valid JSON string. Ensure this is a mapping of \n\n{ "column" : "type" }\n\n, where types are valid href="https://json-schema.org/understanding-json-schema/reference/type.html" target="*blank">JSON Schema datatypes\n\n. Leave as {} to auto-infer the schema.
startDate String: UTC date and time in the format 2017-01-25T00:00:00.000000Z. Any file modified before this date will not be replicated.

bucket string: Name of the S3 bucket where the file(s) exist.
streams SourceS3ConfigurationStream[]: Each instance of this configuration defines a \n\nstream\n\n. Use this to define which files belong in the stream, their format, and how they should be parsed and validated. When sending data to warehouse destination such as Snowflake or BigQuery, each stream is a separate table.
awsAccessKeyId string: In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
awsSecretAccessKey string: In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
dataset string: Deprecated and will be removed soon. Please do not use this field anymore and use streams.name instead. The name of the stream you would like this source to output. Can contain letters, numbers, or underscores.
deliveryMethod SourceS3ConfigurationDeliveryMethod
endpoint string: Endpoint to an S3 compatible service. Leave empty to use AWS. The custom endpoint must be secure, but the 'https' prefix is not required. Default: ""
format SourceS3ConfigurationFormat: Deprecated and will be removed soon. Please do not use this field anymore and use streams.format instead. The format of the files you'd like to replicate
pathPattern string: Deprecated and will be removed soon. Please do not use this field anymore and use streams.globs instead. A regular expression which tells the connector which files to replicate. All files which match this pattern will be replicated. Use | to separate multiple patterns. See \n\nthis page\n\n to understand pattern syntax (GLOBSTAR and SPLIT flags are enabled). Use pattern \n\n**\n\n to pick up all files.
provider SourceS3ConfigurationProvider: Deprecated and will be removed soon. Please do not use this field anymore and use bucket, awsaccesskeyid, awssecretaccesskey and endpoint instead. Use this to load files from S3 or S3-compatible services
regionName string: AWS region where the S3 bucket is located. If not provided, the region will be determined automatically.
roleArn string: Specifies the Amazon Resource Name (ARN) of an IAM role that you want to use to perform operations requested using this profile. Set the External ID to the Airbyte workspace ID, which can be found in the URL of this page.
schema string: Deprecated and will be removed soon. Please do not use this field anymore and use streams.input*schema instead. Optionally provide a schema to enforce, as a valid JSON string. Ensure this is a mapping of \n\n{ "column" : "type" }\n\n, where types are valid href="https://json-schema.org/understanding-json-schema/reference/type.html" target="*blank">JSON Schema datatypes\n\n. Leave as {} to auto-infer the schema.
startDate string: UTC date and time in the format 2017-01-25T00:00:00.000000Z. Any file modified before this date will not be replicated.

bucket str: Name of the S3 bucket where the file(s) exist.
streams Sequence[SourceS3ConfigurationStream]: Each instance of this configuration defines a \n\nstream\n\n. Use this to define which files belong in the stream, their format, and how they should be parsed and validated. When sending data to warehouse destination such as Snowflake or BigQuery, each stream is a separate table.
aws_access_key_id str: In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
aws_secret_access_key str: In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
dataset str: Deprecated and will be removed soon. Please do not use this field anymore and use streams.name instead. The name of the stream you would like this source to output. Can contain letters, numbers, or underscores.
delivery_method SourceS3ConfigurationDeliveryMethod
endpoint str: Endpoint to an S3 compatible service. Leave empty to use AWS. The custom endpoint must be secure, but the 'https' prefix is not required. Default: ""
format SourceS3ConfigurationFormat: Deprecated and will be removed soon. Please do not use this field anymore and use streams.format instead. The format of the files you'd like to replicate
path_pattern str: Deprecated and will be removed soon. Please do not use this field anymore and use streams.globs instead. A regular expression which tells the connector which files to replicate. All files which match this pattern will be replicated. Use | to separate multiple patterns. See \n\nthis page\n\n to understand pattern syntax (GLOBSTAR and SPLIT flags are enabled). Use pattern \n\n**\n\n to pick up all files.
provider SourceS3ConfigurationProvider: Deprecated and will be removed soon. Please do not use this field anymore and use bucket, awsaccesskeyid, awssecretaccesskey and endpoint instead. Use this to load files from S3 or S3-compatible services
region_name str: AWS region where the S3 bucket is located. If not provided, the region will be determined automatically.
role_arn str: Specifies the Amazon Resource Name (ARN) of an IAM role that you want to use to perform operations requested using this profile. Set the External ID to the Airbyte workspace ID, which can be found in the URL of this page.
schema str: Deprecated and will be removed soon. Please do not use this field anymore and use streams.input*schema instead. Optionally provide a schema to enforce, as a valid JSON string. Ensure this is a mapping of \n\n{ "column" : "type" }\n\n, where types are valid href="https://json-schema.org/understanding-json-schema/reference/type.html" target="*blank">JSON Schema datatypes\n\n. Leave as {} to auto-infer the schema.
start_date str: UTC date and time in the format 2017-01-25T00:00:00.000000Z. Any file modified before this date will not be replicated.

bucket String: Name of the S3 bucket where the file(s) exist.
streams List<Property Map>: Each instance of this configuration defines a \n\nstream\n\n. Use this to define which files belong in the stream, their format, and how they should be parsed and validated. When sending data to warehouse destination such as Snowflake or BigQuery, each stream is a separate table.
awsAccessKeyId String: In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
awsSecretAccessKey String: In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
dataset String: Deprecated and will be removed soon. Please do not use this field anymore and use streams.name instead. The name of the stream you would like this source to output. Can contain letters, numbers, or underscores.
deliveryMethod Property Map
endpoint String: Endpoint to an S3 compatible service. Leave empty to use AWS. The custom endpoint must be secure, but the 'https' prefix is not required. Default: ""
format Property Map: Deprecated and will be removed soon. Please do not use this field anymore and use streams.format instead. The format of the files you'd like to replicate
pathPattern String: Deprecated and will be removed soon. Please do not use this field anymore and use streams.globs instead. A regular expression which tells the connector which files to replicate. All files which match this pattern will be replicated. Use | to separate multiple patterns. See \n\nthis page\n\n to understand pattern syntax (GLOBSTAR and SPLIT flags are enabled). Use pattern \n\n**\n\n to pick up all files.
provider Property Map: Deprecated and will be removed soon. Please do not use this field anymore and use bucket, awsaccesskeyid, awssecretaccesskey and endpoint instead. Use this to load files from S3 or S3-compatible services
regionName String: AWS region where the S3 bucket is located. If not provided, the region will be determined automatically.
roleArn String: Specifies the Amazon Resource Name (ARN) of an IAM role that you want to use to perform operations requested using this profile. Set the External ID to the Airbyte workspace ID, which can be found in the URL of this page.
schema String: Deprecated and will be removed soon. Please do not use this field anymore and use streams.input*schema instead. Optionally provide a schema to enforce, as a valid JSON string. Ensure this is a mapping of \n\n{ "column" : "type" }\n\n, where types are valid href="https://json-schema.org/understanding-json-schema/reference/type.html" target="*blank">JSON Schema datatypes\n\n. Leave as {} to auto-infer the schema.
startDate String: UTC date and time in the format 2017-01-25T00:00:00.000000Z. Any file modified before this date will not be replicated.

SourceS3ConfigurationDeliveryMethod, SourceS3ConfigurationDeliveryMethodArgs

CopyRawFiles SourceS3ConfigurationDeliveryMethodCopyRawFiles: Copy raw files without parsing their contents. Bits are copied into the destination exactly as they appeared in the source. Recommended for use with unstructured text data, non-text and compressed files.
ReplicateRecords SourceS3ConfigurationDeliveryMethodReplicateRecords: Recommended - Extract and load structured records into your destination of choice. This is the classic method of moving data in Airbyte. It allows for blocking and hashing individual fields or files from a structured schema. Data can be flattened, typed and deduped depending on the destination.

CopyRawFiles SourceS3ConfigurationDeliveryMethodCopyRawFiles: Copy raw files without parsing their contents. Bits are copied into the destination exactly as they appeared in the source. Recommended for use with unstructured text data, non-text and compressed files.
ReplicateRecords SourceS3ConfigurationDeliveryMethodReplicateRecords: Recommended - Extract and load structured records into your destination of choice. This is the classic method of moving data in Airbyte. It allows for blocking and hashing individual fields or files from a structured schema. Data can be flattened, typed and deduped depending on the destination.

copyRawFiles SourceS3ConfigurationDeliveryMethodCopyRawFiles: Copy raw files without parsing their contents. Bits are copied into the destination exactly as they appeared in the source. Recommended for use with unstructured text data, non-text and compressed files.
replicateRecords SourceS3ConfigurationDeliveryMethodReplicateRecords: Recommended - Extract and load structured records into your destination of choice. This is the classic method of moving data in Airbyte. It allows for blocking and hashing individual fields or files from a structured schema. Data can be flattened, typed and deduped depending on the destination.

copyRawFiles SourceS3ConfigurationDeliveryMethodCopyRawFiles: Copy raw files without parsing their contents. Bits are copied into the destination exactly as they appeared in the source. Recommended for use with unstructured text data, non-text and compressed files.
replicateRecords SourceS3ConfigurationDeliveryMethodReplicateRecords: Recommended - Extract and load structured records into your destination of choice. This is the classic method of moving data in Airbyte. It allows for blocking and hashing individual fields or files from a structured schema. Data can be flattened, typed and deduped depending on the destination.

copy_raw_files SourceS3ConfigurationDeliveryMethodCopyRawFiles: Copy raw files without parsing their contents. Bits are copied into the destination exactly as they appeared in the source. Recommended for use with unstructured text data, non-text and compressed files.
replicate_records SourceS3ConfigurationDeliveryMethodReplicateRecords: Recommended - Extract and load structured records into your destination of choice. This is the classic method of moving data in Airbyte. It allows for blocking and hashing individual fields or files from a structured schema. Data can be flattened, typed and deduped depending on the destination.

copyRawFiles Property Map: Copy raw files without parsing their contents. Bits are copied into the destination exactly as they appeared in the source. Recommended for use with unstructured text data, non-text and compressed files.
replicateRecords Property Map: Recommended - Extract and load structured records into your destination of choice. This is the classic method of moving data in Airbyte. It allows for blocking and hashing individual fields or files from a structured schema. Data can be flattened, typed and deduped depending on the destination.

SourceS3ConfigurationDeliveryMethodCopyRawFiles, SourceS3ConfigurationDeliveryMethodCopyRawFilesArgs

PreserveDirectoryStructure bool: If enabled, sends subdirectory folder structure along with source file names to the destination. Otherwise, files will be synced by their names only. This option is ignored when file-based replication is not enabled. Default: true

PreserveDirectoryStructure bool: If enabled, sends subdirectory folder structure along with source file names to the destination. Otherwise, files will be synced by their names only. This option is ignored when file-based replication is not enabled. Default: true

preserveDirectoryStructure Boolean: If enabled, sends subdirectory folder structure along with source file names to the destination. Otherwise, files will be synced by their names only. This option is ignored when file-based replication is not enabled. Default: true

preserveDirectoryStructure boolean: If enabled, sends subdirectory folder structure along with source file names to the destination. Otherwise, files will be synced by their names only. This option is ignored when file-based replication is not enabled. Default: true

preserve_directory_structure bool: If enabled, sends subdirectory folder structure along with source file names to the destination. Otherwise, files will be synced by their names only. This option is ignored when file-based replication is not enabled. Default: true

preserveDirectoryStructure Boolean: If enabled, sends subdirectory folder structure along with source file names to the destination. Otherwise, files will be synced by their names only. This option is ignored when file-based replication is not enabled. Default: true

SourceS3ConfigurationFormat, SourceS3ConfigurationFormatArgs

Avro SourceS3ConfigurationFormatAvro: This connector utilises \n\nfastavro\n\n for Avro parsing.
Csv SourceS3ConfigurationFormatCsv: This connector utilises \n\nPyArrow (Apache Arrow)\n\n for CSV parsing.
Jsonl SourceS3ConfigurationFormatJsonl: This connector uses \n\nPyArrow\n\n for JSON Lines (jsonl) file parsing.
Parquet SourceS3ConfigurationFormatParquet: This connector utilises \n\nPyArrow (Apache Arrow)\n\n for Parquet parsing.

Avro SourceS3ConfigurationFormatAvro: This connector utilises \n\nfastavro\n\n for Avro parsing.
Csv SourceS3ConfigurationFormatCsv: This connector utilises \n\nPyArrow (Apache Arrow)\n\n for CSV parsing.
Jsonl SourceS3ConfigurationFormatJsonl: This connector uses \n\nPyArrow\n\n for JSON Lines (jsonl) file parsing.
Parquet SourceS3ConfigurationFormatParquet: This connector utilises \n\nPyArrow (Apache Arrow)\n\n for Parquet parsing.

avro SourceS3ConfigurationFormatAvro: This connector utilises \n\nfastavro\n\n for Avro parsing.
csv SourceS3ConfigurationFormatCsv: This connector utilises \n\nPyArrow (Apache Arrow)\n\n for CSV parsing.
jsonl SourceS3ConfigurationFormatJsonl: This connector uses \n\nPyArrow\n\n for JSON Lines (jsonl) file parsing.
parquet SourceS3ConfigurationFormatParquet: This connector utilises \n\nPyArrow (Apache Arrow)\n\n for Parquet parsing.

avro SourceS3ConfigurationFormatAvro: This connector utilises \n\nfastavro\n\n for Avro parsing.
csv SourceS3ConfigurationFormatCsv: This connector utilises \n\nPyArrow (Apache Arrow)\n\n for CSV parsing.
jsonl SourceS3ConfigurationFormatJsonl: This connector uses \n\nPyArrow\n\n for JSON Lines (jsonl) file parsing.
parquet SourceS3ConfigurationFormatParquet: This connector utilises \n\nPyArrow (Apache Arrow)\n\n for Parquet parsing.

avro SourceS3ConfigurationFormatAvro: This connector utilises \n\nfastavro\n\n for Avro parsing.
csv SourceS3ConfigurationFormatCsv: This connector utilises \n\nPyArrow (Apache Arrow)\n\n for CSV parsing.
jsonl SourceS3ConfigurationFormatJsonl: This connector uses \n\nPyArrow\n\n for JSON Lines (jsonl) file parsing.
parquet SourceS3ConfigurationFormatParquet: This connector utilises \n\nPyArrow (Apache Arrow)\n\n for Parquet parsing.

avro Property Map: This connector utilises \n\nfastavro\n\n for Avro parsing.
csv Property Map: This connector utilises \n\nPyArrow (Apache Arrow)\n\n for CSV parsing.
jsonl Property Map: This connector uses \n\nPyArrow\n\n for JSON Lines (jsonl) file parsing.
parquet Property Map: This connector utilises \n\nPyArrow (Apache Arrow)\n\n for Parquet parsing.

SourceS3ConfigurationFormatCsv, SourceS3ConfigurationFormatCsvArgs

AdditionalReaderOptions string: Optionally add a valid JSON string here to provide additional options to the csv reader. Mappings must correspond to options \n\ndetailed here\n\n. 'column_types' is used internally to handle schema so overriding that would likely cause problems.
AdvancedOptions string: Optionally add a valid JSON string here to provide additional \n\nPyarrow ReadOptions\n\n. Specify 'columnnames' here if your CSV doesn't have header, or if you want to use custom column names. 'blocksize' and 'encoding' are already used above, specify them again here will override the values above.
BlockSize double: The chunk size in bytes to process at a time in memory from each file. If your data is particularly wide and failing during schema detection, increasing this should solve it. Beware of raising this too high as you could hit OOM errors. Default: 10000
Delimiter string: The character delimiting individual cells in the CSV data. This may only be a 1-character string. For tab-delimited data enter '\t'. Default: ","
DoubleQuote bool: Whether two quotes in a quoted CSV value denote a single quote in the data. Default: true
Encoding string: The character encoding of the CSV data. Leave blank to default to \n\nUTF8\n\n. See \n\nlist of python encodings\n\n for allowable options. Default: "utf8"
EscapeChar string: The character used for escaping special characters. To disallow escaping, leave this field blank.
InferDatatypes bool: Configures whether a schema for the source should be inferred from the current data or not. If set to false and a custom schema is set, then the manually enforced schema is used. If a schema is not manually set, and this is set to false, then all fields will be read as strings. Default: true
NewlinesInValues bool: Whether newline characters are allowed in CSV values. Turning this on may affect performance. Leave blank to default to False. Default: false
QuoteChar string: The character used for quoting CSV values. To disallow quoting, make this field blank. Default: """

AdditionalReaderOptions string: Optionally add a valid JSON string here to provide additional options to the csv reader. Mappings must correspond to options \n\ndetailed here\n\n. 'column_types' is used internally to handle schema so overriding that would likely cause problems.
AdvancedOptions string: Optionally add a valid JSON string here to provide additional \n\nPyarrow ReadOptions\n\n. Specify 'columnnames' here if your CSV doesn't have header, or if you want to use custom column names. 'blocksize' and 'encoding' are already used above, specify them again here will override the values above.
BlockSize float64: The chunk size in bytes to process at a time in memory from each file. If your data is particularly wide and failing during schema detection, increasing this should solve it. Beware of raising this too high as you could hit OOM errors. Default: 10000
Delimiter string: The character delimiting individual cells in the CSV data. This may only be a 1-character string. For tab-delimited data enter '\t'. Default: ","
DoubleQuote bool: Whether two quotes in a quoted CSV value denote a single quote in the data. Default: true
Encoding string: The character encoding of the CSV data. Leave blank to default to \n\nUTF8\n\n. See \n\nlist of python encodings\n\n for allowable options. Default: "utf8"
EscapeChar string: The character used for escaping special characters. To disallow escaping, leave this field blank.
InferDatatypes bool: Configures whether a schema for the source should be inferred from the current data or not. If set to false and a custom schema is set, then the manually enforced schema is used. If a schema is not manually set, and this is set to false, then all fields will be read as strings. Default: true
NewlinesInValues bool: Whether newline characters are allowed in CSV values. Turning this on may affect performance. Leave blank to default to False. Default: false
QuoteChar string: The character used for quoting CSV values. To disallow quoting, make this field blank. Default: """

additionalReaderOptions String: Optionally add a valid JSON string here to provide additional options to the csv reader. Mappings must correspond to options \n\ndetailed here\n\n. 'column_types' is used internally to handle schema so overriding that would likely cause problems.
advancedOptions String: Optionally add a valid JSON string here to provide additional \n\nPyarrow ReadOptions\n\n. Specify 'columnnames' here if your CSV doesn't have header, or if you want to use custom column names. 'blocksize' and 'encoding' are already used above, specify them again here will override the values above.
blockSize Double: The chunk size in bytes to process at a time in memory from each file. If your data is particularly wide and failing during schema detection, increasing this should solve it. Beware of raising this too high as you could hit OOM errors. Default: 10000
delimiter String: The character delimiting individual cells in the CSV data. This may only be a 1-character string. For tab-delimited data enter '\t'. Default: ","
doubleQuote Boolean: Whether two quotes in a quoted CSV value denote a single quote in the data. Default: true
encoding String: The character encoding of the CSV data. Leave blank to default to \n\nUTF8\n\n. See \n\nlist of python encodings\n\n for allowable options. Default: "utf8"
escapeChar String: The character used for escaping special characters. To disallow escaping, leave this field blank.
inferDatatypes Boolean: Configures whether a schema for the source should be inferred from the current data or not. If set to false and a custom schema is set, then the manually enforced schema is used. If a schema is not manually set, and this is set to false, then all fields will be read as strings. Default: true
newlinesInValues Boolean: Whether newline characters are allowed in CSV values. Turning this on may affect performance. Leave blank to default to False. Default: false
quoteChar String: The character used for quoting CSV values. To disallow quoting, make this field blank. Default: """

additionalReaderOptions string: Optionally add a valid JSON string here to provide additional options to the csv reader. Mappings must correspond to options \n\ndetailed here\n\n. 'column_types' is used internally to handle schema so overriding that would likely cause problems.
advancedOptions string: Optionally add a valid JSON string here to provide additional \n\nPyarrow ReadOptions\n\n. Specify 'columnnames' here if your CSV doesn't have header, or if you want to use custom column names. 'blocksize' and 'encoding' are already used above, specify them again here will override the values above.
blockSize number: The chunk size in bytes to process at a time in memory from each file. If your data is particularly wide and failing during schema detection, increasing this should solve it. Beware of raising this too high as you could hit OOM errors. Default: 10000
delimiter string: The character delimiting individual cells in the CSV data. This may only be a 1-character string. For tab-delimited data enter '\t'. Default: ","
doubleQuote boolean: Whether two quotes in a quoted CSV value denote a single quote in the data. Default: true
encoding string: The character encoding of the CSV data. Leave blank to default to \n\nUTF8\n\n. See \n\nlist of python encodings\n\n for allowable options. Default: "utf8"
escapeChar string: The character used for escaping special characters. To disallow escaping, leave this field blank.
inferDatatypes boolean: Configures whether a schema for the source should be inferred from the current data or not. If set to false and a custom schema is set, then the manually enforced schema is used. If a schema is not manually set, and this is set to false, then all fields will be read as strings. Default: true
newlinesInValues boolean: Whether newline characters are allowed in CSV values. Turning this on may affect performance. Leave blank to default to False. Default: false
quoteChar string: The character used for quoting CSV values. To disallow quoting, make this field blank. Default: """

additional_reader_options str: Optionally add a valid JSON string here to provide additional options to the csv reader. Mappings must correspond to options \n\ndetailed here\n\n. 'column_types' is used internally to handle schema so overriding that would likely cause problems.
advanced_options str: Optionally add a valid JSON string here to provide additional \n\nPyarrow ReadOptions\n\n. Specify 'columnnames' here if your CSV doesn't have header, or if you want to use custom column names. 'blocksize' and 'encoding' are already used above, specify them again here will override the values above.
block_size float: The chunk size in bytes to process at a time in memory from each file. If your data is particularly wide and failing during schema detection, increasing this should solve it. Beware of raising this too high as you could hit OOM errors. Default: 10000
delimiter str: The character delimiting individual cells in the CSV data. This may only be a 1-character string. For tab-delimited data enter '\t'. Default: ","
double_quote bool: Whether two quotes in a quoted CSV value denote a single quote in the data. Default: true
encoding str: The character encoding of the CSV data. Leave blank to default to \n\nUTF8\n\n. See \n\nlist of python encodings\n\n for allowable options. Default: "utf8"
escape_char str: The character used for escaping special characters. To disallow escaping, leave this field blank.
infer_datatypes bool: Configures whether a schema for the source should be inferred from the current data or not. If set to false and a custom schema is set, then the manually enforced schema is used. If a schema is not manually set, and this is set to false, then all fields will be read as strings. Default: true
newlines_in_values bool: Whether newline characters are allowed in CSV values. Turning this on may affect performance. Leave blank to default to False. Default: false
quote_char str: The character used for quoting CSV values. To disallow quoting, make this field blank. Default: """

additionalReaderOptions String: Optionally add a valid JSON string here to provide additional options to the csv reader. Mappings must correspond to options \n\ndetailed here\n\n. 'column_types' is used internally to handle schema so overriding that would likely cause problems.
advancedOptions String: Optionally add a valid JSON string here to provide additional \n\nPyarrow ReadOptions\n\n. Specify 'columnnames' here if your CSV doesn't have header, or if you want to use custom column names. 'blocksize' and 'encoding' are already used above, specify them again here will override the values above.
blockSize Number: The chunk size in bytes to process at a time in memory from each file. If your data is particularly wide and failing during schema detection, increasing this should solve it. Beware of raising this too high as you could hit OOM errors. Default: 10000
delimiter String: The character delimiting individual cells in the CSV data. This may only be a 1-character string. For tab-delimited data enter '\t'. Default: ","
doubleQuote Boolean: Whether two quotes in a quoted CSV value denote a single quote in the data. Default: true
encoding String: The character encoding of the CSV data. Leave blank to default to \n\nUTF8\n\n. See \n\nlist of python encodings\n\n for allowable options. Default: "utf8"
escapeChar String: The character used for escaping special characters. To disallow escaping, leave this field blank.
inferDatatypes Boolean: Configures whether a schema for the source should be inferred from the current data or not. If set to false and a custom schema is set, then the manually enforced schema is used. If a schema is not manually set, and this is set to false, then all fields will be read as strings. Default: true
newlinesInValues Boolean: Whether newline characters are allowed in CSV values. Turning this on may affect performance. Leave blank to default to False. Default: false
quoteChar String: The character used for quoting CSV values. To disallow quoting, make this field blank. Default: """

SourceS3ConfigurationFormatJsonl, SourceS3ConfigurationFormatJsonlArgs

BlockSize double: The chunk size in bytes to process at a time in memory from each file. If your data is particularly wide and failing during schema detection, increasing this should solve it. Beware of raising this too high as you could hit OOM errors. Default: 0
NewlinesInValues bool: Whether newline characters are allowed in JSON values. Turning this on may affect performance. Leave blank to default to False. Default: false
UnexpectedFieldBehavior string: How JSON fields outside of explicit*schema (if given) are treated. Check href="https://arrow.apache.org/docs/python/generated/pyarrow.json.ParseOptions.html" target="*blank">PyArrow documentation\n\n for details. Default: "infer"; must be one of ["ignore", "infer", "error"]

BlockSize float64: The chunk size in bytes to process at a time in memory from each file. If your data is particularly wide and failing during schema detection, increasing this should solve it. Beware of raising this too high as you could hit OOM errors. Default: 0
NewlinesInValues bool: Whether newline characters are allowed in JSON values. Turning this on may affect performance. Leave blank to default to False. Default: false
UnexpectedFieldBehavior string: How JSON fields outside of explicit*schema (if given) are treated. Check href="https://arrow.apache.org/docs/python/generated/pyarrow.json.ParseOptions.html" target="*blank">PyArrow documentation\n\n for details. Default: "infer"; must be one of ["ignore", "infer", "error"]

blockSize Double: The chunk size in bytes to process at a time in memory from each file. If your data is particularly wide and failing during schema detection, increasing this should solve it. Beware of raising this too high as you could hit OOM errors. Default: 0
newlinesInValues Boolean: Whether newline characters are allowed in JSON values. Turning this on may affect performance. Leave blank to default to False. Default: false
unexpectedFieldBehavior String: How JSON fields outside of explicit*schema (if given) are treated. Check href="https://arrow.apache.org/docs/python/generated/pyarrow.json.ParseOptions.html" target="*blank">PyArrow documentation\n\n for details. Default: "infer"; must be one of ["ignore", "infer", "error"]

blockSize number: The chunk size in bytes to process at a time in memory from each file. If your data is particularly wide and failing during schema detection, increasing this should solve it. Beware of raising this too high as you could hit OOM errors. Default: 0
newlinesInValues boolean: Whether newline characters are allowed in JSON values. Turning this on may affect performance. Leave blank to default to False. Default: false
unexpectedFieldBehavior string: How JSON fields outside of explicit*schema (if given) are treated. Check href="https://arrow.apache.org/docs/python/generated/pyarrow.json.ParseOptions.html" target="*blank">PyArrow documentation\n\n for details. Default: "infer"; must be one of ["ignore", "infer", "error"]

block_size float: The chunk size in bytes to process at a time in memory from each file. If your data is particularly wide and failing during schema detection, increasing this should solve it. Beware of raising this too high as you could hit OOM errors. Default: 0
newlines_in_values bool: Whether newline characters are allowed in JSON values. Turning this on may affect performance. Leave blank to default to False. Default: false
unexpected_field_behavior str: How JSON fields outside of explicit*schema (if given) are treated. Check href="https://arrow.apache.org/docs/python/generated/pyarrow.json.ParseOptions.html" target="*blank">PyArrow documentation\n\n for details. Default: "infer"; must be one of ["ignore", "infer", "error"]

blockSize Number: The chunk size in bytes to process at a time in memory from each file. If your data is particularly wide and failing during schema detection, increasing this should solve it. Beware of raising this too high as you could hit OOM errors. Default: 0
newlinesInValues Boolean: Whether newline characters are allowed in JSON values. Turning this on may affect performance. Leave blank to default to False. Default: false
unexpectedFieldBehavior String: How JSON fields outside of explicit*schema (if given) are treated. Check href="https://arrow.apache.org/docs/python/generated/pyarrow.json.ParseOptions.html" target="*blank">PyArrow documentation\n\n for details. Default: "infer"; must be one of ["ignore", "infer", "error"]

SourceS3ConfigurationFormatParquet, SourceS3ConfigurationFormatParquetArgs

BatchSize double: Maximum number of records per batch read from the input files. Batches may be smaller if there aren’t enough rows in the file. This option can help avoid out-of-memory errors if your data is particularly wide. Default: 65536
BufferSize double: Perform read buffering when deserializing individual column chunks. By default every group column will be loaded fully to memory. This option can help avoid out-of-memory errors if your data is particularly wide. Default: 2
Columns List<string>: If you only want to sync a subset of the columns from the file(s), add the columns you want here as a comma-delimited list. Leave it empty to sync all columns.

BatchSize float64: Maximum number of records per batch read from the input files. Batches may be smaller if there aren’t enough rows in the file. This option can help avoid out-of-memory errors if your data is particularly wide. Default: 65536
BufferSize float64: Perform read buffering when deserializing individual column chunks. By default every group column will be loaded fully to memory. This option can help avoid out-of-memory errors if your data is particularly wide. Default: 2
Columns []string: If you only want to sync a subset of the columns from the file(s), add the columns you want here as a comma-delimited list. Leave it empty to sync all columns.

batchSize Double: Maximum number of records per batch read from the input files. Batches may be smaller if there aren’t enough rows in the file. This option can help avoid out-of-memory errors if your data is particularly wide. Default: 65536
bufferSize Double: Perform read buffering when deserializing individual column chunks. By default every group column will be loaded fully to memory. This option can help avoid out-of-memory errors if your data is particularly wide. Default: 2
columns List<String>: If you only want to sync a subset of the columns from the file(s), add the columns you want here as a comma-delimited list. Leave it empty to sync all columns.

batchSize number: Maximum number of records per batch read from the input files. Batches may be smaller if there aren’t enough rows in the file. This option can help avoid out-of-memory errors if your data is particularly wide. Default: 65536
bufferSize number: Perform read buffering when deserializing individual column chunks. By default every group column will be loaded fully to memory. This option can help avoid out-of-memory errors if your data is particularly wide. Default: 2
columns string[]: If you only want to sync a subset of the columns from the file(s), add the columns you want here as a comma-delimited list. Leave it empty to sync all columns.

batch_size float: Maximum number of records per batch read from the input files. Batches may be smaller if there aren’t enough rows in the file. This option can help avoid out-of-memory errors if your data is particularly wide. Default: 65536
buffer_size float: Perform read buffering when deserializing individual column chunks. By default every group column will be loaded fully to memory. This option can help avoid out-of-memory errors if your data is particularly wide. Default: 2
columns Sequence[str]: If you only want to sync a subset of the columns from the file(s), add the columns you want here as a comma-delimited list. Leave it empty to sync all columns.

batchSize Number: Maximum number of records per batch read from the input files. Batches may be smaller if there aren’t enough rows in the file. This option can help avoid out-of-memory errors if your data is particularly wide. Default: 65536
bufferSize Number: Perform read buffering when deserializing individual column chunks. By default every group column will be loaded fully to memory. This option can help avoid out-of-memory errors if your data is particularly wide. Default: 2
columns List<String>: If you only want to sync a subset of the columns from the file(s), add the columns you want here as a comma-delimited list. Leave it empty to sync all columns.

SourceS3ConfigurationProvider, SourceS3ConfigurationProviderArgs

AwsAccessKeyId string: In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
AwsSecretAccessKey string: In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
Bucket string: Name of the S3 bucket where the file(s) exist.
Endpoint string: Endpoint to an S3 compatible service. Leave empty to use AWS. Default: ""
PathPrefix string: By providing a path-like prefix (e.g. myFolder/thisTable/) under which all the relevant files sit, we can optimize finding these in S3. This is optional but recommended if your bucket contains many folders/files which you don't need to replicate. Default: ""
RegionName string: AWS region where the S3 bucket is located. If not provided, the region will be determined automatically.
RoleArn string: Specifies the Amazon Resource Name (ARN) of an IAM role that you want to use to perform operations requested using this profile. Set the External ID to the Airbyte workspace ID, which can be found in the URL of this page.
StartDate string: UTC date and time in the format 2017-01-25T00:00:00Z. Any file modified before this date will not be replicated.

AwsAccessKeyId string: In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
AwsSecretAccessKey string: In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
Bucket string: Name of the S3 bucket where the file(s) exist.
Endpoint string: Endpoint to an S3 compatible service. Leave empty to use AWS. Default: ""
PathPrefix string: By providing a path-like prefix (e.g. myFolder/thisTable/) under which all the relevant files sit, we can optimize finding these in S3. This is optional but recommended if your bucket contains many folders/files which you don't need to replicate. Default: ""
RegionName string: AWS region where the S3 bucket is located. If not provided, the region will be determined automatically.
RoleArn string: Specifies the Amazon Resource Name (ARN) of an IAM role that you want to use to perform operations requested using this profile. Set the External ID to the Airbyte workspace ID, which can be found in the URL of this page.
StartDate string: UTC date and time in the format 2017-01-25T00:00:00Z. Any file modified before this date will not be replicated.

awsAccessKeyId String: In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
awsSecretAccessKey String: In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
bucket String: Name of the S3 bucket where the file(s) exist.
endpoint String: Endpoint to an S3 compatible service. Leave empty to use AWS. Default: ""
pathPrefix String: By providing a path-like prefix (e.g. myFolder/thisTable/) under which all the relevant files sit, we can optimize finding these in S3. This is optional but recommended if your bucket contains many folders/files which you don't need to replicate. Default: ""
regionName String: AWS region where the S3 bucket is located. If not provided, the region will be determined automatically.
roleArn String: Specifies the Amazon Resource Name (ARN) of an IAM role that you want to use to perform operations requested using this profile. Set the External ID to the Airbyte workspace ID, which can be found in the URL of this page.
startDate String: UTC date and time in the format 2017-01-25T00:00:00Z. Any file modified before this date will not be replicated.

awsAccessKeyId string: In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
awsSecretAccessKey string: In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
bucket string: Name of the S3 bucket where the file(s) exist.
endpoint string: Endpoint to an S3 compatible service. Leave empty to use AWS. Default: ""
pathPrefix string: By providing a path-like prefix (e.g. myFolder/thisTable/) under which all the relevant files sit, we can optimize finding these in S3. This is optional but recommended if your bucket contains many folders/files which you don't need to replicate. Default: ""
regionName string: AWS region where the S3 bucket is located. If not provided, the region will be determined automatically.
roleArn string: Specifies the Amazon Resource Name (ARN) of an IAM role that you want to use to perform operations requested using this profile. Set the External ID to the Airbyte workspace ID, which can be found in the URL of this page.
startDate string: UTC date and time in the format 2017-01-25T00:00:00Z. Any file modified before this date will not be replicated.

aws_access_key_id str: In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
aws_secret_access_key str: In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
bucket str: Name of the S3 bucket where the file(s) exist.
endpoint str: Endpoint to an S3 compatible service. Leave empty to use AWS. Default: ""
path_prefix str: By providing a path-like prefix (e.g. myFolder/thisTable/) under which all the relevant files sit, we can optimize finding these in S3. This is optional but recommended if your bucket contains many folders/files which you don't need to replicate. Default: ""
region_name str: AWS region where the S3 bucket is located. If not provided, the region will be determined automatically.
role_arn str: Specifies the Amazon Resource Name (ARN) of an IAM role that you want to use to perform operations requested using this profile. Set the External ID to the Airbyte workspace ID, which can be found in the URL of this page.
start_date str: UTC date and time in the format 2017-01-25T00:00:00Z. Any file modified before this date will not be replicated.

awsAccessKeyId String: In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
awsSecretAccessKey String: In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
bucket String: Name of the S3 bucket where the file(s) exist.
endpoint String: Endpoint to an S3 compatible service. Leave empty to use AWS. Default: ""
pathPrefix String: By providing a path-like prefix (e.g. myFolder/thisTable/) under which all the relevant files sit, we can optimize finding these in S3. This is optional but recommended if your bucket contains many folders/files which you don't need to replicate. Default: ""
regionName String: AWS region where the S3 bucket is located. If not provided, the region will be determined automatically.
roleArn String: Specifies the Amazon Resource Name (ARN) of an IAM role that you want to use to perform operations requested using this profile. Set the External ID to the Airbyte workspace ID, which can be found in the URL of this page.
startDate String: UTC date and time in the format 2017-01-25T00:00:00Z. Any file modified before this date will not be replicated.

SourceS3ConfigurationStream, SourceS3ConfigurationStreamArgs

Format SourceS3ConfigurationStreamFormat: The configuration options that are used to alter how to read incoming files that deviate from the standard formatting.
Name string: The name of the stream.
DaysToSyncIfHistoryIsFull double: When the state history of the file store is full, syncs will only read files that were last modified in the provided day range. Default: 3
Globs List<string>: The pattern used to specify which files should be selected from the file system. For more information on glob pattern matching look \n\nhere\n\n. Default: ["**"]
InputSchema string: The schema that will be used to validate records extracted from the file. This will override the stream schema that is auto-detected from incoming files.
LegacyPrefix string: The path prefix configured in v3 versions of the S3 connector. This option is deprecated in favor of a single glob.
PrimaryKey string: The column or columns (for a composite key) that serves as the unique identifier of a record. If empty, the primary key will default to the parser's default primary key.
RecentNFilesToReadForSchemaDiscovery double: The number of resent files which will be used to discover the schema for this stream.
Schemaless bool: When enabled, syncs will not validate or structure records against the stream's schema. Default: false
ValidationPolicy string: The name of the validation policy that dictates sync behavior when a record does not adhere to the stream schema. Default: "Emit Record"; must be one of ["Emit Record", "Skip Record", "Wait for Discover"]

Format SourceS3ConfigurationStreamFormat: The configuration options that are used to alter how to read incoming files that deviate from the standard formatting.
Name string: The name of the stream.
DaysToSyncIfHistoryIsFull float64: When the state history of the file store is full, syncs will only read files that were last modified in the provided day range. Default: 3
Globs []string: The pattern used to specify which files should be selected from the file system. For more information on glob pattern matching look \n\nhere\n\n. Default: ["**"]
InputSchema string: The schema that will be used to validate records extracted from the file. This will override the stream schema that is auto-detected from incoming files.
LegacyPrefix string: The path prefix configured in v3 versions of the S3 connector. This option is deprecated in favor of a single glob.
PrimaryKey string: The column or columns (for a composite key) that serves as the unique identifier of a record. If empty, the primary key will default to the parser's default primary key.
RecentNFilesToReadForSchemaDiscovery float64: The number of resent files which will be used to discover the schema for this stream.
Schemaless bool: When enabled, syncs will not validate or structure records against the stream's schema. Default: false
ValidationPolicy string: The name of the validation policy that dictates sync behavior when a record does not adhere to the stream schema. Default: "Emit Record"; must be one of ["Emit Record", "Skip Record", "Wait for Discover"]

format SourceS3ConfigurationStreamFormat: The configuration options that are used to alter how to read incoming files that deviate from the standard formatting.
name String: The name of the stream.
daysToSyncIfHistoryIsFull Double: When the state history of the file store is full, syncs will only read files that were last modified in the provided day range. Default: 3
globs List<String>: The pattern used to specify which files should be selected from the file system. For more information on glob pattern matching look \n\nhere\n\n. Default: ["**"]
inputSchema String: The schema that will be used to validate records extracted from the file. This will override the stream schema that is auto-detected from incoming files.
legacyPrefix String: The path prefix configured in v3 versions of the S3 connector. This option is deprecated in favor of a single glob.
primaryKey String: The column or columns (for a composite key) that serves as the unique identifier of a record. If empty, the primary key will default to the parser's default primary key.
recentNFilesToReadForSchemaDiscovery Double: The number of resent files which will be used to discover the schema for this stream.
schemaless Boolean: When enabled, syncs will not validate or structure records against the stream's schema. Default: false
validationPolicy String: The name of the validation policy that dictates sync behavior when a record does not adhere to the stream schema. Default: "Emit Record"; must be one of ["Emit Record", "Skip Record", "Wait for Discover"]

format SourceS3ConfigurationStreamFormat: The configuration options that are used to alter how to read incoming files that deviate from the standard formatting.
name string: The name of the stream.
daysToSyncIfHistoryIsFull number: When the state history of the file store is full, syncs will only read files that were last modified in the provided day range. Default: 3
globs string[]: The pattern used to specify which files should be selected from the file system. For more information on glob pattern matching look \n\nhere\n\n. Default: ["**"]
inputSchema string: The schema that will be used to validate records extracted from the file. This will override the stream schema that is auto-detected from incoming files.
legacyPrefix string: The path prefix configured in v3 versions of the S3 connector. This option is deprecated in favor of a single glob.
primaryKey string: The column or columns (for a composite key) that serves as the unique identifier of a record. If empty, the primary key will default to the parser's default primary key.
recentNFilesToReadForSchemaDiscovery number: The number of resent files which will be used to discover the schema for this stream.
schemaless boolean: When enabled, syncs will not validate or structure records against the stream's schema. Default: false
validationPolicy string: The name of the validation policy that dictates sync behavior when a record does not adhere to the stream schema. Default: "Emit Record"; must be one of ["Emit Record", "Skip Record", "Wait for Discover"]

format SourceS3ConfigurationStreamFormat: The configuration options that are used to alter how to read incoming files that deviate from the standard formatting.
name str: The name of the stream.
days_to_sync_if_history_is_full float: When the state history of the file store is full, syncs will only read files that were last modified in the provided day range. Default: 3
globs Sequence[str]: The pattern used to specify which files should be selected from the file system. For more information on glob pattern matching look \n\nhere\n\n. Default: ["**"]
input_schema str: The schema that will be used to validate records extracted from the file. This will override the stream schema that is auto-detected from incoming files.
legacy_prefix str: The path prefix configured in v3 versions of the S3 connector. This option is deprecated in favor of a single glob.
primary_key str: The column or columns (for a composite key) that serves as the unique identifier of a record. If empty, the primary key will default to the parser's default primary key.
recent_n_files_to_read_for_schema_discovery float: The number of resent files which will be used to discover the schema for this stream.
schemaless bool: When enabled, syncs will not validate or structure records against the stream's schema. Default: false
validation_policy str: The name of the validation policy that dictates sync behavior when a record does not adhere to the stream schema. Default: "Emit Record"; must be one of ["Emit Record", "Skip Record", "Wait for Discover"]

format Property Map: The configuration options that are used to alter how to read incoming files that deviate from the standard formatting.
name String: The name of the stream.
daysToSyncIfHistoryIsFull Number: When the state history of the file store is full, syncs will only read files that were last modified in the provided day range. Default: 3
globs List<String>: The pattern used to specify which files should be selected from the file system. For more information on glob pattern matching look \n\nhere\n\n. Default: ["**"]
inputSchema String: The schema that will be used to validate records extracted from the file. This will override the stream schema that is auto-detected from incoming files.
legacyPrefix String: The path prefix configured in v3 versions of the S3 connector. This option is deprecated in favor of a single glob.
primaryKey String: The column or columns (for a composite key) that serves as the unique identifier of a record. If empty, the primary key will default to the parser's default primary key.
recentNFilesToReadForSchemaDiscovery Number: The number of resent files which will be used to discover the schema for this stream.
schemaless Boolean: When enabled, syncs will not validate or structure records against the stream's schema. Default: false
validationPolicy String: The name of the validation policy that dictates sync behavior when a record does not adhere to the stream schema. Default: "Emit Record"; must be one of ["Emit Record", "Skip Record", "Wait for Discover"]

SourceS3ConfigurationStreamFormat, SourceS3ConfigurationStreamFormatArgs

AvroFormat SourceS3ConfigurationStreamFormatAvroFormat
CsvFormat SourceS3ConfigurationStreamFormatCsvFormat
ExcelFormat SourceS3ConfigurationStreamFormatExcelFormat
JsonlFormat SourceS3ConfigurationStreamFormatJsonlFormat
ParquetFormat SourceS3ConfigurationStreamFormatParquetFormat
UnstructuredDocumentFormat SourceS3ConfigurationStreamFormatUnstructuredDocumentFormat: Extract text from document formats (.pdf, .docx, .md, .pptx) and emit as one record per file.

AvroFormat SourceS3ConfigurationStreamFormatAvroFormat
CsvFormat SourceS3ConfigurationStreamFormatCsvFormat
ExcelFormat SourceS3ConfigurationStreamFormatExcelFormat
JsonlFormat SourceS3ConfigurationStreamFormatJsonlFormat
ParquetFormat SourceS3ConfigurationStreamFormatParquetFormat
UnstructuredDocumentFormat SourceS3ConfigurationStreamFormatUnstructuredDocumentFormat: Extract text from document formats (.pdf, .docx, .md, .pptx) and emit as one record per file.

avroFormat SourceS3ConfigurationStreamFormatAvroFormat
csvFormat SourceS3ConfigurationStreamFormatCsvFormat
excelFormat SourceS3ConfigurationStreamFormatExcelFormat
jsonlFormat SourceS3ConfigurationStreamFormatJsonlFormat
parquetFormat SourceS3ConfigurationStreamFormatParquetFormat
unstructuredDocumentFormat SourceS3ConfigurationStreamFormatUnstructuredDocumentFormat: Extract text from document formats (.pdf, .docx, .md, .pptx) and emit as one record per file.

avroFormat SourceS3ConfigurationStreamFormatAvroFormat
csvFormat SourceS3ConfigurationStreamFormatCsvFormat
excelFormat SourceS3ConfigurationStreamFormatExcelFormat
jsonlFormat SourceS3ConfigurationStreamFormatJsonlFormat
parquetFormat SourceS3ConfigurationStreamFormatParquetFormat
unstructuredDocumentFormat SourceS3ConfigurationStreamFormatUnstructuredDocumentFormat: Extract text from document formats (.pdf, .docx, .md, .pptx) and emit as one record per file.

avro_format SourceS3ConfigurationStreamFormatAvroFormat
csv_format SourceS3ConfigurationStreamFormatCsvFormat
excel_format SourceS3ConfigurationStreamFormatExcelFormat
jsonl_format SourceS3ConfigurationStreamFormatJsonlFormat
parquet_format SourceS3ConfigurationStreamFormatParquetFormat
unstructured_document_format SourceS3ConfigurationStreamFormatUnstructuredDocumentFormat: Extract text from document formats (.pdf, .docx, .md, .pptx) and emit as one record per file.

avroFormat Property Map
csvFormat Property Map
excelFormat Property Map
jsonlFormat Property Map
parquetFormat Property Map
unstructuredDocumentFormat Property Map: Extract text from document formats (.pdf, .docx, .md, .pptx) and emit as one record per file.

SourceS3ConfigurationStreamFormatAvroFormat, SourceS3ConfigurationStreamFormatAvroFormatArgs

DoubleAsString bool: Whether to convert double fields to strings. This is recommended if you have decimal numbers with a high degree of precision because there can be a loss precision when handling floating point numbers. Default: false

DoubleAsString bool: Whether to convert double fields to strings. This is recommended if you have decimal numbers with a high degree of precision because there can be a loss precision when handling floating point numbers. Default: false

doubleAsString Boolean: Whether to convert double fields to strings. This is recommended if you have decimal numbers with a high degree of precision because there can be a loss precision when handling floating point numbers. Default: false

doubleAsString boolean: Whether to convert double fields to strings. This is recommended if you have decimal numbers with a high degree of precision because there can be a loss precision when handling floating point numbers. Default: false

double_as_string bool: Whether to convert double fields to strings. This is recommended if you have decimal numbers with a high degree of precision because there can be a loss precision when handling floating point numbers. Default: false

doubleAsString Boolean: Whether to convert double fields to strings. This is recommended if you have decimal numbers with a high degree of precision because there can be a loss precision when handling floating point numbers. Default: false

SourceS3ConfigurationStreamFormatCsvFormat, SourceS3ConfigurationStreamFormatCsvFormatArgs

Delimiter string: The character delimiting individual cells in the CSV data. This may only be a 1-character string. For tab-delimited data enter '\t'. Default: ","
DoubleQuote bool: Whether two quotes in a quoted CSV value denote a single quote in the data. Default: true
Encoding string: The character encoding of the CSV data. Leave blank to default to \n\nUTF8\n\n. See \n\nlist of python encodings\n\n for allowable options. Default: "utf8"
EscapeChar string: The character used for escaping special characters. To disallow escaping, leave this field blank.
FalseValues List<string>: A set of case-sensitive strings that should be interpreted as false values. Default: ["n","no","f","false","off","0"]
HeaderDefinition SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinition: How headers will be defined. User Provided assumes the CSV does not have a header row and uses the headers provided and Autogenerated assumes the CSV does not have a header row and the CDK will generate headers using for f{i} where i is the index starting from 0. Else, the default behavior is to use the header from the CSV file. If a user wants to autogenerate or provide column names for a CSV having headers, they can skip rows.
IgnoreErrorsOnFieldsMismatch bool: Whether to ignore errors that occur when the number of fields in the CSV does not match the number of columns in the schema. Default: false
InferenceType string: How to infer the types of the columns. If none, inference default to strings. must be one of ["None", "Primitive Types Only"]
NullValues List<string>: A set of case-sensitive strings that should be interpreted as null values. For example, if the value 'NA' should be interpreted as null, enter 'NA' in this field. Default: []
QuoteChar string: The character used for quoting CSV values. To disallow quoting, make this field blank. Default: """
SkipRowsAfterHeader double: The number of rows to skip after the header row. Default: 0
SkipRowsBeforeHeader double: The number of rows to skip before the header row. For example, if the header row is on the 3rd row, enter 2 in this field. Default: 0
StringsCanBeNull bool: Whether strings can be interpreted as null values. If true, strings that match the nullvalues set will be interpreted as null. If false, strings that match the nullvalues set will be interpreted as the string itself. Default: true
TrueValues List<string>: A set of case-sensitive strings that should be interpreted as true values. Default: ["y","yes","t","true","on","1"]

Delimiter string: The character delimiting individual cells in the CSV data. This may only be a 1-character string. For tab-delimited data enter '\t'. Default: ","
DoubleQuote bool: Whether two quotes in a quoted CSV value denote a single quote in the data. Default: true
Encoding string: The character encoding of the CSV data. Leave blank to default to \n\nUTF8\n\n. See \n\nlist of python encodings\n\n for allowable options. Default: "utf8"
EscapeChar string: The character used for escaping special characters. To disallow escaping, leave this field blank.
FalseValues []string: A set of case-sensitive strings that should be interpreted as false values. Default: ["n","no","f","false","off","0"]
HeaderDefinition SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinition: How headers will be defined. User Provided assumes the CSV does not have a header row and uses the headers provided and Autogenerated assumes the CSV does not have a header row and the CDK will generate headers using for f{i} where i is the index starting from 0. Else, the default behavior is to use the header from the CSV file. If a user wants to autogenerate or provide column names for a CSV having headers, they can skip rows.
IgnoreErrorsOnFieldsMismatch bool: Whether to ignore errors that occur when the number of fields in the CSV does not match the number of columns in the schema. Default: false
InferenceType string: How to infer the types of the columns. If none, inference default to strings. must be one of ["None", "Primitive Types Only"]
NullValues []string: A set of case-sensitive strings that should be interpreted as null values. For example, if the value 'NA' should be interpreted as null, enter 'NA' in this field. Default: []
QuoteChar string: The character used for quoting CSV values. To disallow quoting, make this field blank. Default: """
SkipRowsAfterHeader float64: The number of rows to skip after the header row. Default: 0
SkipRowsBeforeHeader float64: The number of rows to skip before the header row. For example, if the header row is on the 3rd row, enter 2 in this field. Default: 0
StringsCanBeNull bool: Whether strings can be interpreted as null values. If true, strings that match the nullvalues set will be interpreted as null. If false, strings that match the nullvalues set will be interpreted as the string itself. Default: true
TrueValues []string: A set of case-sensitive strings that should be interpreted as true values. Default: ["y","yes","t","true","on","1"]

delimiter String: The character delimiting individual cells in the CSV data. This may only be a 1-character string. For tab-delimited data enter '\t'. Default: ","
doubleQuote Boolean: Whether two quotes in a quoted CSV value denote a single quote in the data. Default: true
encoding String: The character encoding of the CSV data. Leave blank to default to \n\nUTF8\n\n. See \n\nlist of python encodings\n\n for allowable options. Default: "utf8"
escapeChar String: The character used for escaping special characters. To disallow escaping, leave this field blank.
falseValues List<String>: A set of case-sensitive strings that should be interpreted as false values. Default: ["n","no","f","false","off","0"]
headerDefinition SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinition: How headers will be defined. User Provided assumes the CSV does not have a header row and uses the headers provided and Autogenerated assumes the CSV does not have a header row and the CDK will generate headers using for f{i} where i is the index starting from 0. Else, the default behavior is to use the header from the CSV file. If a user wants to autogenerate or provide column names for a CSV having headers, they can skip rows.
ignoreErrorsOnFieldsMismatch Boolean: Whether to ignore errors that occur when the number of fields in the CSV does not match the number of columns in the schema. Default: false
inferenceType String: How to infer the types of the columns. If none, inference default to strings. must be one of ["None", "Primitive Types Only"]
nullValues List<String>: A set of case-sensitive strings that should be interpreted as null values. For example, if the value 'NA' should be interpreted as null, enter 'NA' in this field. Default: []
quoteChar String: The character used for quoting CSV values. To disallow quoting, make this field blank. Default: """
skipRowsAfterHeader Double: The number of rows to skip after the header row. Default: 0
skipRowsBeforeHeader Double: The number of rows to skip before the header row. For example, if the header row is on the 3rd row, enter 2 in this field. Default: 0
stringsCanBeNull Boolean: Whether strings can be interpreted as null values. If true, strings that match the nullvalues set will be interpreted as null. If false, strings that match the nullvalues set will be interpreted as the string itself. Default: true
trueValues List<String>: A set of case-sensitive strings that should be interpreted as true values. Default: ["y","yes","t","true","on","1"]

delimiter string: The character delimiting individual cells in the CSV data. This may only be a 1-character string. For tab-delimited data enter '\t'. Default: ","
doubleQuote boolean: Whether two quotes in a quoted CSV value denote a single quote in the data. Default: true
encoding string: The character encoding of the CSV data. Leave blank to default to \n\nUTF8\n\n. See \n\nlist of python encodings\n\n for allowable options. Default: "utf8"
escapeChar string: The character used for escaping special characters. To disallow escaping, leave this field blank.
falseValues string[]: A set of case-sensitive strings that should be interpreted as false values. Default: ["n","no","f","false","off","0"]
headerDefinition SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinition: How headers will be defined. User Provided assumes the CSV does not have a header row and uses the headers provided and Autogenerated assumes the CSV does not have a header row and the CDK will generate headers using for f{i} where i is the index starting from 0. Else, the default behavior is to use the header from the CSV file. If a user wants to autogenerate or provide column names for a CSV having headers, they can skip rows.
ignoreErrorsOnFieldsMismatch boolean: Whether to ignore errors that occur when the number of fields in the CSV does not match the number of columns in the schema. Default: false
inferenceType string: How to infer the types of the columns. If none, inference default to strings. must be one of ["None", "Primitive Types Only"]
nullValues string[]: A set of case-sensitive strings that should be interpreted as null values. For example, if the value 'NA' should be interpreted as null, enter 'NA' in this field. Default: []
quoteChar string: The character used for quoting CSV values. To disallow quoting, make this field blank. Default: """
skipRowsAfterHeader number: The number of rows to skip after the header row. Default: 0
skipRowsBeforeHeader number: The number of rows to skip before the header row. For example, if the header row is on the 3rd row, enter 2 in this field. Default: 0
stringsCanBeNull boolean: Whether strings can be interpreted as null values. If true, strings that match the nullvalues set will be interpreted as null. If false, strings that match the nullvalues set will be interpreted as the string itself. Default: true
trueValues string[]: A set of case-sensitive strings that should be interpreted as true values. Default: ["y","yes","t","true","on","1"]

delimiter str: The character delimiting individual cells in the CSV data. This may only be a 1-character string. For tab-delimited data enter '\t'. Default: ","
double_quote bool: Whether two quotes in a quoted CSV value denote a single quote in the data. Default: true
encoding str: The character encoding of the CSV data. Leave blank to default to \n\nUTF8\n\n. See \n\nlist of python encodings\n\n for allowable options. Default: "utf8"
escape_char str: The character used for escaping special characters. To disallow escaping, leave this field blank.
false_values Sequence[str]: A set of case-sensitive strings that should be interpreted as false values. Default: ["n","no","f","false","off","0"]
header_definition SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinition: How headers will be defined. User Provided assumes the CSV does not have a header row and uses the headers provided and Autogenerated assumes the CSV does not have a header row and the CDK will generate headers using for f{i} where i is the index starting from 0. Else, the default behavior is to use the header from the CSV file. If a user wants to autogenerate or provide column names for a CSV having headers, they can skip rows.
ignore_errors_on_fields_mismatch bool: Whether to ignore errors that occur when the number of fields in the CSV does not match the number of columns in the schema. Default: false
inference_type str: How to infer the types of the columns. If none, inference default to strings. must be one of ["None", "Primitive Types Only"]
null_values Sequence[str]: A set of case-sensitive strings that should be interpreted as null values. For example, if the value 'NA' should be interpreted as null, enter 'NA' in this field. Default: []
quote_char str: The character used for quoting CSV values. To disallow quoting, make this field blank. Default: """
skip_rows_after_header float: The number of rows to skip after the header row. Default: 0
skip_rows_before_header float: The number of rows to skip before the header row. For example, if the header row is on the 3rd row, enter 2 in this field. Default: 0
strings_can_be_null bool: Whether strings can be interpreted as null values. If true, strings that match the nullvalues set will be interpreted as null. If false, strings that match the nullvalues set will be interpreted as the string itself. Default: true
true_values Sequence[str]: A set of case-sensitive strings that should be interpreted as true values. Default: ["y","yes","t","true","on","1"]

delimiter String: The character delimiting individual cells in the CSV data. This may only be a 1-character string. For tab-delimited data enter '\t'. Default: ","
doubleQuote Boolean: Whether two quotes in a quoted CSV value denote a single quote in the data. Default: true
encoding String: The character encoding of the CSV data. Leave blank to default to \n\nUTF8\n\n. See \n\nlist of python encodings\n\n for allowable options. Default: "utf8"
escapeChar String: The character used for escaping special characters. To disallow escaping, leave this field blank.
falseValues List<String>: A set of case-sensitive strings that should be interpreted as false values. Default: ["n","no","f","false","off","0"]
headerDefinition Property Map: How headers will be defined. User Provided assumes the CSV does not have a header row and uses the headers provided and Autogenerated assumes the CSV does not have a header row and the CDK will generate headers using for f{i} where i is the index starting from 0. Else, the default behavior is to use the header from the CSV file. If a user wants to autogenerate or provide column names for a CSV having headers, they can skip rows.
ignoreErrorsOnFieldsMismatch Boolean: Whether to ignore errors that occur when the number of fields in the CSV does not match the number of columns in the schema. Default: false
inferenceType String: How to infer the types of the columns. If none, inference default to strings. must be one of ["None", "Primitive Types Only"]
nullValues List<String>: A set of case-sensitive strings that should be interpreted as null values. For example, if the value 'NA' should be interpreted as null, enter 'NA' in this field. Default: []
quoteChar String: The character used for quoting CSV values. To disallow quoting, make this field blank. Default: """
skipRowsAfterHeader Number: The number of rows to skip after the header row. Default: 0
skipRowsBeforeHeader Number: The number of rows to skip before the header row. For example, if the header row is on the 3rd row, enter 2 in this field. Default: 0
stringsCanBeNull Boolean: Whether strings can be interpreted as null values. If true, strings that match the nullvalues set will be interpreted as null. If false, strings that match the nullvalues set will be interpreted as the string itself. Default: true
trueValues List<String>: A set of case-sensitive strings that should be interpreted as true values. Default: ["y","yes","t","true","on","1"]

SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinition, SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionArgs

Autogenerated SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionAutogenerated
FromCsv SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionFromCsv
UserProvided SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionUserProvided

Autogenerated SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionAutogenerated
FromCsv SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionFromCsv
UserProvided SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionUserProvided

autogenerated SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionAutogenerated
fromCsv SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionFromCsv
userProvided SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionUserProvided

autogenerated SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionAutogenerated
fromCsv SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionFromCsv
userProvided SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionUserProvided

autogenerated SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionAutogenerated
from_csv SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionFromCsv
user_provided SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionUserProvided

autogenerated Property Map
fromCsv Property Map
userProvided Property Map

SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionUserProvided, SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionUserProvidedArgs

ColumnNames List<string>: The column names that will be used while emitting the CSV records

ColumnNames []string: The column names that will be used while emitting the CSV records

columnNames List<String>: The column names that will be used while emitting the CSV records

columnNames string[]: The column names that will be used while emitting the CSV records

column_names Sequence[str]: The column names that will be used while emitting the CSV records

columnNames List<String>: The column names that will be used while emitting the CSV records

SourceS3ConfigurationStreamFormatParquetFormat, SourceS3ConfigurationStreamFormatParquetFormatArgs

DecimalAsFloat bool: Whether to convert decimal fields to floats. There is a loss of precision when converting decimals to floats, so this is not recommended. Default: false

DecimalAsFloat bool: Whether to convert decimal fields to floats. There is a loss of precision when converting decimals to floats, so this is not recommended. Default: false

decimalAsFloat Boolean: Whether to convert decimal fields to floats. There is a loss of precision when converting decimals to floats, so this is not recommended. Default: false

decimalAsFloat boolean: Whether to convert decimal fields to floats. There is a loss of precision when converting decimals to floats, so this is not recommended. Default: false

decimal_as_float bool: Whether to convert decimal fields to floats. There is a loss of precision when converting decimals to floats, so this is not recommended. Default: false

decimalAsFloat Boolean: Whether to convert decimal fields to floats. There is a loss of precision when converting decimals to floats, so this is not recommended. Default: false

SourceS3ConfigurationStreamFormatUnstructuredDocumentFormat, SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatArgs

Processing SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatProcessing: Processing configuration
SkipUnprocessableFiles bool: If true, skip files that cannot be parsed and pass the error message along as the absourcefileparse_error field. If false, fail the sync. Default: true
Strategy string: The strategy used to parse documents. fast extracts text directly from the document which doesn't work for all files. ocr_only is more reliable, but slower. hi_res is the most reliable, but requires an API key and a hosted instance of unstructured and can't be used with local mode. See the unstructured.io documentation for more details: https://unstructured-io.github.io/unstructured/core/partition.html#partition-pdf. Default: "auto"; must be one of ["auto", "fast", "ocronly", "hires"]

Processing SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatProcessing: Processing configuration
SkipUnprocessableFiles bool: If true, skip files that cannot be parsed and pass the error message along as the absourcefileparse_error field. If false, fail the sync. Default: true
Strategy string: The strategy used to parse documents. fast extracts text directly from the document which doesn't work for all files. ocr_only is more reliable, but slower. hi_res is the most reliable, but requires an API key and a hosted instance of unstructured and can't be used with local mode. See the unstructured.io documentation for more details: https://unstructured-io.github.io/unstructured/core/partition.html#partition-pdf. Default: "auto"; must be one of ["auto", "fast", "ocronly", "hires"]

processing SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatProcessing: Processing configuration
skipUnprocessableFiles Boolean: If true, skip files that cannot be parsed and pass the error message along as the absourcefileparse_error field. If false, fail the sync. Default: true
strategy String: The strategy used to parse documents. fast extracts text directly from the document which doesn't work for all files. ocr_only is more reliable, but slower. hi_res is the most reliable, but requires an API key and a hosted instance of unstructured and can't be used with local mode. See the unstructured.io documentation for more details: https://unstructured-io.github.io/unstructured/core/partition.html#partition-pdf. Default: "auto"; must be one of ["auto", "fast", "ocronly", "hires"]

processing SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatProcessing: Processing configuration
skipUnprocessableFiles boolean: If true, skip files that cannot be parsed and pass the error message along as the absourcefileparse_error field. If false, fail the sync. Default: true
strategy string: The strategy used to parse documents. fast extracts text directly from the document which doesn't work for all files. ocr_only is more reliable, but slower. hi_res is the most reliable, but requires an API key and a hosted instance of unstructured and can't be used with local mode. See the unstructured.io documentation for more details: https://unstructured-io.github.io/unstructured/core/partition.html#partition-pdf. Default: "auto"; must be one of ["auto", "fast", "ocronly", "hires"]

processing SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatProcessing: Processing configuration
skip_unprocessable_files bool: If true, skip files that cannot be parsed and pass the error message along as the absourcefileparse_error field. If false, fail the sync. Default: true
strategy str: The strategy used to parse documents. fast extracts text directly from the document which doesn't work for all files. ocr_only is more reliable, but slower. hi_res is the most reliable, but requires an API key and a hosted instance of unstructured and can't be used with local mode. See the unstructured.io documentation for more details: https://unstructured-io.github.io/unstructured/core/partition.html#partition-pdf. Default: "auto"; must be one of ["auto", "fast", "ocronly", "hires"]

processing Property Map: Processing configuration
skipUnprocessableFiles Boolean: If true, skip files that cannot be parsed and pass the error message along as the absourcefileparse_error field. If false, fail the sync. Default: true
strategy String: The strategy used to parse documents. fast extracts text directly from the document which doesn't work for all files. ocr_only is more reliable, but slower. hi_res is the most reliable, but requires an API key and a hosted instance of unstructured and can't be used with local mode. See the unstructured.io documentation for more details: https://unstructured-io.github.io/unstructured/core/partition.html#partition-pdf. Default: "auto"; must be one of ["auto", "fast", "ocronly", "hires"]

SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatProcessing, SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatProcessingArgs

Local SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatProcessingLocal: Process files locally, supporting fast and ocr modes. This is the default option.

Local SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatProcessingLocal: Process files locally, supporting fast and ocr modes. This is the default option.

local SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatProcessingLocal: Process files locally, supporting fast and ocr modes. This is the default option.

local SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatProcessingLocal: Process files locally, supporting fast and ocr modes. This is the default option.

local SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatProcessingLocal: Process files locally, supporting fast and ocr modes. This is the default option.

local Property Map: Process files locally, supporting fast and ocr modes. This is the default option.

SourceS3ResourceAllocation, SourceS3ResourceAllocationArgs

Default SourceS3ResourceAllocationDefault: optional resource requirements to run workers (blank for unbounded allocations)
JobSpecifics List<SourceS3ResourceAllocationJobSpecific>

Default SourceS3ResourceAllocationDefault: optional resource requirements to run workers (blank for unbounded allocations)
JobSpecifics []SourceS3ResourceAllocationJobSpecific

default_ SourceS3ResourceAllocationDefault: optional resource requirements to run workers (blank for unbounded allocations)
jobSpecifics List<SourceS3ResourceAllocationJobSpecific>

default SourceS3ResourceAllocationDefault: optional resource requirements to run workers (blank for unbounded allocations)
jobSpecifics SourceS3ResourceAllocationJobSpecific[]

default SourceS3ResourceAllocationDefault: optional resource requirements to run workers (blank for unbounded allocations)
job_specifics Sequence[SourceS3ResourceAllocationJobSpecific]

default Property Map: optional resource requirements to run workers (blank for unbounded allocations)
jobSpecifics List<Property Map>

SourceS3ResourceAllocationDefault, SourceS3ResourceAllocationDefaultArgs

CpuLimit string
CpuRequest string
EphemeralStorageLimit string
EphemeralStorageRequest string
MemoryLimit string
MemoryRequest string

CpuLimit string
CpuRequest string
EphemeralStorageLimit string
EphemeralStorageRequest string
MemoryLimit string
MemoryRequest string

cpuLimit String
cpuRequest String
ephemeralStorageLimit String
ephemeralStorageRequest String
memoryLimit String
memoryRequest String

cpuLimit string
cpuRequest string
ephemeralStorageLimit string
ephemeralStorageRequest string
memoryLimit string
memoryRequest string

cpu_limit str
cpu_request str
ephemeral_storage_limit str
ephemeral_storage_request str
memory_limit str
memory_request str

cpuLimit String
cpuRequest String
ephemeralStorageLimit String
ephemeralStorageRequest String
memoryLimit String
memoryRequest String

SourceS3ResourceAllocationJobSpecific, SourceS3ResourceAllocationJobSpecificArgs

JobType string: enum that describes the different types of jobs that the platform runs.
ResourceRequirements SourceS3ResourceAllocationJobSpecificResourceRequirements: optional resource requirements to run workers (blank for unbounded allocations)

JobType string: enum that describes the different types of jobs that the platform runs.
ResourceRequirements SourceS3ResourceAllocationJobSpecificResourceRequirements: optional resource requirements to run workers (blank for unbounded allocations)

jobType String: enum that describes the different types of jobs that the platform runs.
resourceRequirements SourceS3ResourceAllocationJobSpecificResourceRequirements: optional resource requirements to run workers (blank for unbounded allocations)

jobType string: enum that describes the different types of jobs that the platform runs.
resourceRequirements SourceS3ResourceAllocationJobSpecificResourceRequirements: optional resource requirements to run workers (blank for unbounded allocations)

job_type str: enum that describes the different types of jobs that the platform runs.
resource_requirements SourceS3ResourceAllocationJobSpecificResourceRequirements: optional resource requirements to run workers (blank for unbounded allocations)

jobType String: enum that describes the different types of jobs that the platform runs.
resourceRequirements Property Map: optional resource requirements to run workers (blank for unbounded allocations)

SourceS3ResourceAllocationJobSpecificResourceRequirements, SourceS3ResourceAllocationJobSpecificResourceRequirementsArgs

CpuLimit string
CpuRequest string
EphemeralStorageLimit string
EphemeralStorageRequest string
MemoryLimit string
MemoryRequest string

CpuLimit string
CpuRequest string
EphemeralStorageLimit string
EphemeralStorageRequest string
MemoryLimit string
MemoryRequest string

cpuLimit String
cpuRequest String
ephemeralStorageLimit String
ephemeralStorageRequest String
memoryLimit String
memoryRequest String

cpuLimit string
cpuRequest string
ephemeralStorageLimit string
ephemeralStorageRequest string
memoryLimit string
memoryRequest string

cpu_limit str
cpu_request str
ephemeral_storage_limit str
ephemeral_storage_request str
memory_limit str
memory_request str

cpuLimit String
cpuRequest String
ephemeralStorageLimit String
ephemeralStorageRequest String
memoryLimit String
memoryRequest String

Import

In Terraform v1.5.0 and later, the import block can be used with the id attribute, for example:

terraform

import {

to = airbyte_source_s3.my_airbyte_source_s3

id = “…”

}

The pulumi import command can be used, for example:

$ pulumi import airbyte:index/sourceS3:SourceS3 my_airbyte_source_s3 "..."

To learn more about importing existing cloud resources, see Importing resources.

Package Details

Repository: airbyte airbytehq/terraform-provider-airbyte
License
Notes: This Pulumi package is based on the airbyte Terraform Provider.

airbyte 1.0.0-rc8 published on Monday, Feb 23, 2026 by airbytehq

Schema (JSON)

airbytehq/terraform-provider-airbyte

airbyte.SourceS3

On this page

On this page

Example Usage

Create SourceS3 Resource

Constructor syntax

Parameters

Constructor example

SourceS3 Resource Properties

Inputs

Outputs

Look up Existing SourceS3 Resource

Supporting Types

SourceS3Configuration, SourceS3ConfigurationArgs

SourceS3ConfigurationDeliveryMethod, SourceS3ConfigurationDeliveryMethodArgs

SourceS3ConfigurationDeliveryMethodCopyRawFiles, SourceS3ConfigurationDeliveryMethodCopyRawFilesArgs

SourceS3ConfigurationFormat, SourceS3ConfigurationFormatArgs

SourceS3ConfigurationFormatCsv, SourceS3ConfigurationFormatCsvArgs

SourceS3ConfigurationFormatJsonl, SourceS3ConfigurationFormatJsonlArgs

SourceS3ConfigurationFormatParquet, SourceS3ConfigurationFormatParquetArgs

SourceS3ConfigurationProvider, SourceS3ConfigurationProviderArgs

SourceS3ConfigurationStream, SourceS3ConfigurationStreamArgs

SourceS3ConfigurationStreamFormat, SourceS3ConfigurationStreamFormatArgs

SourceS3ConfigurationStreamFormatAvroFormat, SourceS3ConfigurationStreamFormatAvroFormatArgs

SourceS3ConfigurationStreamFormatCsvFormat, SourceS3ConfigurationStreamFormatCsvFormatArgs

SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinition, SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionArgs

SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionUserProvided, SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionUserProvidedArgs

SourceS3ConfigurationStreamFormatParquetFormat, SourceS3ConfigurationStreamFormatParquetFormatArgs

SourceS3ConfigurationStreamFormatUnstructuredDocumentFormat, SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatArgs

SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatProcessing, SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatProcessingArgs

SourceS3ResourceAllocation, SourceS3ResourceAllocationArgs

SourceS3ResourceAllocationDefault, SourceS3ResourceAllocationDefaultArgs

SourceS3ResourceAllocationJobSpecific, SourceS3ResourceAllocationJobSpecificArgs

SourceS3ResourceAllocationJobSpecificResourceRequirements, SourceS3ResourceAllocationJobSpecificResourceRequirementsArgs

Import

Package Details

On this page

On this page