Пример #1
0
def _for_column(col, pk_columns):
    data_type = col.data_type.lower()
    inclusion = "available"
    if col.column_name.lower() in [x.lower() for x in pk_columns]:
        inclusion = "automatic"
    result = Schema(inclusion=inclusion)
    if data_type in BYTES_FOR_INTEGER_TYPE:
        result.type = ["null", "integer"]
        bits = BYTES_FOR_INTEGER_TYPE[data_type] * 8
        result.minimum = 0 - 2 ** (bits - 1)
        result.maximum = 2 ** (bits - 1) - 1
    elif data_type in FLOAT_TYPES:
        result.type = ["null", "number"]
    elif data_type in DECIMAL_TYPES:
        result.type = ["null", "number"]
        result.exclusiveMaximum = True
        result.maximum = 10 ** (col.numeric_precision - col.numeric_scale)
        result.exclusiveMinimum = True
        result.minimum = -10 ** (col.numeric_precision - col.numeric_scale)
        result.multipleOf = 10 ** (0 - col.numeric_scale)
    elif data_type in STRING_TYPES:
        if col.ccsid in UNSUPPORTED_CCSIDS:
            err = "Unsupported CCSID {}".format(col.ccsid)
            result = Schema(None, inclusion="unsupported", description=err)
        else:
            result.type = ["null", "string"]
            if col.character_maximum_length > 0:
                result.maxLength = col.character_maximum_length
    elif data_type in DATETIME_TYPES:
        result.type = ["null", "string"]
        result.format = "date-time"
    else:
        err = "Unsupported data type {}".format(data_type)
        result = Schema(None, inclusion="unsupported", description=err)
    return result
Пример #2
0
    def column_schema(cls, col_info):
        date_types = ["dateTime"]
        number_types = ["number", "autoNumber"]
        pk_types = ["autoNumber"]

        air_type = "string"

        if "config" in col_info and "type" in col_info["config"]:
            air_type = col_info["config"]["type"]

        inclusion = "available"
        if air_type in pk_types:
            inclusion = "automatic"

        schema = Schema(inclusion=inclusion)

        singer_type = 'string'
        if air_type in number_types:
            singer_type = 'number'

        schema.type = ['null', singer_type]

        if air_type in date_types:
            schema.format = 'date-time'
        if air_type in ["date"]:
            schema.format = 'date'

        return schema
Пример #3
0
    def discover_base(cls, base_id, base_name=None):
        cls.logger.info("discover base " + base_id)
        headers = cls.__get_auth_header()
        response = requests.get(url=cls.metadata_url + base_id,
                                headers=headers)
        response.raise_for_status()
        entries = []

        for table in response.json()["tables"]:
            schema_cols = {
                "id": Schema(inclusion="automatic", type=['null', "string"])
            }

            meta = {}

            table_name = table["name"]
            keys = []
            meta = metadata.write(meta, (), "inclusion", "available")
            meta = metadata.write(meta, 'database_name', 'base_id', base_id)

            for field in table["fields"]:
                col_schema = cls.column_schema(field)
                if col_schema.inclusion == "automatic":
                    keys.append(field["name"])
                schema_cols[field["name"]] = col_schema
                meta = metadata.write(meta, ('properties', field["name"]),
                                      'inclusion', 'available')
                meta = metadata.write(meta, ('properties', field["name"]),
                                      'airtable_type', field["config"]["type"]
                                      or None)

            schema = Schema(type='object', properties=schema_cols)

            entry = CatalogEntry(tap_stream_id=table["id"],
                                 database=base_name or base_id,
                                 table=table_name,
                                 stream=table_name,
                                 metadata=metadata.to_list(meta),
                                 key_properties=keys,
                                 schema=schema)
            entries.append(entry)

        return entries
Пример #4
0
def sync(ctx):
    # check_credentials_are_authorized(ctx)

    for c in ctx.selected_catalog:
        selected_fields = set([
            k for k, v in c.schema.properties.items()
            if v.selected or k == c.replication_key
        ])
        fields = desired_fields(selected_fields, c.schema)

        schema = Schema(
            type='object',
            properties={prop: c.schema.properties[prop]
                        for prop in fields})
        c.schema = schema
        streams.write_schema(c.tap_stream_id, schema)

    syncer = Syncer(ctx)
    syncer.sync()
Пример #5
0
def generate(columns, pk_columns):
    properties = {c.column_name: _for_column(c, pk_columns) for c in columns}
    return Schema(type="object", properties=properties)