def _for_column(col, pk_columns): data_type = col.data_type.lower() inclusion = "available" if col.column_name.lower() in [x.lower() for x in pk_columns]: inclusion = "automatic" result = Schema(inclusion=inclusion) if data_type in BYTES_FOR_INTEGER_TYPE: result.type = ["null", "integer"] bits = BYTES_FOR_INTEGER_TYPE[data_type] * 8 result.minimum = 0 - 2 ** (bits - 1) result.maximum = 2 ** (bits - 1) - 1 elif data_type in FLOAT_TYPES: result.type = ["null", "number"] elif data_type in DECIMAL_TYPES: result.type = ["null", "number"] result.exclusiveMaximum = True result.maximum = 10 ** (col.numeric_precision - col.numeric_scale) result.exclusiveMinimum = True result.minimum = -10 ** (col.numeric_precision - col.numeric_scale) result.multipleOf = 10 ** (0 - col.numeric_scale) elif data_type in STRING_TYPES: if col.ccsid in UNSUPPORTED_CCSIDS: err = "Unsupported CCSID {}".format(col.ccsid) result = Schema(None, inclusion="unsupported", description=err) else: result.type = ["null", "string"] if col.character_maximum_length > 0: result.maxLength = col.character_maximum_length elif data_type in DATETIME_TYPES: result.type = ["null", "string"] result.format = "date-time" else: err = "Unsupported data type {}".format(data_type) result = Schema(None, inclusion="unsupported", description=err) return result
def column_schema(cls, col_info): date_types = ["dateTime"] number_types = ["number", "autoNumber"] pk_types = ["autoNumber"] air_type = "string" if "config" in col_info and "type" in col_info["config"]: air_type = col_info["config"]["type"] inclusion = "available" if air_type in pk_types: inclusion = "automatic" schema = Schema(inclusion=inclusion) singer_type = 'string' if air_type in number_types: singer_type = 'number' schema.type = ['null', singer_type] if air_type in date_types: schema.format = 'date-time' if air_type in ["date"]: schema.format = 'date' return schema
def discover_base(cls, base_id, base_name=None): cls.logger.info("discover base " + base_id) headers = cls.__get_auth_header() response = requests.get(url=cls.metadata_url + base_id, headers=headers) response.raise_for_status() entries = [] for table in response.json()["tables"]: schema_cols = { "id": Schema(inclusion="automatic", type=['null', "string"]) } meta = {} table_name = table["name"] keys = [] meta = metadata.write(meta, (), "inclusion", "available") meta = metadata.write(meta, 'database_name', 'base_id', base_id) for field in table["fields"]: col_schema = cls.column_schema(field) if col_schema.inclusion == "automatic": keys.append(field["name"]) schema_cols[field["name"]] = col_schema meta = metadata.write(meta, ('properties', field["name"]), 'inclusion', 'available') meta = metadata.write(meta, ('properties', field["name"]), 'airtable_type', field["config"]["type"] or None) schema = Schema(type='object', properties=schema_cols) entry = CatalogEntry(tap_stream_id=table["id"], database=base_name or base_id, table=table_name, stream=table_name, metadata=metadata.to_list(meta), key_properties=keys, schema=schema) entries.append(entry) return entries
def sync(ctx): # check_credentials_are_authorized(ctx) for c in ctx.selected_catalog: selected_fields = set([ k for k, v in c.schema.properties.items() if v.selected or k == c.replication_key ]) fields = desired_fields(selected_fields, c.schema) schema = Schema( type='object', properties={prop: c.schema.properties[prop] for prop in fields}) c.schema = schema streams.write_schema(c.tap_stream_id, schema) syncer = Syncer(ctx) syncer.sync()
def generate(columns, pk_columns): properties = {c.column_name: _for_column(c, pk_columns) for c in columns} return Schema(type="object", properties=properties)