def column_schema(cls, col_info): date_types = ["dateTime"] number_types = ["number", "autoNumber"] pk_types = ["autoNumber"] air_type = "string" if "config" in col_info and "type" in col_info["config"]: air_type = col_info["config"]["type"] inclusion = "available" if air_type in pk_types: inclusion = "automatic" schema = Schema(inclusion=inclusion) singer_type = 'string' if air_type in number_types: singer_type = 'number' schema.type = ['null', singer_type] if air_type in date_types: schema.format = 'date-time' if air_type in ["date"]: schema.format = 'date' return schema
def discover(client): schemas, field_metadata = get_schemas() catalog = Catalog([]) for stream_name, schema_dict in schemas.items(): schema = Schema.from_dict(schema_dict) pk = get_pk(stream_name) metadata = field_metadata[stream_name] catalog.streams.append( CatalogEntry(stream=stream_name, tap_stream_id=stream_name, key_properties=pk, schema=schema, metadata=metadata)) for fn in [_records_streams, _partner_records_streams]: singer_streams = _convert_to_singer_streams(fn(client)) for stream_name, data in singer_streams.items(): schema = Schema.from_dict(data['schema']) metadata = data['metadata'] catalog.streams.append( CatalogEntry(stream=stream_name, tap_stream_id=stream_name, key_properties=PRIMARY_KEYS[stream_name], schema=schema, metadata=metadata)) return catalog
def discover(): schemas = get_schemas() catalog = Catalog([]) for schema_name, schema_dict in schemas.items(): schema = Schema.from_dict(schema_dict) metadata = [] metadata.append({ 'metadata': { 'selected-by-default': False }, 'breadcrumb': [] }) for field_name in schema_dict['properties'].keys(): if field_name is '_id': inclusion = 'automatic' else: inclusion = 'available' metadata.append({ 'metadata': { 'inclusion': inclusion }, 'breadcrumb': ['properties', field_name] }) catalog.streams.append( CatalogEntry(stream=schema_name, tap_stream_id=schema_name, schema=schema, metadata=metadata, key_properties=['_id'])) return catalog
def discover(): schemas = get_schemas() catalog = Catalog([]) for stream_name, schema_dict in schemas.items(): schema = Schema.from_dict(schema_dict) pk = PKS[stream_name] metadata = [] for field_name in schema_dict['properties'].keys(): if field_name in pk: inclusion = 'automatic' else: inclusion = 'available' metadata.append({ 'metadata': { 'inclusion': inclusion }, 'breadcrumb': ['properties', field_name] }) catalog.streams.append( CatalogEntry(stream=stream_name, tap_stream_id=stream_name, key_properties=pk, schema=schema, metadata=metadata)) return catalog
def discover(ctx): check_credentials_are_authorized(ctx) catalog = Catalog([]) for stream in streams.STREAMS: schema = Schema.from_dict(streams.load_schema(stream.tap_stream_id), inclusion="available") mdata = metadata.new() for prop in schema.properties: if prop in streams.PK_FIELDS[stream.tap_stream_id]: mdata = metadata.write(mdata, ('properties', prop), 'inclusion', 'automatic') else: mdata = metadata.write(mdata, ('properties', prop), 'inclusion', 'available') catalog.streams.append( CatalogEntry( stream=stream.tap_stream_id, tap_stream_id=stream.tap_stream_id, key_properties=streams.PK_FIELDS[stream.tap_stream_id], schema=schema, metadata=metadata.to_list(mdata))) return catalog
def discover(select_all, client, spreadsheet_id): schemas, field_metadata = get_schemas(client, spreadsheet_id) catalog = Catalog([]) for stream_name, schema_dict in schemas.items(): schema = Schema.from_dict(schema_dict, selected=select_all) mdata = field_metadata[stream_name] key_properties = None for mdt in mdata: table_key_properties = mdt.get('metadata', {}).get('table-key-properties') if table_key_properties: key_properties = table_key_properties catalog.streams.append( CatalogEntry(stream=stream_name, tap_stream_id=stream_name, key_properties=STREAMS.get(stream_name, {}).get( 'key_properties', key_properties), schema=schema, metadata=mdata)) if select_all: select_all_fields_in_streams(catalog) return catalog
def discover(): catalog = Catalog([]) for tap_stream_id in schemas.STATIC_SCHEMA_STREAM_IDS: # print("tap stream id=",tap_stream_id) schema = Schema.from_dict(schemas.load_schema(tap_stream_id)) metadata = [] for field_name in schema.properties.keys(): # print("field name=",field_name) if field_name in schemas.PK_FIELDS[tap_stream_id]: inclusion = "automatic" else: inclusion = "available" metadata.append({ "metadata": { "inclusion": inclusion }, "breadcrumb": ["properties", field_name], }) catalog.streams.append( CatalogEntry( stream=tap_stream_id, tap_stream_id=tap_stream_id, key_properties=schemas.PK_FIELDS[tap_stream_id], schema=schema, metadata=metadata, )) return catalog
def do_discover(self): logger.info('Starting discover') catalog = Catalog([]) for stream in self.streams: stream.tap = self schema = Schema.from_dict(stream.get_schema()) key_properties = stream.key_properties meta = metadata.get_standard_metadata( schema=schema.to_dict(), key_properties=key_properties, valid_replication_keys=[stream.state_field] if stream.state_field else None, replication_method=stream.replication_method) # If the stream has a state_field, it needs to mark that property with automatic metadata if stream.state_field: meta = metadata.to_map(meta) meta[('properties', stream.state_field)]['inclusion'] = 'automatic' meta = metadata.to_list(meta) catalog.streams.append( CatalogEntry(stream=stream.schema, tap_stream_id=stream.schema, key_properties=key_properties, schema=schema, metadata=meta)) return catalog
def discover(): catalog = Catalog([]) for tap_stream_id in schemas.STATIC_SCHEMA_STREAM_IDS: #print("tap stream id=",tap_stream_id) schema = Schema.from_dict(schemas.load_schema(tap_stream_id)) metadata = [] for field_name in schema.properties.keys(): #print("field name=",field_name) if field_name in schemas.PK_FIELDS[tap_stream_id]: inclusion = 'automatic' else: inclusion = 'available' metadata.append({ 'metadata': { 'inclusion': inclusion }, 'breadcrumb': ['properties', field_name] }) catalog.streams.append(CatalogEntry( stream=tap_stream_id, tap_stream_id=tap_stream_id, key_properties=schemas.PK_FIELDS[tap_stream_id], schema=schema, metadata=metadata )) return catalog
def discover(): ''' Run discovery mode ''' streams = [] for stream_id, stream_object in STREAMS.items(): raw_schema = load_schema(stream_id) schema = Schema.from_dict(raw_schema) mdata = metadata.to_map( metadata.get_standard_metadata( schema=raw_schema, schema_name=stream_id, key_properties=stream_object.key_properties, valid_replication_keys=[stream_object.replication_key], replication_method=stream_object.replication_method)) # make sure that the replication key field is mandatory if stream_object.replication_key: metadata.write(mdata, ('properties', stream_object.replication_key), 'inclusion', 'automatic') streams.append( CatalogEntry(stream=stream_id, tap_stream_id=stream_id, key_properties=stream_object.key_properties, schema=schema, metadata=metadata.to_list(mdata))) return Catalog(streams)
def do_discover(self): logger.info('Starting discover') catalog = Catalog([]) for stream in self.streams: stream.tap = self schema = Schema.from_dict(stream.get_schema()) key_properties = stream.key_properties metadata = [] for prop, json_schema in schema.properties.items(): inclusion = 'available' if prop in key_properties: inclusion = 'automatic' metadata.append({ 'breadcrumb': ['properties', prop], 'metadata': { 'inclusion': inclusion } }) catalog.streams.append( CatalogEntry(stream=stream.schema, tap_stream_id=stream.schema, key_properties=key_properties, schema=schema, metadata=metadata)) return catalog
def discover(ctx): check_credentials_are_authorized(ctx) catalog = Catalog([]) for tap_stream_id in schemas.stream_ids: schema_dict = schemas.load_schema(tap_stream_id) schema = Schema.from_dict(schema_dict) mdata = metadata.get_standard_metadata( schema_dict, key_properties=schemas.PK_FIELDS[tap_stream_id]) mdata = metadata.to_map(mdata) # NB: `lists` and `messages` are required for their substreams. # This is an approximation of the initial functionality using # metadata, which marked them as `selected=True` in the schema. if tap_stream_id in ['lists', 'messages']: mdata = metadata.write(mdata, (), 'inclusion', 'automatic') for field_name in schema_dict['properties'].keys(): mdata = metadata.write(mdata, ('properties', field_name), 'inclusion', 'automatic') catalog.streams.append( CatalogEntry(stream=tap_stream_id, tap_stream_id=tap_stream_id, key_properties=schemas.PK_FIELDS[tap_stream_id], schema=schema, metadata=metadata.to_list(mdata))) return catalog
def do_discover(self): logger.info("Starting discover") catalog = Catalog([]) for stream in self.streams: stream.tap = self schema = Schema.from_dict(stream.get_schema()) key_properties = stream.key_properties metadata = [] metadata.append({"breadcrumb": [], "metadata": {"selected": True}}) for prop, json_schema in schema.properties.items(): metadata.append( { "breadcrumb": ["properties", prop], "metadata": {"inclusion": "automatic", "selected": True}, } ) catalog.streams.append( CatalogEntry( stream=stream.schema, tap_stream_id=stream.schema, key_properties=key_properties, schema=schema, metadata=metadata, ) ) return catalog
def generate_catalog(client, standard_fields, custom_fields, exclusions): schema, mdata = generate_catalog_entry(client, standard_fields, custom_fields, field_exclusions) # Do the thing to generate the thing catalog_entry = CatalogEntry(schema=Schema.from_dict(schema), key_properties=['_sdc_record_hash'], stream='report', tap_stream_id='report', metadata=metadata.to_list(mdata)) return Catalog([catalog_entry])
def discover_base(cls, base_id, base_name=None): cls.logger.info("discover base " + base_id) headers = cls.__get_auth_header() response = requests.get(url=cls.metadata_url + base_id, headers=headers) response.raise_for_status() entries = [] for table in response.json()["tables"]: schema_cols = { "id": Schema(inclusion="automatic", type=['null', "string"]) } meta = {} table_name = table["name"] keys = [] meta = metadata.write(meta, (), "inclusion", "available") meta = metadata.write(meta, 'database_name', 'base_id', base_id) for field in table["fields"]: col_schema = cls.column_schema(field) if col_schema.inclusion == "automatic": keys.append(field["name"]) schema_cols[field["name"]] = col_schema meta = metadata.write(meta, ('properties', field["name"]), 'inclusion', 'available') meta = metadata.write(meta, ('properties', field["name"]), 'airtable_type', field["config"]["type"] or None) schema = Schema(type='object', properties=schema_cols) entry = CatalogEntry(tap_stream_id=table["id"], database=base_name or base_id, table=table_name, stream=table_name, metadata=metadata.to_list(meta), key_properties=keys, schema=schema) entries.append(entry) return entries
def discover(): c = Catalog([]) for stream in streams_.ALL_STREAMS: schema = Schema.from_dict(load_schema(stream.tap_stream_id)) c.streams.append(CatalogEntry( stream=stream.tap_stream_id, tap_stream_id=stream.tap_stream_id, schema=schema,) ) return c
def discover(ctx): catalog = Catalog([]) for stream in streams_.all_streams: schema = Schema.from_dict(load_schema(ctx, stream.tap_stream_id), inclusion="automatic") catalog.streams.append(CatalogEntry( stream=stream.tap_stream_id, tap_stream_id=stream.tap_stream_id, key_properties=stream.pk_fields, schema=schema, )) return catalog
def discover(ctx): check_credentials_are_authorized(ctx) catalog = Catalog([]) for tap_stream_id in schemas.stream_ids: schema = Schema.from_dict(schemas.load_schema(tap_stream_id), inclusion="automatic") catalog.streams.append(CatalogEntry( stream=tap_stream_id, tap_stream_id=tap_stream_id, key_properties=schemas.PK_FIELDS[tap_stream_id], schema=schema, )) return catalog
def discover(ctx): # check_credentials_are_authorized(ctx) catalog = Catalog([]) for stream in streams.STREAMS: schema = Schema.from_dict(streams.load_schema(stream.tap_stream_id), inclusion="available") catalog.streams.append( CatalogEntry( stream=stream.tap_stream_id, tap_stream_id=stream.tap_stream_id, key_properties=streams.PK_FIELDS[stream.tap_stream_id], schema=schema, )) return catalog
def discover(config): test_credentials_are_authorized(config) catalog = Catalog([]) for stream in streams_.all_streams: schema = Schema.from_dict(load_schema(stream.tap_stream_id), inclusion="automatic") catalog.streams.append( CatalogEntry( stream=stream.tap_stream_id, tap_stream_id=stream.tap_stream_id, key_properties=stream.pk_fields, schema=schema, )) return catalog
def discover(): catalog = Catalog([]) for stream in streams_.ALL_STREAMS: schema = Schema.from_dict(load_schema(stream.tap_stream_id)) mdata = generate_metadata(stream, schema) catalog.streams.append( CatalogEntry(stream=stream.tap_stream_id, tap_stream_id=stream.tap_stream_id, key_properties=stream.pk_fields, schema=schema, metadata=mdata)) return catalog
def discover(ctx): check_credentials_are_authorized() catalog = Catalog([]) for tap_stream_id in schemas.stream_ids: schema = Schema.from_dict(load_schema(tap_stream_id), inclusion="available") catalog.streams.append( CatalogEntry( stream=tap_stream_id, tap_stream_id=tap_stream_id, key_properties=schemas.pk_fields[tap_stream_id], schema=schema, )) return catalog
def discover(): catalog = Catalog([]) for stream_name, endpoint_config in get_endpoints(): schema_dict, metadata = get_schema(stream_name, endpoint_config) schema = Schema.from_dict(schema_dict) catalog.streams.append( CatalogEntry(stream=stream_name, tap_stream_id=stream_name, key_properties=endpoint_config['pk'], schema=schema, metadata=metadata)) return catalog
def discover(ctx): ctx.refresh_credentials() catalog = Catalog([]) for stream in streams_.all_streams: schema_dict = load_schema(stream.tap_stream_id) mdata = load_metadata(stream, schema_dict) schema = Schema.from_dict(schema_dict) catalog.streams.append( CatalogEntry(stream=stream.tap_stream_id, tap_stream_id=stream.tap_stream_id, key_properties=stream.pk_fields, schema=schema, metadata=mdata)) return catalog
def discover(): schemas, field_metadata = get_schemas() catalog = Catalog([]) for stream_name, schema_dict in schemas.items(): schema = Schema.from_dict(schema_dict) mdata = field_metadata[stream_name] catalog.streams.append( CatalogEntry(stream=stream_name, tap_stream_id=stream_name, key_properties=STREAMS[stream_name]['key_properties'], schema=schema, metadata=mdata)) return catalog
def discover(config): config = init_credentials(config) catalog = Catalog([]) for stream in streams_.all_streams: schema_dict = load_schema(stream.tap_stream_id) mdata = load_metadata(stream, schema_dict) schema_dict["selected"] = True schema = Schema.from_dict(schema_dict) catalog.streams.append( CatalogEntry(stream=stream.tap_stream_id, tap_stream_id=stream.tap_stream_id, key_properties=stream.pk_fields, schema=schema, metadata=mdata)) return catalog
def discover(client): catalog = Catalog([]) for resource_name in RESOURCES.keys(): schema_dict, metadata = get_schema(client, resource_name) schema = Schema.from_dict(schema_dict) stream_name = RESOURCES[resource_name] catalog.streams.append( CatalogEntry(stream=stream_name, tap_stream_id=stream_name, key_properties=['Id'], schema=schema, metadata=metadata)) return catalog
def discover(service): catalog = Catalog([]) for entity_name, entity in service.entities.items(): optionset_map = get_optionset_metadata(service, entity_name) schema_dict, metadata, pks = get_schema(entity.__odata_schema__, optionset_map) schema = Schema.from_dict(schema_dict) catalog.streams.append( CatalogEntry(stream=entity_name, tap_stream_id=entity_name, key_properties=pks, schema=schema, metadata=metadata)) return catalog
def discover(ctx): LOGGER.info("Running discover") use_event_log = has_access_to_event_log(ctx) catalog = Catalog([]) for tap_stream_id in streams_.stream_ids: if not use_event_log and tap_stream_id == schemas.IDS.EVENT_LOG: continue raw_schema = schemas.load_schema(ctx, tap_stream_id) schema = Schema.from_dict(raw_schema, inclusion="automatic") catalog.streams.append( CatalogEntry( stream=tap_stream_id, tap_stream_id=tap_stream_id, key_properties=schemas.PK_FIELDS[tap_stream_id], schema=schema, )) return catalog
def discover(): schemas, schemas_metadata = get_schemas() streams = [] for schema_name, schema_dict in schemas.items(): schema = Schema.from_dict(schema_dict) schema_meta = schemas_metadata[schema_name] streams.append( CatalogEntry( tap_stream_id=schema_name, stream=schema_name, schema=schema, key_properties=STREAMS[schema_name]['key_properties'], metadata=schema_meta, )) return Catalog(streams)