def do_discover(self): logger.info('Starting discover') catalog = Catalog([]) for stream in self.streams: stream.tap = self schema = Schema.from_dict(stream.get_schema()) key_properties = stream.key_properties metadata = [] for prop, json_schema in schema.properties.items(): inclusion = 'available' if prop in key_properties: inclusion = 'automatic' metadata.append({ 'breadcrumb': ['properties', prop], 'metadata': { 'inclusion': inclusion } }) catalog.streams.append( CatalogEntry(stream=stream.schema, tap_stream_id=stream.schema, key_properties=key_properties, schema=schema, metadata=metadata)) return catalog
def do_discover(self): logger.info("Starting discover") catalog = Catalog([]) for stream in self.streams: stream.tap = self schema = Schema.from_dict(stream.get_schema()) key_properties = stream.key_properties metadata = [] metadata.append({"breadcrumb": [], "metadata": {"selected": True}}) for prop, json_schema in schema.properties.items(): metadata.append( { "breadcrumb": ["properties", prop], "metadata": {"inclusion": "automatic", "selected": True}, } ) catalog.streams.append( CatalogEntry( stream=stream.schema, tap_stream_id=stream.schema, key_properties=key_properties, schema=schema, metadata=metadata, ) ) return catalog
def write_metadata(metadata, values, breadcrumb): metadata.append( { 'metadata': values, 'breadcrumb': breadcrumb } )
def do_discover(self): logger.info("Starting discover") catalog = Catalog([]) for stream in self.streams: stream.tap = self schema = Schema.from_dict(stream.get_schema()) key_properties = stream.key_properties metadata = [{ "metadata": { "inclusion": "available", "table-key-properties": ["id"], "selected": True, "schema-name": stream.get_name() }, "breadcrumb": [] }] for prop, json_schema in schema.properties.items(): inclusion = "available" if prop in key_properties or (stream.state_field and prop == stream.state_field): inclusion = "automatic" metadata.append({ "breadcrumb": ["properties", prop], "metadata": { "inclusion": inclusion }, }) catalog.streams.append( CatalogEntry( stream=stream.schema, tap_stream_id=stream.schema, key_properties=key_properties, schema=schema, metadata=metadata, )) return catalog
def generate_catalog(self): catalog = {"streams": []} for report in self.reports_definition: stream_name = report['name'] table_key_properties = ['_sdc_record_hash'] replication_key = '_sdc_record_timestamp' schema = { "type": ["null", "object"], "additionalProperties": False, "properties": { "_sdc_record_hash": { "type": ['string'] }, "_sdc_record_timestamp": { "type": ["string"], "format": "date-time" }, "report_start_date": { "type": ["string"], "format": "date-time" }, "report_end_date": { "type": ["string"], "format": "date-time" } } } metadata = [] for dimension in report['dimensions']: data_type = self.client.lookup_data_type( 'dimension', dimension) dimension = dimension.replace("ga:", "ga_") schema['properties'][dimension] = { "type": [data_type], } table_key_properties.append(dimension) metadata.append({ "metadata": { "inclusion": "automatic", "selected-by-default": True, "ga_type": 'dimension' }, "breadcrumb": ["properties", dimension] }) for metric in report['metrics']: data_type = self.client.lookup_data_type('metric', metric) metric = metric.replace("ga:", "ga_") schema['properties'][metric] = { "type": ["null", data_type], } metadata.append({ "metadata": { "inclusion": "automatic", "selected-by-default": True, "ga_type": 'metric' }, "breadcrumb": ["properties", metric] }) stream_metadata = { "metadata": { "inclusion": "automatic", "table-key-properties": table_key_properties, "replication-method": "INCREMENTAL", "replication-key": replication_key, "schema-name": stream_name }, "breadcrumb": [] } metadata.insert(0, stream_metadata) catalog_entry = { "stream_name": stream_name, "tap_stream_id": stream_name, "schema": schema, "metadata": metadata } catalog['streams'].append(catalog_entry) return catalog