def sync(): shop_attributes = initialize_shopify_client() sdc_fields = {"_sdc_shop_" + x: shop_attributes[x] for x in SDC_KEYS} # Emit all schemas first so we have them for child streams for stream in Context.catalog["streams"]: if Context.is_selected(stream["tap_stream_id"]): singer.write_schema(stream["tap_stream_id"], stream["schema"], stream["key_properties"], bookmark_properties=stream["replication_key"]) Context.counts[stream["tap_stream_id"]] = 0 # If there is a currently syncing stream bookmark, shuffle the # stream order so it gets sync'd first currently_sync_stream_name = Context.state.get( 'bookmarks', {}).get('currently_sync_stream') if currently_sync_stream_name: shuffle_streams(currently_sync_stream_name) # Loop over streams in catalog for catalog_entry in Context.catalog['streams']: stream_id = catalog_entry['tap_stream_id'] stream = Context.stream_objects[stream_id]() if not Context.is_selected(stream_id): LOGGER.info('Skipping stream: %s', stream_id) continue LOGGER.info('Syncing stream: %s', stream_id) if not Context.state.get('bookmarks'): Context.state['bookmarks'] = {} Context.state['bookmarks']['currently_sync_stream'] = stream_id # some fields have epoch-time as date, hence transform into UTC date with Transformer( singer.UNIX_SECONDS_INTEGER_DATETIME_PARSING) as transformer: for rec in stream.sync(): extraction_time = singer.utils.now() record_schema = catalog_entry['schema'] record_metadata = metadata.to_map(catalog_entry['metadata']) rec = transformer.transform({ **rec, **sdc_fields }, record_schema, record_metadata) singer.write_record(stream_id, rec, time_extracted=extraction_time) Context.counts[stream_id] += 1 Context.state['bookmarks'].pop('currently_sync_stream') singer.write_state(Context.state) LOGGER.info('----------------------') for stream_id, stream_count in Context.counts.items(): LOGGER.info('%s: %d', stream_id, stream_count) LOGGER.info('----------------------')
def sync(): initialize_shopify_client() # Emit all schemas first so we have them for child streams for stream in Context.catalog["streams"]: if Context.is_selected(stream["tap_stream_id"]): singer.write_schema(stream["tap_stream_id"], stream["schema"], stream["key_properties"], bookmark_properties=stream["replication_key"]) Context.counts[stream["tap_stream_id"]] = 0 # If there is a currently syncing stream bookmark, shuffle the # stream order so it gets sync'd first currently_sync_stream_name = Context.state.get( 'bookmarks', {}).get('currently_sync_stream') if currently_sync_stream_name: shuffle_streams(currently_sync_stream_name) # Loop over streams in catalog for catalog_entry in Context.catalog['streams']: stream_id = catalog_entry['tap_stream_id'] stream = Context.stream_objects[stream_id]() if not Context.is_selected(stream_id): LOGGER.info('Skipping stream: %s', stream_id) continue LOGGER.info('Syncing stream: %s', stream_id) if not Context.state.get('bookmarks'): Context.state['bookmarks'] = {} Context.state['bookmarks']['currently_sync_stream'] = stream_id with Transformer() as transformer: for rec in stream.sync(): extraction_time = singer.utils.now() record_schema = catalog_entry['schema'] record_metadata = metadata.to_map(catalog_entry['metadata']) rec = transformer.transform(rec, record_schema, record_metadata) singer.write_record(stream_id, rec, time_extracted=extraction_time) Context.counts[stream_id] += 1 Context.state['bookmarks'].pop('currently_sync_stream') singer.write_state(Context.state) LOGGER.info('----------------------') for stream_id, stream_count in Context.counts.items(): LOGGER.info('%s: %d', stream_id, stream_count) LOGGER.info('----------------------')
def get_selected_parents(): for parent_stream in ['orders', 'customers', 'products', 'custom_collections']: if Context.is_selected(parent_stream): yield Context.stream_objects[parent_stream]()
def sync(): initialize_shopify_client() # Emit all schemas first so we have them for child streams for stream in Context.catalog["streams"]: if Context.is_selected(stream["tap_stream_id"]): singer.write_schema(stream["tap_stream_id"], stream["schema"], stream["key_properties"], bookmark_properties=stream["replication_key"]) Context.counts[stream["tap_stream_id"]] = 0 Context.durations[stream["tap_stream_id"]] = None # If there is a currently syncing stream bookmark, shuffle the # stream order so it gets sync'd first currently_sync_stream_name = Context.state.get('bookmarks', {}).get('currently_sync_stream') if currently_sync_stream_name: shuffle_streams(currently_sync_stream_name) # Loop over streams in catalog for catalog_entry in Context.catalog['streams']: stream_start_time = time.time() stream_id = catalog_entry['tap_stream_id'] stream = Context.stream_objects[stream_id]() stream.schema = catalog_entry['schema'] if not Context.is_selected(stream_id): LOGGER.info('Skipping stream: %s', stream_id) continue LOGGER.info('Syncing stream: %s', stream_id) if not Context.state.get('bookmarks'): Context.state['bookmarks'] = {} Context.state['bookmarks']['currently_sync_stream'] = stream_id if Context.config.get("use_async", False) and stream.async_available: Context.counts[stream_id] = stream.sync_async() else: with Transformer() as transformer: for rec in stream.sync(): extraction_time = singer.utils.now() record_metadata = metadata.to_map(catalog_entry['metadata']) rec = transformer.transform(rec, stream.schema, record_metadata) singer.write_record(stream_id, rec, time_extracted=extraction_time) Context.counts[stream_id] += 1 Context.state['bookmarks'].pop('currently_sync_stream') singer.write_state(Context.state) stream_job_duration = time.strftime("%H:%M:%S", time.gmtime(time.time() - stream_start_time)) Context.durations[stream_id] = stream_job_duration div = "-"*50 info_msg = "\n{d}".format(d=div) info_msg += "\nShop: {}".format(Context.config['shop']) info_msg += "\n{d}\n".format(d=div) for stream_id, stream_count in Context.counts.items(): info_msg += "\n{}: {}".format(stream_id, stream_count) info_msg += "\nDuration: {}".format(Context.durations[stream_id]) info_msg += "\n{d}\n".format(d=div) LOGGER.info(info_msg)
def sync(): initialize_shopify_client() # Emit all schemas first so we have them for child streams for stream in Context.catalog["streams"]: if Context.is_selected(stream["tap_stream_id"]): singer.write_schema(stream["tap_stream_id"], stream["schema"], stream["key_properties"], bookmark_properties=stream["replication_key"]) Context.counts[stream["tap_stream_id"]] = 0 # If there is a currently syncing stream bookmark, shuffle the # stream order so it gets sync'd first currently_sync_stream_name = Context.state.get( 'bookmarks', {}).get('currently_sync_stream') if currently_sync_stream_name: shuffle_streams(currently_sync_stream_name) # Loop over streams in catalog for catalog_entry in Context.catalog['streams']: stream_id = catalog_entry['tap_stream_id'] stream = Context.stream_objects[stream_id]() if not Context.is_selected(stream_id): LOGGER.info('Skipping stream: %s', stream_id) continue LOGGER.info('Syncing stream: %s', stream_id) if not Context.state.get('bookmarks'): Context.state['bookmarks'] = {} Context.state['bookmarks']['currently_sync_stream'] = stream_id with Transformer() as transformer: try: for rec in stream.sync(): extraction_time = singer.utils.now() record_schema = catalog_entry['schema'] record_metadata = metadata.to_map( catalog_entry['metadata']) rec = transformer.transform(rec, record_schema, record_metadata) singer.write_record(stream_id, rec, time_extracted=extraction_time) Context.counts[stream_id] += 1 except pyactiveresource.connection.ResourceNotFound as exc: raise ShopifyError(exc, 'Ensure shop is entered correctly') from exc except pyactiveresource.connection.UnauthorizedAccess as exc: raise ShopifyError(exc, 'Invalid access token - Re-authorize the connection') \ from exc except pyactiveresource.connection.ConnectionError as exc: msg = '' try: body_json = exc.response.body.decode() body = json.loads(body_json) msg = body.get('errors') finally: raise ShopifyError(exc, msg) from exc except Exception as exc: raise ShopifyError(exc) from exc Context.state['bookmarks'].pop('currently_sync_stream') singer.write_state(Context.state) LOGGER.info('----------------------') for stream_id, stream_count in Context.counts.items(): LOGGER.info('%s: %d', stream_id, stream_count) LOGGER.info('----------------------')