def run(dryrun=False): logger.info("MediaOcean download started.") try: download(dryrun) except Exception as ex: logger.exception(ex, "MediaOcean download failed.") logger.info("MediaOcean download finished.")
def download(dryrun=False): transport, sftp = connect() DL_PATH = "/dataVol/collector/mediaocean" if not os.path.exists(DL_PATH): os.makedirs(DL_PATH) subpaths = ("", "bulk_api") # Spectra/Offline & Prisma/Online for subpath in subpaths: filenames = sftp.listdir(subpath) try: filenames.remove("bulk_api") except: pass for fn in filenames: localPath = os.path.join(DL_PATH, fn) remotePath = os.path.join(subpath, fn) logger.info("Download started: %s" % fn) if not dryrun: if fn.startswith("extract-status"): sftp.remove(remotePath) else: sftp.get(remotePath, localPath) if get_env() == PROD_ENV: sftp.remove(remotePath) logger.info("Download complete: %s" % fn) sftp.close() transport.close()
def denormalize(week_range): try: logger.info('%s fill started.' % TABLE_NAME) sql = SQL() sql.execute_batches(FILL_SQL, week_range) sql.commitclose() logger.info('%s fill finished.' % TABLE_NAME) except Exception as ex: logger.exception(ex, 'Critical error in denormalization: %s' % TABLE_NAME)
def write_fuel_data(): """Persist fuel data.""" fuel_data = fetch_fuel_data() fuel_json = parse_fuel_data(fuel_data) ts = temporal.datetimestamp() base_dir = '/dataVol/collector/json' if not os.path.exists(base_dir): os.makedirs(base_dir) fp = os.path.join(base_dir, 'fuel_data_' + ts + '.json') with open(fp, 'w') as fout: fout.write(fuel_json) logger.info('Fuel prices downloaded: %s' % fp)
def download(days = 1): logger.info('Crimson Hexagon download started.') dat = {} dat['is_historical'] = (days > 1) started = time.time() is_finished = False while time.time() - started < 600: try: dat['monitors'] = monitors() logger.info('Crimson Hexagon monitor results...') dat['monitor_results'] = monitor_results(days) dat['monitor_results_bycity'] = monitor_results_by_city(days) dat['monitor_results_bystate'] = monitor_results_by_state(days) logger.info('Crimson Hexagon facebook results...') dat['facebook_admin_posts'] = facebook_admin_posts(days) dat['facebook_page_likes'] = facebook_page_likes(days) dat['facebook_total_activity'] = facebook_total_activity(days) logger.info('Crimson Hexagon twitter results...') dat['twitter_engagement_metrics'] = twitter_engagement_metrics(days) dat['twitter_followers'] = twitter_followers(days) dat['twitter_sent_posts'] = twitter_sent_posts(days) dat['twitter_total_engagement'] = twitter_total_engagement(days) logger.info('Crimson Hexagon instagram results...') dat['instagram_followers'] = instagram_followers(days) dat['instagram_total_activity'] = instagram_total_activity(days) dat['instagram_sent_media'] = instagram_sent_media(days) is_finished = True break except Exception as ex: logger.exception(ex, 'Crimson Hexagon API failure, retrying in 10s...') finally: time.sleep(10) if is_finished: savejson(dat) else: logger.warn('Crimson Hexagon download failed!') logger.info('Crimson Hexagon download finished.')
def savejson(json_blob): fn = 'crimsonhexagon_' + temporal.datetimestamp() + '.json' fp = os.path.join(DL_PATH, fn) os.makedirs(DL_PATH, exist_ok = True) files.savejson(json_blob, fp) logger.info('Crimson Hexagon downloaded: %s' % fp)