def sync_series(): logger.info("Download idx data...") os.makedirs(IDX_DATA_DIR, exist_ok=True) with open(IDX_LIST_FILE_NAME, 'r') as f: listing = f.read() listing = json.loads(listing) for a in listing: if RELAY_KEY is None: avantage_data = load_series_daily_adjusted(a['id']) if avantage_data is None: continue avantage_data = avantage_data.sel(field='close') d = avantage_data.to_netcdf(compute=True) d = gzip.compress(d) d = base64.b64encode(d) url = IDX_DATA_VERIFY_URL + "/" + str(a['id']) + "/" approved_range = request_with_retry(url, d) if approved_range is None: logger.info("approved range None") continue approved_range = json.loads(approved_range) logger.info("approved range " + str(approved_range)) data = avantage_data.loc[approved_range[0]:approved_range[1]] else: url = IDX_DATA_FULL_URL + "/" + str(a['id']) + "/" data = request_with_retry(url) if data is None: continue data = xr.open_dataarray(data) file_name = os.path.join(IDX_DATA_DIR, a['id'] + '.nc') data.to_netcdf(path=file_name, compute=True) logger.info("Done.")
def sync_db(id): logger.info("sync db: " + id) db_folder = os.path.join(BLSGOV_DIR, id.lower()) db_folder_new = db_folder + "_new" lockfile = db_folder + '.lock' try: shutil.rmtree(db_folder_new) except FileNotFoundError: pass os.makedirs(db_folder_new, exist_ok=True) file_list = request_with_retry(BLSGOV_MASTER_URL + "files/" + id.lower()) file_list = json.loads(file_list) file_list = [f['name'] for f in file_list if f['type'] == 'file'] logger.info("files to download: " + str(len(file_list))) for fn in file_list: data = request_with_retry(BLSGOV_MASTER_URL + "files/" + id.lower() + "/" + fn) path = os.path.join(db_folder_new, fn) with open(path, 'wb') as f: f.write(data) with portalocker.Lock(lockfile, flags=portalocker.LOCK_EX): try: shutil.rmtree(db_folder) except FileNotFoundError: pass shutil.move(db_folder_new, db_folder)
def sync_major(): if RELAY_KEY is None: return os.makedirs(IDX_DIR, exist_ok=True) logger.info("Download major idx list...") listing = request_with_retry(MAJOR_IDX_LIST_URL) old_listing = '' try: with open(MAJOR_IDX_LIST_FILE_NAME, 'rb') as f: old_listing = f.read() except FileNotFoundError: pass if old_listing == listing: logger.info("nothing changed") return logger.info("Download major idx data...") data = request_with_retry(MAJOR_IDX_DATA_URL) with open(MAJOR_IDX_DATA_FILE_NAME, 'wb') as f: f.write(data) with open(MAJOR_IDX_LIST_FILE_NAME, 'wb') as f: f.write(listing) logger.info("Done.")
def sync_indexes(): logger.info("Download idx list...") os.makedirs(IDX_DIR, exist_ok=True) url = IDX_LIST_URL listing = request_with_retry(url) listing = json.loads(listing) listing = [l for l in listing if INDEXES is None or l['id'] in INDEXES] old_listing = [] try: with open(IDX_LIST_FILE_NAME, 'rb') as f: old_listing = f.read() old_listing = json.loads(old_listing) except FileNotFoundError: pass if listing == old_listing: logger.info('nothing changed') return logger.info("Download idx data...") os.makedirs(IDX_DATA_DIR, exist_ok=True) for a in listing: if RELAY_KEY is None: if a.get('etf') is None: continue avantage_data = load_series_daily_adjusted(a['etf']) if avantage_data is None: continue avantage_data = avantage_data.sel(field='close') d = avantage_data.to_netcdf(compute=True) d = gzip.compress(d) d = base64.b64encode(d) url = IDX_DATA_VERIFY_URL + "/" + str(a['id']) + "/" approved_range = request_with_retry(url, d) if approved_range is None: logger.info("approved range None") continue approved_range = json.loads(approved_range) logger.info("approved range " + str(approved_range)) data = avantage_data.loc[approved_range[0]:approved_range[1]] else: url = IDX_DATA_FULL_URL + "/" + str(a['id']) + "/" data = request_with_retry(url) if data is None: continue data = xr.open_dataarray(data) file_name = os.path.join(IDX_DATA_DIR, a['id'] + '.nc') data.to_netcdf(path=file_name, compute=True) with open(IDX_LIST_FILE_NAME, 'w') as f: f.write(json.dumps(listing, indent=2)) logger.info('Done.')
def sync(): logger.info("Download crypto...") os.makedirs(WORK_DIR, exist_ok=True) data = request_with_retry(CRYPTOFUTURES_V2_URL, min_size=1024) with open(CRYPTOFUTURES_V2_FILE_NAME, 'wb') as f: f.write(data) data = request_with_retry(CRYPTOFUTURES_V1_URL, min_size=1024) with open(CRYPTOFUTURES_V1_FILE_NAME, 'wb') as f: f.write(data) logger.info('Done.')
def sync_data(): logger.info("Download assets data...") os.makedirs(ASSETS_DATA_DIR, exist_ok=True) with open(ASSETS_LIST_FILE_NAME, 'r') as f: assets = f.read() assets = json.loads(assets) shuffle(assets) progress = 0 for a in assets: if RELAY_KEY is None: main_data = load_series_daily_adjusted(a['avantage_symbol']) # FORWARD ORDER if main_data is None: continue d = main_data.to_netcdf(compute=True) d = gzip.compress(d) d = base64.b64encode(d) url = ASSETS_DATA_VERIFY_URL + "/" + str(a['internal_id']) + "/" approved_range = request_with_retry(url, d) if approved_range is None: logger.info("approved range None") continue approved_range = json.loads(approved_range) logger.info("approved range " + str(approved_range)) main_data = main_data.loc[:, approved_range[0]:approved_range[1]] split_cumprod = main_data.sel(field="split").cumprod() is_liquid = split_cumprod.copy(True) is_liquid[:] = 0 for lr in a['liquid_ranges']: try: is_liquid.loc[lr[0]:lr[1]] = 1 except: pass ext_data = xr.concat([split_cumprod, is_liquid], pd.Index(["split_cumprod", "is_liquid"], name="field")) data = xr.concat([main_data, ext_data], "field") else: url = ASSETS_DATA_FULL_URL + "/" + str(a['internal_id']) + "/" main_data = request_with_retry(url) if main_data is None: continue data = xr.open_dataarray(main_data) file_name = os.path.join(ASSETS_DATA_DIR, a['id'] + '.nc') data.to_netcdf(path=file_name, compute=True) progress += 1 logger.info("progress: " + str(progress) + "/" + str(len(assets))) logger.info("Done.")
def sync(): logger.info("Download crypto...") os.makedirs(WORK_DIR, exist_ok=True) data = request_with_retry(CRYPTO_URL) with open(CRYPTO_FILE_NAME, 'wb') as f: f.write(data) logger.info('Done.')
def handle(self, **options): dt = '' try: with open(CRYPTO_LAST_DATE_FILE_NAME, 'r') as f: dt = f.read() except: pass request_with_retry(POST_STATUS_URL + "/crypto/" + dt + "/") dt = '' try: with open(STOCKS_LAST_DATE_FILE_NAME, 'r') as f: dt = f.read() except: pass request_with_retry(POST_STATUS_URL + "/stocks/" + dt + "/")
def sync_list(): logger.info("Download futures list...") os.makedirs(FUTURES_DIR, exist_ok=True) url = FUTURES_V2_LIST_URL lst = request_with_retry(url, min_size=1024) lst = json.loads(lst) with open(FUTURES_V2_LIST_FILE_NAME, 'w') as f: f.write(json.dumps(lst, indent=2)) url = FUTURES_V1_LIST_URL lst = request_with_retry(url, min_size=1024) lst = json.loads(lst) with open(FUTURES_V1_LIST_FILE_NAME, 'w') as f: f.write(json.dumps(lst, indent=2)) logger.info('Done.')
def handle(self, **options): dt = None try: with open(CRYPTO_LAST_DATE_FILE_NAME, 'r') as f: dt = f.read() except: pass server_dt = request_with_retry(CRYPTO_LAST_DATE_URL).decode() if dt is None or server_dt > dt: crypto.sync.sync() request_with_retry(POST_STATUS_URL + "/crypto/" + server_dt + "/") with open(CRYPTO_LAST_DATE_FILE_NAME, 'w') as f: f.write(server_dt)
def handle(self, **options): dt = None try: with open(FUTURES_LAST_DATE_FILE_NAME, 'r') as f: dt = f.read() except: pass server_dt = request_with_retry(FUTURES_LAST_DATE_URL).decode() if dt is None or server_dt > dt or not os.path.exists(FUTURES_V2_DATA2_FILE_NAME): futures.sync.sync_list() futures.sync.sync_data() request_with_retry(POST_STATUS_URL + "/futures/" + server_dt + "/") with open(FUTURES_LAST_DATE_FILE_NAME, 'w') as f: f.write(server_dt)
def sync_imf_file(url, file): old_data = None if os.path.exists(file): old_data = open(file, 'rb').read() new_data = request_with_retry(url) if new_data == old_data: return print("update", file) open(file, 'wb').write(new_data)
def sync_list(): logger.info("Download idx list...") os.makedirs(IDX_DIR, exist_ok=True) url = IDX_LIST_URL listing = request_with_retry(url) listing = json.loads(listing) listing = [l for l in listing if INDEXES is None or l['id'] in INDEXES] with open(IDX_LIST_FILE_NAME, 'w') as f: f.write(json.dumps(listing, indent=2)) logger.info('Done.')
def sync_data(): logger.info("Download futures data...") os.makedirs(FUTURES_DIR, exist_ok=True) url = FUTURES_V2_DATA_URL data = request_with_retry(url, min_size=1024) with open(FUTURES_V2_DATA_FILE_NAME, 'wb') as f: f.write(data) data = request_with_retry(url + "?offset=1", min_size=1024) with open(FUTURES_V2_DATA1_FILE_NAME, 'wb') as f: f.write(data) data = request_with_retry(url + "?offset=2", min_size=1024) with open(FUTURES_V2_DATA2_FILE_NAME, 'wb') as f: f.write(data) url = FUTURES_V1_DATA_URL data = request_with_retry(url, min_size=1024) with open(FUTURES_V1_DATA_FILE_NAME, 'wb') as f: f.write(data) data = request_with_retry(url + "?offset=1", min_size=1024) with open(FUTURES_V1_DATA1_FILE_NAME, 'wb') as f: f.write(data) data = request_with_retry(url + "?offset=2", min_size=1024) with open(FUTURES_V1_DATA2_FILE_NAME, 'wb') as f: f.write(data) logger.info('Done.')
def handle(self, **options): dt = None try: with open(STOCKS_LAST_DATE_FILE_NAME, 'r') as f: dt = f.read() except: pass server_dt = request_with_retry(STOCKS_LAST_DATE_URL).decode() if dt is None or server_dt > dt: assets.sync.sync_list() assets.sync.sync_data() secgov.sync.sync() idx.sync.sync_list() idx.sync.sync_series() request_with_retry(POST_STATUS_URL + "/stocks/" + server_dt + "/") with open(STOCKS_LAST_DATE_FILE_NAME, 'w') as f: f.write(server_dt)
def sync(): logger.info("Sync sec.gov forms...") os.makedirs(SECGOV_FORMS_DIR_NAME, exist_ok=True) with open(ASSETS_LIST_FILE_NAME, 'r') as f: tickers = f.read() tickers = json.loads(tickers) tickers = [ t for t in tickers if os.path.exists(os.path.join(ASSETS_DATA_DIR, t['id'] + '.nc')) ] ciks = [t['cik'] for t in tickers] ciks = json.dumps(ciks).encode() last_id = 0 try: with open(SEC_GOV_LAST_ID_FILE_NAME, 'r') as f: last_id = f.readline() last_id = last_id.strip() last_id = int(last_id) except: logger.exception("can't read " + SEC_GOV_LAST_ID_FILE_NAME) if not SECGOV_INCREMENTAL_UPDATE: last_id = 0 while True: url = BASE_URL + "?min_id=" + str(last_id + 1) raw = request_with_retry(url, ciks) if raw is None: break lst = json.loads(raw) if len(lst) == 0: break for r in lst: d = r['date'].split("-") path = os.path.join(SECGOV_FORMS_DIR_NAME, r['type'].replace('/', '-'), *d, r['cik']) os.makedirs(path, exist_ok=True) fn = os.path.join(path, str(r['id']) + '.json') with open(fn, 'w') as f: f.write(json.dumps(r, indent=2)) last_id = max(last_id, r['id']) with open(SEC_GOV_LAST_ID_FILE_NAME, 'w') as f: f.write(str(last_id)) logger.info("Done.")
def sync_list(): logger.info("Download asset list...") os.makedirs(ASSETS_DIR, exist_ok=True) listing = [] symbols_json = json.dumps(SYMBOLS).encode() min_id = 0 while True: url = ASSETS_LIST_URL + "?min_id=" + str(min_id + 1) page = request_with_retry(url, symbols_json) page = json.loads(page) if len(page) == 0: break for a in page: min_id = max(min_id, a['internal_id']) listing.append(a) with open(ASSETS_LIST_FILE_NAME, 'w') as f: f.write(json.dumps(listing, indent=2)) logger.info('Done.')
def sync_data(): logger.info("Download assets data...") os.makedirs(ASSETS_DATA_DIR, exist_ok=True) with open(ASSETS_LIST_FILE_NAME, 'r') as f: assets = f.read() assets = json.loads(assets) shuffle(assets) progress = 0 for a in assets: url = ASSETS_DATA_FULL_URL + "/" + str(a['internal_id']) + "/" main_data = request_with_retry(url) if main_data is None: continue data = xr.open_dataarray(main_data) file_name = os.path.join(ASSETS_DATA_DIR, a['id'] + '.nc') data.to_netcdf(path=file_name, compute=True) progress += 1 logger.info("progress: " + str(progress) + "/" + str(len(assets))) logger.info("Done.")
def sync_dbs(): go = True while go: go = False logger.info("Download blsgov db list...") os.makedirs(BLSGOV_DIR, exist_ok=True) old_listing = [] try: with gzip.open(BLSGOV_DB_LIST_FILE_NAME, 'rt') as f: raw = f.read() old_listing = json.loads(raw) except: logger.exception("can't read " + BLSGOV_DB_LIST_FILE_NAME) raw = request_with_retry(BLSGOV_MASTER_URL + 'db/') new_listing = json.loads(raw) if BLSGOV_DBS is not None: new_listing = [l for l in new_listing if l['id'] in BLSGOV_DBS] if len(new_listing) == len( [l for l in new_listing if l in old_listing]): logger.info("nothing is new") return for l in new_listing: if l in old_listing: continue sync_db(l['id']) go = True lockfile = BLSGOV_DB_LIST_FILE_NAME + '.lock' with portalocker.Lock(lockfile, flags=portalocker.LOCK_EX): with gzip.open(BLSGOV_DB_LIST_FILE_NAME, 'wt') as f: f.write(json.dumps(new_listing, indent=1))
def sync(): logger.info("Sync sec.gov forms...") with open(ASSETS_LIST_FILE_NAME, 'r') as f: tickers = f.read() tickers = json.loads(tickers) # tickers = [t for t in tickers if os.path.exists(os.path.join(ASSETS_DATA_DIR, t['id'] + '.nc'))] ciks = [t['cik'] for t in tickers] ciks = [c for c in ciks if c is not None] # print(ciks) os.makedirs(SECGOV_BASE_DIR, exist_ok=True) idx = request_with_retry(BASE_URL + SEC_GOV_IDX_FILE_NAME) idx = gzip.decompress(idx) idx = json.loads(idx) idx = [i for i in idx if i['cik'] in ciks] # print(idx) old_idx = [] try: with gzip.open(os.path.join(SECGOV_BASE_DIR, SEC_GOV_IDX_FILE_NAME), 'rt') as f: txt = f.read() old_idx = json.loads(txt) except: logger.exception("cant load " + SEC_GOV_IDX_FILE_NAME) if idx == old_idx: logger.info("nothing changed") return for i in idx: logger.info("sync: " + json.dumps(i)) oi = next((j for j in old_idx if j['cik'] == i['cik']), None) if oi == i: print("not changed") continue os.makedirs(os.path.join(SECGOV_BASE_DIR, i['cik']), exist_ok=True) raw = request_with_retry(BASE_URL + i['cik'] + "/" + SEC_GOV_IDX_FILE_NAME) with open( os.path.join(SECGOV_BASE_DIR, i['cik'], SEC_GOV_IDX_FILE_NAME), 'wb') as f: f.write(raw) raw = request_with_retry(BASE_URL + i['cik'] + "/" + SEC_GOV_CONTENT_FILE_NAME) with open( os.path.join(SECGOV_BASE_DIR, i['cik'], SEC_GOV_CONTENT_FILE_NAME), 'wb') as f: f.write(raw) facts = dict() with zipfile.ZipFile( os.path.join(SECGOV_BASE_DIR, i['cik'], SEC_GOV_CONTENT_FILE_NAME), 'r') as z: for n in z.namelist(): r = z.read(n).decode() r = json.loads(r) for f in r['facts']: _f = { 'cik': r['cik'], 'report_id': r['id'], 'report_date': r['date'], 'report_type': r['type'], 'report_url': r['url'], 'fact_name': f['name'], 'segment': f.get('segment'), # TODO 'value': f['value'], 'period': f['period']['value'] if f.get('period') is not None else None, # TODO 'period_type': f['period']['type'] if f.get('period') is not None else None, 'period_length': get_period_length(f), 'unit': f['unit']['value'] if f.get('unit') is not None else None, # TODO 'unit_type': f['unit']['type'] if f.get('unit') is not None else None, } if f['name'] not in facts.keys(): facts[f['name']] = [] facts[f['name']].append(_f) with zipfile.ZipFile(os.path.join(SECGOV_BASE_DIR, i['cik'], SEC_GOV_FACTS_FILE_NAME), 'w', compression=zipfile.ZIP_DEFLATED, compresslevel=9) as z: for k in facts.keys(): p = facts[k] p.sort(key=lambda i: (i['report_date'], i['report_id'])) z.writestr(k + '.json', json.dumps(p, indent=1)) with gzip.open(os.path.join(SECGOV_BASE_DIR, SEC_GOV_IDX_FILE_NAME), 'wt') as f: f.write(json.dumps(idx, indent=1)) logger.info("Done.")