def weekly_get_ingest(start_end_days=None, repo_path='repo_hist', rsync_dir_list=None): """ This is supposed to be run on IB machine at EoD Friday. It first gets all the history of this week, and then ingest into a hist_repo. The need for ingestion, is to correct on any missing data. After this run, the files in the hist dir is copied to data machine """ import ibbar if start_end_days is None: cdt = datetime.datetime.now() if cdt.weekday() != 4: raise ValueError('sday not set while running on non-friday!') eday = cdt.strftime('%Y%m%d') tdi = l1.TradingDayIterator(eday) sday = tdi.prev_n_trade_day(5).yyyymmdd() else: sday, eday = start_end_days print 'Got start/end day: ', sday, eday ibbar.weekly_get_hist(sday, eday) #No need to do this, unless the previous get failed. But #then it should be tried again. #ingest_all_symb(sday, eday, repo_path=repo_path) hist_path = ibbar.read_cfg('HistPath') if rsync_dir_list is not None: for rsync_dir in rsync_dir_list: if len(rsync_dir) > 0: os.system('rsync -avz ' + hist_path + '/ ' + rsync_dir)
def __init__(self, stale_sec=60): self.stale_sec = stale_sec self.bar_path = ibbar.read_cfg('BarPath') self.fn = glob.glob(self.bar_path + '/*L2*bin') self.fs = self.upd() self.ts = datetime.datetime.now()
def launch_sustain(): alive = False dtnow = datetime.datetime.now() while not should_run() and dtnow.weekday() != 6: print 'wait for Sunday open...' #reset_network() bounce_ibg() time.sleep(RESET_WAIT_SECOND) dtnow = datetime.datetime.now() while dtnow.weekday() == 6 and not should_run(): utcnow = l1.TradingDayIterator.local_dt_to_utc(dtnow) utcstart = get_utcstart() while utcnow < utcstart - RESET_WAIT_SECOND - 10: print 'wait for Sunday open...', utcnow, utcstart, utcstart - utcnow #reset_network() bounce_ibg() time.sleep(RESET_WAIT_SECOND) utcnow = l1.TradingDayIterator.cur_utc() print 'getting on-line, updating roll ', datetime.datetime.now() ibbar.update_ib_config(cfg_file=cfg) utcnow = l1.TradingDayIterator.cur_utc() if utcstart > utcnow and not is_in_daily_trading(): time.sleep(utcstart - utcnow) utcnow = l1.TradingDayIterator.cur_utc() print 'spining for start', utcnow while not is_in_daily_trading(): utcnow = l1.TradingDayIterator.cur_utc() #time.sleep( float((1000000-utcnow.microsecond)/1000)/1000.0 ) print 'starting on', utcnow alive = True tpm = TPMon() while should_run(): if is_in_daily_trading(): if not alive: print 'getting on-line, updating roll ', datetime.datetime.now( ) ibbar.update_ib_config(cfg_file=cfg) alive = True # poll and sustain for p in procs: if (p not in proc_map.keys()) or (not is_proc_alive( proc_map[p])): launch(p) time.sleep(1) if not tpm.check(): # All L2 repo hasn't been updated for 1 min # exit the process and retry in outer (while [ 1 ]) loop print 'stale detected, exit!' _should_run = False kill_all() alive = False sys.exit(1) continue else: if alive: print 'getting off-line, killing all ', datetime.datetime.now() kill_all() alive = False # do one hour of reset dtnow = datetime.datetime.now() utcstart = get_utcstart() cur_utc = l1.TradingDayIterator.cur_utc() while cur_utc <= utcstart - RESET_WAIT_SECOND - 10: print 'reset network', cur_utc, utcstart #reset_network() bounce_ibg() time.sleep(RESET_WAIT_SECOND) cur_utc = l1.TradingDayIterator.cur_utc() print 'getting on-line, updating roll ', datetime.datetime.now() ibbar.update_ib_config(cfg_file=cfg) cur_utc = l1.TradingDayIterator.cur_utc() if utcstart > cur_utc: time.sleep(utcstart - cur_utc) cur_utc = l1.TradingDayIterator.cur_utc() print 'spinning for start', cur_utc while cur_utc <= utcstart: cur_utc = l1.TradingDayIterator.cur_utc() alive = True tpm = TPMon() print 'stopped ', datetime.datetime.now() kill_all() if is_weekend(): # only do it on friday close dt = datetime.datetime.now() wd = dt.weekday() if wd == 4: remove_logs() # edrive prev_wk, this_wk = ibbar.move_bar( rsync_dir_list=['/cygdrive/e/ib/kisco/bar']) bar_path = ibbar.read_cfg('BarPath') #os.system('scp -r ' + bar_path + '/'+this_wk + ' ' + USER+'@'+DATA_MACHINE+':'+BAR_PATH) print 'moving bar files to ', this_wk print 'previous week was ', prev_wk #import IB_hist #IB_hist.weekly_get_ingest(rsync_dir_list=['/cygdrive/e/ib/kisco/hist']) eday = dt.strftime('%Y%m%d') tdi = l1.TradingDayIterator(eday) sday = tdi.prev_n_trade_day(5).yyyymmdd() #ibbar.weekly_get_hist(sday, eday) os.system("nohup python/ibbar.py " + sday + " " + eday + " 2>&1 >> ./gethist.log &") print "started nohup python/ibbar.py " + sday + " " + eday + " 2>&1 >> ./gethist.log &", datetime.datetime.now( ) time.sleep(30)
def ingest_all_symb(sday, eday, repo_path=None, get_missing=True, sym_list=None, future_inclusion=['front', 'back'], sym_list_exclude=[], overwrite_dbar=True, EarliestMissingDay='20180201'): """ This will go to IB historical data, usually in /cygdrive/e/ib/kisco, read all the symbols defined by sym_list and update the repo at repo_path, which is usually at /cygdrive/e/research/kdb if sym_list is None, then it will include all the symbol collected by ibbar. future_inclusion defaults to include both front and back. It is included for ibbar's ingestion, when only front contract is in hist, but the back contracts haven't been retrieved yet. NOTE: ETF and FX symbols are not affected by future_inclusion """ import ibbar if repo_path is None: repo_path = ibbar.read_cfg('RepoPath') fut_sym = ibbar.sym_priority_list fx_sym = l1.ven_sym_map['FX'] etf_sym = ibbar.ib_sym_etf fut_sym2 = ibbar.sym_priority_list_l1_next idx_sym = ibbar.ib_sym_idx if sym_list is None: sym_list = fut_sym + fx_sym + etf_sym + idx_sym for sym in sym_list: if sym in sym_list_exclude: continue print 'ingesting ', sym if sym in fut_sym and 'front' in future_inclusion: barsec = 1 dbar = repo.RepoDailyBar(sym, repo_path=repo_path, create=True) gen_daily_bar_ib(sym, sday, eday, barsec, dbar_repo=dbar, is_front_future=True, get_missing=get_missing, overwrite_dbar=overwrite_dbar, EarliestMissingDay=EarliestMissingDay) elif sym in fx_sym: barsec = 5 dbar = repo.RepoDailyBar(sym, repo_path=repo_path, create=True) gen_daily_bar_ib(sym, sday, eday, barsec, dbar_repo=dbar, get_missing=get_missing, overwrite_dbar=overwrite_dbar, EarliestMissingDay=EarliestMissingDay) elif sym in etf_sym or sym in idx_sym: barsec = 1 dbar = repo.RepoDailyBar(sym, repo_path=repo_path, create=True) gen_daily_bar_ib(sym, sday, eday, barsec, dbar_repo=dbar, get_missing=get_missing, overwrite_dbar=overwrite_dbar) if sym in fut_sym2 and 'back' in future_inclusion: barsec = 1 repo_path_nc = repo.nc_repo_path( repo_path) # repo path of next contract dbar = repo.RepoDailyBar(sym, repo_path=repo_path_nc, create=True) gen_daily_bar_ib(sym, sday, eday, barsec, dbar_repo=dbar, is_front_future=False, get_missing=get_missing, overwrite_dbar=overwrite_dbar, EarliestMissingDay=EarliestMissingDay)
def fn_from_dates(symbol, sday, eday, is_front_future): try: is_fx = l1.venue_by_symbol(symbol) == 'FX' is_etf = l1.venue_by_symbol(symbol) == 'ETF' is_idx = l1.venue_by_symbol(symbol) == 'IDX' except: print 'Unknow symbol %s' % (symbol) raise ValueError('Unknown symbol ' + symbol) from ibbar import read_cfg hist_path = read_cfg('HistPath') sym0 = symbol if symbol in l1.RicMap.keys(): sym0 = l1.RicMap[symbol] if is_etf: fqt = glob.glob(hist_path + '/ETF/' + sym0 + '_[12]*_qt.csv*') elif is_fx: fqt = glob.glob(hist_path + '/FX/' + sym0 + '_[12]*_qt.csv*') elif is_idx: fqt = glob.glob(hist_path + '/IDX/' + sym0 + '_[12]*_trd.csv*') else: if is_front_future: fqt = glob.glob(hist_path + '/' + symbol + '/' + sym0 + '*_[12]*_qt.csv*') else: fqt = glob.glob(hist_path + '/' + symbol + '/nc/' + sym0 + '??_[12]*_qt.csv*') ds = [] de = [] fn = [] for f in fqt: if os.stat(f).st_size < 500: print '\t\t\t ***** ', f, ' is too small, ignored' continue ds0 = f.split('/')[-1].split('_')[1] de0 = f.split('/')[-1].split('_')[2].split('.')[0] # check for inclusion if ds0 > eday or de0 < sday: continue ds.append(ds0) de.append(de0) fn.append(f) # sort the list in the increasing order of starting dates # this will make the merging easier by using append # in case of total inclusion, then the rule will be # "overwrite", instead of "append" # append means add only the new content to the existing daily bar # overwrite means add all the content to the existing daily bar, overwirte if overlap # merge means to only apply to daily bars of any days that doesn't exists. ix = np.argsort(ds) dss = np.array(ds)[ix] des = np.array(de)[ix] fns = np.array(fn)[ix] while True: if len(fns) == 0: print 'ERROR! Nothing found for %s from %s to %s (front %s), search path %s' % ( symbol, sday, eday, str(is_front_future), hist_path) break # remove the files that are contained desi = des.astype(int) ix = np.nonzero(desi[1:] - desi[:-1] <= 0)[0] if len(ix) > 0: print fns[ix + 1], ' contained by ', fns[ ix], ', removed, if needed, consider load and overwrite repo' fns = np.delete(fns, ix + 1) des = np.delete(des, ix + 1) dss = np.delete(dss, ix + 1) else: break return fns, is_fx, is_etf, is_idx