def get_correlations_for_tickers(tickers, show_exception=False): corrs = [] start_time = datetime.datetime.now() first = True for ticker in tickers: if not first: time_left = get_time_left( start_time, len(corrs), scipy.special.comb(len(tickers), 2) ) print 'Finding Correlations for %s. Time remaining: %f minutes' % (ticker,time_left.seconds/60) first = False try: t_data = get_t_data(ticker) except Exception as e: if show_exception: print "throwing exception", e continue for ticker_2 in tickers: if ticker_2 == ticker: continue try: tdata_2 = get_t_data(ticker_2) except Exception as e: if show_exception: print "throwing exception", e, ticker_2 continue if len(t_data) != len(tdata_2): t_data, tdata_2 = du.remap_data(t_data, tdata_2) corr = get_correlation(t_data, tdata_2)[0] ident = '%s/%s' % (ticker, ticker_2) corrs.append((ident, corr)) r('gc()') gc.collect() gc.collect() return corrs
def get_correlations_for_tickers(tickers, show_exception=False): corrs = [] start_time = datetime.datetime.now() first = True for ticker in tickers: if not first: time_left = get_time_left(start_time, len(corrs), scipy.special.comb(len(tickers), 2)) print 'Finding Correlations for %s. Time remaining: %f minutes' % ( ticker, time_left.seconds / 60) first = False try: t_data = get_t_data(ticker) except Exception as e: if show_exception: print "throwing exception", e continue for ticker_2 in tickers: if ticker_2 == ticker: continue try: tdata_2 = get_t_data(ticker_2) except Exception as e: if show_exception: print "throwing exception", e, ticker_2 continue if len(t_data) != len(tdata_2): t_data, tdata_2 = du.remap_data(t_data, tdata_2) corr = get_correlation(t_data, tdata_2)[0] ident = '%s/%s' % (ticker, ticker_2) corrs.append((ident, corr)) r('gc()') gc.collect() gc.collect() return corrs
def get_correlation_wrap(pair): global counter, lock, global_ticker_data global total global start_time tickers = pair.split('/') if len(tickers) != 2: return (pair, None) try: d1 = d2 = None if tickers[0] in global_ticker_data: d1 = global_ticker_data[tickers[0]] else: d1 = get_t_data(tickers[0]) global_ticker_data[tickers[0]] = d1 if tickers[1] in global_ticker_data: d2 = global_ticker_data[tickers[1]] else: d2 = get_t_data(tickers[1]) global_ticker_data[tickers[1]] = d2 d1, d2 = du.remap_data(d1, d2) corr = get_correlation(d1, d2) result = (pair, corr) with lock: counter.value += 1 if counter.value % 100000 == 0: gc.collect() r.gc() gc.collect() print "%.2f minutes left" % (get_time_left( start_time, counter.value, total.value).total_seconds() / 60), "%d / %d" % (counter.value, total.value) except Exception as e: print e, traceback.print_exc() return ("F****d Up", None) return result
def get_adf(t1, t2, spread=False, portion=0): d1 = s.get_company_data(t1) d2 = s.get_company_data(t2) l1 = [] for d in d1: l1.append(d['Adj Clos']) l2 = [] for d in d2: l2.append(d['Adj Clos']) if len(l1) != len(l2): l1, l2 = du.remap_data(l1, l2) l1 = l1[int(len(l1) * portion):] l2 = l2[int(len(l2) * portion):] r.assign('l1', ro.FloatVector(l1)) r.assign('l2', ro.FloatVector(l2)) t1 = t1.replace('^', '') t2 = t2.replace('^', '') try: df = r('data.frame(%s=l1, %s=l2)' % (t1, t2)) except: command = 'data.frame(%s=l1, %s=l2)' % (t1, t2) print 'ErRROR: %s' % command return None r.assign('df', df) command = 'm <- lm(%s ~ %s + 0, data=df)' % (t1, t2) r(command) beta = r('coef(m)[1]')[0] sprd = compute_spread(l1, l2, beta) r.assign('sprd', ro.FloatVector(sprd)) importr('tseries') r('ht <- adf.test(sprd, alternative="stationary", k=0)') #r('cat("ADF p-value is", ht$p.value, "\n")') p = r('ht$p.value') garbage_collect(['ht', 'sprd', 'l1', 'm', 'l2', 'df']) gc.collect() if spread: return p[0],sprd, beta return p[0]
def get_adf(t1, t2, spread=False, portion=0): d1 = s.get_company_data(t1) d2 = s.get_company_data(t2) l1 = [] for d in d1: l1.append(d['Adj Clos']) l2 = [] for d in d2: l2.append(d['Adj Clos']) if len(l1) != len(l2): l1, l2 = du.remap_data(l1, l2) l1 = l1[int(len(l1) * portion):] l2 = l2[int(len(l2) * portion):] r.assign('l1', ro.FloatVector(l1)) r.assign('l2', ro.FloatVector(l2)) t1 = t1.replace('^', '') t2 = t2.replace('^', '') try: df = r('data.frame(%s=l1, %s=l2)' % (t1, t2)) except: command = 'data.frame(%s=l1, %s=l2)' % (t1, t2) print 'ErRROR: %s' % command return None r.assign('df', df) command = 'm <- lm(%s ~ %s + 0, data=df)' % (t1, t2) r(command) beta = r('coef(m)[1]')[0] sprd = compute_spread(l1, l2, beta) r.assign('sprd', ro.FloatVector(sprd)) importr('tseries') r('ht <- adf.test(sprd, alternative="stationary", k=0)') #r('cat("ADF p-value is", ht$p.value, "\n")') p = r('ht$p.value') garbage_collect(['ht', 'sprd', 'l1', 'm', 'l2', 'df']) gc.collect() if spread: return p[0], sprd, beta return p[0]
def get_correlation_wrap(pair): global counter, lock, global_ticker_data global total global start_time tickers = pair.split('/') if len(tickers) != 2: return (pair, None) try: d1 = d2 = None if tickers[0] in global_ticker_data: d1 = global_ticker_data[tickers[0]] else: d1 = get_t_data(tickers[0]) global_ticker_data[tickers[0]] = d1 if tickers[1] in global_ticker_data: d2 = global_ticker_data[tickers[1]] else: d2 = get_t_data(tickers[1]) global_ticker_data[tickers[1]] = d2 d1, d2 = du.remap_data(d1, d2) corr = get_correlation(d1, d2) result = (pair, corr) with lock: counter.value += 1 if counter.value % 100000 == 0: gc.collect() r.gc() gc.collect() print "%.2f minutes left" % (get_time_left(start_time, counter.value, total.value).total_seconds() / 60), "%d / %d" % (counter.value, total.value) except Exception as e: print e, traceback.print_exc() return ("F****d Up", None) return result