Пример #1
0
def get_correlations_for_tickers(tickers, show_exception=False):
  corrs = []
  start_time = datetime.datetime.now()
  first = True
  for ticker in tickers:
    if  not first:
      time_left =  get_time_left(
                       start_time, 
                       len(corrs), 
                       scipy.special.comb(len(tickers), 2)
                     )
      print 'Finding Correlations for %s. Time remaining: %f minutes' % (ticker,time_left.seconds/60)
    first = False
    try:
      t_data = get_t_data(ticker)
    except Exception as e:
      if show_exception: print "throwing exception", e
      continue
    for ticker_2 in tickers:
      if ticker_2 == ticker: continue
      try:
        tdata_2 = get_t_data(ticker_2)
      except Exception as e:
        if show_exception: print "throwing exception", e, ticker_2
        continue
      if len(t_data) != len(tdata_2):
        t_data, tdata_2 = du.remap_data(t_data, tdata_2)  
      corr = get_correlation(t_data, tdata_2)[0]
      ident = '%s/%s' % (ticker, ticker_2)
      corrs.append((ident, corr)) 
      r('gc()')
      gc.collect()
    gc.collect()
  return corrs
Пример #2
0
def get_correlations_for_tickers(tickers, show_exception=False):
    corrs = []
    start_time = datetime.datetime.now()
    first = True
    for ticker in tickers:
        if not first:
            time_left = get_time_left(start_time, len(corrs),
                                      scipy.special.comb(len(tickers), 2))
            print 'Finding Correlations for %s. Time remaining: %f minutes' % (
                ticker, time_left.seconds / 60)
        first = False
        try:
            t_data = get_t_data(ticker)
        except Exception as e:
            if show_exception: print "throwing exception", e
            continue
        for ticker_2 in tickers:
            if ticker_2 == ticker: continue
            try:
                tdata_2 = get_t_data(ticker_2)
            except Exception as e:
                if show_exception: print "throwing exception", e, ticker_2
                continue
            if len(t_data) != len(tdata_2):
                t_data, tdata_2 = du.remap_data(t_data, tdata_2)
            corr = get_correlation(t_data, tdata_2)[0]
            ident = '%s/%s' % (ticker, ticker_2)
            corrs.append((ident, corr))
            r('gc()')
            gc.collect()
        gc.collect()
    return corrs
Пример #3
0
def get_correlation_wrap(pair):
    global counter, lock, global_ticker_data
    global total
    global start_time

    tickers = pair.split('/')
    if len(tickers) != 2: return (pair, None)
    try:
        d1 = d2 = None
        if tickers[0] in global_ticker_data:
            d1 = global_ticker_data[tickers[0]]
        else:
            d1 = get_t_data(tickers[0])
            global_ticker_data[tickers[0]] = d1

        if tickers[1] in global_ticker_data:
            d2 = global_ticker_data[tickers[1]]
        else:
            d2 = get_t_data(tickers[1])
            global_ticker_data[tickers[1]] = d2

        d1, d2 = du.remap_data(d1, d2)

        corr = get_correlation(d1, d2)
        result = (pair, corr)

        with lock:
            counter.value += 1
            if counter.value % 100000 == 0:
                gc.collect()
                r.gc()
                gc.collect()
                print "%.2f minutes left" % (get_time_left(
                    start_time, counter.value, total.value).total_seconds() /
                                             60), "%d / %d" % (counter.value,
                                                               total.value)
    except Exception as e:
        print e, traceback.print_exc()
        return ("F****d Up", None)
    return result
Пример #4
0
def get_adf(t1, t2, spread=False, portion=0):
  d1 = s.get_company_data(t1)
  d2 = s.get_company_data(t2)
  
  l1 = []
  for d in d1:
    l1.append(d['Adj Clos'])
  l2 = []
  for d in d2:
    l2.append(d['Adj Clos'])
  if len(l1) != len(l2):
    l1, l2 = du.remap_data(l1, l2)
  l1 = l1[int(len(l1) * portion):]
  l2 = l2[int(len(l2) * portion):]  
  r.assign('l1', ro.FloatVector(l1))
  r.assign('l2', ro.FloatVector(l2))
  t1 = t1.replace('^', '')
  t2 = t2.replace('^', '')
  try:
    df = r('data.frame(%s=l1, %s=l2)' % (t1, t2))
  except:
    command = 'data.frame(%s=l1, %s=l2)' % (t1, t2)
    print 'ErRROR: %s' %  command
    return None
  r.assign('df', df)
  command = 'm <- lm(%s ~ %s + 0, data=df)' % (t1, t2)
  r(command)
  beta = r('coef(m)[1]')[0]
  sprd = compute_spread(l1, l2, beta)
  r.assign('sprd', ro.FloatVector(sprd))
  importr('tseries')
  r('ht <- adf.test(sprd, alternative="stationary", k=0)')
  #r('cat("ADF p-value is", ht$p.value, "\n")')
  p = r('ht$p.value')
  garbage_collect(['ht', 'sprd', 'l1', 'm', 'l2', 'df'])
  gc.collect()
  if spread:
    return p[0],sprd, beta   
  return p[0]
Пример #5
0
def get_adf(t1, t2, spread=False, portion=0):
    d1 = s.get_company_data(t1)
    d2 = s.get_company_data(t2)

    l1 = []
    for d in d1:
        l1.append(d['Adj Clos'])
    l2 = []
    for d in d2:
        l2.append(d['Adj Clos'])
    if len(l1) != len(l2):
        l1, l2 = du.remap_data(l1, l2)
    l1 = l1[int(len(l1) * portion):]
    l2 = l2[int(len(l2) * portion):]
    r.assign('l1', ro.FloatVector(l1))
    r.assign('l2', ro.FloatVector(l2))
    t1 = t1.replace('^', '')
    t2 = t2.replace('^', '')
    try:
        df = r('data.frame(%s=l1, %s=l2)' % (t1, t2))
    except:
        command = 'data.frame(%s=l1, %s=l2)' % (t1, t2)
        print 'ErRROR: %s' % command
        return None
    r.assign('df', df)
    command = 'm <- lm(%s ~ %s + 0, data=df)' % (t1, t2)
    r(command)
    beta = r('coef(m)[1]')[0]
    sprd = compute_spread(l1, l2, beta)
    r.assign('sprd', ro.FloatVector(sprd))
    importr('tseries')
    r('ht <- adf.test(sprd, alternative="stationary", k=0)')
    #r('cat("ADF p-value is", ht$p.value, "\n")')
    p = r('ht$p.value')
    garbage_collect(['ht', 'sprd', 'l1', 'm', 'l2', 'df'])
    gc.collect()
    if spread:
        return p[0], sprd, beta
    return p[0]
Пример #6
0
def get_correlation_wrap(pair):
  global counter, lock, global_ticker_data
  global total
  global start_time
  
  tickers = pair.split('/')
  if len(tickers) != 2: return (pair, None)
  try:
    d1 = d2 = None
    if tickers[0] in global_ticker_data:
      d1 = global_ticker_data[tickers[0]]
    else:
      d1 = get_t_data(tickers[0])
      global_ticker_data[tickers[0]] = d1
      
    if tickers[1] in global_ticker_data:
      d2 = global_ticker_data[tickers[1]]
    else:
      d2 = get_t_data(tickers[1]) 
      global_ticker_data[tickers[1]] = d2
    
    d1, d2 = du.remap_data(d1, d2)
    
    corr = get_correlation(d1, d2)
    result = (pair, corr)
    
    with lock:
      counter.value += 1
      if counter.value % 100000 == 0:
        gc.collect()
        r.gc()
        gc.collect()
        print "%.2f minutes left" % (get_time_left(start_time, counter.value, total.value).total_seconds() / 60), "%d / %d" % (counter.value, total.value)
  except Exception as e:
    print e, traceback.print_exc()
    return ("F****d Up", None)
  return result