def correlation_matrix_from_path(path, nlags=100, signal=signals.prct_curr_tick_midprice_change, start_hour=4, end_hour=20): files = glob.glob(path) assert len(files) > 0 ys = [] for filename in files: d = dataset.Dataset(filename) start_idx = dataset_helpers.hour_to_idx(d.t, start_hour) end_idx = dataset_helpers.hour_to_idx(d.t, end_hour) y = signal(d, start_idx=start_idx, end_idx=end_idx) assert len(y) > 0 ys.append(y) return correlation_matrix(ys, nlags), files
def load_pairwise_tensor(path, fn, reciprocal_fn, diagonal, start_hour = 1, end_hour = 20, expect_clique = None): currencies = set([]) vectors = {} nticks = None all_files = glob.glob(path) assert len(all_files) > 0 for filename in all_files: print "===> ", filename d = Dataset(filename) start_idx = hour_to_idx(d.t, start_hour) end_idx = hour_to_idx(d.t, end_hour) nticks = end_idx - start_idx ccy_a, ccy_b = d.currency_pair currencies.add(ccy_a) currencies.add(ccy_b) vectors[ (ccy_a, ccy_b) ] = fn(d)[start_idx:end_idx] vectors[ (ccy_b, ccy_a) ] = reciprocal_fn(d)[start_idx:end_idx] clique = list(maximum_clique(currencies, vectors)) n = len(clique) result = np.zeros( [n,n, nticks], dtype='float') print 'tensor', result.shape for i in xrange(n): ccy_a = clique[i] for j in xrange(n): if i == j: result[i,j,:] = diagonal else: ccy_b = clique[j] result[i,j, :] = vectors[ (ccy_a, ccy_b) ] if expect_clique is not None: assert set(clique) == set(expect_clique) permuted_result = np.zeros_like(result) for i in xrange(n): ccy_a = clique[i] pi = expect_clique.index(ccy_a) for j in xrange(n): ccy_b = clique[j] pj = expect_clique.index(ccy_b) permuted_result[pi,pj,:] = result[i,j,:] result = permuted_result clique = expect_clique return clique, result, currencies, vectors