def return_top(ts_stand, oglist, n, ret=False, file='', test=False): ''' Prints or saves the n closest timeseries to ts_stand from a list of timeseries names (i.e. as string) ''' if n > 1: topn = [] for k in oglist: if test: v = pickle.load(open("../ts_data/{}.p".format(k), "rb")) else: v = pickle.load(open("ts_data/{}.p".format(k), "rb")) v_stand = util.stand(v, v.mean(), v.std()) curr_distance = 2 * (1 - util.kernel_corr(ts_stand, v_stand)) if len(topn) < n: topn.append((k, curr_distance)) else: val = [x[1] for x in topn] i = val.index(max(val)) if val[i] < curr_distance: continue else: topn[i] = (k, curr_distance) topn.sort(key=lambda x: x[1]) # return topn if not ret: print('Results: \n') for k, v in topn: print(k + ':', v) else: pickle.dump(topn, open(file, "wb")) else: m = float("inf") best = None for k in oglist: if test: v = pickle.load(open("../ts_data/{}.p".format(k), "rb")) else: v = pickle.load(open("ts_data/{}.p".format(k), "rb")) v_stand = util.stand(v, v.mean(), v.std()) curr_distance = 2 * (1 - util.kernel_corr(ts_stand, v_stand)) if m > curr_distance: m = curr_distance best = k if not ret: print('Closest: ' + best + ', Distance: ' + str(m)) else: pickle.dump([m, best], open(file, "wb"))
def sanity(ts_stand, n): "This function is a sanity check used in the testing file: brute force n closest" ds = [] for i in range(1000): v = pickle.load(open("../ts_data/ts_{}.p".format(i), "rb")) v_stand = util.stand(v, v.mean(), v.std()) ds.append(('ts_data/ts_' + str(i) + '.p', 2 * (1 - util.kernel_corr(ts_stand, v_stand)))) return sorted(ds, key=lambda d: d[1])[:n]
def main(arguments): #Clean the databases or create the directory try: shutil.rmtree('dbs') except: pass os.mkdir('dbs') #Parse the number of vantage points wanted parser = argparse.ArgumentParser( description='Number of vantage points to generate') parser.add_argument('--n', help='Number of vantage points', type=int, default=20) parser.add_argument('--nts', help='Number of vantage points', type=int, default=1000) args = parser.parse_args(arguments) n = args.n nts = args.nts #sample the vantage points without replacement vantage = random.sample(range(nts), n) pickle.dump(vantage, open("ts_data/vantage_points.p", "wb")) print('Vantage Points are: ') for i in vantage: print('TS ' + str(i)) tss = [] #load the ts print('Loading the TS') for i in range(nts): tss.append(pickle.load(open("ts_data/ts_{}.p".format(i), "rb"))) #Store the negative of the k_corr of each vantage points to the eveyr other timeseries in the DB print('Creating the DBS') for ct, i in enumerate(vantage): print(str(ct + 1) + '/' + str(len(vantage)) + ' Done') db = binarytree.connect('dbs/db_' + str(i) + '.dbdb') ts_i_stand = stand(tss[i], tss[i].mean(), tss[i].std()) for j in range(0, nts): if j == i: continue kc = kernel_corr(ts_i_stand, stand(tss[j], tss[j].mean(), tss[j].std())) db.set(2 * (1 - kc), 'ts_' + str(j)) db.commit() db.close()
def in_radius_from_vantage(ts_stand, v_str, test=False): ''' Return the points in the circle of radius 2*k_corr(ts_stand, vantage_stand) Args: ----- - ts_stand: standardised ArrayTimeSeries - v_str: index of the vantage point as a string ''' if test: v = pickle.load(open("../ts_data/ts_{}.p".format(v_str), "rb")) else: v = pickle.load(open("ts_data/ts_{}.p".format(v_str), "rb")) v_stand = util.stand(v, v.mean(), v.std()) d = 2 * (1 - util.kernel_corr(ts_stand, v_stand)) if not test: db = binarytree.connect('dbs/db_{}.dbdb'.format(v_str)) else: db = binarytree.connect('../dbs/db_{}.dbdb'.format(v_str)) closest_in_vantage = [x[1] for x in db.get_closer_than(2 * d)] db.close() return closest_in_vantage
def eval_closest_vantage(ts_stand, vantage, test=False): ''' Return the index of the closest vantage point Args: ----- - ts_stand: standardised ArrayTimeSeries - vantage point: list of indices of vantage points ''' closest_vantage = None closest_distance = float("inf") for i in vantage: if test: v = pickle.load(open("../ts_data/ts_{}.p".format(i), "rb")) else: v = pickle.load(open("ts_data/ts_{}.p".format(i), "rb")) v_stand = util.stand(v, v.mean(), v.std()) curr_distance = 2 * (1 - util.kernel_corr(ts_stand, v_stand)) if curr_distance < closest_distance: if test: closest_vantage = (i, "../ts_data/ts_{}.p".format(i)) else: closest_vantage = (i, "ts_data/ts_{}.p".format(i)) closest_distance = curr_distance return closest_vantage, closest_distance
def test_kc_smaller_1(self): #Checks that the kc is smaller than 1 for any pair of random of timeseries t1 = util.random_ts(1) t2 = util.random_ts(10) self.assertTrue(util.kernel_corr(t1,t2)<=1)
def test_kc_with_itself(self): #Checks that the kc is equal to 1 when evaluating the kc of a timeseries with itself t1 = util.tsmaker(0.5, 0.1, 0.01) self.assertTrue(abs(util.kernel_corr(t1, t1)-1)<0.000001)