예제 #1
0
def return_top(ts_stand, oglist, n, ret=False, file='', test=False):
    '''
    Prints or saves the n closest timeseries to ts_stand from a list of timeseries names (i.e. as string)
    '''
    if n > 1:
        topn = []
        for k in oglist:
            if test:
                v = pickle.load(open("../ts_data/{}.p".format(k), "rb"))
            else:
                v = pickle.load(open("ts_data/{}.p".format(k), "rb"))
            v_stand = util.stand(v, v.mean(), v.std())
            curr_distance = 2 * (1 - util.kernel_corr(ts_stand, v_stand))
            if len(topn) < n:
                topn.append((k, curr_distance))
            else:
                val = [x[1] for x in topn]
                i = val.index(max(val))
                if val[i] < curr_distance:
                    continue
                else:
                    topn[i] = (k, curr_distance)
        topn.sort(key=lambda x: x[1])
        # return topn
        if not ret:
            print('Results: \n')
            for k, v in topn:
                print(k + ':', v)
        else:
            pickle.dump(topn, open(file, "wb"))
    else:
        m = float("inf")
        best = None
        for k in oglist:
            if test:
                v = pickle.load(open("../ts_data/{}.p".format(k), "rb"))
            else:
                v = pickle.load(open("ts_data/{}.p".format(k), "rb"))
            v_stand = util.stand(v, v.mean(), v.std())
            curr_distance = 2 * (1 - util.kernel_corr(ts_stand, v_stand))
            if m > curr_distance:
                m = curr_distance
                best = k
        if not ret:
            print('Closest: ' + best + ', Distance: ' + str(m))
        else:
            pickle.dump([m, best], open(file, "wb"))
예제 #2
0
def sanity(ts_stand, n):
    "This function is a sanity check used in the testing file: brute force n closest"
    ds = []
    for i in range(1000):
        v = pickle.load(open("../ts_data/ts_{}.p".format(i), "rb"))
        v_stand = util.stand(v, v.mean(), v.std())
        ds.append(('ts_data/ts_' + str(i) + '.p',
                   2 * (1 - util.kernel_corr(ts_stand, v_stand))))
    return sorted(ds, key=lambda d: d[1])[:n]
def main(arguments):
    #Clean the databases or create the directory
    try:
        shutil.rmtree('dbs')
    except:
        pass
    os.mkdir('dbs')

    #Parse the number of vantage points wanted
    parser = argparse.ArgumentParser(
        description='Number of vantage points to generate')
    parser.add_argument('--n',
                        help='Number of vantage points',
                        type=int,
                        default=20)
    parser.add_argument('--nts',
                        help='Number of vantage points',
                        type=int,
                        default=1000)
    args = parser.parse_args(arguments)
    n = args.n
    nts = args.nts

    #sample the vantage points without replacement
    vantage = random.sample(range(nts), n)
    pickle.dump(vantage, open("ts_data/vantage_points.p", "wb"))
    print('Vantage Points are: ')
    for i in vantage:
        print('TS ' + str(i))

    tss = []

    #load the ts
    print('Loading the TS')
    for i in range(nts):
        tss.append(pickle.load(open("ts_data/ts_{}.p".format(i), "rb")))

    #Store the negative of the k_corr of each vantage points to the eveyr other timeseries in the DB
    print('Creating the DBS')
    for ct, i in enumerate(vantage):
        print(str(ct + 1) + '/' + str(len(vantage)) + ' Done')
        db = binarytree.connect('dbs/db_' + str(i) + '.dbdb')
        ts_i_stand = stand(tss[i], tss[i].mean(), tss[i].std())
        for j in range(0, nts):
            if j == i:
                continue
            kc = kernel_corr(ts_i_stand,
                             stand(tss[j], tss[j].mean(), tss[j].std()))
            db.set(2 * (1 - kc), 'ts_' + str(j))
        db.commit()
        db.close()
예제 #4
0
def in_radius_from_vantage(ts_stand, v_str, test=False):
    '''
    Return the points in the circle of radius 2*k_corr(ts_stand, vantage_stand)
    Args:
    -----
    - ts_stand: standardised ArrayTimeSeries
    - v_str: index of the vantage point as a string
    '''
    if test:
        v = pickle.load(open("../ts_data/ts_{}.p".format(v_str), "rb"))
    else:
        v = pickle.load(open("ts_data/ts_{}.p".format(v_str), "rb"))
    v_stand = util.stand(v, v.mean(), v.std())
    d = 2 * (1 - util.kernel_corr(ts_stand, v_stand))
    if not test:
        db = binarytree.connect('dbs/db_{}.dbdb'.format(v_str))
    else:
        db = binarytree.connect('../dbs/db_{}.dbdb'.format(v_str))
    closest_in_vantage = [x[1] for x in db.get_closer_than(2 * d)]
    db.close()
    return closest_in_vantage
예제 #5
0
def eval_closest_vantage(ts_stand, vantage, test=False):
    '''
    Return the index of the closest vantage point
    Args:
    -----
    - ts_stand: standardised ArrayTimeSeries
    - vantage point: list of indices of vantage points
    '''
    closest_vantage = None
    closest_distance = float("inf")
    for i in vantage:
        if test:
            v = pickle.load(open("../ts_data/ts_{}.p".format(i), "rb"))
        else:
            v = pickle.load(open("ts_data/ts_{}.p".format(i), "rb"))
        v_stand = util.stand(v, v.mean(), v.std())
        curr_distance = 2 * (1 - util.kernel_corr(ts_stand, v_stand))
        if curr_distance < closest_distance:
            if test:
                closest_vantage = (i, "../ts_data/ts_{}.p".format(i))
            else:
                closest_vantage = (i, "ts_data/ts_{}.p".format(i))
            closest_distance = curr_distance
    return closest_vantage, closest_distance
 def test_kc_smaller_1(self):
     #Checks that the kc is smaller than 1 for any pair of random of timeseries
     t1 = util.random_ts(1)
     t2 = util.random_ts(10)
     self.assertTrue(util.kernel_corr(t1,t2)<=1)
 def test_kc_with_itself(self):
     #Checks that the kc is equal to 1 when evaluating the kc of a timeseries with itself
     t1 = util.tsmaker(0.5, 0.1, 0.01)
     self.assertTrue(abs(util.kernel_corr(t1, t1)-1)<0.000001)