def test_db_1(): db_fname = TEMP_DIR + "test1.dbdb" db = unbalancedDB.connect(db_fname) assert os.path.isfile(db_fname) == True db.close() db = unbalancedDB.connect(db_fname) db.set(16, "big") db.set(15, "med") db.set(14, "sml") db.commit() db.close() db = unbalancedDB.connect(db_fname) assert db.get(16) == 'big' # test get() assert db.get_min() == 'sml' # test get_min() assert db.get_left(16) == (15, u'med') # test get_left() assert db.get_left(15) == (14, u'sml') # so the tree is indeed unbalanced assert db.chop(15.5) == [ (15, u'med'), (14, u'sml') ] # test chop is robust to whether the tree is balanced or not db.close() db = unbalancedDB.connect(db_fname) db.set(16, "really big") db.close() db = unbalancedDB.connect(db_fname) assert db.get( 16) == 'big' # test commit required for changes to be finalized db.close()
def test_db_2(): # a more complicated balanced example db_fname = TEMP_DIR + "test2.dbdb" db = unbalancedDB.connect(db_fname) assert os.path.isfile(db_fname) == True db.close() db = unbalancedDB.connect(db_fname) input_data = [ (8, "eight"), (3, "three"), (10, "ten"), (1, "one"), (6, "six"), (14, "fourteen"), (4, "four"), (7, "seven"), (13, "thirteen"), ] for key, val in input_data: db.set(key, val) db.commit() db.close() db = unbalancedDB.connect(db_fname) # testing assert db.get_left(8) == (3, "three") assert db.get_right(8) == (10, "ten") assert db.get_left(3) == (1, "one") assert db.get_right(3) == (6, "six") assert db.get_left(6) == (4, "four") assert db.get_right(6) == (7, "seven") assert db.get_right(10) == (14, "fourteen") assert db.get_left(14) == (13, "thirteen" ) # ensure that we do match wikipedia assert db.chop(6) == [(3, u'three'), (1, u'one'), (6, u'six'), (4, u'four')] # test chop on key in database assert db.chop(6.1) == [(3, u'three'), (1, u'one'), (6, u'six'), (4, u'four')] # test chop on key out of database db.close() clear_dir(TEMP_DIR, recreate=False)
def save_vp_dbs(vp, timeseries_dict): """ Creates unbalanced binary tree databases and saves them to disk""" sorted_ds = calc_distances(vp, timeseries_dict) # ts-13.txt -> vp_dbs/ts-13.dbdb db_filepath = DB_DIR + vp[:-4] + ".dbdb" db = connect(db_filepath) for dist_to_vp, ts_fn in sorted_ds: db.set(dist_to_vp, ts_fn) db.commit() db.close()
def search_vpdb(vp_t, ts): """ Searches for most similar light curve based on pre-computed distances in vpdb Args: vp_t: tuple containing vantage point filename and distance of time series to vantage point ts: time series to search on. Returns: Tuple: Distance to closest light curve, filename of closest light curve, ats object for closest light curve """ vp_fn, dist_to_vp = vp_t db_path = DB_DIR + vp_fn[:-4] + ".dbdb" db = connect(DB_DIR + vp_fn[:-4] + ".dbdb") s_ts = standardize(ts) # Identify light curves in selected vantage db that are up to 2x the distance # that the time series is from the vantage point lc_candidates = db.chop(2 * dist_to_vp) db.close() # Vantage point is ts to beat as we search through candidate light curves min_dist = dist_to_vp closest_ts_fn = vp_fn closest_ts = load_ts(vp_fn) for d_to_vp, ts_fn in lc_candidates: candidate_ts = load_ts(ts_fn) dist_to_ts = kernel_dist(standardize(candidate_ts), s_ts) if (dist_to_ts < min_dist): min_dist = dist_to_ts closest_ts_fn = ts_fn closest_ts = candidate_ts return (min_dist, closest_ts_fn, closest_ts)