Exemplo n.º 1
def test_n_closest():
    Get ts 100; run search by id in on, confirm we get back 3 close ts
    and that distances in returned dict match actual distances

    # Attempt to get non-existent time series
    with raises(ValueError):
        n_closest = simsearch_by_id(500, 3)

    with raises(ValueError):
        _ = get_by_id(500)

    # Get ts 100
    ats_100 = get_by_id(100)

    n_closest = simsearch_by_id(100, 3)
    assert (len(n_closest) <= 3)

    # Confirm that distance measures are accurate
    for dist in n_closest:
        tsid = n_closest[dist]
        other_ts = get_by_id(tsid)
        assert (abs(
            dist -
            kernel_dist(standardize(ats_100), standardize(other_ts)) < .0001))
Exemplo n.º 2
def calc_distances(vp_k, timeseries_dict):
    """Calculates kernel distance between vantage point and all loaded light curves"""
    distances = []
    vp = standardize(timeseries_dict[vp_k])
    for k in timeseries_dict:
        if k != vp_k:
            k_dist = kernel_dist(vp, standardize(timeseries_dict[k]))
            distances.append((k_dist, k))
    return distances
Exemplo n.º 3
def find_closest_vp(vps_dict, ts):
    Calculates distances from time series to all vantage points.
    Returns tuple with filename of closest vantage point and distance to that vantage point.
    s_ts = standardize(ts)
    vp_distances = sorted([(kernel_dist(s_ts, standardize(vps_dict[vp])), vp)
                           for vp in vps_dict])
    dist_to_vp, vp_fn = vp_distances[0]
    return (vp_fn, dist_to_vp)
Exemplo n.º 4
def plot_two_ts(ts1, ts1_name, ts2, ts2_name, stand=True):
    """Plots two time series with matplotlib"""
    import matplotlib.pyplot as plt
    if stand:
        ts1 = standardize(ts1)
        ts2 = standardize(ts2)
    plt.plot(ts1, label=ts1_name)
    plt.plot(ts2, label=ts2_name)
Exemplo n.º 5
def test_add_ts():
    """ Create a ts, add to db, retrieve it, assert that it's the same ts"""
    new_ts = standardize(tsmaker(0.5, 0.1, random.uniform(0, 10)))

    new_tsid = add_ts(new_ts)
    ts_as_saved = get_by_id(new_tsid)
    assert (kernel_dist(standardize(ts_as_saved), standardize(new_ts)) <

    # Confirm that we get the same id back when we attempt to add it a second time

    assert (add_ts(new_ts) == new_tsid)
Exemplo n.º 6
def test_save_ts_to_db_two():
    new_ts = ArrayTimeSeries(values=[0, 1, 2, 3, 10],
                             times=[0., .2, .3, .5, 1])
    #new_ts = ArrayTimeSeries(values=[ 1.90015224,4.11290636,2.45059022,2.45251473,-4.1988066], times=[ 0.,0.2,0.4,0.6,0.8])
    #new_ts = (tsmaker(0.5, 0.1, random.uniform(0,10),5))

    new_tsid = s_client.save_ts_to_db(new_ts)
    echo_ts = s_client.get_ts_with_id(new_tsid)
    interpolated_ats = new_ts.interpolate(
        np.arange(0.0, 1.0, (1.0 / TS_LENGTH)))
    assert (kernel_dist(standardize(echo_ts), standardize(interpolated_ats)) <
Exemplo n.º 7
def test_crosscorr():

    t1 = standardize(tsmaker(0.5, 0.1, random.uniform(0, 10)))

    # First confirm that the kernel correlation and distance methods
    # return 1 and 0 when comparing a ts with itself
    assert (kernel_corr(t1, t1) == 1)
    assert (kernel_dist(t1, t1) == 0)

    t2 = standardize(tsmaker(0.5, 0.1, random.uniform(0, 10)))
    t3 = standardize(random_ts(0.5))

    # Now let's do the opposite -- ensure that we see some distance for different curves
    assert (kernel_dist(t1, t2) > 0)
    assert (kernel_dist(t1, t3) > 0)
    assert (kernel_corr(t1, t2) < 1)
    assert (kernel_corr(t1, t3) < 1)
Exemplo n.º 8
def test_simsearch_by_ts():
    ats_75 = get_by_id(75)
    n_closest_dict, tsid, is_new = simsearch_by_ts(ats_75, 5)
    assert (tsid == 75)
    assert (is_new == False)
    assert (n_closest_dict == simsearch_by_id(75, 5))

    new_ts = standardize(tsmaker(0.5, 0.1, random.uniform(0, 10)))
    n_closest_dict, tsid, is_new = simsearch_by_ts(new_ts, 5)
    assert (is_new == True)
    assert (tsid > 250)
    assert (len(n_closest_dict) == 5)
Exemplo n.º 9
def add_ts_to_vpdb(data_tuple):
    Worker function called by add_ts_to_vpdbs above.
    This process is repeated on each vantage point.
    file, fsm, s_ts, ts_fn, db_dir = data_tuple
    vp_ts = load_ts(file[:-5], fsm)
    dist_to_vp = kernel_dist(standardize(vp_ts), s_ts)
    # print("Adding " + ts_fn + " to " + (db_dir + file))
    db = connect(db_dir + file)
    db.set(dist_to_vp, ts_fn)
Exemplo n.º 10
def test_crosscorr_errors():
    """Test that we have checks for varies error conditions"""

    t1 = standardize(tsmaker(0.5, 0.1, random.uniform(0, 10)))
    t4 = standardize(random_ts(0.5, 200))
    t5 = tsmaker(0.5, 0.1, random.uniform(0, 10))

    #Confirm that we raise value error if we attempt to compare time series
    # that are not the same length
    with raises(ValueError):
        ccor(t1, t4)

    with raises(ValueError):
        kernel_dist(t1, t4)

    with raises(ValueError):
        kernel_corr(t1, t4)

    #Confirm that we raise value error if we attempt to compare time series
    # that have not been standardized first
    t5 = tsmaker(0.5, 0.1, random.uniform(0, 10))
    with raises(ValueError):
        kernel_dist(t4, t5)
Exemplo n.º 11
def search_vpdb_for_n(vp_t, ts, db_dir, lc_dir, n):
    Searches for n most similar light curve based on pre-computed distances in vpdb

        vp_t: tuple containing vantage point filename and distance of time series to vantage point
        ts: time series to search on.
        Dict: A dict of n closet time series ids, with distances as the keys and ts ids as the values

        Uses processes pool to calculate distances in parallel, and heap queue data to minimize time
        for sorting final distance list to n smallest distances.

    # 1. Setup data to be processed in parallel
    vp_fn, dist_to_vp = vp_t
    lc_candidates, fsm = find_lc_candidates(vp_t, db_dir, lc_dir)
    lc_candidates.append((dist_to_vp, vp_fn))
    existing_ts_id = -1
    s_ts = standardize(ts)

    lc_candidate_data = [(ts_fn, fsm, s_ts)
                         for d_to_vp, ts_fn in lc_candidates]

    # 2. Calculate distances in parallel
    with ProcessPoolExecutor() as pool:
        dist_list = pool.map(calc_distance, lc_candidate_data)

    # 3. Sort distances for n+1 smallest
    n_smallest = heapq.nsmallest(n + 1, dist_list)

    # 4. Look through sublist of closest time series to see if any of have a distance of zero.
    # If so, mark it as an existing time series.
    # Otherwise, trim the list by 1.
    for dist_to_ts, tsid in n_smallest:
        if dist_to_ts < .00001:
            existing_ts_id = tsid

    if (existing_ts_id == -1):
        n_smallest = n_smallest[:-1]
        n_smallest = [(d, id) for d, id in n_smallest
                      if (id != existing_ts_id)]

    # 5. Return n_smallest dict, and exiting id (or -1 if not in db)
    return (dict(n_smallest), existing_ts_id)
Exemplo n.º 12
def add_ts_to_vpdbs(ts, ts_fn, db_dir, lc_dir):
    Based on names of vantage point db files, adds single new time series to vp indexes
    (Does not re-pick vantage points)

    Uses ProcessPoolExecutor to run processes in parallel.

    fsm = FileStorageManager(lc_dir)
    s_ts = standardize(ts)

    # Setup data for process poll execution
    vp_fns = [
        file for file in os.listdir(db_dir)
        if file.startswith("ts_datafile_") and file.endswith(".dbdb")
    vp_tuples = [(vp_fn, fsm, s_ts, ts_fn, db_dir) for vp_fn in vp_fns]

    # Create processes
    with ProcessPoolExecutor() as pool:
        _ = pool.map(add_ts_to_vpdb, vp_tuples)
Exemplo n.º 13
def test_save_ts_to_db():
    # Save a ts, request it by id, compare to original
    new_ts = (tsmaker(0.5, 0.1, random.uniform(0, 10)))
    new_tsid = s_client.save_ts_to_db(new_ts)
    echo_ts = s_client.get_ts_with_id(new_tsid)
    assert (kernel_dist(standardize(echo_ts), standardize(new_ts)) < .00001)
Exemplo n.º 14
def calc_distance(lc_candidate_data):
    """Working function called by search_vpdb_for_n above"""
    ts_fn, fsm, s_ts = lc_candidate_data
    candidate_ts = load_ts(ts_fn, fsm)
    dist_to_ts = kernel_dist(standardize(candidate_ts), s_ts)
    return (dist_to_ts, tsfn_to_id(ts_fn))