Пример #1
0
def test_n_closest():
    """
    Get ts 100; run search by id in on, confirm we get back 3 close ts
    and that distances in returned dict match actual distances
    """

    # Attempt to get non-existent time series
    with raises(ValueError):
        n_closest = simsearch_by_id(500, 3)

    with raises(ValueError):
        _ = get_by_id(500)

    # Get ts 100
    ats_100 = get_by_id(100)

    n_closest = simsearch_by_id(100, 3)
    assert (len(n_closest) <= 3)

    # Confirm that distance measures are accurate
    for dist in n_closest:
        tsid = n_closest[dist]
        other_ts = get_by_id(tsid)
        assert (abs(
            dist -
            kernel_dist(standardize(ats_100), standardize(other_ts)) < .0001))
Пример #2
0
def calc_distances(vp_k, timeseries_dict):
    """Calculates kernel distance between vantage point and all loaded light curves"""
    distances = []
    vp = standardize(timeseries_dict[vp_k])
    for k in timeseries_dict:
        if k != vp_k:
            k_dist = kernel_dist(vp, standardize(timeseries_dict[k]))
            distances.append((k_dist, k))
    return distances
Пример #3
0
def find_closest_vp(vps_dict, ts):
    """
    Calculates distances from time series to all vantage points.
    Returns tuple with filename of closest vantage point and distance to that vantage point.
    """
    s_ts = standardize(ts)
    vp_distances = sorted([(kernel_dist(s_ts, standardize(vps_dict[vp])), vp)
                           for vp in vps_dict])
    dist_to_vp, vp_fn = vp_distances[0]
    return (vp_fn, dist_to_vp)
Пример #4
0
def plot_two_ts(ts1, ts1_name, ts2, ts2_name, stand=True):
    """Plots two time series with matplotlib"""
    import matplotlib.pyplot as plt
    if stand:
        ts1 = standardize(ts1)
        ts2 = standardize(ts2)
    plt.plot(ts1, label=ts1_name)
    plt.plot(ts2, label=ts2_name)
    plt.legend()
    plt.show()
Пример #5
0
def test_add_ts():
    """ Create a ts, add to db, retrieve it, assert that it's the same ts"""
    new_ts = standardize(tsmaker(0.5, 0.1, random.uniform(0, 10)))

    new_tsid = add_ts(new_ts)
    ts_as_saved = get_by_id(new_tsid)
    assert (kernel_dist(standardize(ts_as_saved), standardize(new_ts)) <
            .00001)

    # Confirm that we get the same id back when we attempt to add it a second time

    assert (add_ts(new_ts) == new_tsid)
Пример #6
0
def test_save_ts_to_db_two():
    new_ts = ArrayTimeSeries(values=[0, 1, 2, 3, 10],
                             times=[0., .2, .3, .5, 1])
    #new_ts = ArrayTimeSeries(values=[ 1.90015224,4.11290636,2.45059022,2.45251473,-4.1988066], times=[ 0.,0.2,0.4,0.6,0.8])
    #new_ts = (tsmaker(0.5, 0.1, random.uniform(0,10),5))

    new_tsid = s_client.save_ts_to_db(new_ts)
    echo_ts = s_client.get_ts_with_id(new_tsid)
    interpolated_ats = new_ts.interpolate(
        np.arange(0.0, 1.0, (1.0 / TS_LENGTH)))
    assert (kernel_dist(standardize(echo_ts), standardize(interpolated_ats)) <
            .00001)
Пример #7
0
def test_crosscorr():

    t1 = standardize(tsmaker(0.5, 0.1, random.uniform(0, 10)))

    # First confirm that the kernel correlation and distance methods
    # return 1 and 0 when comparing a ts with itself
    assert (kernel_corr(t1, t1) == 1)
    assert (kernel_dist(t1, t1) == 0)

    t2 = standardize(tsmaker(0.5, 0.1, random.uniform(0, 10)))
    t3 = standardize(random_ts(0.5))

    # Now let's do the opposite -- ensure that we see some distance for different curves
    assert (kernel_dist(t1, t2) > 0)
    assert (kernel_dist(t1, t3) > 0)
    assert (kernel_corr(t1, t2) < 1)
    assert (kernel_corr(t1, t3) < 1)
Пример #8
0
def test_simsearch_by_ts():
    ats_75 = get_by_id(75)
    n_closest_dict, tsid, is_new = simsearch_by_ts(ats_75, 5)
    assert (tsid == 75)
    assert (is_new == False)
    assert (n_closest_dict == simsearch_by_id(75, 5))

    new_ts = standardize(tsmaker(0.5, 0.1, random.uniform(0, 10)))
    n_closest_dict, tsid, is_new = simsearch_by_ts(new_ts, 5)
    assert (is_new == True)
    assert (tsid > 250)
    assert (len(n_closest_dict) == 5)
Пример #9
0
def add_ts_to_vpdb(data_tuple):
    """
    Worker function called by add_ts_to_vpdbs above.
    This process is repeated on each vantage point.
    """
    file, fsm, s_ts, ts_fn, db_dir = data_tuple
    vp_ts = load_ts(file[:-5], fsm)
    dist_to_vp = kernel_dist(standardize(vp_ts), s_ts)
    # print("Adding " + ts_fn + " to " + (db_dir + file))
    db = connect(db_dir + file)
    db.set(dist_to_vp, ts_fn)
    db.commit()
    db.close()
Пример #10
0
def test_crosscorr_errors():
    """Test that we have checks for varies error conditions"""

    t1 = standardize(tsmaker(0.5, 0.1, random.uniform(0, 10)))
    t4 = standardize(random_ts(0.5, 200))
    t5 = tsmaker(0.5, 0.1, random.uniform(0, 10))

    #Confirm that we raise value error if we attempt to compare time series
    # that are not the same length
    with raises(ValueError):
        ccor(t1, t4)

    with raises(ValueError):
        kernel_dist(t1, t4)

    with raises(ValueError):
        kernel_corr(t1, t4)

    #Confirm that we raise value error if we attempt to compare time series
    # that have not been standardized first
    t5 = tsmaker(0.5, 0.1, random.uniform(0, 10))
    with raises(ValueError):
        kernel_dist(t4, t5)
Пример #11
0
def search_vpdb_for_n(vp_t, ts, db_dir, lc_dir, n):
    """
    Searches for n most similar light curve based on pre-computed distances in vpdb

    Args:
        vp_t: tuple containing vantage point filename and distance of time series to vantage point
        ts: time series to search on.
    Returns:
        Dict: A dict of n closet time series ids, with distances as the keys and ts ids as the values

    Note:
        Uses processes pool to calculate distances in parallel, and heap queue data to minimize time
        for sorting final distance list to n smallest distances.
    """

    # 1. Setup data to be processed in parallel
    vp_fn, dist_to_vp = vp_t
    lc_candidates, fsm = find_lc_candidates(vp_t, db_dir, lc_dir)
    lc_candidates.append((dist_to_vp, vp_fn))
    existing_ts_id = -1
    s_ts = standardize(ts)

    lc_candidate_data = [(ts_fn, fsm, s_ts)
                         for d_to_vp, ts_fn in lc_candidates]

    # 2. Calculate distances in parallel
    with ProcessPoolExecutor() as pool:
        dist_list = pool.map(calc_distance, lc_candidate_data)

    # 3. Sort distances for n+1 smallest
    n_smallest = heapq.nsmallest(n + 1, dist_list)

    # 4. Look through sublist of closest time series to see if any of have a distance of zero.
    # If so, mark it as an existing time series.
    # Otherwise, trim the list by 1.
    for dist_to_ts, tsid in n_smallest:
        if dist_to_ts < .00001:
            existing_ts_id = tsid

    if (existing_ts_id == -1):
        n_smallest = n_smallest[:-1]
    else:
        n_smallest = [(d, id) for d, id in n_smallest
                      if (id != existing_ts_id)]

    # 5. Return n_smallest dict, and exiting id (or -1 if not in db)
    return (dict(n_smallest), existing_ts_id)
Пример #12
0
def add_ts_to_vpdbs(ts, ts_fn, db_dir, lc_dir):
    """
    Based on names of vantage point db files, adds single new time series to vp indexes
    (Does not re-pick vantage points)

    Uses ProcessPoolExecutor to run processes in parallel.
    """

    fsm = FileStorageManager(lc_dir)
    s_ts = standardize(ts)

    # Setup data for process poll execution
    vp_fns = [
        file for file in os.listdir(db_dir)
        if file.startswith("ts_datafile_") and file.endswith(".dbdb")
    ]
    vp_tuples = [(vp_fn, fsm, s_ts, ts_fn, db_dir) for vp_fn in vp_fns]

    # Create processes
    with ProcessPoolExecutor() as pool:
        _ = pool.map(add_ts_to_vpdb, vp_tuples)
Пример #13
0
def test_save_ts_to_db():
    # Save a ts, request it by id, compare to original
    new_ts = (tsmaker(0.5, 0.1, random.uniform(0, 10)))
    new_tsid = s_client.save_ts_to_db(new_ts)
    echo_ts = s_client.get_ts_with_id(new_tsid)
    assert (kernel_dist(standardize(echo_ts), standardize(new_ts)) < .00001)
Пример #14
0
def calc_distance(lc_candidate_data):
    """Working function called by search_vpdb_for_n above"""
    ts_fn, fsm, s_ts = lc_candidate_data
    candidate_ts = load_ts(ts_fn, fsm)
    dist_to_ts = kernel_dist(standardize(candidate_ts), s_ts)
    return (dist_to_ts, tsfn_to_id(ts_fn))