def test_input_data_size(): # Regression test for #6288 # Previoulsly, a metric requiring a particular input dimension would fail def custom_metric(x, y): assert x.shape[0] == 3 return np.sum((x - y) ** 2) rng = check_random_state(0) X = rng.rand(10, 3) pyfunc = DistanceMetric.get_metric("pyfunc", func=dist_func, p=2) eucl = DistanceMetric.get_metric("euclidean") assert_array_almost_equal(pyfunc.pairwise(X), eucl.pairwise(X))
def test_pyfunc_metric(): def dist_func(x1, x2, p): return np.sum((x1 - x2) ** p) ** (1. / p) X = np.random.random((10, 3)) euclidean = DistanceMetric.get_metric("euclidean") pyfunc = DistanceMetric.get_metric("pyfunc", func=dist_func, p=2) D1 = euclidean.pairwise(X) D2 = pyfunc.pairwise(X) assert_allclose(D1, D2)
def check_pdist_bool(metric, D_true): dm = DistanceMetric.get_metric(metric) D12 = dm.pairwise(X1_bool) # Based on https://github.com/scipy/scipy/pull/7373 # When comparing two all-zero vectors, scipy>=1.2.0 jaccard metric # was changed to return 0, instead of nan. if metric == 'jaccard' and LooseVersion(scipy_version) < '1.2.0': D_true[np.isnan(D_true)] = 0 assert_array_almost_equal(D12, D_true)
def test_pyfunc_metric(): X = np.random.random((10, 3)) euclidean = DistanceMetric.get_metric("euclidean") pyfunc = DistanceMetric.get_metric("pyfunc", func=dist_func, p=2) # Check if both callable metric and predefined metric initialized # DistanceMetric object is picklable euclidean_pkl = pickle.loads(pickle.dumps(euclidean)) pyfunc_pkl = pickle.loads(pickle.dumps(pyfunc)) D1 = euclidean.pairwise(X) D2 = pyfunc.pairwise(X) D1_pkl = euclidean_pkl.pairwise(X) D2_pkl = pyfunc_pkl.pairwise(X) assert_array_almost_equal(D1, D2) assert_array_almost_equal(D1_pkl, D2_pkl)
def test_kd_tree_two_point(dualtree): n_samples, n_features = (100, 3) rng = check_random_state(0) X = rng.random_sample((n_samples, n_features)) Y = rng.random_sample((n_samples, n_features)) r = np.linspace(0, 1, 10) kdt = KDTree(X, leaf_size=10) D = DistanceMetric.get_metric("euclidean").pairwise(Y, X) counts_true = [(D <= ri).sum() for ri in r] counts = kdt.two_point_correlation(Y, r=r, dualtree=dualtree) assert_array_almost_equal(counts, counts_true)
def test_ball_tree_two_point(n_samples=100, n_features=3): np.random.seed(0) X = np.random.random((n_samples, n_features)) Y = np.random.random((n_samples, n_features)) r = np.linspace(0, 1, 10) bt = BallTree(X, leaf_size=10) D = DistanceMetric.get_metric("euclidean").pairwise(Y, X) counts_true = [(D <= ri).sum() for ri in r] def check_two_point(r, dualtree): counts = bt.two_point_correlation(Y, r=r, dualtree=dualtree) assert_array_almost_equal(counts, counts_true) for dualtree in (True, False): yield check_two_point, r, dualtree
def test_haversine_metric(): def haversine_slow(x1, x2): return 2 * np.arcsin(np.sqrt(np.sin(0.5 * (x1[0] - x2[0])) ** 2 + np.cos(x1[0]) * np.cos(x2[0]) * np.sin(0.5 * (x1[1] - x2[1])) ** 2)) X = np.random.random((10, 2)) haversine = DistanceMetric.get_metric("haversine") D1 = haversine.pairwise(X) D2 = np.zeros_like(D1) for i, x1 in enumerate(X): for j, x2 in enumerate(X): D2[i, j] = haversine_slow(x1, x2) assert_array_almost_equal(D1, D2) assert_array_almost_equal(haversine.dist_to_rdist(D1), np.sin(0.5 * D2) ** 2)
def check_pickle(metric, kwargs): dm = DistanceMetric.get_metric(metric, **kwargs) D1 = dm.pairwise(X1) dm2 = pickle.loads(pickle.dumps(dm)) D2 = dm2.pairwise(X1) assert_array_almost_equal(D1, D2)
def test_pickle_bool_metrics(metric): dm = DistanceMetric.get_metric(metric) D1 = dm.pairwise(X1_bool) dm2 = pickle.loads(pickle.dumps(dm)) D2 = dm2.pairwise(X1_bool) assert_array_almost_equal(D1, D2)
def check_pdist(self, metric, kwargs, D_true): if metric == 'canberra' and cmp_version(scipy.__version__, '0.9') <= 0: raise SkipTest("Canberra distance incorrect in scipy < 0.9") dm = DistanceMetric.get_metric(metric, **kwargs) D12 = dm.pairwise(self.X1) assert_allclose(D12, D_true)
def check_cdist(self, metric, kwargs, D_true): dm = DistanceMetric.get_metric(metric, **kwargs) D12 = dm.pairwise(self.X1, self.X2) assert_array_almost_equal(D12, D_true)
def check_pickle_bool(self, metric): dm = DistanceMetric.get_metric(metric) D1 = dm.pairwise(self.X1_bool) dm2 = pickle.loads(pickle.dumps(dm)) D2 = dm2.pairwise(self.X1_bool) assert_array_almost_equal(D1, D2)
def check_cdist(self, metric, kwargs, D_true): if metric == "canberra" and cmp_version(scipy.__version__, "0.9") <= 0: raise SkipTest("Canberra distance incorrect in scipy < 0.9") dm = DistanceMetric.get_metric(metric, **kwargs) D12 = dm.pairwise(self.X1, self.X2) assert_array_almost_equal(D12, D_true)
def compute_distances(): # Load IXP-GST positions altitude = 1150 min_elev = 40 orbits = 32 sat_per_orbit = 50 inclination = 53 gst_file = "data/raw/ixp_geolocation.csv" src_file = "data/raw/WUP2018-F22-Cities_Over_300K_Annual.csv" # Load geo information sat_pos, gst_pos, src_pos = load_locations(altitude, orbits, sat_per_orbit, inclination, gst_file, src_file, time=15000) lon_sort_idx_src = np.argsort(src_pos[:, 1]) src_pos = (src_pos[lon_sort_idx_src]) # Remove SRCs that are too high in latitude higher = np.where(src_pos[:, 0] > 56)[0] src_pos = np.delete(src_pos, higher, axis=0) lon_sort_idx_gst = np.argsort(gst_pos[:, 1]) gst_pos = (gst_pos[lon_sort_idx_gst]) # %% sat_sat_dist = compute_sat_sat_distance(sat_pos, altitude, orbits, sat_per_orbit) # Compute the BallTree for the satellites. This gives nn to satellites. sat_tree = BallTree(np.deg2rad(sat_pos), metric=DistanceMetric.get_metric("haversine")) # Get the satellites that are in reach for the ground stations # and their distance. sat_gst_ind_city, sat_gst_dist_city = compute_gst_sat_distance( altitude, min_elev, src_pos, sat_tree) src_src_satellite = gsts_optimization(sat_gst_ind_city, sat_gst_dist_city, sat_sat_dist, n_gsts=src_pos.shape[0]) src_src_latency = src_src_satellite / LIGHT_IN_VACUUM # %% sat_gst_ind_ixp, sat_gst_dist_ixp = compute_gst_sat_distance( altitude, min_elev, gst_pos, sat_tree) gst_gst_satellite = gsts_optimization(sat_gst_ind_ixp, sat_gst_dist_ixp, sat_sat_dist, n_gsts=gst_pos.shape[0]) src_gst_ind, src_gst_dist = src_nearest_gst_distance(src_pos, gst_pos) n_src = src_pos.shape[0] src_gst_latency = compute_src_dst_latency(n_src, [], src_gst_ind, src_gst_dist, [], [], gst_gst_satellite) return src_gst_latency, src_src_latency, src_pos
def check_pdist_bool(self, metric, D_true): dm = DistanceMetric.get_metric(metric) D12 = dm.pairwise(self.X1_bool) assert_allclose(D12, D_true)
def check_pdist(metric, kwargs, D_true): dm = DistanceMetric.get_metric(metric, **kwargs) D12 = dm.pairwise(X1) assert_array_almost_equal(D12, D_true)
def plot_absolute(src_gst_latency, src_src_latency, src_pos): triu = np.triu_indices(src_gst_latency.shape[0], 1) ixp_routed = np.around(src_gst_latency[triu], 6) city_gst = np.around(src_src_latency[triu], 6) SCALING = 1e3 plt.figure(figsize=(8, 6)) pairwise_src = DistanceMetric.pairwise( DistanceMetric.get_metric("haversine"), np.deg2rad(src_pos), np.deg2rad(src_pos)) pairwise_src = pairwise_src * EARTH_RADIUS pairwise = pairwise_src[triu] vals, avg_c, min_c, max_c, _ = vector_map_statistics( pairwise, city_gst, 10) avg_c = np.asarray(avg_c) * SCALING min_c = np.asarray(min_c) * SCALING max_c = np.asarray(max_c) * SCALING plt.plot(vals, avg_c, label="Average city-city", linewidth=3) plt.xlabel("SRC-DST distance (km)") plt.ylabel("Latency (s)") plt.legend(loc=2) pairwise = pairwise_src[triu] vals, avg_g, min_g, max_g, _ = vector_map_statistics( pairwise, ixp_routed, 10) avg_g = np.asarray(avg_g) * SCALING min_g = np.asarray(min_g) * SCALING max_g = np.asarray(max_g) * SCALING plt.plot(vals, avg_g, label="Average IXP-city", linewidth=3) plt.plot(vals, vals / LIGHT_IN_FIBER * SCALING, ':', linewidth=3, label="Great-circle in fiber") plt.plot(vals, vals / LIGHT_IN_VACUUM * SCALING, '--', label="Great-circle in vacuum", linewidth=3) plt.plot(vals, vals * FIBER_PATH_STRETCH / LIGHT_IN_FIBER * SCALING, '-.', label="Path-stretch in fiber", linewidth=3) plt.ylim(0, 150) plt.xlim(0, np.max(vals)) plt.xlabel("SRC-DST great-circle distance (km)") plt.ylabel("One-way latency (s)") plt.legend(loc=9, ncol=2, mode="expand") # Save figures # plt.savefig("figures/latency-distance.pdf") plt.savefig("figures/latency-distance.png")
def brute_force_neighbors(X, Y, k, metric, **kwargs): D = DistanceMetric.get_metric(metric, **kwargs).pairwise(Y, X) ind = np.argsort(D, axis=1)[:, :k] dist = D[np.arange(Y.shape[0])[:, None], ind] return dist, ind
# def dist_func(a, b): # alpha = 1 # return np.sqrt((a.x - b.x)**2 + # (a.y - b.y)**2 + # alpha*(a.theta - b.theta)**2) def dist_func(a, b): alpha = 1 return np.sqrt((a[0] - b[0])**2 + (a[1] - b[1])**2 + alpha * (a[2] - b[2])**2) pyfunc = DistanceMetric.get_metric("pyfunc", func=dist_func) model_states, X = make_states() for i in range(X.shape[0]): print(X[i, :]) print("TREE TIME") tree = KDTree(X, leaf_size=4, metric="euclidean") pts = np.array([(0, 0, 0)]) dist, ind = tree.query(pts, k=1) for i in ind: print(X[i]) print(np.asscalar(i)) print(model_states[np.asscalar(i)]) # print(dist) # print(KDTree.valid_metrics) # a = np.empty((5, 5, 3))
def plot_relative(src_gst_latency, src_src_latency, src_pos): triu = np.triu_indices(src_gst_latency.shape[0], 1) ixp_routed = np.around(src_gst_latency[triu], 6) city_gst = np.around(src_src_latency[triu], 6) pairwise_src = DistanceMetric.pairwise( DistanceMetric.get_metric("haversine"), np.deg2rad(src_pos), np.deg2rad(src_pos)) pairwise_src = pairwise_src * EARTH_RADIUS percent = (ixp_routed - city_gst) / city_gst * 100 pairwise = pairwise_src[triu] vals, avg_c, min_c, max_c, percent = vector_map_statistics( pairwise, percent, 100, [25, 75]) colors = plt.rcParams['axes.prop_cycle'].by_key()['color'] cur_color = colors[0] plt.figure(figsize=(8, 6)) gs1 = gridspec.GridSpec(2, 1) gs1.update(wspace=0.01, hspace=0.11) # set the spacing between axes. axes = [plt.subplot(gs1[0]), plt.subplot(gs1[1])] axes[0].axhline(0, c='grey', linewidth=0.5) axes[0].semilogy(vals, avg_c, label="Average", c=cur_color, linewidth=3) axes[0].semilogy(vals, percent[25], "--", label="Quartiles", c=cur_color, linewidth=3) axes[0].semilogy(vals, percent[75], "--", c=cur_color, linewidth=3) axes[0].plot(vals, max_c, ":", linewidth=3, label="Min-max variability", c=cur_color) axes[0].set_ylim(90, 1000) axes[0].set_ylabel("log-scale") axes[0].set_xticks([]) axes[0].legend() axes[1].axhline(0, c='grey', linewidth=0.5) axes[1].plot(vals, avg_c, label="Average city-city", c=cur_color, linewidth=3) axes[1].plot(vals, percent[25], "--", label="Quartiles", c=cur_color, linewidth=3) axes[1].plot(vals, percent[75], "--", c=cur_color, linewidth=3) axes[1].plot(vals, max_c, ":", linewidth=3, label="Min-max variability", c=cur_color) axes[1].plot(vals, min_c, ":", linewidth=3, label="Min-max variability", c=cur_color) axes[1].set_xlabel("SRC-DST great-circle distance (km)") axes[1].set_ylabel("Loss IXP deployment (%)") # axes[1].set_ylabel("Latency increase IXP deployment (%)") axes[1].set_ylim(-50, 90) plt.savefig("figures/percent-ixp-loss.png")
def check_cdist_bool(metric, D_true): dm = DistanceMetric.get_metric(metric) D12 = dm.pairwise(X1_bool, X2_bool) assert_array_almost_equal(D12, D_true)
def check_pdist(self, metric, kwargs, D_true): if metric == 'canberra' and cmp_version(scipy.__version__, '0.9') <= 0: raise SkipTest("Canberra distance incorrect in scipy < 0.9") dm = DistanceMetric.get_metric(metric, **kwargs) D12 = dm.pairwise(self.X1) assert_array_almost_equal(D12, D_true)
""" Retrieve the row of a condensed matrix from the x index of its corresponding square matrix. """ row = np.empty(shape=(n, ), dtype=cndsd_matrix.dtype) for y in range(n): if y == x: row[y] = 0 continue row[y] = square_idx_to_condensed(cndsd_matrix, x, y, n) return row def square_rows_idx_to_condensed_rows(cndsd_matrix, indexes, n): """ Retrieve the rows of a condensed matrix from the indexes x indexes of its corresponding square matrix. """ rows = np.empty(shape=(n, len(indexes)), dtype=cndsd_matrix.dtype) for i, x in enumerate(indexes): row = square_row_idx_to_condensed_row(cndsd_matrix, x, n) rows[:, i] = row return rows if __name__ == "__main__": data = load_iris().data[:5, :] distance_matrix = DistanceMetric.get_metric("euclidean").pairwise(data) distance_matrix_condensed = scipy.spatial.distance.pdist(data, "euclidean") x, y = 2, None print(distance_matrix[x, :]) print( square_row_idx_to_condensed_row(distance_matrix_condensed, x, n=distance_matrix.shape[0]))