def test_local_similarities(): random_state = np.random.RandomState(0) n, p = 5, 4 x = random_state.uniform(size=n * p).reshape((n, p)) mu = 0.5 k_neighbors = 3 # network = compute_network_from_features(x, mu, k_neighbors) # assert np.allclose( # network.distances, # assert similarity_network. distances = calculate_distances(x) neighbor_indices, neighbor_distances = \ calculate_neighborhoods(distances, k_neighbors) epsilon = calculate_epsilon(distances, neighbor_distances) weights = calculate_weights(distances, epsilon, mu) diagonals_equal_1 = all(weights[i, i] == 1 for i in range(weights.shape[0])) assert diagonals_equal_1 normalized_weights = calculate_normalized_weights(weights) assert np.sum(normalized_weights, axis=0) == 1 assert np.sum(normalized_weights, axis=1) == 1 # to do: assert that diagonal values are 0.5 local_similarities = calculate_local_similarities(weights, neighbor_indices) rows_are_sparse = np.sum(local_similarities != 0, axis=0) == k_neighbors assert rows_are_sparse columns_are_sparse = np.sum(local_similarities != 0, axis=0) == k_neighbors assert columns_are_sparse # to do: assert that local similarities are not symmetric return normalized_weights, local_similarities
def test_calculates_the_right_distance_n_2(): x = np.array([[3], [5]]) distances = calculate_distances(x) assert np.allclose(distances, np.array([ [0, 2], [2, 0] ]))
def from_features(cls, data, k, mu): distances = calculate_distances(data) neighbor_indices, neighbor_distances = \ calculate_neighborhoods(distances, k) epsilon = calculate_epsilon(distances, neighbor_distances) weights = calculate_weights(distances, epsilon, mu) similarities = calculate_local_similarities(weights, neighbor_indices) return cls(weights, similarities, k, mu)
def test_calculates_the_right_distance_n_4(): x = np.array([[3], [3], [4], [6]]) distances = calculate_distances(x) assert np.allclose(distances, np.array([ [0, 0, 1, 3], [0, 0, 1, 3], [1, 1, 0, 2], [3, 3, 2, 0] ]))
def test_is_shaped_correctly(): random_state = np.random.RandomState(0) n, p = 100, 40 x = random_state.uniform(size=n * p).reshape((n, p)) distances = calculate_distances(x) assert distances.shape[0] == x.shape[0] assert distances.shape[1] == x.shape[0] assert distances.shape[0] == distances.shape[1], "must be square" assert np.allclose(distances, distances.T), "must be symmetric" assert nonzero_except_diagonal(distances)
def test_similarity_network_fusion(): rs = np.random.RandomState(0) n_datasets = 3 feature_matrices = [ make_fake_data(n=20, m=3, random_state=rs) for _ in range(n_datasets) ] mu = 0.5 k = 5 distance_graphs = [calculate_distances(f) for f in feature_matrices] neighborhoods = [calculate_neighborhoods(d, k) for d in distance_graphs] epsilons = [ calculate_epsilon(d, n[1]) for d, n in zip(distance_graphs, neighborhoods) ] weight_graphs = [ calculate_weights(d, e, mu) for d, e in zip(distance_graphs, epsilons) ] normalized_weight_graphs = [ calculate_normalized_weights(w) for w in weight_graphs ] local_similarity_graphs = [ calculate_local_similarities(nw, n[0]) for nw, n in zip(normalized_weight_graphs, neighborhoods) ] T_iterations = 1 weights_by_iteration = [normalized_weight_graphs] for t in range(T_iterations): weights_at_iteration_t_plus_one = list(None for _ in range(n_datasets)) for v, s_v in enumerate(local_similarity_graphs): other_normalized_weights = list(weights_by_iteration[t]) other_normalized_weights.pop(v) mean_other_normalized_weights = \ np.mean(other_normalized_weights, axis=0) assert mean_other_normalized_weights.shape == s_v.shape weights_at_iteration_t_plus_one[v] = \ s_v @ mean_other_normalized_weights @ s_v.T assert weights_at_iteration_t_plus_one[v].shape \ == weight_graphs[v].shape weights_by_iteration.append(weights_at_iteration_t_plus_one)