def test_local_similarities(): random_state = np.random.RandomState(0) n, p = 5, 4 x = random_state.uniform(size=n * p).reshape((n, p)) mu = 0.5 k_neighbors = 3 # network = compute_network_from_features(x, mu, k_neighbors) # assert np.allclose( # network.distances, # assert similarity_network. distances = calculate_distances(x) neighbor_indices, neighbor_distances = \ calculate_neighborhoods(distances, k_neighbors) epsilon = calculate_epsilon(distances, neighbor_distances) weights = calculate_weights(distances, epsilon, mu) diagonals_equal_1 = all(weights[i, i] == 1 for i in range(weights.shape[0])) assert diagonals_equal_1 normalized_weights = calculate_normalized_weights(weights) assert np.sum(normalized_weights, axis=0) == 1 assert np.sum(normalized_weights, axis=1) == 1 # to do: assert that diagonal values are 0.5 local_similarities = calculate_local_similarities(weights, neighbor_indices) rows_are_sparse = np.sum(local_similarities != 0, axis=0) == k_neighbors assert rows_are_sparse columns_are_sparse = np.sum(local_similarities != 0, axis=0) == k_neighbors assert columns_are_sparse # to do: assert that local similarities are not symmetric return normalized_weights, local_similarities
def from_features(cls, data, k, mu): distances = calculate_distances(data) neighbor_indices, neighbor_distances = \ calculate_neighborhoods(distances, k) epsilon = calculate_epsilon(distances, neighbor_distances) weights = calculate_weights(distances, epsilon, mu) similarities = calculate_local_similarities(weights, neighbor_indices) return cls(weights, similarities, k, mu)
def test_is_correct_value_bigger(): mu = 0.5 k = 2 d = np.array([[0, 5, 9, 3], [5, 0, 7, 2], [9, 7, 0, 1], [3, 2, 1, 0]]) n_indices, n_distances = calculate_neighborhoods(d, k) e = calculate_epsilon(d, n_distances) w = calculate_weights(d, e, mu) assert all(w[i, i] == 1 for i in range(d.shape[0])) assert w[0, 2] == np.exp(-9**2 / (mu * ((4 + 4 + 9) / 3))) assert w[3, 2] == np.exp(-1**2 / (mu * ((1.5 + 4 + 1) / 3)))
def test_mu_changes_weights(): rs = np.random.RandomState(0) distances = rs.uniform(size=100).reshape((10, 10)) mu_1, mu_2 = 0.4, 0.6 k = 1 neighbor_distances = np.array([2, 2, 3]) neighbor_indices = np.array([[1], [0], [0]]) epsilon = calculate_epsilon(distances, neighbor_distances) weights_1 = calculate_weights(distances, epsilon, mu_1) weights_2 = calculate_weights(distances, epsilon, mu_2) assert weights.shape == (n_samples, n_samples) assert weights.shape == (n_samples, n_samples) assert weights_1[0, 1] == np.exp(2 / mu_1 / 1)
def test_similarity_network_fusion(): rs = np.random.RandomState(0) n_datasets = 3 feature_matrices = [ make_fake_data(n=20, m=3, random_state=rs) for _ in range(n_datasets) ] mu = 0.5 k = 5 distance_graphs = [calculate_distances(f) for f in feature_matrices] neighborhoods = [calculate_neighborhoods(d, k) for d in distance_graphs] epsilons = [ calculate_epsilon(d, n[1]) for d, n in zip(distance_graphs, neighborhoods) ] weight_graphs = [ calculate_weights(d, e, mu) for d, e in zip(distance_graphs, epsilons) ] normalized_weight_graphs = [ calculate_normalized_weights(w) for w in weight_graphs ] local_similarity_graphs = [ calculate_local_similarities(nw, n[0]) for nw, n in zip(normalized_weight_graphs, neighborhoods) ] T_iterations = 1 weights_by_iteration = [normalized_weight_graphs] for t in range(T_iterations): weights_at_iteration_t_plus_one = list(None for _ in range(n_datasets)) for v, s_v in enumerate(local_similarity_graphs): other_normalized_weights = list(weights_by_iteration[t]) other_normalized_weights.pop(v) mean_other_normalized_weights = \ np.mean(other_normalized_weights, axis=0) assert mean_other_normalized_weights.shape == s_v.shape weights_at_iteration_t_plus_one[v] = \ s_v @ mean_other_normalized_weights @ s_v.T assert weights_at_iteration_t_plus_one[v].shape \ == weight_graphs[v].shape weights_by_iteration.append(weights_at_iteration_t_plus_one)
def test_calculates_the_right_value_n_2(): distances = np.array([[0, 3], [3, 0]]) neighbor_distances = np.array([[3], [3]]) epsilon = calculate_epsilon(distances, neighbor_distances) assert np.allclose(epsilon, np.array([[2.0, 3.0], [3.0, 2.0]])), epsilon