def test_shortest_paths_retain_fraction(device): pytest.skip("This test is flaky.") # graph on 10 nodes: 10(9)/2 = 45 edges n = 10 edges = torch.tensor([[i, i + 1] for i in range(n - 1)] + [[n - 1, 0]], device=device) graph = preprocess.Graph.from_edges(edges) # 0.2 * 45 = approx 9 edges expected n_edges = 0 nnz = 0 n_trials = 100 for _ in range(n_trials): shortest_path_graph = preprocess.graph.shortest_paths( graph, retain_fraction=0.2) n_edges += shortest_path_graph.n_edges nnz += shortest_path_graph.A.nnz mean_n_edges = n_edges / n_trials mean_nnz = nnz / n_trials assert shortest_path_graph.n_items == n testing.assert_allclose(mean_n_edges, 9.0, atol=1.5) testing.assert_allclose(mean_nnz, 18.0, atol=1.5) assert not (shortest_path_graph.A != shortest_path_graph.A.T).todense().all()
def test_fit_spectral(device): # TODO deflake this test pytest.skip("This test is flaky on macOS.") np.random.seed(0) torch.random.manual_seed(0) n = 200 m = 3 max_iter = 1000 edges = util.all_edges(n) weights = torch.ones(edges.shape[0]) f = penalties.Quadratic(weights) mde = problem.MDE( n, m, edges=edges, distortion_function=f, constraint=Standardized(), device=device, ) X = mde.embed(max_iter=max_iter, eps=1e-10, memory_size=10) assert id(X) == id(mde.X) X_spectral = quadratic.spectral(n, m, edges=edges, weights=weights, device=device) testing.assert_allclose( mde.average_distortion(X).detach().cpu().numpy(), mde.average_distortion(X_spectral).detach().cpu().numpy(), atol=1e-4, )
def test_differences(device): torch.random.manual_seed(0) edges = np.array([(0, 1), (0, 2), (1, 2)]) X = torch.randn((3, 3), dtype=torch.float32, device=device) mde = problem.MDE( 3, 3, edges, penalties.Quadratic(torch.ones(3)), constraint=Standardized(), device=device, ) diff = mde.differences(X) testing.assert_allclose(X[edges[:, 0]] - X[edges[:, 1]], diff)
def test_k_nearest_neighbors(): data_matrix = np.array([[0.0], [1.0], [1.5], [1.75]]) graph = preprocess.data_matrix.k_nearest_neighbors(data_matrix, k=2) edges = set(tuple(e) for e in graph.edges.cpu().numpy().tolist()) # [2, 1], [3, 2], [3, 1], omitted because they are duplicates expected = set( tuple(e) for e in np.array([[0, 1], [0, 2], [1, 2], [1, 3], [2, 3]]).tolist()) assert edges == expected edges = graph.edges.cpu().numpy() # 2 if i and j are neighbors of each other, # 1 if i is neighbor of j but not vice versa testing.assert_allclose(np.array([1., 1., 2., 2., 2.]), graph.weights)
def test_norm_grad_zero(device): torch.random.manual_seed(0) edges = np.array([(0, 1)]) mde = problem.MDE( 3, 3, edges, penalties.Quadratic(torch.ones(3)), constraint=Standardized(), device=device, ) X = torch.ones((3, 3), requires_grad=True, device=device) norms = mde.distances(X) norms.backward() testing.assert_allclose(X.grad, 0.0)
def test_some_distances_numpy(device): del device np.random.seed(0) max_distances = 50 retain_fraction = max_distances / int(500 * (499) / 2) data_matrix = np.random.randn(500, 2) graph = preprocess.data_matrix.distances(data_matrix, retain_fraction=retain_fraction) assert graph.n_items == data_matrix.shape[0] assert graph.n_edges == max_distances for e, d in zip(graph.edges, graph.distances): e = e.cpu().numpy() d = d.item() true_distance = np.linalg.norm(data_matrix[e[0]] - data_matrix[e[1]]) testing.assert_allclose(true_distance, d)
def test_all_distances_numpy(device): del device np.random.seed(0) data_matrix = np.random.randn(4, 2) graph = preprocess.data_matrix.distances(data_matrix) assert graph.n_items == data_matrix.shape[0] assert graph.n_edges == 6 testing.assert_all_equal( graph.edges, torch.tensor([[0, 1], [0, 2], [0, 3], [1, 2], [1, 3], [2, 3]]), ) for e, d in zip(graph.edges, graph.distances): e = e.cpu().numpy() d = d.item() true_distance = np.linalg.norm(data_matrix[e[0]] - data_matrix[e[1]]) testing.assert_allclose(true_distance, d)
def test_all_distances_torch(device): np.random.seed(0) data_matrix = torch.tensor(np.random.randn(4, 2), dtype=torch.float, device=device) graph = preprocess.data_matrix.distances(data_matrix) assert graph.n_items == data_matrix.shape[0] assert graph.n_edges == 6 testing.assert_all_equal( graph.edges, torch.tensor([[0, 1], [0, 2], [0, 3], [1, 2], [1, 3], [2, 3]]), ) for e, d in zip(graph.edges, graph.distances): e = e d = d true_distance = (data_matrix[e[0]] - data_matrix[e[1]]).norm() testing.assert_allclose(true_distance, d)
def test_average_distortion(device): torch.random.manual_seed(0) edges = np.array([(0, 1), (0, 2), (1, 2)]) mde = problem.MDE( 3, 2, edges, penalties.Quadratic(torch.tensor([1.0, 2.0, 3.0])), constraint=Standardized(), device=device, ) X = torch.tensor( [[0.0, 0.0], [1.0, 1.0], [3.0, 3.0]], dtype=torch.float32, device=device, ) average_distortion = mde.average_distortion(X) # (1*2 + 2*18 + 3*8)/3 = (2 + 36 + 24)/3 = 62/3 testing.assert_allclose(average_distortion.detach().cpu().numpy(), 62.0 / 3)
def test_average_distortion_grad(device): torch.random.manual_seed(0) edges = np.array([(0, 1), (0, 2), (1, 2)]) f = penalties.Quadratic(torch.tensor([1.0, 2.0, 3.0], device=device)) mde = problem.MDE(3, 2, edges, f, Standardized(), device=device) X = torch.randn( (3, 2), requires_grad=True, dtype=torch.float32, device=device, ) average_distortion = mde.average_distortion(X) average_distortion.backward() A = torch.tensor( [[1, 1, 0], [-1, 0, 1], [0, -1, -1]], device=device, ).float() auto_grad = X.grad X.grad = None util._distortion(X, f, A, mde._lhs, mde._rhs).backward() manual_grad = X.grad testing.assert_allclose(auto_grad, manual_grad)
def test_spectral(): np.random.seed(0) torch.random.manual_seed(0) n = 5 m = 3 L = -np.abs(np.random.randn(n, n).astype(np.float32)) L += L.T np.fill_diagonal(L, 0.0) np.fill_diagonal(L, -L.sum(axis=1)) offdiag = np.triu_indices(n, 1) edges = np.column_stack(offdiag) weights = -L[offdiag] X = quadratic.spectral(n, m, edges, torch.tensor(weights)) testing.assert_allclose(1.0 / n * X.T @ X, np.eye(m)) X *= 1.0 / np.sqrt(n) eigenvalues, eigenvectors = scipy.sparse.linalg.eigsh( L, k=m + 1, which="SM", return_eigenvectors=True) eigenvectors = eigenvectors[:, 1:] for col in range(m): testing.assert_allclose(eigenvectors[:, col], X[:, col], up_to_sign=True)
def test_initialization(device): torch.random.manual_seed(0) constraint = Standardized() X = constraint.initialization(5, 3, device=device) testing.assert_allclose(1.0 / 5 * X.T @ X, np.eye(3))
def test_proj_standardized(device): X = torch.eye(2, dtype=torch.float32, device=device) proj = util.proj_standardized(X) testing.assert_allclose(1 / 2.0 * proj.T @ proj, np.eye(2)) n = 10 m = 3 X = torch.randn((n, m), dtype=torch.float32, device=device) proj = util.proj_standardized(X) testing.assert_allclose(1.0 / n * proj.T @ proj, np.eye(m)) n = 100 m = 3 X = torch.randn((n, m), dtype=torch.float32, device=device) proj = util.proj_standardized(X) testing.assert_allclose(1.0 / n * proj.T @ proj, np.eye(m)) n = 100 m = 3 X = torch.randn((n, m), dtype=torch.float32, device=device) X -= X.mean(axis=0) proj = util.proj_standardized(X) testing.assert_allclose(1.0 / n * proj.T @ proj, np.eye(m)) testing.assert_allclose(proj.mean(axis=0), np.zeros(m)) n = 100 m = 3 X = torch.randn((n, m), dtype=torch.float32, device=device) proj = util.proj_standardized(X, demean=True) testing.assert_allclose(1.0 / n * proj.T @ proj, np.eye(m)) testing.assert_allclose(proj.mean(axis=0), np.zeros(m)) n = 1000 m = 2 X = torch.randn((n, m), dtype=torch.float32, device=device) proj = util.proj_standardized(X, demean=True) testing.assert_allclose(1.0 / n * proj.T @ proj, np.eye(m)) testing.assert_allclose(proj.mean(axis=0), np.zeros(m)) n = 1000 m = 3 X = torch.randn((n, m), dtype=torch.float32, device=device) proj = util.proj_standardized(X, demean=True) testing.assert_allclose(1.0 / n * proj.T @ proj, np.eye(m)) testing.assert_allclose(proj.mean(axis=0), np.zeros(m)) n = 1000 m = 250 X = torch.randn((n, m), dtype=torch.float32, device=device) proj = util.proj_standardized(X, demean=True) testing.assert_allclose(1.0 / n * proj.T @ proj, np.eye(m)) testing.assert_allclose(proj.mean(axis=0), np.zeros(m))
def test_pca(): torch.random.manual_seed(0) n = 5 Y = np.random.randn(n, n).astype(np.float32) Y -= Y.mean(axis=0) for m in range(1, 5): X = quadratic.pca(Y, m) testing.assert_allclose(1.0 / n * X.T @ X, np.eye(m)) U, _, _ = np.linalg.svd(Y) X_unscaled = 1.0 / np.sqrt(n) * X U = U[:, :m] U = util.align(source=U, target=X_unscaled) for col in range(m): testing.assert_allclose(X_unscaled[:, col], U[:, col], up_to_sign=True) n = 5 k = 4 Y = np.random.randn(n, k).astype(np.float32) Y -= Y.mean(axis=0) for m in range(1, k): X = quadratic.pca(Y, m) testing.assert_allclose(1.0 / n * X.T @ X, np.eye(m)) U, _, _ = np.linalg.svd(Y) X_unscaled = 1.0 / np.sqrt(n) * X U = U[:, :m] U = util.align(source=U, target=X_unscaled) for col in range(m): testing.assert_allclose(X_unscaled[:, col], U[:, col], up_to_sign=True) with pytest.raises(ValueError, match=r"Embedding dimension must be at most.*"): X = quadratic.pca(Y, k + 1) n = 4 k = 5 Y = np.random.randn(n, k).astype(np.float32) Y -= Y.mean(axis=0) for m in range(1, n): X = quadratic.pca(Y, m) testing.assert_allclose(1.0 / n * X.T @ X, np.eye(m)) U, _, _ = np.linalg.svd(Y) X_unscaled = 1.0 / np.sqrt(n) * X U = U[:, :m] for col in range(m): testing.assert_allclose(X_unscaled[:, col], U[:, col], up_to_sign=True) with pytest.raises(ValueError, match=r"Embedding dimension must be at most.*"): X = quadratic.pca(Y, n + 1)