def lanczos( A: DNDarray, m: int, v0: Optional[DNDarray] = None, V_out: Optional[DNDarray] = None, T_out: Optional[DNDarray] = None, ) -> Tuple[DNDarray, DNDarray]: r""" The Lanczos algorithm is an iterative approximation of the solution to the eigenvalue problem, as an adaptation of power methods to find the m "most useful" (tending towards extreme highest/lowest) eigenvalues and eigenvectors of an :math:`n \times n` Hermitian matrix, where often :math:`m<<n`. It returns two matrices :math:`V` and :math:`T`, where: - :math:`V` is a Matrix of size :math:`n\times m`, with orthonormal columns, that span the Krylow subspace \n - :math:`T` is a Tridiagonal matrix of size :math:`m\times m`, with coefficients :math:`\alpha_1,..., \alpha_n` on the diagonal and coefficients :math:`\beta_1,...,\beta_{n-1}` on the side-diagonals\n Parameters ---------- A : DNDarray 2D symmetric, positive definite Matrix m : int Number of Lanczos iterations v0 : DNDarray, optional 1D starting vector of Euclidian norm 1. If not provided, a random vector will be used to start the algorithm V_out : DNDarray, optional Output Matrix for the Krylow vectors, Shape = (n, m) T_out : DNDarray, optional Output Matrix for the Tridiagonal matrix, Shape = (m, m) """ if not isinstance(A, DNDarray): raise TypeError("A needs to be of type ht.dndarra, but was {}".format( type(A))) if not (A.ndim == 2): raise RuntimeError("A needs to be a 2D matrix") if not isinstance(m, (int, float)): raise TypeError("m must be eiter int or float, but was {}".format( type(m))) n, column = A.shape if n != column: raise TypeError("Input Matrix A needs to be symmetric.") T = ht.zeros((m, m)) if A.split == 0: # This is done for better memory access in the reorthogonalization Gram-Schmidt algorithm V = ht.ones((n, m), split=0, dtype=A.dtype, device=A.device) else: V = ht.ones((n, m), split=None, dtype=A.dtype, device=A.device) if v0 is None: vr = ht.random.rand(n, split=V.split) v0 = vr / ht.norm(vr) else: if v0.split != V.split: v0.resplit_(axis=V.split) # # 0th iteration # # vector v0 has euklidian norm = 1 w = ht.matmul(A, v0) alpha = ht.dot(w, v0) w = w - alpha * v0 T[0, 0] = alpha V[:, 0] = v0 for i in range(1, int(m)): beta = ht.norm(w) if ht.abs(beta) < 1e-10: # print("Lanczos breakdown in iteration {}".format(i)) # Lanczos Breakdown, pick a random vector to continue vr = ht.random.rand(n, dtype=A.dtype, split=V.split) # orthogonalize v_r with respect to all vectors v[i] for j in range(i): vi_loc = V.larray[:, j] a = torch.dot(vr.larray, vi_loc) b = torch.dot(vi_loc, vi_loc) A.comm.Allreduce(ht.communication.MPI.IN_PLACE, a, ht.communication.MPI.SUM) A.comm.Allreduce(ht.communication.MPI.IN_PLACE, b, ht.communication.MPI.SUM) vr.larray = vr.larray - a / b * vi_loc # normalize v_r to Euclidian norm 1 and set as ith vector v vi = vr / ht.norm(vr) else: vr = w # Reorthogonalization # ToDo: Rethink this; mask torch calls, See issue #494 # This is the fast solution, using item access on the ht.dndarray level is way slower for j in range(i): vi_loc = V.larray[:, j] a = torch.dot(vr._DNDarray__array, vi_loc) b = torch.dot(vi_loc, vi_loc) A.comm.Allreduce(ht.communication.MPI.IN_PLACE, a, ht.communication.MPI.SUM) A.comm.Allreduce(ht.communication.MPI.IN_PLACE, b, ht.communication.MPI.SUM) vr._DNDarray__array = vr._DNDarray__array - a / b * vi_loc vi = vr / ht.norm(vr) w = ht.matmul(A, vi) alpha = ht.dot(w, vi) w = w - alpha * vi - beta * V[:, i - 1] T[i - 1, i] = beta T[i, i - 1] = beta T[i, i] = alpha V[:, i] = vi if V.split is not None: V.resplit_(axis=None) if T_out is not None: T_out = T.copy() if V_out is not None: V_out = V.copy() return V_out, T_out return V, T_out elif V_out is not None: V_out = V.copy() return V_out, T return V, T
def test_dot(self): # ONLY TESTING CORRECTNESS! ALL CALLS IN DOT ARE PREVIOUSLY TESTED # cases to test: data2d = np.ones((10, 10)) data3d = np.ones((10, 10, 10)) data1d = np.arange(10) a1d = ht.array(data1d, dtype=ht.float32, split=0) b1d = ht.array(data1d, dtype=ht.float32, split=0) # 2 1D arrays, self.assertEqual(ht.dot(a1d, b1d), np.dot(data1d, data1d)) ret = [] self.assertEqual(ht.dot(a1d, b1d, out=ret), np.dot(data1d, data1d)) a1d = ht.array(data1d, dtype=ht.float32, split=None) b1d = ht.array(data1d, dtype=ht.float32, split=0) self.assertEqual(ht.dot(a1d, b1d), np.dot(data1d, data1d)) a1d = ht.array(data1d, dtype=ht.float32, split=None) b1d = ht.array(data1d, dtype=ht.float32, split=None) self.assertEqual(ht.dot(a1d, b1d), np.dot(data1d, data1d)) a1d = ht.array(data1d, dtype=ht.float32, split=0) b1d = ht.array(data1d, dtype=ht.float32, split=0) self.assertEqual(ht.dot(a1d, b1d), np.dot(data1d, data1d)) # 2 1D arrays, a2d = ht.array(data2d, split=1) b2d = ht.array(data2d, split=1) # 2 2D arrays, res = ht.dot(a2d, b2d) - ht.array(np.dot(data2d, data2d)) self.assertEqual(ht.equal(res, ht.zeros(res.shape)), 1) ret = ht.array(data2d, split=1) ht.dot(a2d, b2d, out=ret) res = ret - ht.array(np.dot(data2d, data2d)) self.assertEqual(ht.equal(res, ht.zeros(res.shape)), 1) const1 = 5 const2 = 6 # a is const res = ht.dot(const1, b2d) - ht.array(np.dot(const1, data2d)) ret = 0 ht.dot(const1, b2d, out=ret) self.assertEqual(ht.equal(res, ht.zeros(res.shape)), 1) # b is const res = ht.dot(a2d, const2) - ht.array(np.dot(data2d, const2)) self.assertEqual(ht.equal(res, ht.zeros(res.shape)), 1) # a and b and const self.assertEqual(ht.dot(const2, const1), 5 * 6) with self.assertRaises(NotImplementedError): ht.dot(ht.array(data3d), ht.array(data1d))