def lstsq(a, b): """ Fast least-squares solver, from https://gist.github.com/aldro61/5889795 Returns the least-squares solution to a linear matrix equation. Solves the equation `a x = b` by computing a vector `x` that minimizes the Euclidean 2-norm `|| b - a x ||^2`. The equation may be under-, well-, or over- determined (i.e., the number of linearly independent rows of `a` can be less than, equal to, or greater than its number of linearly independent columns). If `a` is square and of full rank, then `x` (but for round-off error) is the "exact" solution of the equation. Parameters ---------- a : (M, N) array_like "Coefficient" matrix. b : (M,) array_like Ordinate or "dependent variable" values. residuals : bool Compute the residuals associated with the least-squares solution Returns ------- x : (M,) ndarray Least-squares solution. The shape of `x` depends on the shape of `b`. residuals : int Residuals: ``b - a*x``. """ a = np.asarray(a, order='c') i = dgemm(alpha=1.0, a=a.T, b=a.T, trans_b=True) x = np.linalg.solve(i, dgemm(alpha=1.0, a=a.T, b=b)).flatten() return x, np.dot(a, x) - b
def Transient_EIEW(x, alpha_start, alpha, Sn, Sn_inv, omega, wis): """ Evaluates the cost function given all parameters. In here, we used the FORTRAN dgem-functions instead of @ for efficient matrix multiplication. """ N = x.shape[0] m = alpha.shape[1] P_alpha_F = alpha_start cost = omega * np.sum(x) # cost of clients already entered (only waiting time) for i in range(1, wis + 1): cost += (omega - 1) * np.sum( dgemm(1, P_alpha_F, Sn_inv[0:i * m, 0:i * m])) F = 1 - np.sum(P_alpha_F) P_alpha_F = np.hstack((np.matrix(P_alpha_F), alpha * F)) # cost of clients to be scheduled for i in range(wis + 1, N + wis + 1): exp_Si = expm(Sn[0:i * m, 0:i * m] * x[i - wis - 1]) cost += float( dgemv(1, dgemm(1, P_alpha_F, Sn_inv[0:i * m, 0:i * m]), np.sum(omega * np.eye(i * m) - exp_Si, 1))) P = dgemm(1, P_alpha_F, exp_Si) F = 1 - np.sum(P) P_alpha_F = np.hstack((np.matrix(P), alpha * F)) return cost
def lls_with_blas(x, y, residuals=False): """ https://gist.github.com/aldro61/5889795 """ i = dgemm(alpha=1.0, a=x.T, b=x.T, trans_b=True) beta = np.linalg.solve(i, dgemm(alpha=1.0, a=x.T, b=y)).flatten() return beta
def project_whitened_vector(self, vector_instance): """ Returns a sheared (non-orthogonal) reconstruction of ``vector_instance``. Parameters ---------- vector_instance : (n_features,) ndarray A novel vector. Returns ------- sheared_reconstruction : (n_features,) ndarray A sheared (non-orthogonal) reconstruction of ``vector_instance`` """ whitened_components = self.whitened_components weights = dgemm(alpha=1.0, a=vector_instance.T, b=whitened_components.T, trans_a=True) return dgemm(alpha=1.0, a=weights.T, b=whitened_components.T, trans_a=True, trans_b=True)
def _llsq_solver_fast(A, y): """ Return the least-squares solution to a linear matrix equation. Solves the equation `A x = y` by computing a vector `x` that minimizes the Euclidean 2-norm `|| b - a x ||^2`. The equation may be under-, well-, or over- determined (i.e., the number of linearly independent rows of `A` can be less than, equal to, or greater than its number of linearly independent columns). If `A` is square and of full rank, then `x` (but for round-off error) is the "exact" solution of the equation. However, if A is rank-deficient, this solver may fail. In that case, use :func:`_llsq_solver_pinv`. :param A: (m, d) array like :param y: (m,) array_like :returns x: (d,) ndarray, least square solution """ if type(A) != np.ndarray or not A.flags['C_CONTIGUOUS']: warn("Matrix a is not a C-contiguous numpy array. " + "The solver will create a copy, which will result" + " in increased memory usage.") A = np.asarray(A, order='c') i = dgemm(alpha=1.0, a=A.T, b=A.T, trans_b=True) x = np.linalg.solve(i, dgemm(alpha=1.0, a=A.T, b=y)).flatten() return x
def project_out_vectors(self, vectors): """ Returns a version of `vectors` where all the basis of the model have been projected out. Parameters ---------- vectors : (n_vectors, n_features) ndarray A matrix of novel vectors. Returns ------- projected_out : (n_vectors, n_features) ndarray A copy of `vectors` with all basis of the model projected out. """ weights = dgemm(alpha=1.0, a=vectors.T, b=self.components.T, trans_a=True) return (vectors - dgemm(alpha=1.0, a=weights.T, b=self.components.T, trans_a=True, trans_b=True))
def _solve_matrix(a, b, residuals=False): i = dgemm(alpha=1.0, a=a.T, b=a.T, trans_b=True) x = np.linalg.solve(i, dgemm(alpha=1.0, a=a.T, b=b)).flatten() if residuals: return x, np.linalg.norm(np.dot(a, x) - b) else: return x
def lsq(a, b, residuals=False): if type(a) != np.ndarray or not a.flags['C_CONTIGUOUS']: warn('Matrix a is not a C-contiguous numpy array. The solver will create a copy, which will result' + \ ' in increased memory usage.') a = np.asarray(a, order='c') i = dgemm(alpha=1.0, a=a.T, b=a.T, trans_b=True) x = np.linalg.solve(i, dgemm(alpha=1.0, a=a.T, b=b)).flatten() if residuals: return x, np.linalg.norm(np.dot(a, x) - b) else: return x
def g09_calculate_fock(c, s, nbf, e): # Calculate the Fock matrix sc_temp = blas.dgemm(1.0, np.array(c), np.array(s), 1.0) sc = np.asmatrix(sc_temp) sce = np.matlib.zeros((nbf, nbf)) for i in range(nbf): sce[i, :] = e[0, i] * sc[i, :] c_lu, ipiv, info = lapack.dgetrf(c) ic_lu, info = lapack.dgetri(c_lu, ipiv) f_temp = blas.dgemm(1.0, np.array(ic_lu), np.array(sce), 1.0) f = np.asmatrix(f_temp) return f
def forward(self, X): # forward propagation through our network # dot product of X (input) and first set of 3x2 weights #self.z = np.dot(X, self.W1) self.z = FD.dgemm(alpha=1.0, a=X.T, b=self.W1.T, trans_a=True, trans_b=True) # MKL fast dot product self.z2 = NN.Elu((self.z)) # ACTIVATION FUNCTION # dot product of hidden layer (z2) and second set of 3x1 weights #self.z3 = np.dot(self.z2, self.W2) self.z3 = FD.dgemm(alpha=1.0, a=self.z2.T, b=self.W2.T, trans_a=True, trans_b=True) o = expit((self.z3)) # final activation function return o
def dgemm(a, b, c=None, alpha=1.0, beta=0.0): # Fortran memory layout so we reorder memory without copying, and then # form c^T, finally converting to C layout without copying. if not _is_contiguous(a) or not _is_contiguous(b): print('WARNING: DGEMM called on non-contiguous data') m, k = a.shape n = b.shape[1] assert k == b.shape[0] a, ta = _reorder_fortran(a) b, tb = _reorder_fortran(b) if c is None: c = np.zeros((m, n), dtype=np.float64, order='C') if m == 0 or n == 0 or k == 0: return c c, tc = _reorder_fortran(c) c = blas.dgemm(alpha=alpha, a=b, b=a, c=c, beta=beta, trans_a=not tb, trans_b=not ta) c, tc = _reorder_c(c) return c
def polynomial_kernel_matrix(P, Q, c, degree): """ Calculate kernel matrix using polynomial kernel. k(p,q) = (p^{T}q + c)^d Parameters: ----------- P : `numpy.ndarray` (nDataP, nDim) matrix of data. Each row corresponds to a data point. Q : `numpy.ndarray` (nDataQ, nDim) matrix of data. Each row corresponds to a data point. c : `float` Bias term, c >= 0. degree : `int` Degree of the polynomial kernel function. Returns: -------- K : `numpy.ndarray` (nDataP,nDataQ) matrix, the polynomial kernel matrix of the P and Q data matrix. """ return ne.evaluate('(c + A)**d', { 'A': dgemm(alpha=1.0, a=P, b=Q, trans_b=True), 'c': c, 'd': degree })
def xc_scalar_ni(me, sp1, R1, sp2, R2, xc_code, deriv, **kw): from pyscf.dft.libxc import eval_xc """ Computes overlap for an atom pair. The atom pair is given by a pair of species indices and the coordinates of the atoms. Args: sp1,sp2 : specie indices, and R1,R2 : respective coordinates in Bohr, atomic units Result: matrix of orbital overlaps The procedure uses the numerical integration in coordinate space. """ grids = build_3dgrid(me, sp1, R1, sp2, R2, **kw) rho = dens_libnao(grids.coords, me.sv.nspin) exc, vxc, fxc, kxc = libxc.eval_xc(xc_code, rho.T, spin=me.sv.nspin - 1, deriv=deriv) ao1 = ao_eval(me.ao1, R1, sp1, grids.coords) if deriv == 1: #print(' vxc[0].shape ', vxc[0].shape) ao1 = ao1 * grids.weights * vxc[0] elif deriv == 2: ao1 = ao1 * grids.weights * fxc[0] else: print(' deriv ', deriv) raise RuntimeError('!deriv!') ao2 = ao_eval(me.ao2, R2, sp2, grids.coords) overlaps = blas.dgemm(1.0, ao1, ao2.T) return overlaps
def matrixMult(A,B): global initializedMartix global useNumpy # module based variables mprint("A",A) mprint("B",B) if not initializedMatrix: matrix_initialize() if cuda.useCUDA: return cuda.dot(A,B) if useNumpy: return np.dot(A,B) # If the matrices are in Fortran order then the computations will be faster # when using dgemm. Otherwise, the function will copy the matrix and that takes time. if not A.flags['F_CONTIGUOUS']: AA = A.T transA = True else: AA = A transA = False if not B.flags['F_CONTIGUOUS']: BB = B.T transB = True else: BB = B transB = False return dgemm(alpha=1.,a=AA,b=BB,trans_a=transA,trans_b=transB)
def gemm(alpha, A, B, C, beta, MKLProc): from scipy.linalg.blas import dgemm from numpy import transpose import os os.environ['MKL_NUM_THREADS'] = str(MKLProc) B = transpose(B) C = dgemm(alpha, A, B, c=C, beta=beta) return C
def project_onto_null_space(a, n, nt, lb, ub, n_steps_projection, reactions, tol=1e-4, verbose=0): # Do matrix multuplication a = dgemm(1.0, n, dgemm(1.0, nt, a)).transpose()[0] i, accept = 0, True while any(a < lb - tol) or any(a > ub + tol): print '[WARNING] Bounds reseted: %d times' % (i + 1) print '\t lower bound: ', reactions[a < lb - tol] print '\t upper bound: ', reactions[a > ub + tol] # Fix bounds violations lb_violations = a < lb - tol if any(lb_violations): a[lb_violations] = lb[lb_violations] ub_violations = a > ub if any(ub_violations): a[ub_violations] = ub[ub_violations] # Do matrix multiplication a = dgemm(1.0, n, dgemm(1.0, nt, a)).transpose()[0] # Verbose if i > 10: accept, n_steps_projection = False, max(25, n_steps_projection - 100) # Verbose if verbose > 1: print '[WARNING] Point discarded! lb: %r, ub: %r' % ( any(lb_violations), any(ub_violations)) break i += 1 if i == 0: n_steps_projection = min(25, n_steps_projection + 25) return accept, n_steps_projection, a.copy()
def main(): lock = int(sys.argv[1]) size_list = [2**i for i in range(8,16)] no_runs = 10 time_df = pd.DataFrame(columns=["My function (Python)", "My function (32 Cores Python)", "matmul (NumPy Python)", "dgemm (Python)", "sgemm (Python)"]) for mat_size in size_list: print(f"Mat size: {mat_size}") for i in range(no_runs): print(f"i: {i}") m1 = np.random.rand(mat_size,mat_size).astype(np.float32) m2 = np.random.rand(mat_size,mat_size).astype(np.float32) new_times=[] time.sleep(10) if mat_size =< 2048: my_func_start = time.perf_counter() m_myfunc = matrix_mult(m1,m2) my_func_finish = time.perf_counter() new_times.append(round(my_func_finish-my_func_start,8)) my_func_32cores_start = time.perf_counter() time_taken, m_myfunc32 = gen_time_results(mat_size,32,m1,m2) my_func_32cores_finish = time.perf_counter() new_times.append(round(my_func_32cores_finish-my_func_32cores_start,8)) else: new_times.append(None) new_times.append(None) numpy_start = time.perf_counter() mn = np.matmul(m1,m2) numpy_finish = time.perf_counter() new_times.append(round(numpy_finish-numpy_start,8)) dgemm_start = time.perf_counter() md = FB.dgemm(alpha=1.0, a=m1, b=m2) dgemm_finish = time.perf_counter() new_times.append(round(dgemm_finish-dgemm_start,8)) sgemm_start = time.perf_counter() ms = FB.sgemm(alpha=1.0, a=m1, b=m2) sgemm_finish = time.perf_counter() new_times.append(round(sgemm_finish-sgemm_start,8)) print(new_times) time_df = time_df.append( pd.DataFrame(columns=["My function (Python)","My function (32 Cores Python)", "matmul (NumPy_Python)","dgemm (Python)","sgemm (Python)"],index=[mat_size]) ) if lock: time_df.to_pickle("time_df_libraries_lock.pkl") else: time_df.to_pickle("time_df_libraries_no_lock.pkl")
def seidel(adj): """ Implements the Seidel algorithm for APSP on unweighted, undirected graphs. For a given adjacency matrix, returns a like-sized matrix holding the distances of the shortest paths between nodes. """ n = adj.shape[0] # Prepare our base case to compare against, an n x n matrix that is 0 on the # diagonal and 1 everywhere else. base_case = np.ones_like(adj) np.fill_diagonal(base_case, 0) # Let Z = A . A # Using DGEMM directly here instead of np.dot because the latter is 10x slower. z = dgemm(1, adj, adj) # Let B be an n x n binary matrix # where b_ij = # 1 if it isn't on the diagonal and a_ij = 1 or z_ij > 0 # 0 otherwise b = np.where( np.logical_and( # Not on the diagonal np.logical_not(np.eye(n)), np.logical_or(adj == 1, z > 0)), 1, 0) # If all b_ij not on the diagonal are 1 if (b == base_case).all(): return 2 * b - adj t = seidel(b) # Let X = T . A x = dgemm(1, t, adj) # Return n x n matrix D # where d_ij = # 2*t_ij if x_ij >= t_ij * degree(j) # 2*t_ij - 1 if x_ij < t_ij * degree(j) degrees = np.repeat(np.sum(adj, axis=1)[None, :], adj.shape[0], axis=0) d = np.where(x >= np.multiply(t, degrees), 2 * t, 2 * t - 1) return d.astype('float32')
def calc_XVX(X, VX): XVX = np.zeros((VX.shape[1], X.shape[0], VX.shape[2])) for i in range(XVX.shape[2]): XVX[:, :, i] = blas.dgemm(1.0, VX[:, :, i], X, trans_a=1, trans_b=1) return XVX
def project_out_vectors(self, vectors): """ Returns a version of `vectors` where all the basis of the model have been projected out. Parameters ---------- vectors : (n_vectors, n_features) ndarray A matrix of novel vectors. Returns ------- projected_out : (n_vectors, n_features) ndarray A copy of `vectors` with all basis of the model projected out. """ weights = dgemm(alpha=1.0, a=vectors.T, b=self.components.T, trans_a=True) return vectors - dgemm(alpha=1.0, a=weights.T, b=self.components.T, trans_a=True, trans_b=True)
def dgemm(a, b, c=None, alpha=1.0, beta=0.0): ''' Performs dgemm in Fortran memory alignment without copying. Input matrix should be contiguous (either F or C). Parameters ---------- a : array input matrix a b : array input matrix b c : array, optional output matrix c, if None then it is allocated inside the function, default None alpha : float, optional scalar factor for matrix a beta : float, optional scalar factor for matrix c Returns ------- c : ndarray output matrix ''' #FIXME: this needs testing better - currently this should only be # used where it has been tested against np.dot specifically for # that use case! if (not _is_contiguous(a)) or (not _is_contiguous(b)): log.warn('DGEMM called on non-contiguous data') m, k = a.shape n = b.shape[1] assert k == b.shape[0] a, ta = _reorder_fortran(a) b, tb = _reorder_fortran(b) if c is None: c = np.zeros((m, n), dtype=np.float64, order='C') if m == 0 or n == 0 or k == 0: return c c, tc = _reorder_fortran(c) c = blas.dgemm(alpha=alpha, a=b, b=a, c=c, beta=beta, trans_a=not tb, trans_b=not ta) c, tc = _reorder_c(c) return c
def linear_least_squares(a, b, residuals=False): """ Return the least-squares solution to a linear matrix equation. Solves the equation `a x = b` by computing a vector `x` that minimizes the Euclidean 2-norm `|| b - a x ||^2`. The equation may be under-, well-, or over- determined (i.e., the number of linearly independent rows of `a` can be less than, equal to, or greater than its number of linearly independent columns). If `a` is square and of full rank, then `x` (but for round-off error) is the "exact" solution of the equation. Parameters ---------- a : (M, N) array_like "Coefficient" matrix. b : (M,) array_like Ordinate or "dependent variable" values. residuals : bool Compute the residuals associated with the least-squares solution Returns ------- x : (M,) ndarray Least-squares solution. The shape of `x` depends on the shape of `b`. residuals : int (Optional) Sums of residuals; squared Euclidean 2-norm for each column in ``b - a*x``. """ if type(a) != np.ndarray or not a.flags['C_CONTIGUOUS']: warn('Matrix a is not a C-contiguous numpy array. The solver will create a copy, which will result' + \ ' in increased memory usage.') a = np.asarray(a, order='c') i = dgemm(alpha=1.0, a=a.T, b=a.T, trans_b=True) x = np.linalg.solve(i, dgemm(alpha=1.0, a=a.T, b=b)).flatten() if residuals: return x, np.linalg.norm(np.dot(a, x) - b) else: return x
def applyBND(mat, **kwargs): """Return a BND refinement of a symetric matrix. If the matrix is not symetric. The calculation for mat+mat.T will be performanced. It has comparable speed with the original MATLAB code. [SUP15] Sun, Hai‐Ping, et al. "Improving accuracy of protein contact prediction using balanced network deconvolution." Proteins: Structure, Function, and Bioinformatics (2015).""" try: ndim, shape = mat.ndim, mat.shape except AttributeError: raise TypeError('Matrix must be a 2D square array') from numpy import fill_diagonal from ..Application.math import eigh from scipy.linalg.blas import dgemm if ndim != 2 or shape[0] != shape[1]: raise ValueError('Matrix must be a 2D square array') if mat.min() != mat.max(): mat = (mat - mat.min()) / (mat.max() - mat.min()) else: from ..IO.output import printInfo printInfo("The input matrix is a constant matrix.") return mat n = mat.shape[0] fill_diagonal(mat, 0.) mat_th = (mat + mat.T) / 2. # Double symetric eigvector relatively robust representation (RRR) d, u = eigh(mat_th) for i in xrange(n): if d[i] != 0: d[i] = (-1. + (1 + 4 * d[i] * d[i]) ** .5) / 2. / d[i] # Double general matrix multiply mat_new1 = dgemm(alpha=1.0, a=(u * d), b=u, trans_b=True) # mat_new1 = (u*d).dot(u.T) #old numpy dot,slower ind_edges = (mat_th > 0) * 1.0 ind_nonedges = (mat_th == 0) * 1.0 m1 = (mat * ind_nonedges).max() m2 = mat_new1.min() mat_new2 = (mat_new1 + max(m1 - m2, 0)) * ind_edges + (mat * ind_nonedges) m1 = mat_new2.min() m2 = mat_new2.max() if m1 != m2: mat_bnd = (mat_new2 - m1) / (m2 - m1) else: mat_bnd = mat_new2 return mat_bnd
def compute_metric_gmm(direction, criterion, variables, model_cv, samples, labels, idx): """ Function that computes the accuracy of the model_cv using the variables : idx +/- one of variables Inputs: direction: 'backward' or 'forward' or 'SFFS' criterion: criterion function use to discriminate variables variables: the variable to add or delete to idx model_cv: the model build with all the variables samples,labels: the samples/label for testing idx: the pool of retained variables Output: metric: the estimated metric Used in GMM.forward_selection(), GMM.backward_selection() """ # Initialization metric = sp.zeros(variables.size) confMatrix = ConfusionMatrix() # Compute inv of covariance matrix if len(idx)==0: logdet = None Qs = None scores_t = None else: logdet = sp.empty((model_cv.C)) Qs = sp.empty((model_cv.C,len(idx),len(idx))) scores_t = sp.empty((model_cv.C,samples.shape[0])) for c in xrange(model_cv.C): # Here -> store Qs and scores_t vp,Q,rcond = model_cv.decomposition(model_cv.cov[c,idx,:][:,idx]) Qs[c,:,:] = Q/sp.sqrt(vp) logdet[c] = sp.sum(sp.log(vp)) # Pre compute score testSamples_c = samples[:,idx] - model_cv.mean[c,idx] temp = dgemm(1.,Qs[c,:,:].T,testSamples_c,trans_b=True) scores_t[c,:] = sp.sum(temp**2,axis=0) for i,var in enumerate(variables): predLabels = model_cv.predict_gmm_update(direction,samples,logdet,(i,var),Qs,scores_t,featIdx=idx)[0] confMatrix.compute_confusion_matrix(predLabels,labels) if criterion=='accuracy': metric[i] = confMatrix.get_OA() elif criterion=='F1Mean': metric[i] = confMatrix.get_F1Mean() elif criterion=='kappa': metric[i] = confMatrix.get_kappa() return metric
def kronvec_prod(self, x): """ Computes K*x where K is a kronecker product matrix and x is a column vector which is a numpy array Inputs: self : (N,M) KronMatrix x : (M,1) matrix Outputs: y : (N,1) matrix this routine ensures all matricies are in fortran contigous format before using blas routines it also takes advantage of matrix symmetry (if sym flag is set) """ if x.shape != (self.shape[1], 1): raise ValueError('x is the wrong shape, must be (%d,1), not %s'\ % (self.shape[1],repr(x.shape))) y = x for i, Ki in reversed(list(enumerate(self.K))): y = np.reshape(y, (self.sshape[i, 1], -1), order='F') if isinstance(Ki, np.ndarray): # use optimized blas routines if np.isfortran(Ki): a = Ki else: a = Ki.T if self.sym: if np.isfortran(y): y = blas.dsymm(alpha=1, a=a, b=y, side=0).T else: # just switch the order y = blas.dsymm(alpha=1, a=a, b=y.T, side=1) else: if np.isfortran(y): b = y else: b = y.T y = blas.dgemm(alpha=1, a=a, b=b, trans_a=(not np.isfortran(Ki)), trans_b=(not np.isfortran(y))).T else: # use __mul__ routine y = (Ki * y).T y = y.reshape((-1, 1), order='F') # reshape to a column vector return y
def project_onto_null_space(a, n, nt, lb, ub, n_steps_projection, reactions, tol=1e-4, verbose=0): # Do matrix multuplication a = dgemm(1.0, n, dgemm(1.0, nt, a)).transpose()[0] i, accept = 0, True while any(a < lb - tol) or any(a > ub + tol): print '[WARNING] Bounds reseted: %d times' % (i+1) print '\t lower bound: ', reactions[a < lb - tol] print '\t upper bound: ', reactions[a > ub + tol] # Fix bounds violations lb_violations = a < lb - tol if any(lb_violations): a[lb_violations] = lb[lb_violations] ub_violations = a > ub if any(ub_violations): a[ub_violations] = ub[ub_violations] # Do matrix multiplication a = dgemm(1.0, n, dgemm(1.0, nt, a)).transpose()[0] # Verbose if i > 10: accept, n_steps_projection = False, max(25, n_steps_projection - 100) # Verbose if verbose > 1: print '[WARNING] Point discarded! lb: %r, ub: %r' % (any(lb_violations), any(ub_violations)) break i += 1 if i == 0: n_steps_projection = min(25, n_steps_projection + 25) return accept, n_steps_projection, a.copy()
def project_vectors(self, vectors): """ Projects each of the `vectors` onto the model, retrieving the optimal linear reconstruction weights for each instance. Parameters ---------- vectors : (n_samples, n_features) ndarray Returns ------- weights : (n_samples, n_components) ndarray The matrix of optimal linear weights """ return dgemm(alpha=1.0, a=vectors.T, b=self.components.T, trans_a=True)
def gen_time_results(mat_size, no_runs): mat_A = np.random.rand(mat_size, mat_size) mat_B = np.random.rand(mat_size, mat_size) time_list_d = [] time_list_s = [] for _ in range(no_runs): start = time.perf_counter() result = FB.dgemm(alpha=1, a=mat_A, b=mat_B) finish = time.perf_counter() time_taken_d = round(finish - start, 10) time_list_d.append(time_taken_d) start = time.perf_counter() result = FB.sgemm(alpha=1, a=mat_A, b=mat_B) finish = time.perf_counter() time_taken_s = round(finish - start, 10) time_list_s.append(time_taken_s) return time_list_d, time_list_s
def mytdot(a, b): if a.flags.c_contiguous: a_T = False a = a.T elif a.flags.f_contiguous: a_T = True else: a_T = True a = np.asfortranarray(a) if b.flags.c_contiguous: b_T = False b = b.T elif b.flags.f_contiguous: b_T = True else: b_T = True b = np.asfortranarray(b) return bl.dgemm(alpha=1.0, a=b, b=a, trans_a=b_T, trans_b=a_T).T
def single_propagation(self, query, src_net, corr_function=None): network = self.graphdata.networks[src_net].matrix names = self.graphdata.networks[src_net].node_names query_vector = self.generate_query_vector(query, src_net) initial_score = RWR(query_vector, network) vectors = initial_score.tolist() corr_method = self._get_correlation_method(corr_function) d_vectors = dgemm(alpha=1., a=vectors, b=vectors, trans_a=True)[0][0] n_paths = 1 corr_score = self.compute_correlation_scores(network, vectors, d_vectors, src_net, n_paths, corr_method) return corr_score
def gaussian_kernel_matrix(P, Q, c): """ Calculate kernel matrix using gaussian kernel. ||p-q||^2 = ||p||^2 + ||q||^2 - 2 * p^T * q k(p,q) = exp(-c*||p-q||^2) = exp(-c*[||p||^2 + ||q||^2 - 2 * p^T * q]) C: dgemm(alpha=1.0, a=P, b=Q, trans_b=True) double precision C: sgemm(alpha=1.0, a=P, b=Q, trans_b=True) single precision C: np.dot(P,Q.T) single precision P = P.astype(np.float32) Q = Q.astype(np.float32) Parameters: ----------- P : `numpy.ndarray` (nDataP, nDim) matrix of data. Each row corresponds to a data point. Q : `numpy.ndarray` (nDataQ, nDim) matrix of data. Each row corresponds to a data point. c : `int` Width of the gaussian kernel function. Returns: -------- K : `numpy.ndarray` (nDataP,nDataQ) matrix, the gaussian kernel matrix of the P and Q data matrix. """ # Calculate norm P_norm = np.einsum('ij,ij->i', P, P) Q_norm = np.einsum('ij,ij->i', Q, Q) return ne.evaluate( 'exp(-gamma * (A + B - 2*C))', { 'A': P_norm[:, None], 'B': Q_norm[None, :], 'C': dgemm(alpha=1.0, a=P, b=Q, trans_b=True), 'gamma': c })
finish = time.perf_counter() time_taken = round(finish - start, 8) total_time_Numpy += time_taken #assert mn.all() == ans.all() print(total_time_Numpy / no_runs) print("\n") print("----LAPACK DGEMM----") for mat_size in mat_sizes: print(f"Mat size: {mat_size}") total_time_DGEMM = 0 for _ in range(no_runs): m1 = np.random.rand(mat_size, mat_size) m2 = np.random.rand(mat_size, mat_size) start = time.perf_counter() md = FB.dgemm(alpha=1, a=m1, b=m2) finish = time.perf_counter() time_taken = round(finish - start, 8) total_time_DGEMM += time_taken #assert md.all() == ans.all() print(total_time_DGEMM / no_runs) print("\n") print("---- LAPACK SGEMM----") for mat_size in mat_sizes: print(f"Mat size: {mat_size}") total_time_SGEMM = 0 for _ in range(no_runs): m1 = np.random.rand(mat_size, mat_size) m2 = np.random.rand(mat_size, mat_size) start = time.perf_counter()
def _instance_vectors_for_full_weights(self, full_weights): return dgemm(alpha=1.0, a=full_weights.T, b=self.components.T, trans_a=True, trans_b=True)
def principal_component_decomposition(X, whiten=False, center=True, bias=False, inplace=False): r""" Apply PCA on the data matrix X. In the case where the data matrix is very large, it is advisable to set `inplace=True`. However, note this this destructively edits the data matrix by subtracting the mean inplace. Parameters ---------- x : (n_samples, n_features) ndarray Training data whiten : bool, optional Normalise the eigenvectors to have unit magnitude Default: `False` center : bool, optional Whether to center the data matrix. If `False`, zero will be subtracted. Default: `True` bias : bool, optional Whether to use a biased estimate of the number of samples. If `False`, subtracts `1` from the number of samples. Default: `False` inplace : bool, optional Whether to do the mean subtracting inplace or not. This is crucial if the data matrix is greater than half the available memory size. Default: `False` Returns ------- eigenvectors : (n_components, n_features) ndarray The eigenvectors of the data matrix eigenvalues : (n_components,) ndarray The positive eigenvalues from the data matrix mean_vector : (n_components,) ndarray The mean that was subtracted from the dataset """ n_samples, n_features = X.shape if bias: N = n_samples else: N = n_samples - 1.0 if center: # center data mean_vector = np.mean(X, axis=0) else: mean_vector = np.zeros(n_features) # This is required if the data matrix is very large! if inplace: X -= mean_vector else: X = X - mean_vector if n_features < n_samples: # compute covariance matrix # S: n_features x n_features S = dgemm(alpha=1.0, a=X.T, b=X.T, trans_b=True) / N # S should be perfectly symmetrical, but numerical error can creep # in. Enforce symmetry here to avoid creating complex # eigenvectors from eigendecomposition S = (S + S.T) / 2.0 # perform eigenvalue decomposition # eigenvectors: n_features x n_features # eigenvalues: n_features eigenvectors, eigenvalues = eigenvalue_decomposition(S) if whiten: # whiten eigenvectors eigenvectors *= np.sqrt(1.0 / eigenvalues) else: # n_features > n_samples # compute covariance matrix # S: n_samples x n_samples S = dgemm(alpha=1.0, a=X.T, b=X.T, trans_a=True) / N # S should be perfectly symmetrical, but numerical error can creep # in. Enforce symmetry here to avoid creating complex # eigenvectors from eigendecomposition S = (S + S.T) / 2.0 # perform eigenvalue decomposition # eigenvectors: n_samples x n_samples # eigenvalues: n_samples eigenvectors, eigenvalues = eigenvalue_decomposition(S) # compute final eigenvectors # eigenvectors: n_samples x n_features if whiten: w = (N * eigenvalues) ** -1.0 else: w = np.sqrt(1.0 / (N * eigenvalues)) eigenvectors = w * dgemm(alpha=1.0, a=X.T, b=eigenvectors.T, trans_b=True) # transpose eigenvectors # eigenvectors: n_samples x n_features eigenvectors = eigenvectors.T return eigenvectors, eigenvalues, mean_vector
def mean_cov(X): n,p = X.shape m = X.mean(axis=0) cx = X - m S = dgemm(1./(n-1), cx.T, cx.T, trans_a=0, trans_b=1) return cx,m,S.T
def predict_gmm_update(self, direction, testSamples, logdet, newFeat, Qs,scores_t,featIdx): """ Function that predict the label for testSamples using the learned model (with an update method of the inverse of covariance matrix) Inputs: direction: 'backward' or 'forward' testSamples: the samples to be classified invCov: inverte of covariance matrix of already selected features newFeat: couple (index,feature) to add or delete featIdx: indices of features to use for classification Outputs: predLabels: the class scores: the decision value for each class """ # Get machine precision eps = sp.finfo(sp.float64).eps # Get information from the data nbTestSpl = testSamples.shape[0] # Number of testing samples scores = sp.empty((nbTestSpl,self.C)) # New set of features if direction=='forward': id_t = list(featIdx) id_t.append(newFeat[1]) elif direction=='backward': id_t = list(featIdx) # Start the prediction for each class for c in xrange(self.C): testSamples_c = testSamples[:,id_t] - self.mean[c,id_t] if len(id_t)==1: scores[:,c] = sp.sum(testSamples_c*testSamples_c,axis=1)/self.cov[c,id_t,id_t] + sp.log(self.cov[c,id_t,id_t]) - self.logprop[c] else: if direction=='forward': temp = dgemm(1.0,Qs[c,:,:].T,self.cov[c,newFeat[1],:][featIdx].T) alpha = self.cov[c,newFeat[1],newFeat[1]] - sp.sum(temp**2) if alpha < eps: alpha = eps logdet_update = sp.log(alpha) + logdet[c] row_feat = sp.empty((len(id_t))) row_feat[:len(featIdx)] = dgemm(-1/alpha,Qs[c,:,:],temp).flatten() row_feat[-1] = 1/alpha cst_feat = alpha * (sp.dot(row_feat,testSamples_c.T)**2) elif direction=='backward': alpha = 1/invCov[c,newFeat[0],newFeat[0]] if alpha < eps: alpha = eps logdet_update = - sp.log(alpha) + logdet[c] row_feat = invCov[c,newFeat[0],:] cst_feat = - alpha * (sp.dot(row_feat,testSamples_c.T)**2) cst = logdet_update - self.logprop[c] # Pre compute the constant term # temp = sp.dot(testSamples_c,invCov[c,:,:][:,:]) # Here -> symmetric dot product scores[:,c] = scores_t[c,:] + cst_feat + cst del temp del testSamples_c # Assign the label to the minimum value of scores predLabels = sp.argmin(scores,1)+1 return predLabels,scores
def principal_component_decomposition(X, whiten=False, center=True, bias=False): r""" Apply PCA on the data matrix X. Parameters ---------- x : (n_samples, n_features) ndarray Training data Returns ------- eigenvectors : (n_components, n_features) ndarray eigenvalues : (n_components,) ndarray """ n_samples, n_features = X.shape if bias: N = n_samples else: N = n_samples - 1 if center: # center data mean_vector = np.mean(X, axis=0) else: mean_vector = np.zeros(n_features) X -= mean_vector if n_features < n_samples: # compute covariance matrix # S: n_features x n_features S = dgemm(alpha=1.0, a=X.T, b=X.T, trans_b=True) / N # S should be perfectly symmetrical, but numerical error can creep # in. Enforce symmetry here to avoid creating complex # eigenvectors from eigendecomposition S = (S + S.T) / 2 # perform eigenvalue decomposition # eigenvectors: n_features x n_features # eigenvalues: n_features eigenvectors, eigenvalues = eigenvalue_decomposition(S) if whiten: # whiten eigenvectors eigenvectors *= eigenvalues ** -0.5 else: # n_features > n_samples # compute covariance matrix # S: n_samples x n_samples S = dgemm(alpha=1.0, a=X.T, b=X.T, trans_a=True) / N # S should be perfectly symmetrical, but numerical error can creep # in. Enforce symmetry here to avoid creating complex # eigenvectors from eigendecomposition S = (S + S.T) / 2 # perform eigenvalue decomposition # eigenvectors: n_samples x n_samples # eigenvalues: n_samples eigenvectors, eigenvalues = eigenvalue_decomposition(S) aux = 2 if whiten: # will cause eigenvectors to be whiten aux = 1 # compute final eigenvectors # eigenvectors: n_samples x n_features w = (N * eigenvalues) ** (-1 / aux) eigenvectors = w * dgemm(alpha=1.0, a=X.T, b=eigenvectors.T, trans_b=True) # transpose eigenvectors # eigenvectors: n_samples x n_features eigenvectors = eigenvectors.T return eigenvectors, eigenvalues, mean_vector
def calculate(self, theta, data, sparse): ''' Calculates the cost function of a Sparse network for a given data using feedforward and backpropagation algorithms. Parameters ---------- theta : Theta Theta values of the Sparse network. data : numpy.ndarray Training data. sparse : Sparse Sparse network. Returns ------- cost_function : float Value of the cost function. total_partials : numpy.ndarray Matrix containing theta and bias values. ''' th1, th2, b1, b2 = wrap_matrices(sparse, theta) num_samples = data.shape[0] # layer 1 a1 = data # layer 2 bias2 = np.dot(np.ones((num_samples, 1)), b1.T) z2 = np.dot(th1, data.T).T + bias2 a2 = sigmoid(z2) # layer 3 bias3 = np.dot(np.ones((num_samples, 1)), b2.T) z3 = np.dot(th2, a2.T).T + bias3 a3 = sigmoid(z3) ### feedforward cost ### reg = np.sum(np.power(th1, 2)) + np.sum(np.power(th2, 2)) norm = np.power(a3 - data, 2) Jnn = 0.5 * np.sum(norm) / num_samples + 0.5 * sparse.lamb * reg rhoest = np.sum(a2, axis=0) / num_samples kl = self.__compute_kl(sparse.rho, rhoest) cost_function = Jnn + sparse.beta * np.sum(kl) ### Backpropagation ## sum2 = sparse.beta * (-sparse.rho / rhoest + (1.0 - sparse.rho) / (1.0 - rhoest)) delta3 = (-(data - a3) * a3 * (1 - a3)).T sum1 = dgemm(alpha=1.0, a=th2.T, b=delta3, trans_b=False) sum2 = sum2[:, np.newaxis] delta2 = np.add(sum1, sum2) * a2.T * (1 - a2.T) partial_j1 = np.dot(delta2, a1) partial_j2 = np.dot(delta3, a2) partial_b1 = delta2.sum(axis=1) partial_b2 = delta3.sum(axis=1) total_partials = [ 1.0 / num_samples * partial_j1 + sparse.lamb * th1, 1.0 / num_samples * partial_j2 + sparse.lamb * th2, 1.0 / num_samples * partial_b1, 1.0 / num_samples * partial_b2 ] return cost_function, unwrap_matrices(total_partials)
# suppliers or licensors in any way. # #******************************************************************************* # Content: # dgemm example #******************************************************************************* import numpy as np import scipy.linalg.blas as slb import time M = 10000 N = 10000 matrix_list = [1024, 2048, 4096, 8192, 10000] for K in matrix_list: a = np.array(np.random.random((M, K)), dtype=np.double, order='F') b = np.array(np.random.random((K, N)), dtype=np.double, order='F') A = np.matrix(a, dtype=np.double, copy=False) B = np.matrix(b, dtype=np.double, copy=False) start = time.time() C = slb.dgemm(1.0, a=A, b=B) end = time.time() texec = end - start print('M x N x K == ', M, N, K, '.. Scipy Execution Time == ', texec, 'sec')
NITER = 30 A_array = random((N,N)) B_array = random((N,N)) C_array = zeros((N,N)) start = time.time() for it in range(NITER): C_array = dot(A_array,B_array) stop = time.time() print"Time for 2d array dot product is:", (stop - start)/NITER A_mat = matrix(A_array) B_mat = matrix(B_array) C_mat = matrix(C_array) start = time.time() for it in range(NITER): C_mat = A_mat*B_mat stop = time.time() print "Time for Matrix multiplication is:", (stop-start)/NITER alpha = 1 start = time.time() for it in range(NITER): C_array = dgemm(alpha, A_array, B_array) stop = time.time() print "Time for dgemm is:", (stop-start)/NITER
#env = os.environ if len(sys.argv) >= 2: K = int(sys.argv[1]) M = K N = K ITER = 1 else: print "usage: python numpy_dgemm.py MATRIX_DIMENSION" sys.exit() a1 = np.array(np.random.random((M, K)), dtype=np.double, order='C', copy=False) a2 = np.array(np.random.random((K, N)), dtype=np.double, order='C', copy=False) m1 = np.matrix(a1, dtype=np.double, copy=False) m2 = np.matrix(a2, dtype=np.double, copy=False) t_start = time.time() for i in range(ITER): #mf2 = np.dot(m1, m2) #print "%d: dot(m1, m2)" % i mf2 = sp.dgemm(alpha=1.0, a=m1, b=m2) t_end = time.time() #print "= = = PYTHON NUMPY MKL = = =" #print print "dgemm(%dx%d, %dx%d), %d iterations" % (M, K, K, N, ITER) print "dot(m1, m2): %f sec" % (t_end - t_start)
def compute_cov(xj,mean,ni): """ """ Xc = xj-mean cov = dgemm(1.0/(ni-1),Xc,Xc,trans_a=True,trans_b=False) # dsyrk could have been used -> but then triangular matrix must be handled everywhere return cov
def calculate(self, theta, data, sparse): ''' Calculates the cost function of a Sparse network for a given data using feedforward and backpropagation algorithms. Parameters ---------- theta : Theta Theta values of the Sparse network. data : numpy.ndarray Training data. sparse : Sparse Sparse network. Returns ------- cost_function : float Value of the cost function. total_partials : numpy.ndarray Matrix containing theta and bias values. ''' th1, th2, b1, b2 = wrap_matrices(sparse, theta) num_samples = data.shape[0] # layer 1 a1 = data # layer 2 bias2 = np.dot(np.ones((num_samples,1)),b1.T) z2 = np.dot(th1, data.T).T + bias2 a2 = sigmoid(z2) # layer 3 bias3 = np.dot(np.ones((num_samples,1)),b2.T) z3 = np.dot(th2, a2.T).T + bias3 a3 = sigmoid(z3) ### feedforward cost ### reg = np.sum(np.power(th1, 2)) + np.sum(np.power(th2, 2)) norm = np.power(a3 - data, 2) Jnn = 0.5 * np.sum(norm) / num_samples + 0.5 * sparse.lamb * reg rhoest = np.sum(a2, axis=0) / num_samples kl = self.__compute_kl(sparse.rho, rhoest) cost_function = Jnn + sparse.beta * np.sum(kl) ### Backpropagation ## sum2 = sparse.beta * (-sparse.rho / rhoest + (1.0 - sparse.rho) / (1.0 - rhoest)) delta3 = (-(data - a3) * a3 * (1 - a3)).T sum1 = dgemm(alpha=1.0, a=th2.T, b=delta3, trans_b=False) sum2 = sum2[:,np.newaxis] delta2 = np.add(sum1,sum2)* a2.T * (1 - a2.T) partial_j1 = np.dot(delta2, a1) partial_j2 = np.dot(delta3, a2) partial_b1 = delta2.sum(axis=1) partial_b2 = delta3.sum(axis=1) total_partials = [1.0 / num_samples * partial_j1 + sparse.lamb * th1, 1.0 / num_samples * partial_j2 + sparse.lamb * th2, 1.0 / num_samples * partial_b1, 1.0 / num_samples * partial_b2] return cost_function, unwrap_matrices(total_partials)