def zgemm(a, b, c=None, alpha=1.0 + 0.0j, beta=0.0 + 0.0j): ''' Performs zgemm in Fortran memory alignment without copying. Input matrix should be contiguous (either F or C). Parameters ---------- a : array input matrix a b : array input matrix b c : array, optional output matrix c, if None then it is allocated inside the function, default None alpha : complex, optional scalar factor for matrix a beta : float, optional scalar factor for matrix c Returns ------- c : ndarray output matrix ''' if (not _is_contiguous(a)) or (not _is_contiguous(b)): log.warn('ZGEMM called on non-contiguous data') m, k = a.shape n = b.shape[1] assert k == b.shape[0] a, ta = _reorder_fortran(a) b, tb = _reorder_fortran(b) if c is None: c = np.zeros((m, n), dtype=np.complex128, order='C') if m == 0 or n == 0 or k == 0: return c c, tc = _reorder_fortran(c) c = blas.zgemm(alpha=alpha, a=b, b=a, c=c, beta=beta, trans_a=not tb, trans_b=not ta) c, tc = _reorder_c(c) return c
def dy_dt(t0,y,H0,V_0,U,occup,epsilon,N,V,epsilon_vm): #for ode #unpacks psi_old,P_old,theta_old=psi_vec_to_mat(y,N) #psi is complex #computes V_av, needed for dp and dtheta T_av,V_av,occup_old,occupd_old=E_T_V_n(psi_old,occup,N,V,epsilon_vm) R_old=R(P_old,theta_old) E_old=R_old*V_av+T_av+U/2*P_old # print (E_old) H_t=H0+R_old*V_0 iHpsi=-1j*blas.zgemm(alpha=1.0, a=H_t, b=psi_old, trans_b=False) dP=-V_av*dR_dtheta(P_old,theta_old) dtheta=U/2+V_av*dR_dP(P_old,theta_old) # print(dP,dtheta) #packs into vector dy_dt=psi_mat_to_vec(iHpsi,dP,dtheta,N) return dy_dt
def y_sparse(flag): tap = np.ones((array_sizes_brch[rank]), dtype=np.complex128) c_from = np.zeros((nbus, array_sizes_brch[rank]), dtype=np.complex128) c_to = np.zeros((nbus, array_sizes_brch[rank]), dtype=np.complex128) c_line = np.zeros((nbus, array_sizes_brch[rank]), dtype=np.complex128) chrgfull = np.zeros((array_sizes_brch[rank], array_sizes_brch[rank]), dtype=np.complex128) yyfull = np.zeros((array_sizes_brch[rank], array_sizes_brch[rank]), dtype=np.complex128) Y = np.zeros((nbus, nbus), dtype=np.complex128) Y_dummy = np.zeros((array_sizes_brch[rank], nbus), dtype=np.complex128) if flag == 0: busy = recv_ipt_bus liney = recv_ipt_brch if flag == 1: busy = fbus liney = fline if flag == 2: busy = posfbus liney = posfline Gb = busy[:, 7] Bb = busy[:, 8] r = liney[:, 2] rx = liney[:, 3] chrg = jay * (0.5 * liney[:, 4]).reshape(array_sizes_brch[rank], 1) z = r + jay * rx yy = (1 / z).reshape(array_sizes_brch[rank], 1) from_bus = liney[:, 0].astype(int) from_int = bus_int[from_bus - 1].astype(int) - 1 to_bus = liney[:, 1].astype(int) to_int = bus_int[to_bus - 1].astype(int) - 1 phase_shift = liney[:, 6] liney_ratio = liney[:, 5] ratio_0 = np.where(liney_ratio > 0) tap[ratio_0] = np.exp( (-jay * phase_shift[ratio_0]) * mt.pi / 180) / liney_ratio[ratio_0] for i in range(array_sizes_brch[rank]): c_from[from_int[i], i] = tap[i] c_to[to_int[i], i] = complex(1, 0) c_line[from_int[i], i] = c_from[from_int[i], i] - c_to[from_int[i], i] c_line[to_int[i], i] = c_from[to_int[i], i] - c_to[to_int[i], i] np.fill_diagonal(chrgfull, chrg[:, 0]) np.fill_diagonal(yyfull, yy[:, 0]) Y_dummy = zgemm(alpha, chrgfull, c_from, beta, Y_dummy, 0, 1, 1) Y = zgemm(alpha, c_from, Y_dummy, beta, Y, 0, 0, 1) Y_dummy = zgemm(alpha, chrgfull, c_to, beta, Y_dummy, 0, 1, 1) Y = zgemm(alpha, c_to, Y_dummy, alpha, Y, 0, 0, 1) Y_dummy = zgemm(alpha, yyfull, c_line, beta, Y_dummy, 0, 1, 1) Y = zgemm(alpha, c_line, Y_dummy, alpha, Y, 0, 0, 1) o = Gb + jay * Bb if rank == 0: np.fill_diagonal( Y[:np.cumsum(array_sizes_bus)[rank], :np.cumsum(array_sizes_bus )[rank]], Y.diagonal()[:np.cumsum(array_sizes_bus)[rank]] + o) else: np.fill_diagonal( Y[np.cumsum(array_sizes_bus)[rank - 1]:np.cumsum(array_sizes_bus)[rank], np.cumsum(array_sizes_bus)[rank - 1]:np.cumsum(array_sizes_bus)[rank]], Y.diagonal()[np.cumsum(array_sizes_bus)[rank - 1]:np. cumsum(array_sizes_bus)[rank]] + o) return Y
def reduce_y(comm, flag): Y_a = np.zeros((array_sizes_gen[rank], ngen), dtype=np.complex128) Y_b = np.zeros((array_sizes_gen[rank], nbus), dtype=np.complex128) yl = np.zeros(array_sizes_bus[rank], dtype=np.complex128) xd = np.zeros(array_sizes_gen[rank]) y = np.zeros(array_sizes_gen[rank], dtype=np.complex128) perm = np.zeros((ngen, array_sizes_gen[rank]), dtype=np.complex128) diagy = np.zeros((ngen, array_sizes_gen[rank]), dtype=np.complex128) Ymod_all = np.zeros((ngen, ngen), dtype=np.complex128) Ymod = np.zeros((ngen, ngen), dtype=np.complex128, order='F') permmod = np.zeros((array_sizes_gen[rank], ngen), dtype=np.complex128, order='F') Y_b_full = np.zeros((ngen, nbus), dtype=np.complex128) Y_d_full = np.zeros((nbus, nbus), dtype=np.complex128) temp = np.zeros((ngen, array_sizes_gen[rank]), dtype=np.complex128, order='F') x = y_sparse(flag) if flag == 0: busy = recv_ipt_bus liney = recv_ipt_brch Y_d = x V = busy[:, 1] elif flag == 1: busy = fbus liney = fline Y_d = x V = fbus[:, 1] elif flag == 2: busy = posfbus liney = posfline Y_d = x V = posfbus[:, 1] Pl = busy[:, 5] Ql = busy[:, 6] b_type = busy[:, 9] b_pg = busy[:, 3] b_qg = busy[:, 4] b_type_3 = np.where(b_type == 3) Pl[b_type_3] = Pl[b_type_3] - b_pg[b_type_3] Ql[b_type_3] = Ql[b_type_3] - b_qg[b_type_3] yl = (Pl - jay * Ql) / (V * V) if rank == 0: np.fill_diagonal( Y_d[:np.cumsum(array_sizes_bus)[rank], :np.cumsum(array_sizes_bus )[rank]], Y_d.diagonal()[:np.cumsum(array_sizes_bus)[rank]] + yl) else: np.fill_diagonal( Y_d[np.cumsum(array_sizes_bus)[rank - 1]:np.cumsum(array_sizes_bus)[rank], np.cumsum(array_sizes_bus)[rank - 1]:np.cumsum(array_sizes_bus )[rank]], Y_d.diagonal()[np.cumsum(array_sizes_bus)[rank - 1]:np. cumsum(array_sizes_bus)[rank]] + yl) ra = recv_ipt_gen[:, 4] * basmva / recv_ipt_gen[:, 2] g_dstr = recv_ipt_gen[:, 7] g_dtr = recv_ipt_gen[:, 6] g_m = recv_ipt_gen[:, 2] g_dstr_0 = np.where(g_dstr == 0) xd[g_dstr_0] = g_dtr[g_dstr_0] * basmva / g_m[g_dstr_0] y = 1 / (ra + jay * xd) recv_g_bus = recv_ipt_gen[:, 1].astype(int) if rank == 0: np.fill_diagonal(perm, 1) np.fill_diagonal(Y_a, y) np.fill_diagonal(diagy, y) # Consider one bus with multi machine if ngen != nPV: for i in range(ngen): for j in range(array_sizes_gen[rank]): if i != j and g_bus[i] == g_bus[j]: perm[i, j] = 1 permPV = perm else: permPV = perm else: np.fill_diagonal( perm[np.cumsum(array_sizes_gen)[rank - 1]:np. cumsum(array_sizes_gen)[rank], :array_sizes_gen[rank]], 1) np.fill_diagonal( Y_a[:array_sizes_gen[rank], np.cumsum(array_sizes_gen)[rank - 1]:np.cumsum(array_sizes_gen )[rank]], y) np.fill_diagonal( diagy[np.cumsum(array_sizes_gen)[rank - 1]:np. cumsum(array_sizes_gen)[rank], :array_sizes_gen[rank]], y) if ngen != nPV: for i in range(ngen): for j in range(array_sizes_gen[rank]): if i != j + np.cumsum(array_sizes_gen)[rank - 1] and g_bus[ i] == g_bus[j + np.cumsum(array_sizes_gen)[rank - 1]]: perm[i, j] = 1 permPV = perm else: permPV = perm zgemm(alpha, diagy, permPV, beta, Ymod, 0, 1, 1) comm.Allreduce(Ymod, Ymod_all) zgemm(alpha, permPV, Ymod_all, beta, permmod, 1, 0, 1) recv_Ymod = np.zeros((array_sizes_gen[rank], ngen), dtype=np.complex128) comm.Scatterv([ Ymod_all, split_sizes_input1_gen, displacements_input1_gen, MPI.DOUBLE_COMPLEX ], recv_Ymod, root=0) for i in range(ngen): Y_b[:, g_bus[i] - 1] = -recv_Ymod[:, i] Y_c = Y_b.T comm.Allgatherv([Y_b, MPI.DOUBLE_COMPLEX], [ Y_b_full, split_sizes_output1_gen, displacements_output1_gen, MPI.DOUBLE_COMPLEX ]) for i in range(array_sizes_gen[rank]): for k in range(ngen): Y_d[recv_g_bus[i] - 1, g_bus[k] - 1] = Y_d[recv_g_bus[i] - 1, g_bus[k] - 1] + permmod[i, k] comm.Allreduce(Y_d, Y_d_full) if flag == 0: prefrecV1 = -zgesv(Y_d_full, Y_c, 1, 1)[2] #prefrecV1=-np.linalg.solve(Y_d_full,Y_c) zgemm(alpha, Y_b_full, prefrecV1, beta, temp, 0, 0, 1) prefY11 = Y_a + temp.T #print(prefY11) return prefY11 if flag == 1: frecV1 = -zgesv(Y_d_full, Y_c, 1, 1)[2] #frecV1=-np.linalg.solve(Y_d_full,Y_c) zgemm(alpha, Y_b_full, frecV1, beta, temp, 0, 0, 1) fY11 = Y_a + temp.T #print(fY11) return fY11 if flag == 2: posfrecV1 = -zgesv(Y_d_full, Y_c, 1, 1)[2] #posfrecV1=-np.linalg.solve(Y_d_full,Y_c) zgemm(alpha, Y_b_full, posfrecV1, beta, temp, 0, 0, 1) posfY11 = Y_a + temp.T #print(posfY11) return posfY11
def run_blasZGEMM(alpha, A, B, **kwargs): return blas.zgemm(alpha, A, B, **kwargs)