def postprocess(self, caller, gf=None, edges=None): from petram.helper.mpi_recipes import safe_flatstack from mfem.common.mpi_debug import nicePrint if edges is None: return print("postprocess is called") gfr, gfi = gf print(caller, gfr) try: fes = gfr.ParFESpace() mesh = fes.GetParMesh() except: fes = gfr.FESpace() mesh = fes.GetMesh() from petram.mesh.mesh_utils import get_extended_connectivity if not hasattr(mesh, 'extended_connectivity'): get_extended_connectivity(mesh) l2e = mesh.extended_connectivity['line2edge'] idx = safe_flatstack([l2e[e] for e in edges]) if len(idx) > 0: dofs = safe_flatstack([fes.GetEdgeDofs(i) for i in idx]) size = dofs.size // idx.size w = [] for i in idx: # don't put this Tr outside the loop.... Tr = mesh.GetEdgeTransformation(i) w.extend([Tr.Weight()] * size) w = np.array(w) data = gfr.GetDataArray()[dofs] + 1j * gfi.GetDataArray()[dofs] field = data / w else: w = np.array([]) field = np.array([]) nicePrint(w) nicePrint(field)
def convolve2d(fes1, fes2, kernel=delta, support=None, orderinc=5, is_complex=False, trial_domain='all', test_domain='all', verbose=False, coeff=None): ''' fill linear operator for convolution \int phi_test(x) func(x-x') phi_trial(x') dx Genralized version to multi-dim test/trial ScalarFE, ScalarFE : func is scalar VectorFE, ScalarFE : func is vector (vertical) ScalarFE, VectorFE : func is vector (horizontal) VectorFE, VectorFE : func matrix ''' mat, rstart = get_empty_map(fes2, fes1, is_complex=is_complex) if fes1.GetNE() == 0: assert False, "FESpace does not have element" eltrans1 = fes1.GetElementTransformation(0) ir = get_rule(fes1.GetFE(0), fes2.GetFE(0), eltrans1, orderinc, verbose) name_fes1 = fes1.FEColl().Name()[:2] name_fes2 = fes2.FEColl().Name()[:2] sdim = fes1.GetMesh().SpaceDimension() if name_fes1 in ['RT', 'ND']: shape1 = mfem.DenseMatrix() vdim1 = fes1.GetMesh().SpaceDimension() else: shape1 = mfem.Vector() vdim1 = 1 if name_fes2 in ['RT', 'ND']: shape2 = mfem.DenseMatrix() vdim2 = fes1.GetMesh().SpaceDimension() else: shape2 = mfem.Vector() vdim1 = 1 #nicePrint("shape", mat.shape, fes2.GetNE(), fes1.GetNE()) # communication strategy # (1) x2 (ir points on test space) is collected in each nodes # (2) x2 is send to other nodes # (3) each nodes compute \int f(x2-x1) phi(x1) # (4) non-zero results of (3) and global index should be send back # Step (1, 2) if verbose: dprint1("Step 1,2") x2_arr = [] i2_arr = [] ptx = mfem.DenseMatrix(ir.GetNPoints(), sdim) attrs1 = fes2.GetMesh().GetAttributeArray() attrs2 = fes2.GetMesh().GetAttributeArray() for i in range(fes2.GetNE()): # scan test space if test_domain != 'all': if not attrs1[i] in test_domain: continue eltrans = fes2.GetElementTransformation(i) eltrans.Transform(ir, ptx) x2_arr.append(ptx.GetDataArray().copy().transpose()) i2_arr.append(i) if support is not None: supports = np.array([support(np.mean(xxx, 0)) for xxx in x2_arr]) else: supports = -np.ones(len(x2_arr)) if len(i2_arr) > 0: ptx_x2 = np.stack(x2_arr) i2_arr = np.hstack(i2_arr) else: ptx_x2 = np.array([[[]]]) i2_arr = np.array([]) #nicePrint("x2 shape", ptx_x2.shape) if USE_PARALLEL: ## note: we could implement more advanced alg. to reduce ## the amount of data exchange.. x2_all = comm.allgather(ptx_x2) i2_all = comm.allgather(i2_arr) s_all = comm.allgather(supports) else: x2_all = [ptx_x2] i2_all = [i2_arr] s_all = [supports] #nicePrint("x2_all shape", supports.shape, len(x2_all), [tmp.shape for tmp in x2_all]) if USE_PARALLEL: #this is global TrueDoF (offset is not subtracted) P = fes1.Dof_TrueDof_Matrix() P = ToScipyCoo(P).tocsr() VDoFtoGTDoF1 = P.indices P = fes2.Dof_TrueDof_Matrix() P = ToScipyCoo(P).tocsr() VDoFtoGTDoF2 = P.indices # Step 3 if verbose: dprint1("Step 3") vdofs1_senddata = [] elmats_senddata = [] for knode1 in range(len(x2_all)): #dprint1("new knode1", myid, knode1) x2_onenode = x2_all[knode1] i2_onenode = i2_all[knode1] s_onenode = s_all[knode1] elmats_all = [] vdofs1_all = [] # collect vdofs for j in range(fes1.GetNE()): local_vdofs = fes1.GetElementVDofs(j) local_vdofs = [vv if vv >= 0 else -1 - vv for vv in local_vdofs] if USE_PARALLEL: subvdofs2 = [VDoFtoGTDoF1[i] for i in local_vdofs] vdofs1_all.append(subvdofs2) else: vdofs1_all.append(local_vdofs) #if myid == 0: # pr = profile_start() for i, x2s, su in zip(i2_onenode, x2_onenode, s_onenode): # loop over fes2 nd2 = len(x2s) #nicePrint("x2s", i, x2s.shape, x2s) elmats = [] for j in range(fes1.GetNE()): if trial_domain != 'all': if not attrs1[j] in trial_domain: continue # collect integration fe1 = fes1.GetFE(j) nd1 = fe1.GetDof() eltrans = fes1.GetElementTransformation(j) dof_sign1 = np.array( [1 if vv >= 0 else -1 for vv in fes1.GetElementVDofs(j)]) if name_fes1 in ['RT', 'ND']: shape1.SetSize(nd1, vdim1) else: shape1.SetSize(nd1) elmat = np.zeros((nd2, vdim2, nd1), dtype=mat.dtype) tmp_int = np.zeros((vdim2, nd1), dtype=mat.dtype).squeeze() #if myid == 0: print("fes1 idx", j) dataset = [] shapes = [] for jj in range(ir.GetNPoints()): ip1 = ir.IntPoint(jj) eltrans.SetIntPoint(ip1) x1 = eltrans.Transform(ip1) if name_fes1 in ['RT', 'ND']: fe1.CalcVShape(eltrans, shape1) else: fe1.CalcShape(ip1, shape1) w = eltrans.Weight() * ip1.weight ss = shape1.GetDataArray().copy() if len(ss.shape) > 1: #dof_sign1 = dof_sign1.reshape(-1, 1) ss = np.transpose(ss) ss = ss * dof_sign1 dataset.append((x1, w, ss)) has_contribution = False for kkk, x2 in enumerate(x2s): tmp_int *= 0.0 has_contribution2 = False for x1, w, shape_arr in dataset: s = np.sqrt(np.sum((x1 - x2)**2)) if su >= 0 and s > su: continue val = kernel(x2 - x1, (x2 + x1) / 2.0, w=w) if val is None: continue if coeff is not None: val = val * coeff((x2 + x1) / 2.0) tmp_int += np.dot(val, shape_arr) * w has_contribution2 = True if has_contribution2: elmat[kkk, ...] = tmp_int has_contribution = True if has_contribution: elmats.append((j, elmat)) #if myid == 0: # pr.dump_stats("/home/shiraiwa/test.prf") # profile_stop(pr) # assert False, "hoge" # pr = profile_start() if len(elmats) > 0: elmats_all.append((i, elmats)) vdofs1_senddata.append(vdofs1_all) elmats_senddata.append(elmats_all) # send this information to knodes; ''' if USE_PARALLEL: #nicePrint(vdofs1_all) #nicePrint("elmats", [len(x) for x in elmats_all]) if myid == knode1: vdofs1_data = comm.gather(vdofs1_all, root=knode1) elmats_data = comm.gather(elmats_all, root=knode1) else: _ = comm.gather(vdofs1_all, root=knode1) _ = comm.gather(elmats_all, root=knode1) else: vdofs1_data = [vdofs1_all,] elmats_data = [elmats_all,] ''' if USE_PARALLEL: knode1 = 0 for vdofs1_all, elmats_all in zip(vdofs1_senddata, elmats_senddata): if myid == knode1: vdofs1_data = comm.gather(vdofs1_all, root=knode1) elmats_data = comm.gather(elmats_all, root=knode1) else: _ = comm.gather(vdofs1_all, root=knode1) _ = comm.gather(elmats_all, root=knode1) knode1 = knode1 + 1 else: vdofs1_data = vdofs1_senddata elmats_data = elmats_senddata # Step 4 if verbose: dprint1("Step 4") shared_data = [] mpi_rank = 0 for vdofs1, elmats_all in zip(vdofs1_data, elmats_data): # loop over MPI nodes #nicePrint("len elmats", len(elmats_all)) #for i, elmats in enumerate(elmats_all): # corresponds to loop over fes2 if verbose: coupling = [len(elmats) for i, elmats in elmats_all] nicePrint("Element coupling for rank/count", mpi_rank, len(coupling)) nicePrint(" Average :", (0 if len(coupling) == 0 else np.mean(coupling))) nicePrint(" Max/Min :", (0 if len(coupling) == 0 else np.max(coupling)), (0 if len(coupling) == 0 else np.min(coupling))) mpi_rank += 1 for i, elmats in elmats_all: # corresponds to loop over fes2 vdofs2 = fes2.GetElementVDofs(i) dof_sign2 = np.array([ [1 if vv >= 0 else -1 for vv in vdofs2], ]).transpose() vdofs2 = [-1 - x if x < 0 else x for x in vdofs2] fe2 = fes2.GetFE(i) nd2 = fe2.GetDof() if name_fes2 in ['RT', 'ND']: shape2.SetSize(nd2, vdim2) else: shape2.SetSize(nd2) eltrans = fes2.GetElementTransformation(i) #for j, elmat in enumerate(elmats): for j, elmat in elmats: #print(vdofs1[j], elmat.shape) #if elmat is None: # continue mm = np.zeros((len(vdofs2), len(vdofs1[j])), dtype=float) for ii in range(ir.GetNPoints()): ip2 = ir.IntPoint(ii) eltrans.SetIntPoint(ip2) ww = eltrans.Weight() * ip2.weight if name_fes2 in ['RT', 'ND']: fe2.CalcVShape(eltrans, shape2) else: fe2.CalcShape(ip2, shape2) shape2 *= ww ss = shape2.GetDataArray().reshape(-1, vdim2) ss = ss * dof_sign2 tmp_int = elmat[ii, ...].reshape(vdim1, -1) tmp = np.dot(ss, tmp_int) mm = mm + tmp # preapre shared data if USE_PARALLEL: vdofs22 = [fes2.GetLocalTDofNumber(ii) for ii in vdofs2] vdofs22g = [VDoFtoGTDoF2[ii] for ii in vdofs2] kkk = 0 #for v2, v2g in zip(vdofs22, vdofs22g): for v2, v2g in zip(vdofs22, vdofs22g): if v2 < 0: shared_data.append([v2g, mm[kkk, :], vdofs1[j]]) kkk = kkk + 1 # merge contribution to final mat for k, vv in enumerate(vdofs1[j]): try: if USE_PARALLEL: mmm = mm[np.where(np.array(vdofs22) >= 0)[0], :] vdofs222 = [x for x in vdofs22 if x >= 0] else: vdofs222 = vdofs2 mmm = mm #if myid == 1: # print("check here", vdofs2, vdofs22, vdofs222) #print(mmm[:, [k]]) tmp = mat[vdofs222, vv] + mmm[:, [k]] mat[vdofs222, vv] = tmp.flatten() except: import traceback print("error", myid) #print(vdofs1, vdofs22, vdofs222, mmm.shape, k) traceback.print_exc() if USE_PARALLEL: for source_id in range(nprc): data = comm.bcast(shared_data, root=source_id) myoffset = fes2.GetMyTDofOffset() for v2g, elmat, vdofs1 in data: if v2g >= myoffset and v2g < myoffset + mat.shape[0]: i = v2g - myoffset #print("procesising this", myid, i, v2g, elmat, vdofs1) mat[i, vdofs1] = mat[i, vdofs1] + elmat from scipy.sparse import coo_matrix, csr_matrix if USE_PARALLEL: if is_complex: m1 = csr_matrix(mat.real, dtype=float) m2 = csr_matrix(mat.imag, dtype=float) else: m1 = csr_matrix(mat.real, dtype=float) m2 = None from mfem.common.chypre import CHypreMat start_col = fes1.GetMyTDofOffset() end_col = fes1.GetMyTDofOffset() + fes1.GetTrueVSize() col_starts = [start_col, end_col, mat.shape[1]] M = CHypreMat(m1, m2, col_starts=col_starts) else: from petram.helper.block_matrix import convert_to_ScipyCoo M = convert_to_ScipyCoo(coo_matrix(mat, dtype=mat.dtype)) return M
def convolve1d(fes1, fes2, kernel=delta, support=None, orderinc=5, is_complex=False, trial_domain='all', test_domain='all', verbose=False, coeff=None): ''' fill linear operator for convolution \int phi_test(x) func(x-x') phi_trial(x') dx ''' mat, rstart = get_empty_map(fes2, fes1, is_complex=is_complex) eltrans1 = fes1.GetElementTransformation(0) ir = get_rule(fes1.GetFE(0), fes2.GetFE(0), eltrans1, orderinc, verbose) shape1 = mfem.Vector() shape2 = mfem.Vector() #nicePrint("shape", mat.shape, fes2.GetNE(), fes1.GetNE()) # communication strategy # (1) x2 (ir points on test space) is collected in each nodes # (2) x2 is send to other nodes # (3) each nodes compute \int f(x2-x1) phi(x1) # (4) non-zero results of (3) and global index should be send back # Step (1, 2) if verbose: dprint1("Step 1,2") x2_arr = [] i2_arr = [] ptx = mfem.DenseMatrix(ir.GetNPoints(), 1) attrs1 = fes2.GetMesh().GetAttributeArray() attrs2 = fes2.GetMesh().GetAttributeArray() for i in range(fes2.GetNE()): # scan test space if test_domain != 'all': if not attrs1[i] in test_domain: continue eltrans = fes2.GetElementTransformation(i) eltrans.Transform(ir, ptx) x2_arr.append(ptx.GetDataArray().copy()) i2_arr.append(i) if len(i2_arr) > 0: ptx_x2 = np.vstack(x2_arr) i2_arr = np.hstack(i2_arr) else: ptx_x2 = np.array([[]]) i2_arr = np.array([]) #nicePrint("x2 shape", ptx_x2.shape) if USE_PARALLEL: ## note: we could implement more advanced alg. to reduce ## the amount of data exchange.. x2_all = comm.allgather(ptx_x2) i2_all = comm.allgather(i2_arr) else: x2_all = [ptx_x2] i2_all = [i2_arr] #nicePrint("x2_all shape", x2_all.shape) if USE_PARALLEL: #this is global TrueDoF (offset is not subtracted) P = fes1.Dof_TrueDof_Matrix() P = ToScipyCoo(P).tocsr() VDoFtoGTDoF1 = P.indices P = fes2.Dof_TrueDof_Matrix() P = ToScipyCoo(P).tocsr() VDoFtoGTDoF2 = P.indices # Step 3 if verbose: dprint1("Step 3") vdofs1_senddata = [] elmats_senddata = [] for knode1 in range(len(x2_all)): x2_onenode = x2_all[knode1] i2_onenode = i2_all[knode1] elmats_all = [] vdofs1_all = [] # collect vdofs for j in range(fes1.GetNE()): local_vdofs = fes1.GetElementVDofs(j) if USE_PARALLEL: subvdofs2 = [VDoFtoGTDoF1[i] for i in local_vdofs] vdofs1_all.append(subvdofs2) else: vdofs1_all.append(local_vdofs) for i, x2s in zip(i2_onenode, x2_onenode): # loop over fes2 nd2 = len(x2s) #nicePrint(x2s) elmats = [] for j in range(fes1.GetNE()): if trial_domain != 'all': if not attrs1[j] in trial_domain: continue # collect integration fe1 = fes1.GetFE(j) nd1 = fe1.GetDof() shape1.SetSize(nd1) eltrans = fes1.GetElementTransformation(j) tmp_int = np.zeros(shape1.Size(), dtype=mat.dtype) elmat = np.zeros((nd2, nd1), dtype=mat.dtype) #if myid == 0: print("fes1 idx", j) dataset = [] for jj in range(ir.GetNPoints()): ip1 = ir.IntPoint(jj) eltrans.SetIntPoint(ip1) x1 = eltrans.Transform(ip1)[0] fe1.CalcShape(ip1, shape1) w = eltrans.Weight() * ip1.weight dataset.append((x1, w, shape1.GetDataArray().copy())) has_contribution = False for kkk, x2 in enumerate(x2s): tmp_int *= 0.0 for x1, w, shape_arr in dataset: if support is not None: s = support((x1 + x2) / 2.0) if np.abs(x1 - x2) > s: continue has_contribution = True #if myid == 0: print("check here", x1, x2) val = kernel(x2 - x1, (x2 + x1) / 2.0, w=w) if coeff is not None: val = val * coeff((x2 + x1) / 2.0) #shape_arr *= w*val tmp_int += shape_arr * w * val elmat[kkk, :] = tmp_int if has_contribution: elmats.append((j, elmat)) #print(elmats) if len(elmats) > 0: elmats_all.append((i, elmats)) vdofs1_senddata.append(vdofs1_all) elmats_senddata.append(elmats_all) # send this information to knodes; ''' if USE_PARALLEL: #nicePrint(vdofs1_all) #nicePrint("elmats", [len(x) for x in elmats_all]) if myid == knode1: vdofs1_data = comm.gather(vdofs1_all, root=knode1) elmats_data = comm.gather(elmats_all, root=knode1) else: _ = comm.gather(vdofs1_all, root=knode1) _ = comm.gather(elmats_all, root=knode1) else: vdofs1_data = [vdofs1_all,] elmats_data = [elmats_all,] ''' if USE_PARALLEL: knode1 = 0 for vdofs1_all, elmats_all in zip(vdofs1_senddata, elmats_senddata): if myid == knode1: vdofs1_data = comm.gather(vdofs1_all, root=knode1) elmats_data = comm.gather(elmats_all, root=knode1) else: _ = comm.gather(vdofs1_all, root=knode1) _ = comm.gather(elmats_all, root=knode1) knode1 = knode1 + 1 else: vdofs1_data = vdofs1_senddata elmats_data = elmats_senddata # Step 4 if verbose: dprint1("Step 4") shared_data = [] mpi_rank = 0 for vdofs1, elmats_all in zip(vdofs1_data, elmats_data): # loop over MPI nodes #nicePrint("len elmats", len(elmats_all)) #for i, elmats in enumerate(elmats_all): # corresponds to loop over fes2 if verbose: coupling = [len(elmats) for i, elmats in elmats_all] nicePrint("Element coupling for rank", mpi_rank) nicePrint(" Average :", (0 if len(coupling) == 0 else np.mean(coupling))) nicePrint(" Max/Min :", (0 if len(coupling) == 0 else np.max(coupling)), (0 if len(coupling) == 0 else np.min(coupling))) mpi_rank += 1 for i, elmats in elmats_all: # corresponds to loop over fes2 vdofs2 = fes2.GetElementVDofs(i) fe2 = fes2.GetFE(i) nd2 = fe2.GetDof() shape2.SetSize(nd2) eltrans = fes2.GetElementTransformation(i) #for j, elmat in enumerate(elmats): for j, elmat in elmats: #print(vdofs1[j], elmat.shape) #if elmat is None: # continue mm = np.zeros((len(vdofs2), len(vdofs1[j])), dtype=float) for ii in range(ir.GetNPoints()): ip2 = ir.IntPoint(ii) eltrans.SetIntPoint(ip2) ww = eltrans.Weight() * ip2.weight fe2.CalcShape(ip2, shape2) shape2 *= ww tmp_int = elmat[ii, :] tmp = np.dot( np.atleast_2d(shape2.GetDataArray()).transpose(), np.atleast_2d(tmp_int)) mm = mm + tmp #print("check here", myid, mm.shape, tmp.shape) # merge contribution to final mat if USE_PARALLEL: vdofs22 = [fes2.GetLocalTDofNumber(ii) for ii in vdofs2] vdofs22g = [VDoFtoGTDoF2[ii] for ii in vdofs2] kkk = 0 for v2, v2g in zip(vdofs22, vdofs22g): if v2 < 0: shared_data.append([v2g, mm[kkk, :], vdofs1[j]]) kkk = kkk + 1 for k, vv in enumerate(vdofs1[j]): try: if USE_PARALLEL: mmm = mm[np.where(np.array(vdofs22) >= 0)[0], :] vdofs222 = [x for x in vdofs22 if x >= 0] else: vdofs222 = vdofs2 mmm = mm #if myid == 1: # print("check here", vdofs2, vdofs22, vdofs222) #print(mmm[:, [k]]) tmp = mat[vdofs222, vv] + mmm[:, [k]] mat[vdofs222, vv] = tmp.flatten() except: import traceback print("error", myid) #print(vdofs1, vdofs22, vdofs222, mmm.shape, k) traceback.print_exc() if USE_PARALLEL: for source_id in range(nprc): data = comm.bcast(shared_data, root=source_id) myoffset = fes2.GetMyTDofOffset() for v2g, elmat, vdofs1 in data: if v2g >= myoffset and v2g < myoffset + mat.shape[0]: i = v2g - myoffset #print("procesising this", myid, i, v2g, elmat, vdofs1) mat[i, vdofs1] = mat[i, vdofs1] + elmat from scipy.sparse import coo_matrix, csr_matrix if USE_PARALLEL: if is_complex: m1 = csr_matrix(mat.real, dtype=float) m2 = csr_matrix(mat.imag, dtype=float) else: m1 = csr_matrix(mat.real, dtype=float) m2 = None from mfem.common.chypre import CHypreMat start_col = fes1.GetMyTDofOffset() end_col = fes1.GetMyTDofOffset() + fes1.GetTrueVSize() col_starts = [start_col, end_col, mat.shape[1]] M = CHypreMat(m1, m2, col_starts=col_starts) #print("mat", M) else: from petram.helper.block_matrix import convert_to_ScipyCoo M = convert_to_ScipyCoo(coo_matrix(mat, dtype=mat.dtype)) return M
def make_solver(self, A): offset = np.array(A.RowOffsets().ToList(), dtype=int) rows = A.NumRowBlocks() cols = A.NumColBlocks() local_size = np.diff(offset) x = allgather_vector(local_size) global_size = np.sum(x.reshape(num_proc,-1), 0) nicePrint(local_size) global_offset = np.hstack(([0], np.cumsum(global_size))) global_roffset = global_offset + offset print global_offset new_offset = np.hstack(([0], np.cumsum(x)))[:-1] # np.cumsum(x.reshape(2,-1).transpose().flatten()))) new_size = x.reshape(num_proc, -1) new_offset = new_offset.reshape(num_proc, -1) print new_offset #index_mapping def blk_stm_idx_map(i): stm_idx = [new_offset[kk, i]+ np.arange(new_size[kk, i], dtype=int) for kk in range(num_proc)] return np.hstack(stm_idx) map = [blk_stm_idx_map(i) for i in range(rows)] newi = [] newj = [] newd = [] nrows = np.sum(local_size) ncols = np.sum(global_size) for i in range(rows): for j in range(cols): m = self.get_block(A, i, j) if m is None: continue # num_rows, ilower, iupper, jlower, jupper, irn, jcn, data = 0, 0, 0, 0, 0, np.array([0,0]), np.array([0,0]), np.array([0,0]) # else: num_rows, ilower, iupper, jlower, jupper, irn, jcn, data = m.GetCooDataArray() irn = irn #+ global_roffset[i] jcn = jcn #+ global_offset[j] nicePrint(i, j, map[i].shape, map[i]) nicePrint(irn) irn2 = map[i][irn] jcn2 = map[j][jcn] newi.append(irn2) newj.append(jcn2) newd.append(data) newi = np.hstack(newi) newj = np.hstack(newj) newd = np.hstack(newd) from scipy.sparse import coo_matrix nicePrint(new_offset) nicePrint((nrows, ncols),) nicePrint('newJ', np.min(newj), np.max(newj)) nicePrint('newI', np.min(newi)-new_offset[myid, 0], np.max(newi)-new_offset[myid, 0]) mat = coo_matrix((newd,(newi-new_offset[myid, 0], newj)), shape=(nrows, ncols), dtype=newd.dtype).tocsr() AA = ToHypreParCSR(mat) import mfem.par.strumpack as strmpk Arow = strmpk.STRUMPACKRowLocMatrix(AA) args = [] if self.hss: args.extend(["--sp_enable_hss", "--hss_verbose", "--sp_hss_min_sep_size", str(int(self.hss_front_size)), "--hss_rel_tol", str(0.01), "--hss_abs_tol", str(1e-4),]) print self.maxiter args.extend(["--sp_maxit", str(int(self.maxiter))]) args.extend(["--sp_rel_tol", str(self.rctol)]) args.extend(["--sp_abs_tol", str(self.actol)]) args.extend(["--sp_gmres_restart", str(int(self.gmres_restart))]) strumpack = strmpk.STRUMPACKSolver(args, MPI.COMM_WORLD) if self.gui.log_level == 0: strumpack.SetPrintFactorStatistics(False) strumpack.SetPrintSolveStatistics(False) elif self.gui.log_level == 1: strumpack.SetPrintFactorStatistics(True) strumpack.SetPrintSolveStatistics(False) else: strumpack.SetPrintFactorStatistics(True) strumpack.SetPrintSolveStatistics(True) strumpack.SetKrylovSolver(strmpk.KrylovSolver_DIRECT); strumpack.SetReorderingStrategy(strmpk.ReorderingStrategy_METIS) strumpack.SetMC64Job(strmpk.MC64Job_NONE) # strumpack.SetSymmetricPattern(True) strumpack.SetOperator(Arow) strumpack.SetFromCommandLine() strumpack._mapper = map return strumpack
def run_test(): import mfem.par as par from mfem.common.parcsr_extra import ToHypreParCSR, ToScipyCoo from mpi4py import MPI from mfem.common.mpi_debug import nicePrint comm = MPI.COMM_WORLD num_proc = MPI.COMM_WORLD.size myid = MPI.COMM_WORLD.rank def print_hypre(M, txt): for i in range(num_proc): MPI.COMM_WORLD.Barrier() if myid == i: if myid == 0: print(txt) print('MyID: ', myid) else: print('MyID: ', myid) print(ToScipyCoo(M)) # make sample matrix row = np.array([0, 0, 1, 1]) col = np.array([0, 3, 1, 2]) data = np.array([4, 5, 7, 9]) m = coo_matrix((data, (row, col)), shape=(2, 4)) m = m.tocsr() m = m * (myid + 1) M = ToHypreParCSR(m, assert_non_square_no_col_starts=False) print_hypre(M, 'matrix M') from mfem.common.chypre import CHypreVec r1 = np.array([0, 0, 1, 1]) r2 = np.array([1, 1, 0, 0]) vec1 = CHypreVec(r1, None) vec2 = CHypreVec(r2, None) if myid == 0: print("v1") v1 = (vec1 - vec1 * 1j) v2 = (vec1 + vec1 * 1j) nicePrint(v1.GlobalVector()) nicePrint(v2.GlobalVector()) nicePrint((v1 + v2).GlobalVector()) nicePrint((v1 - v2).GlobalVector()) if myid == 0: print("v1, v2") v1 = (vec1 - vec2 * 1j) v2 = (vec1 + vec2 * 1j) nicePrint(v1.GlobalVector()) nicePrint(v2.GlobalVector()) nicePrint((v1 + v2).GlobalVector()) nicePrint((v1 - v2).GlobalVector()) v1 *= 3 nicePrint(v1.GlobalVector()) v1 *= 1j nicePrint(v1.GlobalVector()) print(v1.dot(v1)) v1 *= 1 + 1j nicePrint("v1", v1.GlobalVector()) nicePrint("v2", v2.GlobalVector()) print(v1.dot(v1)) print(v1.dot(v2))
if len(sendsize) != size: assert False, "senddata size does not match with mpi size" recvsize = np.empty(size, dtype=int) disp = list(range(size)) counts = [1] * size dtype = get_mpi_datatype(sendsize) s1 = [sendsize, counts, disp, dtype] r1 = [recvsize, counts, disp, dtype] comm.Alltoallv(s1, r1) print("process %s receiving %s " % (rank, recvsize)) recvsize = list(recvsize) recvdisp = list(np.hstack((0, np.cumsum(recvsize)))[:-1]) recvdata = np.empty(np.sum(recvsize), dtype=int) senddata = np.hstack(senddata).flatten() dtype = get_mpi_datatype(senddata[0]) s1 = [senddata, sendsize, senddisp, dtype] r1 = [recvdata, recvsize, recvdisp, dtype] comm.Alltoallv(s1, r1) hoge = alltoall_vector(orgdata) nicePrint(hoge) hoge = alltoall_vector(hoge) nicePrint(hoge) nicePrint("process %s sending %s receiving %s " % (rank, senddata, r1[0]))
''' testing Alltoallv (variable length vector version of alltoall) mpirun -np 3 python mpi_alltoallv.py ''' from mpi4py import MPI import numpy as np from mfem.common.mpi_dtype import get_mpi_datatype from mfem.common.mpi_debug import nicePrint, niceCall from petram.helper.mpi_recipes import alltoall_vector, alltoall_vectorv comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() a_size = 1 if rank == 0: data = [[np.arange(x, dtype="float64") * x * y for x in range(size)] for y in range(size)] else: data = [[np.ones(2, dtype="float64") * rank for x in range(rank)] for y in range(size)] nicePrint(data) hoge = alltoall_vectorv(data) nicePrint(hoge) hoge = alltoall_vectorv(hoge) nicePrint(hoge)