def run(self, engine, is_first=True, return_instance=False): dprint1("Entering run (is_first=", is_first, ")", self.fullpath()) if self.clear_wdir: engine.remove_solfiles() instance = DistanceSolverInstance(self, engine) instance.set_blk_mask() if return_instance: return instance if self.init_only: pass else: instance.solve() instance.save_solution(ksol=0, skip_mesh=False, mesh_only=False, save_parmesh=self.save_parmesh) #engine.sol = instance.sol # instance.save_probe() is_first = False dprint1(debug.format_memory_usage()) return is_first
def run(self, engine, is_first=True, return_instance=False): dprint1("Entering run (is_first=", is_first, ")", self.fullpath()) if self.clear_wdir: engine.remove_solfiles() instance = MGInstance(self, engine) instance.set_blk_mask() if return_instance: return instance instance.configure_probes(self.probe) if self.init_only: engine.sol = engine.assembled_blocks[1][0] instance.sol = engine.sol else: if is_first: instance.assemble() is_first = False instance.solve() instance.save_solution(ksol=0, skip_mesh=False, mesh_only=False, save_parmesh=self.save_parmesh) engine.sol = instance.sol instance.save_probe() dprint1(debug.format_memory_usage()) return is_first
def run(self, engine, is_first=True, return_instance=False): dprint1("Entering run", is_first, self.fullpath()) if self.clear_wdir: engine.remove_solfiles() instance = StandardMeshAdaptSolver(self, engine) instance.set_blk_mask() if return_instance: return instance # We dont use probe..(no need...) #instance.configure_probes(self.probe) if self.init_only: engine.sol = engine.assembled_blocks[1][0] instance.sol = engine.sol else: if is_first: instance.assemble() is_first = False instance.solve() instance.save_solution(ksol=0, skip_mesh=False, mesh_only=False, save_parmesh=self.save_parmesh) engine.sol = instance.sol dprint1(debug.format_memory_usage()) return is_first
def call_solver(self, engine): solver = self.get_active_solver() phys_targets = self.get_phys() phys_real = all([not p.is_complex() for p in phys_targets]) ls_type = solver.linear_system_type(self.assemble_real, phys_real) ''' ls_type: coo (matrix in coo format : DMUMP or ZMUMPS) coo_real (matrix in coo format converted from complex matrix : DMUMPS) # below is a plan... blk (matrix made mfem:block operator) blk_real (matrix made mfem:block operator for complex problem) (unknowns are in the order of R_fes1, R_fes2,... I_fes1, Ifes2...) blk_interleave (unknowns are in the order of R_fes1, I_fes1, R_fes2, I_fes2,...) None(not supported) ''' if debug.debug_memory: dprint1("Block Matrix before shring :\n", self.M) dprint1(debug.format_memory_usage()) M_block, B_blocks, P = engine.eliminate_and_shrink( self.M, self.B, self.Me) if debug.debug_memory: dprint1("Block Matrix after shrink :\n", M_block) dprint1(debug.format_memory_usage()) M, B = engine.finalize_linearsystem(M_block, B_blocks, not phys_real, format=ls_type) solall = solver.solve(engine, M, B) #solall = np.zeros((M.shape[0], len(B_blocks))) # this will make fake data to skip solve step #if ls_type.endswith('_real'): if not phys_real and self.assemble_real: solall = solver.real_to_complex(solall, M) PT = P.transpose() return solall, PT
def run(self, engine): phys_target = self.get_phys() if self.clear_wdir: engine.remove_solfiles() if not engine.isInitialized: self.init_sol(engine) if self.init_only: extra_data = None else: matvecs, matvecs_c = self.assemble(engine) self.generate_linear_system(engine, matvecs, matvecs_c) solall, PT = self.call_solver(engine) extra_data = self.store_sol(engine, matvecs, solall, PT, 0) dprint1("Extra Data", extra_data) engine.remove_solfiles() dprint1("writing sol files") self.save_solution(engine, extra_data) print(debug.format_memory_usage())
def step(self, is_first): engine = self.engine mask = self.blk_mask engine.copy_block_mask(mask) #if not self.pre_assembled: # assert False, "pre_assmeble must have been called" if (self.counter == 0 and is_first): M_changed = True else: if self._dt_used_in_assemble != self.time_step: engine.set_update_flag('UpdateAll') else: engine.set_update_flag('TimeDependent') engine.run_apply_essential(self.get_phys(), self.get_phys_range(), update=True) engine.run_fill_X_block(update=True) M_updated, B_updated = self.pre_assemble(update=True) M_changed = self.assemble(update=True) A, X, RHS, Ae, B, M, depvars = self.blocks if M_changed or self.counter == 0: AA = engine.finalize_matrix(A, mask, not self.phys_real, format=self.ls_type, verbose=False) BB = engine.finalize_rhs([RHS], A, X[-1], mask, not self.phys_real, format=self.ls_type, verbose=False) if self.counter == 0: self.sol = engine.sol self.write_checkpoint_solution() self.icheckpoint += 1 depvars = [x for i, x in enumerate(depvars) if mask[0][i]] if self.linearsolver is None: is_complex = self.gui.is_complex() self.linearsolver = self.linearsolver_model.allocate_solver( is_complex, engine) M_changed = True if M_changed: self.linearsolver.SetOperator(AA, dist=engine.is_matrix_distributed, name=depvars) if self.linearsolver.is_iterative: XX = engine.finalize_x(X[-1], RHS, mask, not self.phys_real, format=self.ls_type) else: XX = None solall = self.linearsolver.Mult(BB, x=XX, case_base=engine.case_base) engine.case_base += len(BB) if not self.phys_real and self.assemble_real: assert False, "this has to be debugged (convertion from real to complex)" solall = self.linearsolver_model.real_to_complex(solell, A) A.reformat_central_mat(solall, 0, X[0], mask) # this apply interpolation operator sol, sol_extra = engine.split_sol_array(X[0]) for name in self.time_deriv_vars: offset1 = engine.dep_var_offset(name) # vt offset2 = engine.dep_var_offset(name[:-1]) # v X[0][offset1, 0] = (X[0][offset2, 0] - X[-1][offset2, 0]) * (1. / self.time_step) for child in self.child_instance: # for now update_operator is True only for the first run. child.solve(update_operator=(self.counter == 0)) self.time = self.time + self.time_step self.counter += 1 for p in self.probe: p.append_sol(X[0], self.time) # swap X[0] and X[-1] for next computing tmp = X[0] X[0] = X[-1] X[-1] = tmp self.sol = X[-1] engine.sol = self.sol engine.recover_sol(sol, access_idx=-1) ## ToDo. Provide a way to use Lagrange multipler in model extra_data = engine.process_extra(sol_extra) checkpoint_written = False if self.checkpoint[self.icheckpoint] < self.time: self.write_checkpoint_solution() self.icheckpoint += 1 checkpoint_written = True dprint1("TimeStep (" + str(self.counter) + "), t=" + str(self.time) + "...done.") dprint1(debug.format_memory_usage()) return self.time >= self.et, checkpoint_written
def gather_dataset(idx1, idx2, fes1, fes2, trans1, trans2, tol, shape_type='scalar', mode='surface'): dprint1("gather_dataset1", debug.format_memory_usage()) if fes2 is None: fes2 = fes1 if trans1 is None: trans1 = notrans if trans2 is None: trans2 = notrans mesh1 = fes1.GetMesh() mesh2 = fes2.GetMesh() if mode == 'volume': mode1 = get_volume_mode(mesh1.Dimension(), mesh1.SpaceDimension()) mode2 = get_volume_mode(mesh2.Dimension(), mesh2.SpaceDimension()) elif mode == 'surface': mode1 = get_surface_mode(mesh1.Dimension(), mesh1.SpaceDimension()) mode2 = get_surface_mode(mesh2.Dimension(), mesh2.SpaceDimension()) elif mode == 'edge': mode1 = get_edge_mode(mesh1.Dimension(), mesh1.SpaceDimension()) mode2 = get_edge_mode(mesh2.Dimension(), mesh2.SpaceDimension()) # collect data ibdr1 = find_element(fes1, idx1, mode=mode1) ibdr2 = find_element(fes2, idx2, mode=mode2) ct1 = find_el_center(fes1, ibdr1, trans1, mode=mode1) ct2 = find_el_center(fes2, ibdr2, trans2, mode=mode2) arr1 = get_element_data(fes1, ibdr1, trans1, mode=mode1) arr2 = get_element_data(fes2, ibdr2, trans1, mode=mode2, use_global=True) if shape_type == 'scalar': sh1all = get_shape(fes1, ibdr1, mode=mode1) sh2all = get_shape(fes2, ibdr2, mode=mode2) elif shape_type == 'vector': sh1all = get_vshape(fes1, ibdr1, mode=mode1) sh2all = get_vshape(fes2, ibdr2, mode=mode2) else: assert False, "Unknown shape type" dprint1("gather_dataset2", debug.format_memory_usage()) # pt is on (u, v), pto is (x, y, z) try: k1all, pt1all, pto1all = zip(*arr1) except: k1all, pt1all, pto1all = (), (), () try: k2all, pt2all, pto2all = zip(*arr2) except: k2all, pt2all, pto2all = (), (), () if use_parallel: # share ibr2 (destination information among nodes...) ct1dim = ct1.shape[1] if ct1.size > 0 else 0 ct1dim = comm.allgather(ct1dim) ct1 = np.atleast_2d(ct1).reshape(-1, max(ct1dim)) ct2 = np.atleast_2d(ct2).reshape(-1, max(ct1dim)) ct2 = allgather_vector(ct2, MPI.DOUBLE) dprint1("gather_dataset3", debug.format_memory_usage()) # mapping between elements from scipy.spatial import cKDTree tree = cKDTree(ct2) ctr_dist, map_1_2 = tree.query(ct1) dprint1("gather_dataset4", debug.format_memory_usage()) if ctr_dist.size > 0 and np.max(ctr_dist) > 1e-15: print('Center Dist may be too large (check mesh): ' + str(np.max(ctr_dist))) if use_parallel: pt2all, pto2all, k2all, sh2all, map_1_2 = redistribute_pt2_k2( pt2all, pto2all, k2all, sh2all, map_1_2) dprint1("gather_dataset5", debug.format_memory_usage()) # map is fill as transposed shape (row = fes1) data = pt1all, pt2all, pto1all, pto2all, k1all, k2all, sh1all, sh2all, return data, map_1_2
def map_dof_vector(map, fes1, fes2, pt1all, pt2all, pto1all, pto2all, k1all, k2all, sh1all, sh2all, map_1_2, trans1, trans2, tol, tdof, rstart): dprint1("map_dof_vector1", debug.format_memory_usage()) pt = [] subvdofs1 = [] subvdofs2 = [] num1 = 0 num2 = 0 num_pts = 0 decimals = int(np.abs(np.log10(tol))) if use_parallel: P = fes1.Dof_TrueDof_Matrix() from mfem.common.parcsr_extra import ToScipyCoo P = ToScipyCoo(P).tocsr() VDoFtoGTDoF = P.indices #this is global TrueDoF (offset is not subtracted) external_entry = [] gtdof_check = [] def make_entry(r, c, value, num_entry): value = np.around(value, decimals) if value == 0: return num_entry if r[1] != -1: map[r[1] - rstart, c] = value num_entry = num_entry + 1 bisect.insort_left(subvdofs1, r[1]) #subvdofs1.append(r[1]) else: rr = r[0] if r[0] >= 0 else -1 - r[0] gtdof = VDoFtoGTDoF[rr] if not gtdof in gtdof_check: external_entry.append((gtdof, c, value)) gtdof_check.append(gtdof) return num_entry tdof = sorted(tdof) for k0 in range(len(pt1all)): k2 = map_1_2[k0] pt1 = pt1all[k0] pto1 = pto1all[k0] newk1 = k1all[k0] #(i local DoF, global DoF) sh1 = sh1all[k0] pt2 = pt2all[k2] pto2 = pto2all[k2] newk2 = k2all[k2] sh2 = sh2all[k2] #if myid == 1: print newk1[:,2], newk1[:,1], rstart #if myid == 1: # x = [r if r >= 0 else -1-r for r in newk1[:,1]] # print [VDoFtoGTDoF[r] for r in x] #dprint1(len(np.unique(newk1[:,2])) == len(newk1[:,2])) for k, p in enumerate(pt1): #if idx[k]: continue num_pts = num_pts + 1 #if newk1[k,2] in tdof: continue iii = bisect.bisect_left(tdof, newk1[k, 2]) if iii != len(tdof) and tdof[iii] == newk1[k, 2]: continue #if newk1[k,2] in subvdofs1: continue iii = bisect.bisect_left(subvdofs1, newk1[k, 2]) if iii != len(subvdofs1) and subvdofs1[iii] == newk1[k, 2]: continue dist = np.sum((pt2 - p)**2, 1) d = np.where(dist == np.min(dist))[0] #if myid == 1: dprint1('min_dist', np.min(dist)) if len(d) == 1: ''' this factor is not always 1 ''' d = d[0] s = np.sign(newk1[k, 1] + 0.5) * np.sign(newk2[d, 1] + 0.5) p1 = pto1[k] p2 = pto2[d] delta = np.sum(np.std(pto1, 0)) / np.sum(np.std(sh1, 0)) / 10. v1 = trans1(p1) - trans1(p1 + delta * sh1[newk1[k, 0]]) v2 = trans2(p2) - trans2(p2 + delta * sh2[newk2[d, 0]]) fac = np.sum(v1 * v2) / np.sum(v1 * v1) * s #except RuntimeWarning: # print(pto1, pto1.shape, p1,p2, s, delta, sh1[newk1[k, 0]], sh2[newk2[d, 0]]) # assert False, "Got Here" num1 = make_entry(newk1[k, [1, 2]], newk2[d, 2], fac, num1) elif len(d) == 2: dd = np.argsort(np.sum((pt1 - p)**2, 1)) p1 = pto1[dd[0]] p3 = pto2[d[0]] p2 = pto1[dd[1]] p4 = pto2[d[1]] delta = np.sum(np.std(pto1, 0)) / np.sum(np.std(sh1, 0)) / 10. v1 = trans1(p1) - trans1(p1 + delta * sh1[newk1[dd[0], 0]]) v2 = trans1(p2) - trans1(p2 + delta * sh1[newk1[dd[1], 0]]) v3 = trans2(p3) - trans2(p3 + delta * sh2[newk2[d[0], 0]]) v4 = trans2(p4) - trans2(p4 + delta * sh2[newk2[d[1], 0]]) v1 = v1 * np.sign(newk1[dd[0], 1] + 0.5) v2 = v2 * np.sign(newk1[dd[1], 1] + 0.5) v3 = v3 * np.sign(newk2[d[0], 1] + 0.5) v4 = v4 * np.sign(newk2[d[1], 1] + 0.5) s = np.sign(newk1[k, 1] + 0.5) * np.sign(newk2[d, 1] + 0.5) def vnorm(v): return v / np.sqrt(np.sum(v**2)) v1n = vnorm(v1) v2n = vnorm(v2) v3n = vnorm(v3) v4n = vnorm(v4) if (np.abs(np.abs(np.sum(v1n * v3n)) - 1) < tol and np.abs(np.abs(np.sum(v2n * v4n)) - 1) < tol): fac1 = np.sum(v1 * v3) / np.sum(v1 * v1) fac2 = np.sum(v2 * v4) / np.sum(v2 * v2) num2 = make_entry(newk1[dd[0], [1, 2]], newk2[d[0], 2], fac1, num2) num2 = make_entry(newk1[dd[1], [1, 2]], newk2[d[1], 2], fac2, num2) elif (np.abs(np.abs(np.sum(v2n * v3n)) - 1) < tol and np.abs(np.abs(np.sum(v1n * v4n)) - 1) < tol): fac1 = np.sum(v1 * v4) / np.sum(v1 * v1) fac2 = np.sum(v2 * v3) / np.sum(v2 * v2) num2 = make_entry(newk1[dd[0], [1, 2]], newk2[d[1], 2], fac1, num2) num2 = make_entry(newk1[dd[1], [1, 2]], newk2[d[0], 2], fac2, num2) else: def proj2d(v, e1, e2): return np.array([np.sum(v * e1), np.sum(v * e2)]) if len( v1 ) == 3: # if vector is 3D, needs to prjoect on surface e3 = np.cross(v1n, v2n) e1 = v1n e2 = np.cross(e1, e3) v1 = proj2d(v1, e1, e2) v2 = proj2d(v2, e1, e2) v3 = proj2d(v3, e1, e2) v4 = proj2d(v4, e1, e2) m1 = np.transpose(np.vstack((v1, v2))) m2 = np.transpose(np.vstack((v3, v4))) m = np.dot(np.linalg.inv(m1), m2) m = np.around(np.linalg.inv(m), decimals=decimals) num2 = make_entry(newk1[dd[0], [1, 2]], newk2[d[0], 2], m[0, 0], num2) num2 = make_entry(newk1[dd[0], [1, 2]], newk2[d[1], 2], m[1, 0], num2) num2 = make_entry(newk1[dd[1], [1, 2]], newk2[d[0], 2], m[0, 1], num2) num2 = make_entry(newk1[dd[1], [1, 2]], newk2[d[1], 2], m[1, 1], num2) elif len(d) == 3: dd = np.argsort(np.sum((pt1 - p)**2, 1)) p1 = [pto1[dd[i]] for i in [0, 1, 2]] p2 = [pto2[d[i]] for i in [0, 1, 2]] delta = np.sum(np.std(pto1, 0)) / np.sum(np.std(sh1, 0)) / 10. v1 = [ trans1(p1[i]) - trans1(p1[i] + delta * sh1[newk1[dd[i], 0]]) for i in [0, 1, 2] ] v2 = [ trans2(p2[i]) - trans2(p2[i] + delta * sh2[newk2[d[i], 0]]) for i in [0, 1, 2] ] v1 = [ v1[i] * np.sign(newk1[dd[i], 1] + 0.5) for i in [0, 1, 2] ] v2 = [v2[i] * np.sign(newk2[d[i], 1] + 0.5) for i in [0, 1, 2]] s = np.sign(newk1[k, 1] + 0.5) * np.sign(newk2[d, 1] + 0.5) def vnorm(v): return v / np.sqrt(np.sum(v**2)) v1n = [vnorm(v) for v in v1] v2n = [vnorm(v) for v in v2] m1 = np.transpose(np.vstack(v1)) m2 = np.transpose(np.vstack(v2)) m = np.dot(np.linalg.inv(m1), m2) m = np.around(np.linalg.inv(m), decimals=decimals) num2 = make_entry(newk1[dd[0], [1, 2]], newk2[d[0], 2], m[0, 0], num2) num2 = make_entry(newk1[dd[0], [1, 2]], newk2[d[1], 2], m[1, 0], num2) num2 = make_entry(newk1[dd[0], [1, 2]], newk2[d[2], 2], m[2, 0], num2) num2 = make_entry(newk1[dd[1], [1, 2]], newk2[d[0], 2], m[0, 1], num2) num2 = make_entry(newk1[dd[1], [1, 2]], newk2[d[1], 2], m[1, 1], num2) num2 = make_entry(newk1[dd[1], [1, 2]], newk2[d[2], 2], m[2, 1], num2) num2 = make_entry(newk1[dd[2], [1, 2]], newk2[d[0], 2], m[0, 2], num2) num2 = make_entry(newk1[dd[2], [1, 2]], newk2[d[1], 2], m[1, 2], num2) num2 = make_entry(newk1[dd[2], [1, 2]], newk2[d[2], 2], m[2, 2], num2) else: print(pt1, pt2) ''' newk1 = k1all[k0] #(i local DoF, global DoF) sh1 = sh1all[k0] pto2 = pto2all[k2] newk2 = k2all[k2] sh2 = sh2all[k2] ''' # to do support three vectors raise AssertionError("more than three dofs at same place") subvdofs2.extend([s for k, v, s in newk2]) dprint1("map_dof_vector2", debug.format_memory_usage()) num_entry = num1 + num2 if use_parallel: dprint1("total entry (before)", sum(allgather(num_entry))) #nicePrint("data to exchange", len(external_entry)) external_entry = redistribute_external_entry(external_entry, rstart + map.shape[0]) if len(external_entry.shape) == 2: idx1 = np.in1d(external_entry[:, 0], subvdofs1, invert=True) val, idx2 = np.unique(external_entry[idx1, 0], return_index=True) external_entry = external_entry[idx1][idx2] for r, c, d in external_entry: #if not r in subvdofs1: num_entry = num_entry + 1 #print("adding",myid, r, c, d ) map[r - rstart, c] = d #subvdofs1.append(r) dprint1("map_dof_vector3", debug.format_memory_usage()) ''' external_entry = sum(comm.allgather(external_entry),[]) #nicePrint(external_entry) for r, c, d in external_entry: h = map.shape[0] if (r - rstart >= 0 and r - rstart < h and not r in subvdofs1): num_entry = num_entry + 1 print("adding",myid, r, c, d ) map[r-rstart, c] = d subvdofs1.append(r) ''' total_entry = sum(allgather(num_entry)) total_pts = sum(allgather(num_pts)) if sum(allgather(map.nnz)) != total_entry: assert False, "total_entry does not match with nnz" else: total_entry = num_entry total_pts = num_pts #dprint1("map size", map.shape) dprint1("local pts/entry", num_pts, " ", num_entry) dprint1("total pts/entry", total_pts, " ", total_entry) return map
def map_dof_scalar(map, fes1, fes2, pt1all, pt2all, pto1all, pto2all, k1all, k2all, sh1all, sh2all, map_1_2, trans1, trans2, tol, tdof, rstart): dprint1("map_dof_scalar1", debug.format_memory_usage()) pt = [] subvdofs1 = [] subvdofs2 = [] num_entry = 0 num_pts = 0 decimals = int(np.abs(np.log10(tol))) if use_parallel: P = fes1.Dof_TrueDof_Matrix() from mfem.common.parcsr_extra import ToScipyCoo P = ToScipyCoo(P).tocsr() VDoFtoGTDoF = P.indices #this is global TrueDoF (offset is not subtracted) external_entry = [] gtdof_check = [] for k0 in range(len(pt1all)): k2 = map_1_2[k0] pt1 = pt1all[k0] pto1 = pto1all[k0] newk1 = k1all[k0] #(i local DoF, global DoF) sh1 = sh1all[k0] pt2 = pt2all[k2] pto2 = pto2all[k2] newk2 = k2all[k2] sh2 = sh2all[k2] for k, p in enumerate(pt1): num_pts = num_pts + 1 #if newk1[k,2] in tdof: continue iii = bisect.bisect_left(tdof, newk1[k, 2]) if iii != len(tdof) and tdof[iii] == newk1[k, 2]: continue #if newk1[k,2] in subvdofs1: continue iii = bisect.bisect_left(subvdofs1, newk1[k, 2]) if iii != len(subvdofs1) and subvdofs1[iii] == newk1[k, 2]: continue dist = np.sum((pt2 - p)**2, 1) d = np.where(dist == np.min(dist))[0] if myid == 1: dprint2('min_dist', np.min(dist)) if len(d) == 1: d = d[0] s1 = sh1[newk1[k, 0]] s2 = sh2[newk2[d, 0]] #dprint1("case1 ", s1, s2) this looks all 1 if s1 / s2 < 0: dprint2("not positive") #if myid == 1: print(newk1[d][2]-rstart, newk2[k][2]) value = np.around(s1 / s2, decimals) if newk1[k, 2] != -1: map[newk1[k][2] - rstart, newk2[d][2]] = value num_entry = num_entry + 1 bisect.insort_left(subvdofs1, newk1[k][2]) #subvdofs1.append(newk1[k][2]) else: # for scalar, this is perhaps not needed # rr = newk1[k][1]] if newk1[k][1]] >= 0 else -1-newk1[k][1]] # gtdof = VDoFtoGTDoF[rr] assert newk1[k][1] >= 0, "Negative index found" gtdof = VDoFtoGTDoF[newk1[k][1]] if not gtdof in gtdof_check: external_entry.append((gtdof, newk2[d][2], value)) gtdof_check.append(gtdof) else: raise AssertionError( "more than two dofs at same plase is not asupported. ") #subvdofs1.extend([s for k, v, s in newk1]) subvdofs2.extend([s for k, v, s in newk2]) dprint1("map_dof_scalar2", debug.format_memory_usage()) if use_parallel: dprint1("total entry (before)", sum(allgather(num_entry))) external_entry = redistribute_external_entry(external_entry, rstart + map.shape[0]) if len(external_entry.shape) == 2: idx1 = np.in1d(external_entry[:, 0], subvdofs1, invert=True) val, idx2 = np.unique(external_entry[idx1, 0], return_index=True) external_entry = external_entry[idx1][idx2] for r, c, d in external_entry: #if not r in subvdofs1: num_entry = num_entry + 1 map[r - rstart, c] = d #print("adding",myid, r, c, d ) #subvdofs1.append(r) dprint1("map_dof_scalar3", debug.format_memory_usage()) ''' external_entry = sum(comm.allgather(external_entry),[]) for r, c, d in external_entry: h = map.shape[0] if (r - rstart >= 0 and r - rstart < h and not r in subvdofs1): num_entry = num_entry + 1 print("adding",myid, r, c, d ) map[r-rstart, c] = d subvdofs1.append(r) ''' total_entry = sum(allgather(num_entry)) total_pts = sum(allgather(num_pts)) if sum(allgather(map.nnz)) != total_entry: assert False, "total_entry does not match with nnz" else: total_entry = num_entry total_pts = num_pts #dprint1("map size", map.shape) dprint1("local pts/entry", num_pts, " ", num_entry) dprint1("total pts/entry", total_pts, " ", total_entry) return map
def run(self, engine, is_first=True): if self.clear_wdir: engine.remove_solfiles() fid = engine.open_file( 'checkpoint.' + self.parent.name() + '_' + self.name() + '.txt', 'w') st, et, nt = self.st_et_nt if self.ts_method == 'Backward Eular': instance = FirstOrderBackwardEuler(self, engine) dprint1("time step configuration: " + str(self.time_step)) time_step = self.eval_text_in_global(self.time_step) instance.set_timestep(TimeStep(time_step)) elif self.ts_method == "Adaptive BE": instance = FirstOrderBackwardEulerAT(self, engine) instance.set_timestep(self.abe_minstep) instance.set_maxtimestep(self.abe_maxstep) else: assert False, "unknown stepping method: " + self.ts_method instance.set_start(st) instance.set_end(et) instance.set_checkpoint(np.linspace(st, et, nt)) engine.sol = engine.assembled_blocks[1][0] instance.sol = engine.sol instance.time = st if self.init_only: instance.write_checkpoint_solution() else: if is_first: instance.pre_assemble() instance.assemble() is_first = False #instance.solve() #if is_first: #self.prepare_form_sol_variables(engine) #finished = instance.init(self.init_only) instance.set_blk_mask() instance.configure_probes(self.probe) for solver in self.derived_value_solver(): child = solver.allocate_instance(engine) instance.add_child_instance(child) finished = False if fid is not None: fid.write(str(0) + ':' + str(instance.time) + "\n") while not finished: finished, cp_written = instance.step(is_first) if self.use_dwc_ts: engine.call_dwc(self.get_phys_range(), method="timestep", args=self.dwc_ts_arg, callername=self.name(), time=instance.time) if self.use_dwc_cp and cp_written: engine.call_dwc(self.get_phys_range(), method="checkpoint", callername=self.name(), args=self.dwc_cp_arg, time=instance.time, icheckpoint=instance.icheckpoint - 1) if cp_written and fid is not None: fid.write( str(instance.icheckpoint - 1) + ':' + str(instance.time) + "\n") fid.flush() instance.save_solution(ksol=0, skip_mesh=False, mesh_only=False, save_parmesh=self.save_parmesh) instance.save_probe() if fid is not None: fid.close() return is_first print(debug.format_memory_usage())