def errornorm(u, uh, norm_type="L2", degree_rise=3, mesh=None): """Compute the error :math:`e = u - u_h` in the specified norm. :arg u: a :class:`.Function` containing an "exact" solution :arg uh: a :class:`.Function` containing the approximate solution :arg norm_type: the type of norm to compute, see :func:`.norm` for details of supported norm types. :arg degree_rise: increase in polynomial degree to use as the approximation space for computing the error. :arg mesh: an optional mesh on which to compute the error norm (currently ignored). This function works by :func:`.project`\ing ``u`` and ``uh`` into a space of degree ``degree_rise`` higher than the degree of ``uh`` and computing the error there. """ urank = len(u.ufl_shape) uhrank = len(uh.ufl_shape) rank = urank if urank != uhrank: raise RuntimeError("Mismatching rank between u and uh") degree = uh.function_space().ufl_element().degree() if isinstance(degree, tuple): degree = max(degree) + degree_rise else: degree += degree_rise # The exact solution might be an expression, in which case this test is irrelevant. if isinstance(u, function.Function): degree_u = u.function_space().ufl_element().degree() if degree > degree_u: warning("Degree of exact solution less than approximation degree") mesh = uh.function_space().mesh() if rank == 0: V = functionspace.FunctionSpace(mesh, 'DG', degree) elif rank == 1: V = functionspace.VectorFunctionSpace(mesh, 'DG', degree, dim=u.ufl_shape[0]) else: raise RuntimeError( "Don't know how to compute error norm for tensor valued functions") u_ = projection.project(u, V) uh_ = projection.project(uh, V) uh_ -= u_ return norm(uh_, norm_type=norm_type, mesh=mesh)
def errornorm(u, uh, norm_type="L2", degree_rise=3, mesh=None): """Compute the error :math:`e = u - u_h` in the specified norm. :arg u: a :class:`.Function` containing an "exact" solution :arg uh: a :class:`.Function` containing the approximate solution :arg norm_type: the type of norm to compute, see :func:`.norm` for details of supported norm types. :arg degree_rise: increase in polynomial degree to use as the approximation space for computing the error. :arg mesh: an optional mesh on which to compute the error norm (currently ignored). This function works by :func:`.project`\ing ``u`` and ``uh`` into a space of degree ``degree_rise`` higher than the degree of ``uh`` and computing the error there. """ urank = len(u.ufl_shape) uhrank = len(uh.ufl_shape) rank = urank if urank != uhrank: raise RuntimeError("Mismatching rank between u and uh") degree = uh.function_space().ufl_element().degree() if isinstance(degree, tuple): degree = max(degree) + degree_rise else: degree += degree_rise # The exact solution might be an expression, in which case this test is irrelevant. if isinstance(u, function.Function): degree_u = u.function_space().ufl_element().degree() if degree > degree_u: warning("Degree of exact solution less than approximation degree") mesh = uh.function_space().mesh() if rank == 0: V = functionspace.FunctionSpace(mesh, 'DG', degree) elif rank == 1: V = functionspace.VectorFunctionSpace(mesh, 'DG', degree, dim=u.ufl_shape[0]) else: raise RuntimeError("Don't know how to compute error norm for tensor valued functions") u_ = projection.project(u, V) uh_ = projection.project(uh, V) uh_ -= u_ return norm(uh_, norm_type=norm_type, mesh=mesh)
def _extract_kwargs(**kwargs): parameters = kwargs.get('solver_parameters', None) if 'parameters' in kwargs: warning(RED % "The 'parameters' keyword is deprecated, use 'solver_parameters' instead.") parameters = kwargs['parameters'] if 'solver_parameters' in kwargs: warning(RED % "'parameters' and 'solver_parameters' passed, using the latter") parameters = kwargs['solver_parameters'] # Make sure we don't stomp on a dict the user has passed in. parameters = parameters.copy() if parameters is not None else {} nullspace = kwargs.get('nullspace', None) options_prefix = kwargs.get('options_prefix', None) return parameters, nullspace, options_prefix
def create_slope_set(op2set, extra_halo, insp_sets=None): """ Convert an OP2 set to a set suitable for the SLOPE Python interface. Also check that the halo region us sufficiently depth for tiling. """ SlopeSet = namedtuple('SlopeSet', 'name core boundary nonexec superset') partitioning = op2set._partitioning if hasattr(op2set, '_partitioning') else None if not isinstance(op2set, Subset): name = op2set.name superset = None else: name = "%s_ss" % op2set superset = s.superset.name if slope.get_exec_mode() not in ['OMP_MPI', 'ONLY_MPI']: core_size = op2set.core_size boundary_size = op2set.exec_size - op2set.core_size nonexec_size = op2set.total_size - op2set.exec_size elif hasattr(op2set, '_deep_size'): # Assume [1, ..., N] levels of halo regions # Each level is represented by (core, owned, exec, nonexec) level_N = op2set._deep_size[-1] core_size = level_N[0] boundary_size = level_N[2] - core_size nonexec_size = level_N[3] - level_N[2] if extra_halo and nonexec_size == 0: level_E = op2set._deep_size[-2] boundary_size = level_E[2] - core_size nonexec_size = level_E[3] - level_E[2] else: warning("Couldn't find deep halos in %s, outcome is undefined." % op2set.name) core_size = op2set.core_size boundary_size = op2set.exec_size - op2set.core_size nonexec_size = op2set.total_size - op2set.exec_size slope_set = SlopeSet(name, core_size, boundary_size, nonexec_size, superset) insp_sets[slope_set] = partitioning return slope_set
def _extract_kwargs(**kwargs): parameters = kwargs.get('solver_parameters', None) if 'parameters' in kwargs: warning( RED % "The 'parameters' keyword is deprecated, use 'solver_parameters' instead." ) parameters = kwargs['parameters'] if 'solver_parameters' in kwargs: warning( RED % "'parameters' and 'solver_parameters' passed, using the latter" ) parameters = kwargs['solver_parameters'] # Make sure we don't stomp on a dict the user has passed in. parameters = parameters.copy() if parameters is not None else {} nullspace = kwargs.get('nullspace', None) options_prefix = kwargs.get('options_prefix', None) return parameters, nullspace, options_prefix
def increase_stack(asm_opt): """"Increase the stack size it the total space occupied by the kernel's local arrays is too big.""" # Assume the size of a C type double is 8 bytes double_size = 8 # Assume the stack size is 1.7 MB (2 MB is usually the limit) stack_size = 1.7*1024*1024 size = 0 for asm in asm_opt: decls = asm.decls.values() if decls: size += sum([reduce(operator.mul, d.sym.rank) for d in zip(*decls)[0] if d.sym.rank]) if size*double_size > stack_size: # Increase the stack size if the kernel's stack size seems to outreach # the space available try: resource.setrlimit(resource.RLIMIT_STACK, (resource.RLIM_INFINITY, resource.RLIM_INFINITY)) except resource.error: warning("Stack may blow up, and could not increase its size.") warning("In case of failure, lower COFFEE's licm level to 1.")
def __lshift__(self, arg): from pyop2.logger import warning, RED warning(RED % "The << syntax is deprecated, use File.write") self.write(arg)
import ufl import ctypes from ctypes import POINTER, c_int, c_double, c_void_p import coffee.base as ast from pyop2 import op2 from pyop2.logger import warning from firedrake import functionspaceimpl from firedrake import utils from firedrake import vector try: import cachetools except ImportError: warning( "cachetools not available, expression assembly will be slowed down") cachetools = None __all__ = ['Function', 'PointNotInDomainError'] valuetype = np.float64 class _CFunction(ctypes.Structure): """C struct collecting data from a :class:`Function`""" _fields_ = [("n_cols", c_int), ("n_layers", c_int), ("coords", POINTER(c_double)), ("coords_map", POINTER(c_int)), ("f", POINTER(c_double)), ("f_map", POINTER(c_int)), ("sidx", c_void_p)]
import ufl import coffee.base as ast from pyop2 import op2 from pyop2.logger import warning from firedrake import expression as expression_t from firedrake import functionspace from firedrake import utils from firedrake import vector try: import cachetools except ImportError: warning("cachetools not available, expression assembly will be slowed down") cachetools = None __all__ = ["Function", "interpolate"] valuetype = np.float64 def interpolate(expr, V): """Interpolate an expression onto a new function in V. :arg expr: an :class:`.Expression`. :arg V: the :class:`.FunctionSpace` to interpolate into.
def __lshift__(self, data): """It allows file << function syntax for writing data out to disk. In the case of parallel, it would also accept (function, timestep) tuple as an argument. If only function is given, then the timestep will be automatically generated.""" # If parallel, it needs to keep track of its timestep. if MPI.parallel: # if statements to keep the consistency of how to update the # timestep. if isinstance(data, tuple): if self._time_step == -1 or not self._generate_time: function = data[0] self._time_step = data[1] else: raise TypeError("Expected function, got tuple.") else: if self._time_step != -1 and not self._generate_time: raise TypeError("Expected tuple, got function.") function = data self._time_step += 1 self._generate_time = True else: function = data def is_family1(e, family): import ufl.finiteelement.hdivcurl as hc if isinstance(e, (hc.HDivElement, hc.HCurlElement)): return False if e.family() == 'OuterProductElement': if e.degree() == (1, 1): if e._A.family() == family \ and e._B.family() == family: return True elif e.family() == family and e.degree() == 1: return True return False def is_cgN(e): import ufl.finiteelement.hdivcurl as hc if isinstance(e, (hc.HDivElement, hc.HCurlElement)): return False if e.family() == 'OuterProductElement': if e._A.family() in ('Lagrange', 'Q') \ and e._B.family() == 'Lagrange': return True elif e.family() in ('Lagrange', 'Q'): return True return False mesh = function.function_space().mesh() e = function.function_space().ufl_element() if len(e.value_shape()) > 1: raise RuntimeError("Can't output tensor valued functions") ce = mesh.coordinates.function_space().ufl_element() coords_p1 = is_family1(ce, 'Lagrange') or is_family1(ce, 'Q') coords_p1dg = is_family1(ce, 'Discontinuous Lagrange') or is_family1(ce, 'DQ') coords_cgN = is_cgN(ce) function_p1 = is_family1(e, 'Lagrange') or is_family1(e, 'Q') function_p1dg = is_family1(e, 'Discontinuous Lagrange') or is_family1(e, 'DQ') function_cgN = is_cgN(e) project_coords = False project_function = False discontinuous = False # We either output in P1 or P1dg. if coords_cgN and function_cgN: family = 'CG' project_coords = not coords_p1 project_function = not function_p1 else: family = 'DG' project_coords = not coords_p1dg project_function = not function_p1dg discontinuous = True if project_function: if len(e.value_shape()) == 0: Vo = fs.FunctionSpace(mesh, family, 1) elif len(e.value_shape()) == 1: Vo = fs.VectorFunctionSpace(mesh, family, 1, dim=e.value_shape()[0]) else: # Never reached Vo = None if not self._warnings[0]: warning(RED % "*** Projecting output function to %s1", family) self._warnings[0] = True output = projection.project(function, Vo, name=function.name()) else: output = function Vo = output.function_space() if project_coords: Vc = fs.VectorFunctionSpace(mesh, family, 1, dim=mesh._coordinate_fs.dim) if not self._warnings[1]: warning(RED % "*** Projecting coordinates to %s1", family) self._warnings[1] = True coordinates = projection.project(mesh.coordinates, Vc, name=mesh.coordinates.name()) else: coordinates = mesh.coordinates Vc = coordinates.function_space() num_points = Vo.node_count layers = mesh.layers - 1 if isinstance(e.cell(), OuterProductCell) else 1 num_cells = mesh.num_cells() * layers if not isinstance(e.cell(), OuterProductCell) and e.cell().cellname() != "quadrilateral": connectivity = Vc.cell_node_map().values_with_halo.flatten() else: # Connectivity of bottom cell in extruded mesh base = Vc.cell_node_map().values_with_halo if _cells[mesh.ufl_cell()] == hl.VtkQuad: # Quad is # # 1--3 # | | # 0--2 # # needs to be # # 3--2 # | | # 0--1 base = base[:, [0, 2, 3, 1]] points_per_cell = 4 elif _cells[mesh.ufl_cell()] == hl.VtkWedge: # Wedge is # # 5 # /|\ # / | \ # 1----3 # | 4 | # | /\ | # |/ \| # 0----2 # # needs to be # # 5 # /|\ # / | \ # 3----4 # | 2 | # | /\ | # |/ \| # 0----1 # base = base[:, [0, 2, 4, 1, 3, 5]] points_per_cell = 6 elif _cells[mesh.ufl_cell()] == hl.VtkHexahedron: # Hexahedron is # # 5----7 # /| /| # 4----6 | # | 1--|-3 # |/ |/ # 0----2 # # needs to be # # 7----6 # /| /| # 4----5 | # | 3--|-2 # |/ |/ # 0----1 # base = base[:, [0, 2, 3, 1, 4, 6, 7, 5]] points_per_cell = 8 # Repeat up the column connectivity_temp = np.repeat(base, layers, axis=0) if discontinuous: scale = points_per_cell else: scale = 1 offsets = np.arange(layers) * scale # Add offsets going up the column connectivity_temp += np.tile(offsets.reshape(-1, 1), (mesh.num_cells(), 1)) connectivity = connectivity_temp.flatten() if isinstance(output.function_space(), fs.VectorFunctionSpace): tmp = output.dat.data_ro_with_halos vdata = [None]*3 if output.dat.dim[0] == 1: vdata[0] = tmp.flatten() else: for i in range(output.dat.dim[0]): vdata[i] = tmp[:, i].flatten() for i in range(output.dat.dim[0], 3): vdata[i] = np.zeros_like(vdata[0]) data = tuple(vdata) # only for checking large file size flat_data = {function.name(): tmp.flatten()} else: data = output.dat.data_ro_with_halos.flatten() flat_data = {function.name(): data} coordinates = self._fd_to_evtk_coord(coordinates.dat.data_ro_with_halos) cell_types = np.empty(num_cells, dtype="uint8") # Assume that all cells are of same shape. cell_types[:] = _cells[mesh.ufl_cell()].tid p_c = _points_per_cell[mesh.ufl_cell()] # This tells which are the last nodes of each cell. offsets = np.arange(start=p_c, stop=p_c * (num_cells + 1), step=p_c, dtype='int32') large_file_flag = _requiresLargeVTKFileSize("VtkUnstructuredGrid", numPoints=num_points, numCells=num_cells, pointData=flat_data, cellData=None) new_name = self._filename # When vtu file makes part of a parallel process, aggregated by a # pvtu file, the output is : filename_timestep_rank.vtu if MPI.parallel: new_name += "_" + str(self._time_step) + "_" + str(MPI.comm.rank) self._writer = hl.VtkFile( new_name, hl.VtkUnstructuredGrid, large_file_flag) self._writer.openGrid() self._writer.openPiece(ncells=num_cells, npoints=num_points) # openElement allows the stuff in side of the tag <arg></arg> # to be editted. self._writer.openElement("Points") # addData adds the DataArray in the tag <arg1> self._writer.addData("Points", coordinates) self._writer.closeElement("Points") self._writer.openElement("Cells") self._writer.addData("connectivity", connectivity) self._writer.addData("offsets", offsets) self._writer.addData("types", cell_types) self._writer.closeElement("Cells") self._writer.openData("Point", scalars=function.name()) self._writer.addData(function.name(), data) self._writer.closeData("Point") self._writer.closePiece() self._writer.closeGrid() # Create the AppendedData self._writer.appendData(coordinates) self._writer.appendData(connectivity) self._writer.appendData(offsets) self._writer.appendData(cell_types) self._writer.appendData(data) self._writer.save()
def fuse(name, loop_chain, **kwargs): """Apply fusion (and possibly tiling) to an iterator of :class:`ParLoop` obecjts, which we refer to as ``loop_chain``. Return an iterator of :class:`ParLoop` objects, in which some loops may have been fused or tiled. If fusion could not be applied, return the unmodified ``loop_chain``. .. note:: At the moment, the following features are not supported, in which case the unmodified ``loop_chain`` is returned. * mixed ``Datasets`` and ``Maps``; * extruded ``Sets`` .. note:: Tiling cannot be applied if any of the following conditions verifies: * a global reduction/write occurs in ``loop_chain`` """ # If there is nothing to fuse, just return if len(loop_chain) in [0, 1]: return loop_chain # Are there _LazyMatOp objects (i.e., synch points) preventing fusion? remainder = [] synch_points = [l for l in loop_chain if isinstance(l, _LazyMatOp)] if synch_points: # Fuse only the sub-sequence before the first synch point synch_point = loop_chain.index(synch_points[0]) remainder, loop_chain = loop_chain[synch_point:], loop_chain[:synch_point] # Return if there is nothing to fuse (e.g. only _LazyMatOp objects were present) if len(loop_chain) in [0, 1]: return loop_chain + remainder # Get an inspector for fusing this /loop_chain/. If there's a cache hit, # return the fused par loops straight away. Otherwise, try to run an inspection. options = { 'log': kwargs.get('log', False), 'mode': kwargs.get('mode', 'hard'), 'ignore_war': kwargs.get('ignore_war', False), 'use_glb_maps': kwargs.get('use_glb_maps', False), 'use_prefetch': kwargs.get('use_prefetch', 0), 'tile_size': kwargs.get('tile_size', 1), 'seed_loop': kwargs.get('seed_loop', 0), 'extra_halo': kwargs.get('extra_halo', False), 'coloring': kwargs.get('coloring', 'default') } inspector = Inspector(name, loop_chain, **options) if inspector._initialized: return inspector.schedule(loop_chain) + remainder # Otherwise, is the inspection legal ? mode = kwargs.get('mode', 'hard') force_glb = kwargs.get('force_glb', False) # Skip if loops in /loop_chain/ are already /fusion/ objects: this could happen # when loops had already been fused in a /loop_chain/ context if any(isinstance(l, extended.ParLoop) for l in loop_chain): return loop_chain + remainder # Global reductions are dangerous for correctness, so avoid fusion unless the # user is forcing it if not force_glb and any(l._reduced_globals for l in loop_chain): return loop_chain + remainder # Loop fusion requires modifying kernels, so ASTs must be available if not mode == 'only_tile': if any(not l.kernel._ast or l.kernel._attached_info['flatblocks'] for l in loop_chain): return loop_chain + remainder # Mixed still not supported if any(a._is_mixed for a in flatten([l.args for l in loop_chain])): return loop_chain + remainder # Extrusion still not supported if any(l.is_layered for l in loop_chain): return loop_chain + remainder # If tiling is requested, SLOPE must be visible if mode in ['tile', 'only_tile'] and not slope: warning("Couldn't locate SLOPE. Falling back to plain op2.ParLoops.") return loop_chain + remainder schedule = inspector.inspect() return schedule(loop_chain) + remainder
def fuse(name, loop_chain, **kwargs): """Apply fusion (and possibly tiling) to an iterator of :class:`ParLoop` obecjts, which we refer to as ``loop_chain``. Return an iterator of :class:`ParLoop` objects, in which some loops may have been fused or tiled. If fusion could not be applied, return the unmodified ``loop_chain``. .. note:: At the moment, the following features are not supported, in which case the unmodified ``loop_chain`` is returned. * mixed ``Datasets`` and ``Maps``; * extruded ``Sets`` .. note:: Tiling cannot be applied if any of the following conditions verifies: * a global reduction/write occurs in ``loop_chain`` """ # If there is nothing to fuse, just return if len(loop_chain) in [0, 1]: return loop_chain # Are there _LazyMatOp objects (i.e., synch points) preventing fusion? remainder = [] synch_points = [l for l in loop_chain if isinstance(l, _LazyMatOp)] if synch_points: # Fuse only the sub-sequence before the first synch point synch_point = loop_chain.index(synch_points[0]) remainder, loop_chain = loop_chain[ synch_point:], loop_chain[:synch_point] # Return if there is nothing to fuse (e.g. only _LazyMatOp objects were present) if len(loop_chain) in [0, 1]: return loop_chain + remainder # Get an inspector for fusing this /loop_chain/. If there's a cache hit, # return the fused par loops straight away. Otherwise, try to run an inspection. options = { 'log': kwargs.get('log', False), 'mode': kwargs.get('mode', 'hard'), 'ignore_war': kwargs.get('ignore_war', False), 'use_glb_maps': kwargs.get('use_glb_maps', False), 'use_prefetch': kwargs.get('use_prefetch', 0), 'tile_size': kwargs.get('tile_size', 1), 'seed_loop': kwargs.get('seed_loop', 0), 'extra_halo': kwargs.get('extra_halo', False), 'coloring': kwargs.get('coloring', 'default') } inspector = Inspector(name, loop_chain, **options) if inspector._initialized: return inspector.schedule(loop_chain) + remainder # Otherwise, is the inspection legal ? mode = kwargs.get('mode', 'hard') force_glb = kwargs.get('force_glb', False) # Skip if loops in /loop_chain/ are already /fusion/ objects: this could happen # when loops had already been fused in a /loop_chain/ context if any(isinstance(l, extended.ParLoop) for l in loop_chain): return loop_chain + remainder # Global reductions are dangerous for correctness, so avoid fusion unless the # user is forcing it if not force_glb and any(l._reduced_globals for l in loop_chain): return loop_chain + remainder # Loop fusion requires modifying kernels, so ASTs must be available if not mode == 'only_tile': if any(not l.kernel._ast or l.kernel._attached_info['flatblocks'] for l in loop_chain): return loop_chain + remainder # Mixed still not supported if any(a._is_mixed for a in flatten([l.args for l in loop_chain])): return loop_chain + remainder # Extrusion still not supported if any(l.is_layered for l in loop_chain): return loop_chain + remainder # If tiling is requested, SLOPE must be visible if mode in ['tile', 'only_tile'] and not slope: warning("Couldn't locate SLOPE. Falling back to plain op2.ParLoops.") return loop_chain + remainder schedule = inspector.inspect() return schedule(loop_chain) + remainder
def __init__(self, *args, **kwargs): """ :arg problem: A :class:`NonlinearVariationalProblem` to solve. :kwarg nullspace: an optional :class:`.VectorSpaceBasis` (or :class:`.MixedVectorSpaceBasis`) spanning the null space of the operator. :kwarg solver_parameters: Solver parameters to pass to PETSc. This should be a dict mapping PETSc options to values. For example, to set the nonlinear solver type to just use a linear solver: .. code-block:: python {'snes_type': 'ksponly'} PETSc flag options should be specified with `bool` values. For example: .. code-block:: python {'snes_monitor': True} .. warning :: Since this object contains a circular reference and a custom ``__del__`` attribute, you *must* call :meth:`.destroy` on it when you are done, otherwise it will never be garbage collected. """ assert isinstance(args[0], NonlinearVariationalProblem) self._problem = args[0] # Build the jacobian with the correct sparsity pattern. Note # that since matrix assembly is lazy this doesn't actually # force an additional assembly of the matrix since in # form_jacobian we call assemble again which drops this # computation on the floor. self._jac_tensor = assemble.assemble(self._problem.J_ufl, bcs=self._problem.bcs, form_compiler_parameters=self._problem.form_compiler_parameters) if self._problem.Jp is not None: self._jac_ptensor = assemble.assemble(self._problem.Jp, bcs=self._problem.bcs, form_compiler_parameters=self._problem.form_compiler_parameters) else: self._jac_ptensor = self._jac_tensor test = self._problem.F_ufl.arguments()[0] self._F_tensor = function.Function(test.function_space()) # Function to hold current guess self._x = function.Function(self._problem.u_ufl) self._problem.F_ufl = ufl.replace(self._problem.F_ufl, {self._problem.u_ufl: self._x}) self._problem.J_ufl = ufl.replace(self._problem.J_ufl, {self._problem.u_ufl: self._x}) if self._problem.Jp is not None: self._problem.Jp = ufl.replace(self._problem.Jp, {self._problem.u_ufl: self._x}) self._jacobian_assembled = False self.snes = PETSc.SNES().create() self._opt_prefix = 'firedrake_snes_%d_' % NonlinearVariationalSolver._id NonlinearVariationalSolver._id += 1 self.snes.setOptionsPrefix(self._opt_prefix) parameters = kwargs.get('solver_parameters', None) if 'parameters' in kwargs: warning(RED % "The 'parameters' keyword to %s is deprecated, use 'solver_parameters' instead.", self.__class__.__name__) parameters = kwargs['parameters'] if 'solver_parameters' in kwargs: warning(RED % "'parameters' and 'solver_parameters' passed to %s, using the latter", self.__class__.__name__) parameters = kwargs['solver_parameters'] # Make sure we don't stomp on a dict the user has passed in. parameters = parameters.copy() if parameters is not None else {} # Mixed problem, use jacobi pc if user has not supplied one. if self._jac_tensor._M.sparsity.shape != (1, 1): parameters.setdefault('pc_type', 'jacobi') self.parameters = parameters ksp = self.snes.getKSP() pc = ksp.getPC() pmat = self._jac_ptensor._M names = [fs.name if fs.name else str(i) for i, fs in enumerate(test.function_space())] ises = solving_utils.set_fieldsplits(pmat, pc, names=names) with self._F_tensor.dat.vec as v: self.snes.setFunction(self.form_function, v) self.snes.setJacobian(self.form_jacobian, J=self._jac_tensor._M.handle, P=self._jac_ptensor._M.handle) nullspace = kwargs.get('nullspace', None) if nullspace is not None: self.set_nullspace(nullspace, ises=ises)
def __lshift__(self, data): """It allows file << function syntax for writing data out to disk. In the case of parallel, it would also accept (function, timestep) tuple as an argument. If only function is given, then the timestep will be automatically generated.""" # If parallel, it needs to keep track of its timestep. if MPI.parallel: # if statements to keep the consistency of how to update the # timestep. if isinstance(data, tuple): if self._time_step == -1 or not self._generate_time: function = data[0] self._time_step = data[1] else: raise TypeError("Expected function, got tuple.") else: if self._time_step != -1 and not self._generate_time: raise TypeError("Expected tuple, got function.") function = data self._time_step += 1 self._generate_time = True else: function = data def is_family1(e, family): import ufl.finiteelement.hdivcurl as hc if isinstance(e, (hc.HDiv, hc.HCurl)): return False if e.family() == 'OuterProductElement': if e.degree() == (1, 1): if e._A.family() == family \ and e._B.family() == family: return True elif e.family() == family and e.degree() == 1: return True return False def is_cgN(e): import ufl.finiteelement.hdivcurl as hc if isinstance(e, (hc.HDiv, hc.HCurl)): return False if e.family() == 'OuterProductElement': if e._A.family() in ('Lagrange', 'Q') \ and e._B.family() == 'Lagrange': return True elif e.family() in ('Lagrange', 'Q'): return True return False mesh = function.function_space().mesh() e = function.function_space().ufl_element() if len(e.value_shape()) > 1: raise RuntimeError("Can't output tensor valued functions") ce = mesh.coordinates.function_space().ufl_element() coords_p1 = is_family1(ce, 'Lagrange') or is_family1(ce, 'Q') coords_p1dg = is_family1(ce, 'Discontinuous Lagrange') or is_family1(ce, 'DQ') coords_cgN = is_cgN(ce) function_p1 = is_family1(e, 'Lagrange') or is_family1(e, 'Q') function_p1dg = is_family1(e, 'Discontinuous Lagrange') or is_family1(e, 'DQ') function_cgN = is_cgN(e) project_coords = False project_function = False discontinuous = False # We either output in P1 or P1dg. if coords_cgN and function_cgN: family = 'CG' project_coords = not coords_p1 project_function = not function_p1 else: family = 'DG' project_coords = not coords_p1dg project_function = not function_p1dg discontinuous = True if project_function: if len(e.value_shape()) == 0: Vo = fs.FunctionSpace(mesh, family, 1) elif len(e.value_shape()) == 1: Vo = fs.VectorFunctionSpace(mesh, family, 1, dim=e.value_shape()[0]) else: # Never reached Vo = None if not self._warnings[0]: warning(RED % "*** Projecting output function to %s1", family) self._warnings[0] = True output = projection.project(function, Vo, name=function.name()) else: output = function Vo = output.function_space() if project_coords: Vc = fs.VectorFunctionSpace(mesh, family, 1, dim=mesh._coordinate_fs.dim) if not self._warnings[1]: warning(RED % "*** Projecting coordinates to %s1", family) self._warnings[1] = True coordinates = projection.project(mesh.coordinates, Vc, name=mesh.coordinates.name()) else: coordinates = mesh.coordinates Vc = coordinates.function_space() num_points = Vo.node_count layers = mesh.layers - 1 if isinstance(e.cell(), OuterProductCell) else 1 num_cells = mesh.num_cells() * layers if not isinstance(e.cell(), OuterProductCell) and e.cell().cellname() != "quadrilateral": connectivity = Vc.cell_node_map().values_with_halo.flatten() else: # Connectivity of bottom cell in extruded mesh base = Vc.cell_node_map().values_with_halo if _cells[mesh.ufl_cell()] == hl.VtkQuad: # Quad is # # 1--3 # | | # 0--2 # # needs to be # # 3--2 # | | # 0--1 base = base[:, [0, 2, 3, 1]] points_per_cell = 4 elif _cells[mesh.ufl_cell()] == hl.VtkWedge: # Wedge is # # 5 # /|\ # / | \ # 1----3 # | 4 | # | /\ | # |/ \| # 0----2 # # needs to be # # 5 # /|\ # / | \ # 3----4 # | 2 | # | /\ | # |/ \| # 0----1 # base = base[:, [0, 2, 4, 1, 3, 5]] points_per_cell = 6 elif _cells[mesh.ufl_cell()] == hl.VtkHexahedron: # Hexahedron is # # 5----7 # /| /| # 4----6 | # | 1--|-3 # |/ |/ # 0----2 # # needs to be # # 7----6 # /| /| # 4----5 | # | 3--|-2 # |/ |/ # 0----1 # base = base[:, [0, 2, 3, 1, 4, 6, 7, 5]] points_per_cell = 8 # Repeat up the column connectivity_temp = np.repeat(base, layers, axis=0) if discontinuous: scale = points_per_cell else: scale = 1 offsets = np.arange(layers) * scale # Add offsets going up the column connectivity_temp += np.tile(offsets.reshape(-1, 1), (mesh.num_cells(), 1)) connectivity = connectivity_temp.flatten() if isinstance(output.function_space(), fs.VectorFunctionSpace): tmp = output.dat.data_ro_with_halos vdata = [None]*3 if output.dat.dim[0] == 1: vdata[0] = tmp.flatten() else: for i in range(output.dat.dim[0]): vdata[i] = tmp[:, i].flatten() for i in range(output.dat.dim[0], 3): vdata[i] = np.zeros_like(vdata[0]) data = tuple(vdata) # only for checking large file size flat_data = {function.name(): tmp.flatten()} else: data = output.dat.data_ro_with_halos.flatten() flat_data = {function.name(): data} coordinates = self._fd_to_evtk_coord(coordinates.dat.data_ro_with_halos) cell_types = np.empty(num_cells, dtype="uint8") # Assume that all cells are of same shape. cell_types[:] = _cells[mesh.ufl_cell()].tid p_c = _points_per_cell[mesh.ufl_cell()] # This tells which are the last nodes of each cell. offsets = np.arange(start=p_c, stop=p_c * (num_cells + 1), step=p_c, dtype='int32') large_file_flag = _requiresLargeVTKFileSize("VtkUnstructuredGrid", numPoints=num_points, numCells=num_cells, pointData=flat_data, cellData=None) new_name = self._filename # When vtu file makes part of a parallel process, aggregated by a # pvtu file, the output is : filename_timestep_rank.vtu if MPI.parallel: new_name += "_" + str(self._time_step) + "_" + str(MPI.comm.rank) self._writer = hl.VtkFile( new_name, hl.VtkUnstructuredGrid, large_file_flag) self._writer.openGrid() self._writer.openPiece(ncells=num_cells, npoints=num_points) # openElement allows the stuff in side of the tag <arg></arg> # to be editted. self._writer.openElement("Points") # addData adds the DataArray in the tag <arg1> self._writer.addData("Points", coordinates) self._writer.closeElement("Points") self._writer.openElement("Cells") self._writer.addData("connectivity", connectivity) self._writer.addData("offsets", offsets) self._writer.addData("types", cell_types) self._writer.closeElement("Cells") self._writer.openData("Point", scalars=function.name()) self._writer.addData(function.name(), data) self._writer.closeData("Point") self._writer.closePiece() self._writer.closeGrid() # Create the AppendedData self._writer.appendData(coordinates) self._writer.appendData(connectivity) self._writer.appendData(offsets) self._writer.appendData(cell_types) self._writer.appendData(data) self._writer.save()
def evict(self): """Run the cache eviction algorithm. This works out the permitted cache size and deletes objects until it is achieved. Cache values are assumed to have a :attr:`value` attribute and eviction occurs in increasing :attr:`value` order. Currently :attr:`value` is an index of the assembly operation, so older operations are evicted first. The cache will be evicted down to 90% of permitted size. The permitted size is either the explicit :data:`parameters["assembly_cache"]["max_bytes"]` or it is the amount of memory per core scaled by :data:`parameters["assembly_cache"]["max_factor"]` (by default the scale factor is 0.6). In MPI parallel, the nbytes of each cache entry is set to the maximum over all processes, while the available memory is set to the minimum. This produces a conservative caching policy which is guaranteed to result in the same evictions on each processor. """ if not parameters["assembly_cache"]["eviction"]: return max_cache_size = min(parameters["assembly_cache"]["max_bytes"] or float("inf"), (memory or float("inf")) * parameters["assembly_cache"]["max_factor"] ) if max_cache_size == float("inf"): if not self.evictwarned: warning("No maximum assembly cache size. Install psutil >= 2.0.0 or risk leaking memory!") self.evictwarned = True return cache_size = self.nbytes if cache_size < max_cache_size: return debug("Cache eviction triggered. %s bytes in cache, %s bytes allowed" % (cache_size, max_cache_size)) # Evict down to 90% full. bytes_to_evict = cache_size - 0.9 * max_cache_size sorted_cache = sorted(self.cache.items(), key=lambda x: x[1][1].value) nbytes = lambda x: x[1][1].nbytes candidates = [] while bytes_to_evict > 0: next = sorted_cache.pop(0) candidates.append(next) bytes_to_evict -= nbytes(next) for c in reversed(candidates): if bytes_to_evict + nbytes(c) < 0: # We may have been overzealous. bytes_to_evict += nbytes(c) else: del self.cache[c[0]]
def evict(self): """Run the cache eviction algorithm. This works out the permitted cache size and deletes objects until it is achieved. Cache values are assumed to have a :attr:`value` attribute and eviction occurs in increasing :attr:`value` order. Currently :attr:`value` is an index of the assembly operation, so older operations are evicted first. The cache will be evicted down to 90% of permitted size. The permitted size is either the explicit :data:`parameters["assembly_cache"]["max_bytes"]` or it is the amount of memory per core scaled by :data:`parameters["assembly_cache"]["max_factor"]` (by default the scale factor is 0.6). In MPI parallel, the nbytes of each cache entry is set to the maximum over all processes, while the available memory is set to the minimum. This produces a conservative caching policy which is guaranteed to result in the same evictions on each processor. """ if not parameters["assembly_cache"]["eviction"]: return max_cache_size = min( parameters["assembly_cache"]["max_bytes"] or float("inf"), (memory or float("inf")) * parameters["assembly_cache"]["max_factor"]) if max_cache_size == float("inf"): if not self.evictwarned: warning( "No maximum assembly cache size. Install psutil >= 2.0.0 or risk leaking memory!" ) self.evictwarned = True return cache_size = self.nbytes if cache_size < max_cache_size: return debug("Cache eviction triggered. %s bytes in cache, %s bytes allowed" % (cache_size, max_cache_size)) # Evict down to 90% full. bytes_to_evict = cache_size - 0.9 * max_cache_size sorted_cache = sorted(self.cache.items(), key=lambda x: x[1][1].value) nbytes = lambda x: x[1][1].nbytes candidates = [] while bytes_to_evict > 0: next = sorted_cache.pop(0) candidates.append(next) bytes_to_evict -= nbytes(next) for c in reversed(candidates): if bytes_to_evict + nbytes(c) < 0: # We may have been overzealous. bytes_to_evict += nbytes(c) else: del self.cache[c[0]]