def compute_form_action(form, coefficient): """Compute the action of a form on a Coefficient. This works simply by replacing the last Argument with a Coefficient on the same function space (element). The form returned will thus have one Argument less and one additional Coefficient at the end if no Coefficient has been provided. """ # TODO: Check whatever makes sense for coefficient # Extract all arguments arguments = form.arguments() parts = [arg.part() for arg in arguments] if set(parts) - {None}: error("compute_form_action cannot handle parts.") # Pick last argument (will be replaced) u = arguments[-1] fs = u.ufl_function_space() if coefficient is None: coefficient = Coefficient(fs) elif coefficient.ufl_function_space() != fs: debug("Computing action of form on a coefficient in a different function space.") return replace(form, {u: coefficient})
def compute_form_action(form, coefficient): """Compute the action of a form on a Coefficient. This works simply by replacing the last Argument with a Coefficient on the same function space (element). The form returned will thus have one Argument less and one additional Coefficient at the end if no Coefficient has been provided. """ # TODO: Check whatever makes sense for coefficient # Extract all arguments arguments = extract_arguments(form) # Pick last argument (will be replaced) u = arguments[-1] e = u.element() if coefficient is None: coefficient = Coefficient(e) else: #ufl_assert(coefficient.element() == e, \ if coefficient.element() != e: debug("Computing action of form on a coefficient in a different element space.") return replace(form, { u: coefficient })
def compute_form_action(form, coefficient): """Compute the action of a form on a Coefficient. This works simply by replacing the last Argument with a Coefficient on the same function space (element). The form returned will thus have one Argument less and one additional Coefficient at the end if no Coefficient has been provided. """ # TODO: Check whatever makes sense for coefficient # Extract all arguments arguments = extract_arguments(form) # Pick last argument (will be replaced) u = arguments[-1] e = u.element() if coefficient is None: coefficient = Coefficient(e) else: #ufl_assert(coefficient.element() == e, \ if coefficient.element() != e: debug( "Computing action of form on a coefficient in a different element space." ) return replace(form, {u: coefficient})
def transform_integrands(form, transform, domain_type=None): """Apply transform(expression) to each integrand expression in form, or to form if it is an Expr.""" if isinstance(form, Form): newintegrals = [] for itg in form.integrals(): integrand = itg.integrand() if domain_type is None or domain_type == itg.domain_type(): integrand = transform(integrand) if not isinstance(integrand, Zero): newitg = itg.reconstruct(integrand) newintegrals.append(newitg) if not newintegrals: debug( "No integrals left after transformation, returning empty form." ) return Form(newintegrals) elif isinstance(form, Integral): integral = form integrand = transform(integral.integrand()) new_integral = integral.reconstruct(integrand) return new_integral elif isinstance(form, Expr): expr = form return transform(expr) else: error("Expecting Form or Expr.")
def coefficient(self, o): # Define dw/dw := d/ds [w + s v] = v debug("In CoefficientAD.coefficient:") debug("o = %s" % o) debug("self._w = %s" % self._w) debug("self._v = %s" % self._v) # Find o among w for (w, v) in izip(self._w, self._v): if o == w: return (w, v) # If o is not among coefficient derivatives, return do/dw=0 oprimesum = Zero(o.shape()) oprimes = self._cd._data.get(o) if oprimes is None: if self._cd._data: # TODO: Make it possible to silence this message in particular? # It may be good to have for debugging... warning("Assuming d{%s}/d{%s} = 0." % (o, self._w)) else: # Make sure we have a tuple to match the self._v tuple if not isinstance(oprimes, tuple): oprimes = (oprimes,) ufl_assert(len(oprimes) == len(self._v), "Got a tuple of arguments, "+\ "expecting a matching tuple of coefficient derivatives.") # Compute do/dw_j = do/dw_h : v. # Since we may actually have a tuple of oprimes and vs in a # 'mixed' space, sum over them all to get the complete inner # product. Using indices to define a non-compound inner product. for (oprime, v) in izip(oprimes, self._v): so, oi = as_scalar(oprime) rv = len(v.shape()) oi1 = oi[:-rv] oi2 = oi[-rv:] prod = so*v[oi2] if oi1: oprimesum += as_tensor(prod, oi1) else: oprimesum += prod # Example: # (f : g) -> (dfdu : v) : g + ditto # shape(f) == shape(g) == shape(dfdu : v) # shape(dfdu) == shape(f) + shape(v) return (o, oprimesum)
def is_multilinear(form): "Check if form is multilinear in arguments." # An attempt at implementing is_multilinear using extract_argument_dependencies. # TODO: This has some false negatives for "multiple configurations". (Does it still? Needs testing!) # TODO: FFC probably needs a variant of this which checks for some sorts of linearity # in Coefficients as well, this should be a fairly simple extension of the current algorithm. try: for e in iter_expressions(form): deps = extract_argument_dependencies(e) nargs = [len(d) for d in deps] if len(nargs) == 0: debug("This form is a functional.") if len(nargs) == 1: debug("This form is linear in %d arguments." % nargs[0]) if len(nargs) > 1: warning("This form has more than one argument "\ "'configuration', it has terms that are linear in %s "\ "arguments respectively." % str(nargs)) except NotMultiLinearException, msg: warning("Form is not multilinear, the offending term is: %s" % msg) return False
def coefficient(self, o): # Define dw/dw := d/ds [w + s v] = v debug("In CoefficientAD.coefficient:") debug("o = %s" % o) debug("self._w = %s" % self._w) debug("self._v = %s" % self._v) # Find o among w for (w, v) in izip(self._w, self._v): if o == w: return (w, v) # If o is not among coefficient derivatives, return do/dw=0 oprimesum = Zero(o.shape()) oprimes = self._cd._data.get(o) if oprimes is None: if self._cd._data: # TODO: Make it possible to silence this message in particular? # It may be good to have for debugging... warning("Assuming d{%s}/d{%s} = 0." % (o, self._w)) else: # Make sure we have a tuple to match the self._v tuple if not isinstance(oprimes, tuple): oprimes = (oprimes, ) ufl_assert(len(oprimes) == len(self._v), "Got a tuple of arguments, "+\ "expecting a matching tuple of coefficient derivatives.") # Compute do/dw_j = do/dw_h : v. # Since we may actually have a tuple of oprimes and vs in a # 'mixed' space, sum over them all to get the complete inner # product. Using indices to define a non-compound inner product. for (oprime, v) in izip(oprimes, self._v): so, oi = as_scalar(oprime) rv = len(v.shape()) oi1 = oi[:-rv] oi2 = oi[-rv:] prod = so * v[oi2] if oi1: oprimesum += as_tensor(prod, oi1) else: oprimesum += prod # Example: # (f : g) -> (dfdu : v) : g + ditto # shape(f) == shape(g) == shape(dfdu : v) # shape(dfdu) == shape(f) + shape(v) return (o, oprimesum)
def _debug_visit(self, o): "Debugging hook, enable this by renaming to 'visit'." r = Transformer.visit(self, o) f, df = r if not f is o: debug("In ForwardAD.visit, didn't get back o:") debug(" o: %s" % str(o)) debug(" f: %s" % str(f)) debug(" df: %s" % str(df)) fi_diff = set(f.free_indices()) ^ set(df.free_indices()) if fi_diff: debug("In ForwardAD.visit, got free indices diff:") debug(" o: %s" % str(o)) debug(" f: %s" % str(f)) debug(" df: %s" % str(df)) debug(" f.fi(): %s" % lstr(f.free_indices())) debug(" df.fi(): %s" % lstr(df.free_indices())) debug(" fi_diff: %s" % str(fi_diff)) return r
def build_uflacs_ir(cell, integral_type, entitytype, integrands, tensor_shape, coefficient_numbering, quadrature_rules, parameters): # The intermediate representation dict we're building and returning here ir = {} # Extract uflacs specific optimization and code generation parameters p = parse_uflacs_optimization_parameters(parameters, integral_type) # Pass on parameters for consumption in code generation ir["params"] = p # { ufl coefficient: count } ir["coefficient_numbering"] = coefficient_numbering # Shared unique tables for all quadrature loops ir["unique_tables"] = {} ir["unique_table_types"] = {} # Shared piecewise expr_ir for all quadrature loops ir["piecewise_ir"] = empty_expr_ir() # { num_points: expr_ir for one integrand } ir["varying_irs"] = {} # Temporary data structures to build shared piecewise data pe2i = {} piecewise_modified_argument_indices = {} # Whether we expect the quadrature weight to be applied or not # (in some cases it's just set to 1 in ufl integral scaling) tdim = cell.topological_dimension() expect_weight = ( integral_type not in ("expression",) + point_integral_types and (entitytype == "cell" or (entitytype == "facet" and tdim > 1) or (integral_type in custom_integral_types) ) ) if integral_type == "expression": # TODO: Figure out how to get non-integrand expressions in here, this is just a draft: # Analyse all expressions in one list assert isinstance(integrands, (tuple, list)) all_num_points = [None] cases = [(None, integrands)] else: # Analyse each num_points/integrand separately assert isinstance(integrands, dict) all_num_points = sorted(integrands.keys()) cases = [(num_points, [integrands[num_points]]) for num_points in all_num_points] ir["all_num_points"] = all_num_points for num_points, expressions in cases: # Rebalance order of nested terminal modifiers expressions = [balance_modifiers(expr) for expr in expressions] # Build initial scalar list-based graph representation V, V_deps, V_targets = build_scalar_graph(expressions) # Build terminal_data from V here before factorization. # Then we can use it to derive table properties for all modified terminals, # and then use that to rebuild the scalar graph more efficiently before # argument factorization. We can build terminal_data again after factorization # if that's necessary. initial_terminal_indices = [i for i, v in enumerate(V) if is_modified_terminal(v)] initial_terminal_data = [analyse_modified_terminal(V[i]) for i in initial_terminal_indices] unique_tables, unique_table_types, unique_table_num_dofs, mt_unique_table_reference = \ build_optimized_tables(num_points, quadrature_rules, cell, integral_type, entitytype, initial_terminal_data, ir["unique_tables"], p["enable_table_zero_compression"], rtol=p["table_rtol"], atol=p["table_atol"]) # Replace some scalar modified terminals before reconstructing expressions # (could possibly use replace() on target expressions instead) z = as_ufl(0.0) one = as_ufl(1.0) for i, mt in zip(initial_terminal_indices, initial_terminal_data): if isinstance(mt.terminal, QuadratureWeight): # Replace quadrature weight with 1.0, will be added back later V[i] = one else: # Set modified terminals with zero tables to zero tr = mt_unique_table_reference.get(mt) if tr is not None and tr.ttype == "zeros": V[i] = z # Propagate expression changes using dependency list for i in range(len(V)): deps = [V[j] for j in V_deps[i]] if deps: V[i] = V[i]._ufl_expr_reconstruct_(*deps) # Rebuild scalar target expressions and graph # (this may be overkill and possible to optimize # away if it turns out to be costly) expressions = [V[i] for i in V_targets] # Rebuild scalar list-based graph representation SV, SV_deps, SV_targets = build_scalar_graph(expressions) assert all(i < len(SV) for i in SV_targets) # Compute factorization of arguments (argument_factorizations, modified_arguments, FV, FV_deps, FV_targets) = \ compute_argument_factorization(SV, SV_deps, SV_targets, len(tensor_shape)) assert len(SV_targets) == len(argument_factorizations) # TODO: Still expecting one target variable in code generation assert len(argument_factorizations) == 1 argument_factorization, = argument_factorizations # Store modified arguments in analysed form for i in range(len(modified_arguments)): modified_arguments[i] = analyse_modified_terminal(modified_arguments[i]) # Build set of modified_terminal indices into factorized_vertices modified_terminal_indices = [i for i, v in enumerate(FV) if is_modified_terminal(v)] # Build set of modified terminal ufl expressions modified_terminals = [analyse_modified_terminal(FV[i]) for i in modified_terminal_indices] # Make it easy to get mt object from FV index FV_mts = [None]*len(FV) for i, mt in zip(modified_terminal_indices, modified_terminals): FV_mts[i] = mt # Mark active modified arguments #active_modified_arguments = numpy.zeros(len(modified_arguments), dtype=int) #for ma_indices in argument_factorization: # for j in ma_indices: # active_modified_arguments[j] = 1 # Dependency analysis inv_FV_deps, FV_active, FV_piecewise, FV_varying = \ analyse_dependencies(FV, FV_deps, FV_targets, modified_terminal_indices, modified_terminals, mt_unique_table_reference) # Extend piecewise V with unique new FV_piecewise vertices pir = ir["piecewise_ir"] for i, v in enumerate(FV): if FV_piecewise[i]: j = pe2i.get(v) if j is None: j = len(pe2i) pe2i[v] = j pir["V"].append(v) pir["V_active"].append(1) mt = FV_mts[i] if mt is not None: pir["mt_tabledata"][mt] = mt_unique_table_reference.get(mt) pir["V_mts"].append(mt) # Extend piecewise modified_arguments list with unique new items for mt in modified_arguments: ma = piecewise_modified_argument_indices.get(mt) if ma is None: ma = len(pir["modified_arguments"]) pir["modified_arguments"].append(mt) piecewise_modified_argument_indices[mt] = ma # Loop over factorization terms block_contributions = defaultdict(list) for ma_indices, fi in sorted(argument_factorization.items()): # Get a bunch of information about this term rank = len(ma_indices) trs = tuple(mt_unique_table_reference[modified_arguments[ai]] for ai in ma_indices) unames = tuple(tr.name for tr in trs) ttypes = tuple(tr.ttype for tr in trs) assert not any(tt == "zeros" for tt in ttypes) blockmap = tuple(tr.dofmap for tr in trs) block_is_uniform = all(tr.is_uniform for tr in trs) # Collect relevant restrictions to identify blocks # correctly in interior facet integrals block_restrictions = [] for i, ma in enumerate(ma_indices): if trs[i].is_uniform: r = None else: r = modified_arguments[ma].restriction block_restrictions.append(r) block_restrictions = tuple(block_restrictions) # Store piecewise status for fi and translate # index to piecewise scope if relevant factor_is_piecewise = FV_piecewise[fi] if factor_is_piecewise: factor_index = pe2i[FV[fi]] else: factor_index = fi # TODO: Add separate block modes for quadrature # Both arguments in quadrature elements """ for iq fw = f*w #for i # for j # B[i,j] = fw*U[i]*V[j] = 0 if i != iq or j != iq BQ[iq] = B[iq,iq] = fw for (iq) A[iq+offset0, iq+offset1] = BQ[iq] """ # One argument in quadrature element """ for iq fw[iq] = f*w #for i # for j # B[i,j] = fw*UQ[i]*V[j] = 0 if i != iq for j BQ[iq,j] = fw[iq]*V[iq,j] for (iq) for (j) A[iq+offset, j+offset] = BQ[iq,j] """ # Decide how to handle code generation for this block if p["enable_preintegration"] and (factor_is_piecewise and rank > 0 and "quadrature" not in ttypes): # - Piecewise factor is an absolute prerequisite # - Could work for rank 0 as well but currently doesn't # - Haven't considered how quadrature elements work out block_mode = "preintegrated" elif p["enable_premultiplication"] and (rank > 0 and all(tt in piecewise_ttypes for tt in ttypes)): # Integrate functional in quadloop, scale block after quadloop block_mode = "premultiplied" elif p["enable_sum_factorization"]: if (rank == 2 and any(tt in piecewise_ttypes for tt in ttypes)): # Partial computation in quadloop of f*u[i], # compute (f*u[i])*v[i] outside quadloop, # (or with u,v swapped) block_mode = "partial" else: # Full runtime integration of f*u[i]*v[j], # can still do partial computation in quadloop of f*u[i] # but must compute (f*u[i])*v[i] as well inside quadloop. # (or with u,v swapped) block_mode = "full" else: # Use full runtime integration with nothing fancy going on block_mode = "safe" # Carry out decision if block_mode == "preintegrated": # Add to contributions: # P = sum_q weight*u*v; preintegrated here # B[...] = f * P[...]; generated after quadloop # A[blockmap] += B[...]; generated after quadloop cache = ir["piecewise_ir"]["preintegrated_blocks"] block_is_transposed = False pname = cache.get(unames) # Reuse transpose to save memory if p["enable_block_transpose_reuse"] and pname is None and len(unames) == 2: pname = cache.get((unames[1], unames[0])) if pname is not None: # Cache hit on transpose block_is_transposed = True if pname is None: # Cache miss, precompute block weights = quadrature_rules[num_points][1] if integral_type == "interior_facet": ptable = integrate_block_interior_facets(weights, unames, ttypes, unique_tables, unique_table_num_dofs) else: ptable = integrate_block(weights, unames, ttypes, unique_tables, unique_table_num_dofs) ptable = clamp_table_small_numbers(ptable, rtol=p["table_rtol"], atol=p["table_atol"]) pname = "PI%d" % (len(cache,)) cache[unames] = pname unique_tables[pname] = ptable unique_table_types[pname] = "preintegrated" assert factor_is_piecewise block_unames = (pname,) blockdata = preintegrated_block_data_t(block_mode, ttypes, factor_index, factor_is_piecewise, block_unames, block_restrictions, block_is_transposed, block_is_uniform, pname) block_is_piecewise = True elif block_mode == "premultiplied": # Add to contributions: # P = u*v; computed here # FI = sum_q weight * f; generated inside quadloop # B[...] = FI * P[...]; generated after quadloop # A[blockmap] += B[...]; generated after quadloop cache = ir["piecewise_ir"]["premultiplied_blocks"] block_is_transposed = False pname = cache.get(unames) # Reuse transpose to save memory if p["enable_block_transpose_reuse"] and pname is None and len(unames) == 2: pname = cache.get((unames[1], unames[0])) if pname is not None: # Cache hit on transpose block_is_transposed = True if pname is None: # Cache miss, precompute block if integral_type == "interior_facet": ptable = multiply_block_interior_facets(0, unames, ttypes, unique_tables, unique_table_num_dofs) else: ptable = multiply_block(0, unames, ttypes, unique_tables, unique_table_num_dofs) pname = "PM%d" % (len(cache,)) cache[unames] = pname unique_tables[pname] = ptable unique_table_types[pname] = "premultiplied" block_unames = (pname,) blockdata = premultiplied_block_data_t(block_mode, ttypes, factor_index, factor_is_piecewise, block_unames, block_restrictions, block_is_transposed, block_is_uniform, pname) block_is_piecewise = False elif block_mode == "scaled": # TODO: Add mode, block is piecewise but choose not to be premultiplied # Add to contributions: # FI = sum_q weight * f; generated inside quadloop # B[...] = FI * u * v; generated after quadloop # A[blockmap] += B[...]; generated after quadloop raise NotImplementedError("scaled block mode not implemented.") # (probably need mostly the same data as premultiplied, except no P table name or values) block_is_piecewise = False elif block_mode in ("partial", "full", "safe"): # Translate indices to piecewise context if necessary block_is_piecewise = factor_is_piecewise and not expect_weight ma_data = [] for i, ma in enumerate(ma_indices): if trs[i].is_piecewise: ma_index = piecewise_modified_argument_indices[modified_arguments[ma]] else: block_is_piecewise = False ma_index = ma ma_data.append(ma_data_t(ma_index, trs[i])) block_is_transposed = False # FIXME: Handle transposes for these block types if block_mode == "partial": # Add to contributions: # P[i] = sum_q weight * f * u[i]; generated inside quadloop # B[i,j] = P[i] * v[j]; generated after quadloop (where v is the piecewise ma) # A[blockmap] += B[...]; generated after quadloop # Find first piecewise index TODO: Is last better? just reverse range here for i in range(rank): if trs[i].is_piecewise: piecewise_ma_index = i break assert rank == 2 not_piecewise_ma_index = 1 - piecewise_ma_index block_unames = (unames[not_piecewise_ma_index],) blockdata = partial_block_data_t(block_mode, ttypes, factor_index, factor_is_piecewise, block_unames, block_restrictions, block_is_transposed, tuple(ma_data), piecewise_ma_index) elif block_mode in ("full", "safe"): # Add to contributions: # B[i] = sum_q weight * f * u[i] * v[j]; generated inside quadloop # A[blockmap] += B[i]; generated after quadloop block_unames = unames blockdata = full_block_data_t(block_mode, ttypes, factor_index, factor_is_piecewise, block_unames, block_restrictions, block_is_transposed, tuple(ma_data)) else: error("Invalid block_mode %s" % (block_mode,)) if block_is_piecewise: # Insert in piecewise expr_ir ir["piecewise_ir"]["block_contributions"][blockmap].append(blockdata) else: # Insert in varying expr_ir for this quadrature loop block_contributions[blockmap].append(blockdata) # Figure out which table names are referenced in unstructured partition active_table_names = set() for i, mt in zip(modified_terminal_indices, modified_terminals): tr = mt_unique_table_reference.get(mt) if tr is not None and FV_active[i]: active_table_names.add(tr.name) # Figure out which table names are referenced in blocks for blockmap, contributions in chain(block_contributions.items(), ir["piecewise_ir"]["block_contributions"].items()): for blockdata in contributions: if blockdata.block_mode in ("preintegrated", "premultiplied"): active_table_names.add(blockdata.name) elif blockdata.block_mode in ("partial", "full", "safe"): for mad in blockdata.ma_data: active_table_names.add(mad.tabledata.name) # Record all table types before dropping tables ir["unique_table_types"].update(unique_table_types) # Drop tables not referenced from modified terminals # and tables of zeros and ones unused_ttypes = ("zeros", "ones", "quadrature") keep_table_names = set() for name in active_table_names: ttype = ir["unique_table_types"][name] if ttype not in unused_ttypes: if name in unique_tables: keep_table_names.add(name) unique_tables = { name: unique_tables[name] for name in keep_table_names } # Add to global set of all tables for name, table in unique_tables.items(): tbl = ir["unique_tables"].get(name) if tbl is not None and not numpy.allclose(tbl, table, rtol=p["table_rtol"], atol=p["table_atol"]): error("Table values mismatch with same name.") ir["unique_tables"].update(unique_tables) # Analyse active terminals to check what we'll need to generate code for active_mts = [] for i, mt in zip(modified_terminal_indices, modified_terminals): if FV_active[i]: active_mts.append(mt) # Figure out if we need to access CellCoordinate to # avoid generating quadrature point table otherwise if integral_type == "cell": need_points = any(isinstance(mt.terminal, CellCoordinate) for mt in active_mts) elif integral_type in facet_integral_types: need_points = any(isinstance(mt.terminal, FacetCoordinate) for mt in active_mts) elif integral_type in custom_integral_types: need_points = True # TODO: Always? else: need_points = False # Figure out if we need to access QuadratureWeight to # avoid generating quadrature point table otherwise #need_weights = any(isinstance(mt.terminal, QuadratureWeight) # for mt in active_mts) # Count blocks of each mode block_modes = defaultdict(int) for blockmap, contributions in block_contributions.items(): for blockdata in contributions: block_modes[blockdata.block_mode] += 1 # Debug output summary = "\n".join(" %d\t%s" % (count, mode) for mode, count in sorted(block_modes.items())) debug("Blocks of each mode: \n" + summary) # If there are any blocks other than preintegrated we need weights if expect_weight and any(mode != "preintegrated" for mode in block_modes): need_weights = True elif integral_type in custom_integral_types: need_weights = True # TODO: Always? else: need_weights = False # Build IR dict for the given expressions expr_ir = {} # (array) FV-index -> UFL subexpression expr_ir["V"] = FV # (array) V indices for each input expression component in flattened order expr_ir["V_targets"] = FV_targets ### Result of factorization: # (array) MA-index -> UFL expression of modified arguments expr_ir["modified_arguments"] = modified_arguments # (dict) tuple(MA-indices) -> FV-index of monomial factor #expr_ir["argument_factorization"] = argument_factorization expr_ir["block_contributions"] = block_contributions ### Modified terminals # (array) list of FV-indices to modified terminals #expr_ir["modified_terminal_indices"] = modified_terminal_indices # Dependency structure of graph: # (CRSArray) FV-index -> direct dependency FV-index list #expr_ir["dependencies"] = FV_deps # (CRSArray) FV-index -> direct dependee FV-index list #expr_ir["inverse_dependencies"] = inv_FV_deps # Metadata about each vertex #expr_ir["active"] = FV_active # (array) FV-index -> bool #expr_ir["V_piecewise"] = FV_piecewise # (array) FV-index -> bool expr_ir["V_varying"] = FV_varying # (array) FV-index -> bool expr_ir["V_mts"] = FV_mts # Store mapping from modified terminal object to # table data, this is used in integralgenerator expr_ir["mt_tabledata"] = mt_unique_table_reference # To emit quadrature rules only if needed expr_ir["need_points"] = need_points expr_ir["need_weights"] = need_weights # Store final ir for this num_points ir["varying_irs"][num_points] = expr_ir return ir
def build_uflacs_ir(cell, integral_type, entitytype, integrands, tensor_shape, coefficient_numbering, quadrature_rules, parameters): # The intermediate representation dict we're building and returning here ir = {} # Extract uflacs specific optimization and code generation parameters p = parse_uflacs_optimization_parameters(parameters, integral_type) # Pass on parameters for consumption in code generation ir["params"] = p # { ufl coefficient: count } ir["coefficient_numbering"] = coefficient_numbering # Shared unique tables for all quadrature loops ir["unique_tables"] = {} ir["unique_table_types"] = {} # Shared piecewise expr_ir for all quadrature loops ir["piecewise_ir"] = empty_expr_ir() # { num_points: expr_ir for one integrand } ir["varying_irs"] = {} # Temporary data structures to build shared piecewise data pe2i = {} piecewise_modified_argument_indices = {} # Whether we expect the quadrature weight to be applied or not # (in some cases it's just set to 1 in ufl integral scaling) tdim = cell.topological_dimension() expect_weight = ( integral_type not in ("expression", ) + point_integral_types and (entitytype == "cell" or (entitytype == "facet" and tdim > 1) or (integral_type in custom_integral_types))) if integral_type == "expression": # TODO: Figure out how to get non-integrand expressions in here, this is just a draft: # Analyse all expressions in one list assert isinstance(integrands, (tuple, list)) all_num_points = [None] cases = [(None, integrands)] else: # Analyse each num_points/integrand separately assert isinstance(integrands, dict) all_num_points = sorted(integrands.keys()) cases = [(num_points, [integrands[num_points]]) for num_points in all_num_points] ir["all_num_points"] = all_num_points for num_points, expressions in cases: # Rebalance order of nested terminal modifiers expressions = [balance_modifiers(expr) for expr in expressions] # Build initial scalar list-based graph representation V, V_deps, V_targets = build_scalar_graph(expressions) # Build terminal_data from V here before factorization. # Then we can use it to derive table properties for all modified terminals, # and then use that to rebuild the scalar graph more efficiently before # argument factorization. We can build terminal_data again after factorization # if that's necessary. initial_terminal_indices = [ i for i, v in enumerate(V) if is_modified_terminal(v) ] initial_terminal_data = [ analyse_modified_terminal(V[i]) for i in initial_terminal_indices ] unique_tables, unique_table_types, unique_table_num_dofs, mt_unique_table_reference = \ build_optimized_tables(num_points, quadrature_rules, cell, integral_type, entitytype, initial_terminal_data, ir["unique_tables"], p["enable_table_zero_compression"], rtol=p["table_rtol"], atol=p["table_atol"]) # Replace some scalar modified terminals before reconstructing expressions # (could possibly use replace() on target expressions instead) z = as_ufl(0.0) one = as_ufl(1.0) for i, mt in zip(initial_terminal_indices, initial_terminal_data): if isinstance(mt.terminal, QuadratureWeight): # Replace quadrature weight with 1.0, will be added back later V[i] = one else: # Set modified terminals with zero tables to zero tr = mt_unique_table_reference.get(mt) if tr is not None and tr.ttype == "zeros": V[i] = z # Propagate expression changes using dependency list for i in range(len(V)): deps = [V[j] for j in V_deps[i]] if deps: V[i] = V[i]._ufl_expr_reconstruct_(*deps) # Rebuild scalar target expressions and graph # (this may be overkill and possible to optimize # away if it turns out to be costly) expressions = [V[i] for i in V_targets] # Rebuild scalar list-based graph representation SV, SV_deps, SV_targets = build_scalar_graph(expressions) assert all(i < len(SV) for i in SV_targets) # Compute factorization of arguments (argument_factorizations, modified_arguments, FV, FV_deps, FV_targets) = \ compute_argument_factorization(SV, SV_deps, SV_targets, len(tensor_shape)) assert len(SV_targets) == len(argument_factorizations) # TODO: Still expecting one target variable in code generation assert len(argument_factorizations) == 1 argument_factorization, = argument_factorizations # Store modified arguments in analysed form for i in range(len(modified_arguments)): modified_arguments[i] = analyse_modified_terminal( modified_arguments[i]) # Build set of modified_terminal indices into factorized_vertices modified_terminal_indices = [ i for i, v in enumerate(FV) if is_modified_terminal(v) ] # Build set of modified terminal ufl expressions modified_terminals = [ analyse_modified_terminal(FV[i]) for i in modified_terminal_indices ] # Make it easy to get mt object from FV index FV_mts = [None] * len(FV) for i, mt in zip(modified_terminal_indices, modified_terminals): FV_mts[i] = mt # Mark active modified arguments #active_modified_arguments = numpy.zeros(len(modified_arguments), dtype=int) #for ma_indices in argument_factorization: # for j in ma_indices: # active_modified_arguments[j] = 1 # Dependency analysis inv_FV_deps, FV_active, FV_piecewise, FV_varying = \ analyse_dependencies(FV, FV_deps, FV_targets, modified_terminal_indices, modified_terminals, mt_unique_table_reference) # Extend piecewise V with unique new FV_piecewise vertices pir = ir["piecewise_ir"] for i, v in enumerate(FV): if FV_piecewise[i]: j = pe2i.get(v) if j is None: j = len(pe2i) pe2i[v] = j pir["V"].append(v) pir["V_active"].append(1) mt = FV_mts[i] if mt is not None: pir["mt_tabledata"][ mt] = mt_unique_table_reference.get(mt) pir["V_mts"].append(mt) # Extend piecewise modified_arguments list with unique new items for mt in modified_arguments: ma = piecewise_modified_argument_indices.get(mt) if ma is None: ma = len(pir["modified_arguments"]) pir["modified_arguments"].append(mt) piecewise_modified_argument_indices[mt] = ma # Loop over factorization terms block_contributions = defaultdict(list) for ma_indices, fi in sorted(argument_factorization.items()): # Get a bunch of information about this term rank = len(ma_indices) trs = tuple(mt_unique_table_reference[modified_arguments[ai]] for ai in ma_indices) unames = tuple(tr.name for tr in trs) ttypes = tuple(tr.ttype for tr in trs) assert not any(tt == "zeros" for tt in ttypes) blockmap = tuple(tr.dofmap for tr in trs) block_is_uniform = all(tr.is_uniform for tr in trs) # Collect relevant restrictions to identify blocks # correctly in interior facet integrals block_restrictions = [] for i, ma in enumerate(ma_indices): if trs[i].is_uniform: r = None else: r = modified_arguments[ma].restriction block_restrictions.append(r) block_restrictions = tuple(block_restrictions) # Store piecewise status for fi and translate # index to piecewise scope if relevant factor_is_piecewise = FV_piecewise[fi] if factor_is_piecewise: factor_index = pe2i[FV[fi]] else: factor_index = fi # TODO: Add separate block modes for quadrature # Both arguments in quadrature elements """ for iq fw = f*w #for i # for j # B[i,j] = fw*U[i]*V[j] = 0 if i != iq or j != iq BQ[iq] = B[iq,iq] = fw for (iq) A[iq+offset0, iq+offset1] = BQ[iq] """ # One argument in quadrature element """ for iq fw[iq] = f*w #for i # for j # B[i,j] = fw*UQ[i]*V[j] = 0 if i != iq for j BQ[iq,j] = fw[iq]*V[iq,j] for (iq) for (j) A[iq+offset, j+offset] = BQ[iq,j] """ # Decide how to handle code generation for this block if p["enable_preintegration"] and (factor_is_piecewise and rank > 0 and "quadrature" not in ttypes): # - Piecewise factor is an absolute prerequisite # - Could work for rank 0 as well but currently doesn't # - Haven't considered how quadrature elements work out block_mode = "preintegrated" elif p["enable_premultiplication"] and (rank > 0 and all( tt in piecewise_ttypes for tt in ttypes)): # Integrate functional in quadloop, scale block after quadloop block_mode = "premultiplied" elif p["enable_sum_factorization"]: if (rank == 2 and any(tt in piecewise_ttypes for tt in ttypes)): # Partial computation in quadloop of f*u[i], # compute (f*u[i])*v[i] outside quadloop, # (or with u,v swapped) block_mode = "partial" else: # Full runtime integration of f*u[i]*v[j], # can still do partial computation in quadloop of f*u[i] # but must compute (f*u[i])*v[i] as well inside quadloop. # (or with u,v swapped) block_mode = "full" else: # Use full runtime integration with nothing fancy going on block_mode = "safe" # Carry out decision if block_mode == "preintegrated": # Add to contributions: # P = sum_q weight*u*v; preintegrated here # B[...] = f * P[...]; generated after quadloop # A[blockmap] += B[...]; generated after quadloop cache = ir["piecewise_ir"]["preintegrated_blocks"] block_is_transposed = False pname = cache.get(unames) # Reuse transpose to save memory if p["enable_block_transpose_reuse"] and pname is None and len( unames) == 2: pname = cache.get((unames[1], unames[0])) if pname is not None: # Cache hit on transpose block_is_transposed = True if pname is None: # Cache miss, precompute block weights = quadrature_rules[num_points][1] if integral_type == "interior_facet": ptable = integrate_block_interior_facets( weights, unames, ttypes, unique_tables, unique_table_num_dofs) else: ptable = integrate_block(weights, unames, ttypes, unique_tables, unique_table_num_dofs) ptable = clamp_table_small_numbers(ptable, rtol=p["table_rtol"], atol=p["table_atol"]) pname = "PI%d" % (len(cache, )) cache[unames] = pname unique_tables[pname] = ptable unique_table_types[pname] = "preintegrated" assert factor_is_piecewise block_unames = (pname, ) blockdata = preintegrated_block_data_t( block_mode, ttypes, factor_index, factor_is_piecewise, block_unames, block_restrictions, block_is_transposed, block_is_uniform, pname) block_is_piecewise = True elif block_mode == "premultiplied": # Add to contributions: # P = u*v; computed here # FI = sum_q weight * f; generated inside quadloop # B[...] = FI * P[...]; generated after quadloop # A[blockmap] += B[...]; generated after quadloop cache = ir["piecewise_ir"]["premultiplied_blocks"] block_is_transposed = False pname = cache.get(unames) # Reuse transpose to save memory if p["enable_block_transpose_reuse"] and pname is None and len( unames) == 2: pname = cache.get((unames[1], unames[0])) if pname is not None: # Cache hit on transpose block_is_transposed = True if pname is None: # Cache miss, precompute block if integral_type == "interior_facet": ptable = multiply_block_interior_facets( 0, unames, ttypes, unique_tables, unique_table_num_dofs) else: ptable = multiply_block(0, unames, ttypes, unique_tables, unique_table_num_dofs) pname = "PM%d" % (len(cache, )) cache[unames] = pname unique_tables[pname] = ptable unique_table_types[pname] = "premultiplied" block_unames = (pname, ) blockdata = premultiplied_block_data_t( block_mode, ttypes, factor_index, factor_is_piecewise, block_unames, block_restrictions, block_is_transposed, block_is_uniform, pname) block_is_piecewise = False elif block_mode == "scaled": # TODO: Add mode, block is piecewise but choose not to be premultiplied # Add to contributions: # FI = sum_q weight * f; generated inside quadloop # B[...] = FI * u * v; generated after quadloop # A[blockmap] += B[...]; generated after quadloop raise NotImplementedError("scaled block mode not implemented.") # (probably need mostly the same data as premultiplied, except no P table name or values) block_is_piecewise = False elif block_mode in ("partial", "full", "safe"): # Translate indices to piecewise context if necessary block_is_piecewise = factor_is_piecewise and not expect_weight ma_data = [] for i, ma in enumerate(ma_indices): if trs[i].is_piecewise: ma_index = piecewise_modified_argument_indices[ modified_arguments[ma]] else: block_is_piecewise = False ma_index = ma ma_data.append(ma_data_t(ma_index, trs[i])) block_is_transposed = False # FIXME: Handle transposes for these block types if block_mode == "partial": # Add to contributions: # P[i] = sum_q weight * f * u[i]; generated inside quadloop # B[i,j] = P[i] * v[j]; generated after quadloop (where v is the piecewise ma) # A[blockmap] += B[...]; generated after quadloop # Find first piecewise index TODO: Is last better? just reverse range here for i in range(rank): if trs[i].is_piecewise: piecewise_ma_index = i break assert rank == 2 not_piecewise_ma_index = 1 - piecewise_ma_index block_unames = (unames[not_piecewise_ma_index], ) blockdata = partial_block_data_t( block_mode, ttypes, factor_index, factor_is_piecewise, block_unames, block_restrictions, block_is_transposed, tuple(ma_data), piecewise_ma_index) elif block_mode in ("full", "safe"): # Add to contributions: # B[i] = sum_q weight * f * u[i] * v[j]; generated inside quadloop # A[blockmap] += B[i]; generated after quadloop block_unames = unames blockdata = full_block_data_t( block_mode, ttypes, factor_index, factor_is_piecewise, block_unames, block_restrictions, block_is_transposed, tuple(ma_data)) else: error("Invalid block_mode %s" % (block_mode, )) if block_is_piecewise: # Insert in piecewise expr_ir ir["piecewise_ir"]["block_contributions"][blockmap].append( blockdata) else: # Insert in varying expr_ir for this quadrature loop block_contributions[blockmap].append(blockdata) # Figure out which table names are referenced in unstructured partition active_table_names = set() for i, mt in zip(modified_terminal_indices, modified_terminals): tr = mt_unique_table_reference.get(mt) if tr is not None and FV_active[i]: active_table_names.add(tr.name) # Figure out which table names are referenced in blocks for blockmap, contributions in chain( block_contributions.items(), ir["piecewise_ir"]["block_contributions"].items()): for blockdata in contributions: if blockdata.block_mode in ("preintegrated", "premultiplied"): active_table_names.add(blockdata.name) elif blockdata.block_mode in ("partial", "full", "safe"): for mad in blockdata.ma_data: active_table_names.add(mad.tabledata.name) # Record all table types before dropping tables ir["unique_table_types"].update(unique_table_types) # Drop tables not referenced from modified terminals # and tables of zeros and ones unused_ttypes = ("zeros", "ones", "quadrature") keep_table_names = set() for name in active_table_names: ttype = ir["unique_table_types"][name] if ttype not in unused_ttypes: if name in unique_tables: keep_table_names.add(name) unique_tables = { name: unique_tables[name] for name in keep_table_names } # Add to global set of all tables for name, table in unique_tables.items(): tbl = ir["unique_tables"].get(name) if tbl is not None and not numpy.allclose( tbl, table, rtol=p["table_rtol"], atol=p["table_atol"]): error("Table values mismatch with same name.") ir["unique_tables"].update(unique_tables) # Analyse active terminals to check what we'll need to generate code for active_mts = [] for i, mt in zip(modified_terminal_indices, modified_terminals): if FV_active[i]: active_mts.append(mt) # Figure out if we need to access CellCoordinate to # avoid generating quadrature point table otherwise if integral_type == "cell": need_points = any( isinstance(mt.terminal, CellCoordinate) for mt in active_mts) elif integral_type in facet_integral_types: need_points = any( isinstance(mt.terminal, FacetCoordinate) for mt in active_mts) elif integral_type in custom_integral_types: need_points = True # TODO: Always? else: need_points = False # Figure out if we need to access QuadratureWeight to # avoid generating quadrature point table otherwise #need_weights = any(isinstance(mt.terminal, QuadratureWeight) # for mt in active_mts) # Count blocks of each mode block_modes = defaultdict(int) for blockmap, contributions in block_contributions.items(): for blockdata in contributions: block_modes[blockdata.block_mode] += 1 # Debug output summary = "\n".join(" %d\t%s" % (count, mode) for mode, count in sorted(block_modes.items())) debug("Blocks of each mode: \n" + summary) # If there are any blocks other than preintegrated we need weights if expect_weight and any(mode != "preintegrated" for mode in block_modes): need_weights = True elif integral_type in custom_integral_types: need_weights = True # TODO: Always? else: need_weights = False # Build IR dict for the given expressions expr_ir = {} # (array) FV-index -> UFL subexpression expr_ir["V"] = FV # (array) V indices for each input expression component in flattened order expr_ir["V_targets"] = FV_targets ### Result of factorization: # (array) MA-index -> UFL expression of modified arguments expr_ir["modified_arguments"] = modified_arguments # (dict) tuple(MA-indices) -> FV-index of monomial factor #expr_ir["argument_factorization"] = argument_factorization expr_ir["block_contributions"] = block_contributions ### Modified terminals # (array) list of FV-indices to modified terminals #expr_ir["modified_terminal_indices"] = modified_terminal_indices # Dependency structure of graph: # (CRSArray) FV-index -> direct dependency FV-index list #expr_ir["dependencies"] = FV_deps # (CRSArray) FV-index -> direct dependee FV-index list #expr_ir["inverse_dependencies"] = inv_FV_deps # Metadata about each vertex #expr_ir["active"] = FV_active # (array) FV-index -> bool #expr_ir["V_piecewise"] = FV_piecewise # (array) FV-index -> bool expr_ir["V_varying"] = FV_varying # (array) FV-index -> bool expr_ir["V_mts"] = FV_mts # Store mapping from modified terminal object to # table data, this is used in integralgenerator expr_ir["mt_tabledata"] = mt_unique_table_reference # To emit quadrature rules only if needed expr_ir["need_points"] = need_points expr_ir["need_weights"] = need_weights # Store final ir for this num_points ir["varying_irs"][num_points] = expr_ir return ir