示例#1
0
def make_common_subexpression(field, prefix=None):
    from pytools.obj_array import log_shape
    from hedge.tools import is_zero
    from pymbolic.primitives import CommonSubexpression

    ls = log_shape(field)
    if ls != ():
        from pytools import indices_in_shape
        result = numpy.zeros(ls, dtype=object)

        for i in indices_in_shape(ls):
            if prefix is not None:
                component_prefix = prefix+"_".join(str(i_i) for i_i in i)
            else:
                component_prefix = None

            if is_zero(field[i]):
                result[i] = 0
            else:
                result[i] = CommonSubexpression(field[i], component_prefix)

        return result
    else:
        if is_zero(field):
            return 0
        else:
            return CommonSubexpression(field, prefix)
示例#2
0
    def map_quad_int_faces_grid_upsampler(self, op, expr):
        field = self.rec(expr)
        discr = self.executor.discr

        from hedge.tools import is_zero
        if is_zero(field):
            return 0

        quad_info = discr.get_cuda_quadrature_info(op.quadrature_tag)

        result = self.discr._empty_gpuarray(quad_info.int_face_vector_size,
                                            dtype=field.dtype)

        for eg in self.discr.element_groups:
            eg_quad_info = discr.get_cuda_elgroup_quadrature_info(
                eg, op.quadrature_tag)
            kernel = discr.element_local_kernel(
                image_dofs_per_el=eg_quad_info.ldis_quad_info.face_node_count(
                ) * eg.local_discretization.face_count(),
                aligned_image_dofs_per_microblock=eg_quad_info.
                aligned_int_face_dofs_per_microblock)
            try:
                prepared_matrix = \
                        self.executor.elwise_linear_cache[eg, op, field.dtype]
            except KeyError:
                prepared_matrix = kernel.prepare_matrix(
                    eg_quad_info.ldis_quad_info.
                    volume_to_face_up_interpolation_matrix())

                self.executor.elwise_linear_cache[eg, op, field.dtype] = \
                        prepared_matrix

            kernel(field, prepared_matrix, out_vector=result)

        return result
示例#3
0
文件: execute.py 项目: felipeh/hedge
    def map_quad_grid_upsampler(self, op, expr):
        field = self.rec(expr)
        discr = self.executor.discr

        from hedge.tools import is_zero
        if is_zero(field):
            return 0

        quad_info = discr.get_cuda_quadrature_info(
                op.quadrature_tag)

        result = self.discr._empty_gpuarray(
                quad_info.volume_vector_size,
                dtype=field.dtype)

        for eg in self.discr.element_groups:
            eg_quad_info = discr.get_cuda_elgroup_quadrature_info(
                    eg, op.quadrature_tag)
            kernel = discr.element_local_kernel(
                    image_dofs_per_el=eg_quad_info.ldis_quad_info.node_count(),
                    aligned_image_dofs_per_microblock
                    =eg_quad_info.aligned_dofs_per_microblock)
            try:
                prepared_matrix = \
                        self.executor.elwise_linear_cache[eg, op, field.dtype]
            except KeyError:
                prepared_matrix = kernel.prepare_matrix(
                        eg_quad_info.ldis_quad_info.volume_up_interpolation_matrix())

                self.executor.elwise_linear_cache[eg, op, field.dtype] = \
                        prepared_matrix

            kernel(field, prepared_matrix, out_vector=result)

        return result
示例#4
0
文件: execute.py 项目: felipeh/hedge
    def map_elementwise_linear(self, op, expr):
        field = self.rec(expr)

        from hedge.tools import is_zero
        if is_zero(field):
            return 0

        kernel = self.executor.discr.element_local_kernel()
        # FIXME: wouldn't volume_empty suffice?
        result = self.discr.volume_zeros(dtype=field.dtype)

        for eg in self.discr.element_groups:
            try:
                prepared_matrix = \
                        self.executor.elwise_linear_cache[eg, op, field.dtype]
            except KeyError:
                prepared_matrix = kernel.prepare_matrix(op.matrix(eg))
                assert op.coefficients(eg) is None, \
                        "per-element scaling of elementwise linear ops is no " \
                        "longer supported"

                self.executor.elwise_linear_cache[eg, op, field.dtype] = \
                        prepared_matrix

            kernel(field, prepared_matrix, out_vector=result)

        return result
示例#5
0
        def bind_one(subexpr):
            if is_zero(subexpr):
                return subexpr
            else:
                from hedge.optemplate.primitives import OperatorBinding

                return OperatorBinding(self, subexpr)
示例#6
0
    def map_elementwise_linear(self, op, expr):
        field = self.rec(expr)

        from hedge.tools import is_zero
        if is_zero(field):
            return 0

        kernel = self.executor.discr.element_local_kernel()
        # FIXME: wouldn't volume_empty suffice?
        result = self.discr.volume_zeros(dtype=field.dtype)

        for eg in self.discr.element_groups:
            try:
                prepared_matrix = \
                        self.executor.elwise_linear_cache[eg, op, field.dtype]
            except KeyError:
                prepared_matrix = kernel.prepare_matrix(op.matrix(eg))
                assert op.coefficients(eg) is None, \
                        "per-element scaling of elementwise linear ops is no " \
                        "longer supported"

                self.executor.elwise_linear_cache[eg, op, field.dtype] = \
                        prepared_matrix

            kernel(field, prepared_matrix, out_vector=result)

        return result
示例#7
0
    def map_field_component(self, expr):
        if expr.is_interior:
            prefix = "a"
            f_expr = self.int_field_expr
        else:
            prefix = "b"
            f_expr = self.ext_field_expr

        from hedge.tools import is_obj_array, is_zero
        from pymbolic import var
        if is_obj_array(f_expr):
            f_expr = f_expr[expr.index]
            if is_zero(f_expr):
                return 0
            return var("val_%s_field%d" % (prefix, self.dep_to_index[f_expr]))
        else:
            assert expr.index == 0, repr(f_expr)
            if is_zero(f_expr):
                return 0
            return var("val_%s_field%d" % (prefix, self.dep_to_index[f_expr]))
示例#8
0
    def map_field_component(self, expr):
        if expr.is_interior:
            prefix = "a"
            f_expr = self.int_field_expr
        else:
            prefix = "b"
            f_expr = self.ext_field_expr

        from hedge.tools import is_obj_array, is_zero
        from pymbolic import var
        if is_obj_array(f_expr):
            f_expr = f_expr[expr.index]
            if is_zero(f_expr):
                return 0
            return var("val_%s_field%d" % (prefix, self.dep_to_index[f_expr]))
        else:
            assert expr.index == 0, repr(f_expr)
            if is_zero(f_expr):
                return 0
            return var("val_%s_field%d" % (prefix, self.dep_to_index[f_expr]))
示例#9
0
def get_flux_dependencies(flux, field, bdry="all"):
    from hedge.flux import FluxDependencyMapper, FieldComponent
    in_fields = list(FluxDependencyMapper(
        include_calls=False)(flux))

    # check that all in_fields are FieldComponent instances
    assert not [in_field
        for in_field in in_fields
        if not isinstance(in_field, FieldComponent)]

    def maybe_index(fld, index):
        from hedge.tools import is_obj_array
        if is_obj_array(fld):
            return fld[inf.index]
        else:
            return fld

    from hedge.tools import is_zero
    from hedge.optemplate import BoundaryPair
    if isinstance(field, BoundaryPair):
        for inf in in_fields:
            if inf.is_interior:
                if bdry in ["all", "int"]:
                    value = maybe_index(field.field, inf.index)

                    if not is_zero(value):
                        yield value
            else:
                if bdry in ["all", "ext"]:
                    value = maybe_index(field.bfield, inf.index)

                    if not is_zero(value):
                        yield value
    else:
        for inf in in_fields:
            value = maybe_index(field, inf.index)
            if not is_zero(value):
                yield value
示例#10
0
def get_flux_dependencies(flux, field, bdry="all"):
    from hedge.flux import FluxDependencyMapper, FieldComponent
    in_fields = list(FluxDependencyMapper(include_calls=False)(flux))

    # check that all in_fields are FieldComponent instances
    assert not [
        in_field
        for in_field in in_fields if not isinstance(in_field, FieldComponent)
    ]

    def maybe_index(fld, index):
        from hedge.tools import is_obj_array
        if is_obj_array(fld):
            return fld[inf.index]
        else:
            return fld

    from hedge.tools import is_zero
    from hedge.optemplate import BoundaryPair
    if isinstance(field, BoundaryPair):
        for inf in in_fields:
            if inf.is_interior:
                if bdry in ["all", "int"]:
                    value = maybe_index(field.field, inf.index)

                    if not is_zero(value):
                        yield value
            else:
                if bdry in ["all", "ext"]:
                    value = maybe_index(field.bfield, inf.index)

                    if not is_zero(value):
                        yield value
    else:
        for inf in in_fields:
            value = maybe_index(field, inf.index)
            if not is_zero(value):
                yield value
示例#11
0
文件: execute.py 项目: felipeh/hedge
    def map_quad_bdry_grid_upsampler(self, op, expr):
        field = self.rec(expr)
        discr = self.executor.discr

        from hedge.tools import is_zero
        if is_zero(field):
            return 0

        quad_info = discr.get_cuda_quadrature_info(
                op.quadrature_tag)

        result = self.discr._empty_gpuarray(
                quad_info.face_storage_info.aligned_boundary_dof_count,
                dtype=field.dtype)

        for eg in self.discr.element_groups:
            eqi = discr.get_cuda_elgroup_quadrature_info(
                    eg, op.quadrature_tag)
            kernel = discr.element_local_kernel(
                    aligned_preimage_dofs_per_microblock
                    =discr.face_storage_info.aligned_boundary_dofs_per_face,

                    preimage_dofs_per_el
                    =eg.local_discretization.face_node_count(),

                    aligned_image_dofs_per_microblock
                    =quad_info.face_storage_info.aligned_boundary_dofs_per_face,

                    image_dofs_per_el
                    =eqi.ldis_quad_info.face_node_count(),

                    elements_per_microblock=1,
                    microblock_count
                    =quad_info.face_storage_info.aligned_boundary_dof_count//
                    quad_info.face_storage_info.aligned_boundary_dofs_per_face)
            try:
                prepared_matrix = \
                        self.executor.elwise_linear_cache[eg, op, field.dtype]
            except KeyError:
                prepared_matrix = kernel.prepare_matrix(
                        eqi.ldis_quad_info
                        .face_up_interpolation_matrix())

                self.executor.elwise_linear_cache[eg, op, field.dtype] = \
                        prepared_matrix

            kernel(field, prepared_matrix, out_vector=result)

        return result
示例#12
0
    def __add__(self, update):
        from hedge.tools import is_zero
        if is_zero(update):
            return self

        from pyrticle.tools import NumberShiftableVector

        #from pytools import typedump
        dx, dp, ddep = update
        dx = NumberShiftableVector.unwrap(dx)
        dp = NumberShiftableVector.unwrap(dp)

        new_state = self.method.advance_state(self.state, dx, dp, ddep)

        return TimesteppablePicState(self.method, new_state)
示例#13
0
文件: cloud.py 项目: gimac/pyrticle
    def __add__(self, update):
        from hedge.tools import is_zero
        if is_zero(update):
            return self

        from pyrticle.tools import NumberShiftableVector

        #from pytools import typedump
        dx, dp, ddep = update
        dx = NumberShiftableVector.unwrap(dx)
        dp = NumberShiftableVector.unwrap(dp)

        new_state = self.method.advance_state(self.state, dx, dp, ddep)

        return TimesteppablePicState(self.method, new_state)
示例#14
0
文件: em.py 项目: allansnielsen/hedge
    def incident_bc(self, w=None):
        "Flux terms for incident boundary conditions"
        # NOTE: Untested for inhomogeneous materials, but would usually be
        # physically meaningless anyway (are there exceptions to this?)

        e, h = self.split_eh(self.field_placeholder(w))
        if not self.fixed_material:
            from warnings import warn
            if self.incident_tag != hedge.mesh.TAG_NONE:
                warn("Incident boundary conditions assume homogeneous"
                     " background material, results may be unphysical")

        from hedge.tools import count_subset
        fld_cnt = count_subset(self.get_eh_subset())

        from hedge.tools import is_zero
        incident_bc_data = self.incident_bc_data(self, e, h)
        if is_zero(incident_bc_data):
            return make_obj_array([0]*fld_cnt)
        else:
            return cse(-incident_bc_data)
示例#15
0
    def incident_bc(self, w=None):
        "Flux terms for incident boundary conditions"
        # NOTE: Untested for inhomogeneous materials, but would usually be
        # physically meaningless anyway (are there exceptions to this?)

        e, h = self.split_eh(self.field_placeholder(w))
        if not self.fixed_material:
            from warnings import warn
            if self.incident_tag != hedge.mesh.TAG_NONE:
                warn("Incident boundary conditions assume homogeneous"
                     " background material, results may be unphysical")

        from hedge.tools import count_subset
        fld_cnt = count_subset(self.get_eh_subset())

        from hedge.tools import is_zero
        incident_bc_data = self.incident_bc_data(self, e, h)
        if is_zero(incident_bc_data):
            return make_obj_array([0] * fld_cnt)
        else:
            return cse(-incident_bc_data)
示例#16
0
    def __call__(self, operators, field):
        # pick a "representative operator"
        rep_op = operators[0]

        result = [self.discr.volume_zeros(dtype=field.dtype) 
                for i in range(self.discr.dimensions)]
        from hedge.tools import is_zero
        if not is_zero(field):
            for eg in self.discr.element_groups:
                from pytools import to_uncomplex_dtype
                uncomplex_dtype = to_uncomplex_dtype(field.dtype)
                matrices = rep_op.matrices(eg)
                args = ([rep_op.preimage_ranges(eg), eg.ranges, field]
                        + [m.astype(uncomplex_dtype) for m in matrices]
                        + result)

                diff_routine = self.make_diff(eg, field.dtype,
                        matrices[0].shape)
                diff_routine(*args)

        return [result[op.rst_axis] for op in operators]
示例#17
0
    def finalize_multi_assign(self, names, exprs, do_not_return, priority):
        from pytools import any
        from hedge.tools import is_zero

        has_zero_assignees = any(is_zero(expr) for expr in exprs)
        if has_zero_assignees:
            if len(exprs) > 1:
                raise RuntimeError("found aggregated zero constant assignment")

        from hedge.optemplate import FlopCounter
        flop_count = sum(FlopCounter()(expr) for expr in exprs)

        if has_zero_assignees or flop_count == 0:
            return Assign(names,
                          exprs,
                          priority=priority,
                          dep_mapper_factory=self.dep_mapper_factory)
        else:
            return VectorExprAssign(names=names,
                                    exprs=exprs,
                                    do_not_return=do_not_return,
                                    dep_mapper_factory=self.dep_mapper_factory,
                                    priority=priority)
示例#18
0
文件: diff.py 项目: gimac/hedge
    def __call__(self, operators, field):
        # pick a "representative operator"
        rep_op = operators[0]

        result = [self.discr.volume_zeros(dtype=field.dtype) for i in range(self.discr.dimensions)]
        from hedge.tools import is_zero

        if not is_zero(field):
            for eg in self.discr.element_groups:
                from pytools import to_uncomplex_dtype

                uncomplex_dtype = to_uncomplex_dtype(field.dtype)
                matrices = rep_op.matrices(eg)
                args = (
                    [rep_op.preimage_ranges(eg), eg.ranges, field]
                    + [m.astype(uncomplex_dtype) for m in matrices]
                    + result
                )

                diff_routine = self.make_diff(eg, field.dtype, matrices[0].shape)
                diff_routine(*args)

        return [result[op.rst_axis] for op in operators]
示例#19
0
    def finalize_multi_assign(self, names, exprs, do_not_return, priority):
        from pytools import any
        from hedge.tools import is_zero

        has_zero_assignees = any(is_zero(expr) for expr in exprs)
        if has_zero_assignees:
            if len(exprs) > 1:
                raise RuntimeError("found aggregated zero constant assignment")

        from hedge.optemplate import FlopCounter

        flop_count = sum(FlopCounter()(expr) for expr in exprs)

        if has_zero_assignees or flop_count == 0:
            return Assign(names, exprs, priority=priority, dep_mapper_factory=self.dep_mapper_factory)
        else:
            return VectorExprAssign(
                names=names,
                exprs=exprs,
                do_not_return=do_not_return,
                dep_mapper_factory=self.dep_mapper_factory,
                priority=priority,
            )
示例#20
0
    def map_operator_binding(self, expr):
        from hedge.optemplate.operators import FluxOperatorBase
        from hedge.optemplate.primitives import BoundaryPair
        from hedge.flux import FluxSubstitutionMapper, FieldComponent

        if not (isinstance(expr.op, FluxOperatorBase)
                and isinstance(expr.field, BoundaryPair)):
            return IdentityMapper.map_operator_binding(self, expr)

        bpair = expr.field
        vol_field = bpair.field
        bdry_field = bpair.bfield
        flux = expr.op.flux

        bdry_dependencies = DependencyMapper(
            include_calls="descend_args",
            include_operator_bindings=True)(bdry_field)

        vol_dependencies = DependencyMapper(
            include_operator_bindings=True)(vol_field)

        vol_bdry_intersection = bdry_dependencies & vol_dependencies
        if vol_bdry_intersection:
            raise RuntimeError(
                "Variables are being used as both "
                "boundary and volume quantities: %s" %
                ", ".join(str(v) for v in vol_bdry_intersection))

        # Step 1: Find maximal flux-evaluable subexpression of boundary field
        # in given BoundaryPair.

        class MaxBoundaryFluxEvaluableExpressionFinder(IdentityMapper,
                                                       OperatorReducerMixin):
            def __init__(self, vol_expr_list, expensive_bdry_op_detector):
                self.vol_expr_list = vol_expr_list
                self.vol_expr_to_idx = dict(
                    (vol_expr, idx)
                    for idx, vol_expr in enumerate(vol_expr_list))

                self.bdry_expr_list = []
                self.bdry_expr_to_idx = {}

                self.expensive_bdry_op_detector = expensive_bdry_op_detector

            # {{{ expression registration
            def register_boundary_expr(self, expr):
                try:
                    return self.bdry_expr_to_idx[expr]
                except KeyError:
                    idx = len(self.bdry_expr_to_idx)
                    self.bdry_expr_to_idx[expr] = idx
                    self.bdry_expr_list.append(expr)
                    return idx

            def register_volume_expr(self, expr):
                try:
                    return self.vol_expr_to_idx[expr]
                except KeyError:
                    idx = len(self.vol_expr_to_idx)
                    self.vol_expr_to_idx[expr] = idx
                    self.vol_expr_list.append(expr)
                    return idx

            # }}}

            # {{{ map_xxx routines

            @memoize_method
            def map_common_subexpression(self, expr):
                # Here we need to decide whether this CSE should be turned into
                # a flux CSE or not. This is a good idea if the transformed
                # expression only contains "bare" volume or boundary
                # expressions.  However, as soon as an operator is applied
                # somewhere in the subexpression, the CSE should not be touched
                # in order to avoid redundant evaluation of that operator.
                #
                # Observe that at the time of this writing (Feb 2010), the only
                # operators that may occur in boundary expressions are
                # quadrature-related.

                has_expensive_operators = \
                        self.expensive_bdry_op_detector(expr.child)

                if has_expensive_operators:
                    return FieldComponent(self.register_boundary_expr(expr),
                                          is_interior=False)
                else:
                    return IdentityMapper.map_common_subexpression(self, expr)

            def map_normal(self, expr):
                raise RuntimeError(
                    "Your operator template contains a flux normal. "
                    "You may find this confusing, but you can't do that. "
                    "It turns out that you need to use "
                    "hedge.optemplate.make_normal() for normals in boundary "
                    "terms of operator templates.")

            def map_normal_component(self, expr):
                if expr.boundary_tag != bpair.tag:
                    raise RuntimeError(
                        "BoundaryNormalComponent and BoundaryPair "
                        "do not agree about boundary tag: %s vs %s" %
                        (expr.boundary_tag, bpair.tag))

                from hedge.flux import Normal
                return Normal(expr.axis)

            def map_variable(self, expr):
                return FieldComponent(self.register_boundary_expr(expr),
                                      is_interior=False)

            map_subscript = map_variable

            def map_operator_binding(self, expr):
                from hedge.optemplate import (BoundarizeOperator,
                                              FluxExchangeOperator,
                                              QuadratureGridUpsampler,
                                              QuadratureBoundaryGridUpsampler)

                if isinstance(expr.op, BoundarizeOperator):
                    if expr.op.tag != bpair.tag:
                        raise RuntimeError(
                            "BoundarizeOperator and BoundaryPair "
                            "do not agree about boundary tag: %s vs %s" %
                            (expr.op.tag, bpair.tag))

                    return FieldComponent(self.register_volume_expr(
                        expr.field),
                                          is_interior=True)

                elif isinstance(expr.op, FluxExchangeOperator):
                    from hedge.mesh import TAG_RANK_BOUNDARY
                    op_tag = TAG_RANK_BOUNDARY(expr.op.rank)
                    if bpair.tag != op_tag:
                        raise RuntimeError(
                            "BoundarizeOperator and "
                            "FluxExchangeOperator do not agree about "
                            "boundary tag: %s vs %s" % (op_tag, bpair.tag))
                    return FieldComponent(self.register_boundary_expr(expr),
                                          is_interior=False)

                elif isinstance(expr.op, QuadratureBoundaryGridUpsampler):
                    if bpair.tag != expr.op.boundary_tag:
                        raise RuntimeError(
                            "BoundarizeOperator "
                            "and QuadratureBoundaryGridUpsampler "
                            "do not agree about boundary tag: %s vs %s" %
                            (expr.op.boundary_tag, bpair.tag))
                    return FieldComponent(self.register_boundary_expr(expr),
                                          is_interior=False)

                elif isinstance(expr.op, QuadratureGridUpsampler):
                    # We're invoked before operator specialization, so we may
                    # see these instead of QuadratureBoundaryGridUpsampler.
                    return FieldComponent(self.register_boundary_expr(expr),
                                          is_interior=False)

                else:
                    raise RuntimeError(
                        "Found '%s' in a boundary term. "
                        "To the best of my knowledge, no hedge operator applies "
                        "directly to boundary data, so this is likely in error."
                        % expr.op)

            def map_flux_exchange(self, expr):
                return FieldComponent(self.register_boundary_expr(expr),
                                      is_interior=False)

            # }}}

        from hedge.tools import is_obj_array
        if not is_obj_array(vol_field):
            vol_field = [vol_field]

        mbfeef = MaxBoundaryFluxEvaluableExpressionFinder(
            list(vol_field), self.expensive_bdry_op_detector)
        #from hedge.optemplate.tools import pretty
        #print pretty(bdry_field)
        #raw_input("YO")
        new_bdry_field = mbfeef(bdry_field)

        # Step II: Substitute the new_bdry_field into the flux.
        def sub_bdry_into_flux(expr):
            if isinstance(expr, FieldComponent) and not expr.is_interior:
                if expr.index == 0 and not is_obj_array(bdry_field):
                    return new_bdry_field
                else:
                    return new_bdry_field[expr.index]
            else:
                return None

        new_flux = FluxSubstitutionMapper(sub_bdry_into_flux)(flux)

        from hedge.tools import is_zero, make_obj_array
        if is_zero(new_flux):
            return 0
        else:
            return type(expr.op)(new_flux, *expr.op.__getinitargs__()[1:])(
                BoundaryPair(
                    make_obj_array([self.rec(e)
                                    for e in mbfeef.vol_expr_list]),
                    make_obj_array(
                        [self.rec(e) for e in mbfeef.bdry_expr_list]),
                    bpair.tag))
示例#21
0
文件: compiler.py 项目: felipeh/hedge
    def aggregate_assignments(self, instructions, result):
        from pymbolic.primitives import Variable

        # aggregation helpers -------------------------------------------------
        def get_complete_origins_set(insn, skip_levels=0):
            if skip_levels < 0:
                skip_levels = 0

            result = set()
            for dep in insn.get_dependencies():
                if isinstance(dep, Variable):
                    dep_origin = origins_map.get(dep.name, None)
                    if dep_origin is not None:
                        if skip_levels <= 0:
                            result.add(dep_origin)
                        result |= get_complete_origins_set(
                                dep_origin, skip_levels-1)

            return result

        var_assignees_cache = {}
        def get_var_assignees(insn):
            try:
                return var_assignees_cache[insn]
            except KeyError:
                result = set(Variable(assignee)
                        for assignee in insn.get_assignees())
                var_assignees_cache[insn] = result
                return result

        def aggregate_two_assignments(ass_1, ass_2):
            names = ass_1.names + ass_2.names

            from pymbolic.primitives import Variable
            deps = (ass_1.get_dependencies() | ass_2.get_dependencies()) \
                    - set(Variable(name) for name in names)

            return Assign(
                    names=names, exprs=ass_1.exprs + ass_2.exprs,
                    _dependencies=deps,
                    dep_mapper_factory=self.dep_mapper_factory,
                    priority=max(ass_1.priority, ass_2.priority))

        # main aggregation pass -----------------------------------------------
        origins_map = dict(
                    (assignee, insn)
                    for insn in instructions
                    for assignee in insn.get_assignees())

        from pytools import partition
        unprocessed_assigns, other_insns = partition(
                lambda insn: isinstance(insn, Assign),
                instructions)

        # filter out zero-flop-count assigns--no need to bother with those
        processed_assigns, unprocessed_assigns = partition(
                lambda ass: ass.flop_count() == 0,
                unprocessed_assigns)

        # filter out zero assignments
        from pytools import any
        from hedge.tools import is_zero

        i = 0

        while i < len(unprocessed_assigns):
            my_assign = unprocessed_assigns[i]
            if any(is_zero(expr) for expr in my_assign.exprs):
                processed_assigns.append(unprocessed_assigns.pop())
            else:
                i += 1

        # greedy aggregation
        while unprocessed_assigns:
            my_assign = unprocessed_assigns.pop()

            my_deps = my_assign.get_dependencies()
            my_assignees = get_var_assignees(my_assign)

            agg_candidates = []
            for i, other_assign in enumerate(unprocessed_assigns):
                other_deps = other_assign.get_dependencies()
                other_assignees = get_var_assignees(other_assign)

                if ((my_deps & other_deps
                        or my_deps & other_assignees
                        or other_deps & my_assignees)
                        and my_assign.priority == other_assign.priority):
                    agg_candidates.append((i, other_assign))

            did_work = False

            if agg_candidates:
                my_indirect_origins = get_complete_origins_set(
                        my_assign, skip_levels=1)

                for other_assign_index, other_assign in agg_candidates:
                    if self.max_vectors_in_batch_expr is not None:
                        new_assignee_count = len(
                                set(my_assign.get_assignees())
                                | set(other_assign.get_assignees()))
                        new_dep_count = len(
                                my_assign.get_dependencies(
                                    each_vector=True)
                                | other_assign.get_dependencies(
                                    each_vector=True))

                        if (new_assignee_count + new_dep_count \
                                > self.max_vectors_in_batch_expr):
                            continue

                    other_indirect_origins = get_complete_origins_set(
                            other_assign, skip_levels=1)

                    if (my_assign not in other_indirect_origins and
                            other_assign not in my_indirect_origins):
                        did_work = True

                        # aggregate the two assignments
                        new_assignment = aggregate_two_assignments(
                                my_assign, other_assign)
                        del unprocessed_assigns[other_assign_index]
                        unprocessed_assigns.append(new_assignment)
                        for assignee in new_assignment.get_assignees():
                            origins_map[assignee] = new_assignment

                        break

            if not did_work:
                processed_assigns.append(my_assign)

        externally_used_names = set(
                expr
                for insn in processed_assigns + other_insns
                for expr in insn.get_dependencies())

        from hedge.tools import is_obj_array
        if is_obj_array(result):
            externally_used_names |= set(expr for expr in result)
        else:
            externally_used_names |= set([result])

        def schedule_and_finalize_assignment(ass):
            dep_mapper = self.dep_mapper_factory()

            names_exprs = zip(ass.names, ass.exprs)

            my_assignees = set(name for name, expr in names_exprs)
            names_exprs_deps = [
                    (name, expr,
                        set(dep.name for dep in dep_mapper(expr) if
                            isinstance(dep, Variable)) & my_assignees)
                    for name, expr in names_exprs]

            ordered_names_exprs = []
            available_names = set()

            while names_exprs_deps:
                schedulable = []

                i = 0
                while i < len(names_exprs_deps):
                    name, expr, deps = names_exprs_deps[i]

                    unsatisfied_deps = deps - available_names

                    if not unsatisfied_deps:
                        schedulable.append((str(expr), name, expr))
                        del names_exprs_deps[i]
                    else:
                        i += 1

                # make sure these come out in a constant order
                schedulable.sort()

                if schedulable:
                    for key, name, expr in schedulable:
                        ordered_names_exprs.append((name, expr))
                        available_names.add(name)
                else:
                    raise RuntimeError("aggregation resulted in an "
                            "impossible assignment")

            return self.finalize_multi_assign(
                    names=[name for name, expr in ordered_names_exprs],
                    exprs=[expr for name, expr in ordered_names_exprs],
                    do_not_return=[Variable(name) not in externally_used_names
                        for name, expr in ordered_names_exprs],
                    priority=ass.priority)

        return [schedule_and_finalize_assignment(ass)
            for ass in processed_assigns] + other_insns
示例#22
0
 def nb_bdry_permute(fld):
     if is_zero(fld):
         return 0
     else:
         return fld[from_nb_indices]
示例#23
0
 def bind_one(subexpr):
     if is_zero(subexpr):
         return subexpr
     else:
         from hedge.optemplate.primitives import OperatorBinding
         return OperatorBinding(self, subexpr)
示例#24
0
    def __call__(self, eval_dependency, lift_plan):
        discr = self.discr
        fplan = self.plan
        given = fplan.given
        elgroup, = discr.element_groups

        all_fluxes_on_faces = [
            gpuarray.empty(given.matmul_preimage_shape(lift_plan),
                           dtype=given.float_type,
                           allocator=discr.pool.allocate)
            for i in range(len(self.fluxes))
        ]

        fdata = self.flux_face_data_block(elgroup)
        ilist_data = self.index_list_data()

        block, gather, texref_map = self.get_kernel(fdata,
                                                    ilist_data,
                                                    for_benchmark=False)

        for dep_expr in self.all_deps:
            dep_field = eval_dependency(dep_expr)

            from hedge.tools import is_zero
            if is_zero(dep_field):
                if dep_expr in self.dep_to_tag:
                    dep_field = discr.boundary_zeros(self.dep_to_tag[dep_expr])
                else:
                    dep_field = discr.volume_zeros()

            assert dep_field.dtype == given.float_type, "Wrong types: %s: %s, %s: %s" % (
                dep_expr, dep_field.dtype, given, given.float_type)
            dep_field.bind_to_texref_ext(texref_map[dep_expr],
                                         allow_double_hack=True)

        if set(["cuda_flux", "cuda_debugbuf"]) <= discr.debug:
            debugbuf = gpuarray.zeros((10000, ), dtype=given.float_type)
        else:
            from hedge.backends.cuda.tools import FakeGPUArray
            debugbuf = FakeGPUArray()

        if discr.instrumented:
            discr.flux_gather_timer.add_timer_callable(
                gather.prepared_timed_call(
                    (len(discr.blocks), 1), block, debugbuf.gpudata,
                    fdata.device_memory,
                    *tuple(fof.gpudata for fof in all_fluxes_on_faces)))

            discr.gmem_bytes_gather.add(
                len(discr.blocks) * fdata.block_bytes + given.float_size() * (
                    # fetch
                    len(self.fluxes) * 2 * fdata.fp_count * fplan.dofs_per_face

                    # store
                    + len(discr.blocks) * len(self.fluxes) *
                    fplan.microblocks_per_block() *
                    fplan.aligned_face_dofs_per_microblock()))
        else:
            gather.prepared_call(
                (len(discr.blocks), 1), block, debugbuf.gpudata,
                fdata.device_memory,
                *tuple(fof.gpudata for fof in all_fluxes_on_faces))

        if set(["cuda_flux", "cuda_debugbuf"]) <= discr.debug:
            from hedge.tools import get_rank, wait_for_keypress
            if get_rank(discr) == 0:
                copied_debugbuf = debugbuf.get()
                print "DEBUG", len(discr.blocks)
                numpy.set_printoptions(linewidth=130)
                #print numpy.reshape(copied_debugbuf, (32, 16))
                print copied_debugbuf[:50]

                #for i in range(len(discr.blocks)*6):
                #print i, copied_debugbuf[i*16:(i+1)*16]
                #print i, [x-10000 for x in sorted(copied_debugbuf[i*16:(i+1)*16]) if x != 0]

                wait_for_keypress(discr)

        if "cuda_flux" in discr.debug:
            from hedge.tools import get_rank, wait_for_keypress
            if get_rank(discr) == 0:
                numpy.set_printoptions(linewidth=130,
                                       precision=2,
                                       threshold=10**6)
                if True:

                    cols = []
                    for k in range(len(all_fluxes_on_faces)):
                        my_fof = all_fluxes_on_faces[k].get()

                        def sstruc(a):
                            result = ""
                            for i in a:
                                if i == 0:
                                    result += "0"
                                elif abs(i) < 1e-10:
                                    result += "-"
                                elif numpy.isnan(i):
                                    result += "N"
                                elif i == 17:
                                    result += "*"
                                else:
                                    result += "#"

                            return result

                        useful_sz = given.block_count \
                                * given.microblocks_per_block \
                                * lift_plan.aligned_preimage_dofs_per_microblock

                        my_col = []
                        i = 0
                        while i < useful_sz:
                            my_col.append(sstruc(my_fof[i:i + 16]))
                            i += 16

                        cols.append(my_col)

                    from pytools import Table
                    tbl = Table()
                    tbl.add_row(["num"] + range(len(cols)))
                    i = 0
                    for row in zip(*cols):
                        tbl.add_row((i, ) + row)
                        i += 1
                    print tbl
                else:
                    for i in range(len(all_fluxes_on_faces)):
                        print i
                        print all_fluxes_on_faces[i].get()

                wait_for_keypress(discr)
                #print "B", [la.norm(fof.get()) for fof in all_fluxes_on_faces]

        return all_fluxes_on_faces
示例#25
0
    def op_template(self, sensor_scaling=None, viscosity_only=False):
        u = self.cse_u
        rho = self.cse_rho
        rho_u = self.rho_u
        p = self.p
        e = self.e

        # {{{ artificial diffusion
        def make_artificial_diffusion():
            if self.artificial_viscosity_mode not in ["diffusion"]:
                return 0

            dq = self.grad_of_state()

            return make_obj_array([
                self.div(
                    to_vol_quad(self.sensor())*to_vol_quad(dq[i]),
                    to_int_face_quad(self.sensor())*to_int_face_quad(dq[i])) 
                for i in range(dq.shape[0])])
        # }}}

        # {{{ state setup

        volq_flux = self.flux(self.volq_state())
        faceq_flux = self.flux(self.faceq_state())

        from hedge.optemplate.primitives import CFunction
        sqrt = CFunction("sqrt")

        speed = self.characteristic_velocity_optemplate(self.state())

        has_viscosity = not is_zero(self.get_mu(self.state(), to_quad_op=None))

        # }}}

        # {{{ operator assembly -----------------------------------------------
        from hedge.flux.tools import make_lax_friedrichs_flux
        from hedge.optemplate.operators import InverseMassOperator

        from hedge.optemplate.tools import make_stiffness_t

        primitive_bcs_as_quad_conservative = dict(
                (tag, self.primitive_to_conservative(to_bdry_quad(bc)))
                for tag, bc in 
                self.get_primitive_boundary_conditions().iteritems())

        def get_bc_tuple(tag):
            state = self.state()
            bc = make_obj_array([
                self.get_boundary_condition_for(tag, s_i) for s_i in state])
            return tag, bc, self.flux(bc)

        first_order_part = InverseMassOperator()(
                numpy.dot(make_stiffness_t(self.dimensions), volq_flux)
                - make_lax_friedrichs_flux(
                    wave_speed=cse(to_int_face_quad(speed), "emax_c"),

                    state=self.faceq_state(), fluxes=faceq_flux,
                    bdry_tags_states_and_fluxes=[
                        get_bc_tuple(tag) for tag in self.get_boundary_tags()],
                    strong=False))

        if viscosity_only:
            first_order_part = 0*first_order_part

        result = join_fields(
                first_order_part 
                + self.make_second_order_part()
                + make_artificial_diffusion()
                + self.make_extra_terms(),
                 speed)

        if self.source is not None:
            result = result + join_fields(
                    make_sym_vector("source_vect", len(self.state())),
                    # extra field for speed
                    0)

        return result
示例#26
0
 def nb_bdry_permute(fld):
     if is_zero(fld):
         return 0
     else:
         return fld[from_nb_indices]
示例#27
0
    def op_template(self, sensor_scaling=None, viscosity_only=False):
        u = self.cse_u
        rho = self.cse_rho
        rho_u = self.rho_u
        p = self.p
        e = self.e

        # {{{ artificial diffusion
        def make_artificial_diffusion():
            if self.artificial_viscosity_mode not in ["diffusion"]:
                return 0

            dq = self.grad_of_state()

            return make_obj_array([
                self.div(
                    to_vol_quad(self.sensor()) * to_vol_quad(dq[i]),
                    to_int_face_quad(self.sensor()) * to_int_face_quad(dq[i]))
                for i in range(dq.shape[0])
            ])

        # }}}

        # {{{ state setup

        volq_flux = self.flux(self.volq_state())
        faceq_flux = self.flux(self.faceq_state())

        from hedge.optemplate.primitives import CFunction
        sqrt = CFunction("sqrt")

        speed = self.characteristic_velocity_optemplate(self.state())

        has_viscosity = not is_zero(self.get_mu(self.state(), to_quad_op=None))

        # }}}

        # {{{ operator assembly -----------------------------------------------
        from hedge.flux.tools import make_lax_friedrichs_flux
        from hedge.optemplate.operators import InverseMassOperator

        from hedge.optemplate.tools import make_stiffness_t

        primitive_bcs_as_quad_conservative = dict(
            (tag, self.primitive_to_conservative(to_bdry_quad(bc))) for tag, bc
            in self.get_primitive_boundary_conditions().iteritems())

        def get_bc_tuple(tag):
            state = self.state()
            bc = make_obj_array(
                [self.get_boundary_condition_for(tag, s_i) for s_i in state])
            return tag, bc, self.flux(bc)

        first_order_part = InverseMassOperator()(
            numpy.dot(make_stiffness_t(self.dimensions), volq_flux) -
            make_lax_friedrichs_flux(
                wave_speed=cse(to_int_face_quad(speed), "emax_c"),
                state=self.faceq_state(),
                fluxes=faceq_flux,
                bdry_tags_states_and_fluxes=[
                    get_bc_tuple(tag) for tag in self.get_boundary_tags()
                ],
                strong=False))

        if viscosity_only:
            first_order_part = 0 * first_order_part

        result = join_fields(
            first_order_part + self.make_second_order_part() +
            make_artificial_diffusion() + self.make_extra_terms(), speed)

        if self.source is not None:
            result = result + join_fields(
                make_sym_vector("source_vect", len(self.state())),
                # extra field for speed
                0)

        return result
示例#28
0
    def __call__(self, eval_dependency, lift_plan):
        discr = self.discr
        fplan = self.plan
        given = fplan.given
        elgroup, = discr.element_groups

        all_fluxes_on_faces = [gpuarray.empty(
                given.matmul_preimage_shape(lift_plan),
                dtype=given.float_type,
                allocator=discr.pool.allocate)
                for i in range(len(self.fluxes))]

        fdata = self.flux_face_data_block(elgroup)
        ilist_data = self.index_list_data()

        block, gather, texref_map = self.get_kernel(fdata, ilist_data,
                for_benchmark=False)

        for dep_expr in self.all_deps:
            dep_field = eval_dependency(dep_expr)

            from hedge.tools import is_zero
            if is_zero(dep_field):
                if dep_expr in self.dep_to_tag:
                    dep_field = discr.boundary_zeros(self.dep_to_tag[dep_expr])
                else:
                    dep_field = discr.volume_zeros()

            assert dep_field.dtype == given.float_type
            dep_field.bind_to_texref_ext(texref_map[dep_expr],
                    allow_double_hack=True)

        if set(["cuda_flux", "cuda_debugbuf"]) <= discr.debug:
            debugbuf = gpuarray.zeros((10000,), dtype=given.float_type)
        else:
            from hedge.backends.cuda.tools import FakeGPUArray
            debugbuf = FakeGPUArray()

        if discr.instrumented:
            discr.flux_gather_timer.add_timer_callable(gather.prepared_timed_call(
                    (len(discr.blocks), 1), block,
                    debugbuf.gpudata,
                    fdata.device_memory,
                    *tuple(fof.gpudata for fof in all_fluxes_on_faces)
                    ))

            discr.gmem_bytes_gather.add(
                    len(discr.blocks) * fdata.block_bytes
                    +
                    given.float_size()
                    * (
                        # fetch
                        len(self.fluxes)
                        * 2*fdata.fp_count
                        * fplan.dofs_per_face

                        # store
                        + len(discr.blocks)
                        * len(self.fluxes)
                        * fplan.microblocks_per_block()
                        * fplan.aligned_face_dofs_per_microblock()
                        ))
        else:
            gather.prepared_call(
                    (len(discr.blocks), 1), block,
                    debugbuf.gpudata,
                    fdata.device_memory,
                    *tuple(fof.gpudata for fof in all_fluxes_on_faces)
                    )

        if set(["cuda_flux", "cuda_debugbuf"]) <= discr.debug:
            from hedge.tools import get_rank, wait_for_keypress
            if get_rank(discr) == 0:
                copied_debugbuf = debugbuf.get()
                print "DEBUG", len(discr.blocks)
                numpy.set_printoptions(linewidth=130)
                #print numpy.reshape(copied_debugbuf, (32, 16))
                print copied_debugbuf[:50]

                #for i in range(len(discr.blocks)*6):
                    #print i, copied_debugbuf[i*16:(i+1)*16]
                    #print i, [x-10000 for x in sorted(copied_debugbuf[i*16:(i+1)*16]) if x != 0]

                wait_for_keypress(discr)

        if "cuda_flux" in discr.debug:
            from hedge.tools import get_rank, wait_for_keypress
            if get_rank(discr) == 0:
                numpy.set_printoptions(linewidth=130, precision=2, threshold=10**6)
                if True:

                    cols = []
                    for k in range(len(all_fluxes_on_faces)):
                        my_fof = all_fluxes_on_faces[k].get()
                        def sstruc(a):
                            result = ""
                            for i in a:
                                if i == 0:
                                    result += "0"
                                elif abs(i) < 1e-10:
                                    result += "-"
                                elif numpy.isnan(i):
                                    result += "N"
                                elif i == 17:
                                    result += "*"
                                else:
                                    result += "#"

                            return result

                        useful_sz = given.block_count \
                                * given.microblocks_per_block \
                                * lift_plan.aligned_preimage_dofs_per_microblock

                        my_col = []
                        i = 0
                        while i < useful_sz:
                            my_col.append(sstruc(my_fof[i:i+16]))
                            i += 16

                        cols.append(my_col)

                    from pytools import Table
                    tbl = Table()
                    tbl.add_row(["num"]+range(len(cols)))
                    i = 0
                    for row in zip(*cols):
                        tbl.add_row((i,)+row)
                        i += 1
                    print tbl
                else:
                    for i in range(len(all_fluxes_on_faces)):
                        print i
                        print all_fluxes_on_faces[i].get()

                wait_for_keypress(discr)
                #print "B", [la.norm(fof.get()) for fof in all_fluxes_on_faces]

        return all_fluxes_on_faces
示例#29
0
    def map_operator_binding(self, expr):
        from hedge.optemplate.operators import FluxOperatorBase
        from hedge.optemplate.primitives import BoundaryPair
        from hedge.flux import FluxSubstitutionMapper, FieldComponent

        if not (isinstance(expr.op, FluxOperatorBase)
                and isinstance(expr.field, BoundaryPair)):
            return IdentityMapper.map_operator_binding(self, expr)

        bpair = expr.field
        vol_field = bpair.field
        bdry_field = bpair.bfield
        flux = expr.op.flux

        bdry_dependencies = DependencyMapper(
                    include_calls="descend_args",
                    include_operator_bindings=True)(bdry_field)

        vol_dependencies = DependencyMapper(
                include_operator_bindings=True)(vol_field)

        vol_bdry_intersection = bdry_dependencies & vol_dependencies
        if vol_bdry_intersection:
            raise RuntimeError("Variables are being used as both "
                    "boundary and volume quantities: %s"
                    % ", ".join(str(v) for v in vol_bdry_intersection))

        # Step 1: Find maximal flux-evaluable subexpression of boundary field
        # in given BoundaryPair.

        class MaxBoundaryFluxEvaluableExpressionFinder(
                IdentityMapper, OperatorReducerMixin):

            def __init__(self, vol_expr_list, expensive_bdry_op_detector):
                self.vol_expr_list = vol_expr_list
                self.vol_expr_to_idx = dict((vol_expr, idx)
                        for idx, vol_expr in enumerate(vol_expr_list))

                self.bdry_expr_list = []
                self.bdry_expr_to_idx = {}

                self.expensive_bdry_op_detector = expensive_bdry_op_detector

            # {{{ expression registration
            def register_boundary_expr(self, expr):
                try:
                    return self.bdry_expr_to_idx[expr]
                except KeyError:
                    idx = len(self.bdry_expr_to_idx)
                    self.bdry_expr_to_idx[expr] = idx
                    self.bdry_expr_list.append(expr)
                    return idx

            def register_volume_expr(self, expr):
                try:
                    return self.vol_expr_to_idx[expr]
                except KeyError:
                    idx = len(self.vol_expr_to_idx)
                    self.vol_expr_to_idx[expr] = idx
                    self.vol_expr_list.append(expr)
                    return idx

            # }}}

            # {{{ map_xxx routines

            @memoize_method
            def map_common_subexpression(self, expr):
                # Here we need to decide whether this CSE should be turned into
                # a flux CSE or not. This is a good idea if the transformed
                # expression only contains "bare" volume or boundary
                # expressions.  However, as soon as an operator is applied
                # somewhere in the subexpression, the CSE should not be touched
                # in order to avoid redundant evaluation of that operator.
                #
                # Observe that at the time of this writing (Feb 2010), the only
                # operators that may occur in boundary expressions are
                # quadrature-related.

                has_expensive_operators = \
                        self.expensive_bdry_op_detector(expr.child)

                if has_expensive_operators:
                    return FieldComponent(
                            self.register_boundary_expr(expr),
                            is_interior=False)
                else:
                    return IdentityMapper.map_common_subexpression(self, expr)

            def map_normal(self, expr):
                raise RuntimeError("Your operator template contains a flux normal. "
                        "You may find this confusing, but you can't do that. "
                        "It turns out that you need to use "
                        "hedge.optemplate.make_normal() for normals in boundary "
                        "terms of operator templates.")

            def map_normal_component(self, expr):
                if expr.boundary_tag != bpair.tag:
                    raise RuntimeError("BoundaryNormalComponent and BoundaryPair "
                            "do not agree about boundary tag: %s vs %s"
                            % (expr.boundary_tag, bpair.tag))

                from hedge.flux import Normal
                return Normal(expr.axis)

            def map_variable(self, expr):
                return FieldComponent(
                        self.register_boundary_expr(expr),
                        is_interior=False)

            map_subscript = map_variable

            def map_operator_binding(self, expr):
                from hedge.optemplate import (BoundarizeOperator,
                        FluxExchangeOperator,
                        QuadratureGridUpsampler,
                        QuadratureBoundaryGridUpsampler)

                if isinstance(expr.op, BoundarizeOperator):
                    if expr.op.tag != bpair.tag:
                        raise RuntimeError("BoundarizeOperator and BoundaryPair "
                                "do not agree about boundary tag: %s vs %s"
                                % (expr.op.tag, bpair.tag))

                    return FieldComponent(
                            self.register_volume_expr(expr.field),
                            is_interior=True)

                elif isinstance(expr.op, FluxExchangeOperator):
                    from hedge.mesh import TAG_RANK_BOUNDARY
                    op_tag = TAG_RANK_BOUNDARY(expr.op.rank)
                    if bpair.tag != op_tag:
                        raise RuntimeError("BoundarizeOperator and FluxExchangeOperator "
                                "do not agree about boundary tag: %s vs %s"
                                % (op_tag, bpair.tag))
                    return FieldComponent(
                            self.register_boundary_expr(expr),
                            is_interior=False)

                elif isinstance(expr.op, QuadratureBoundaryGridUpsampler):
                    if bpair.tag != expr.op.boundary_tag:
                        raise RuntimeError("BoundarizeOperator "
                                "and QuadratureBoundaryGridUpsampler "
                                "do not agree about boundary tag: %s vs %s"
                                % (expr.op.boundary_tag, bpair.tag))
                    return FieldComponent(
                            self.register_boundary_expr(expr),
                            is_interior=False)

                elif isinstance(expr.op, QuadratureGridUpsampler):
                    # We're invoked before operator specialization, so we may
                    # see these instead of QuadratureBoundaryGridUpsampler.
                    return FieldComponent(
                            self.register_boundary_expr(expr),
                            is_interior=False)

                else:
                    raise RuntimeError("Found '%s' in a boundary term. "
                            "To the best of my knowledge, no hedge operator applies "
                            "directly to boundary data, so this is likely in error."
                            % expr.op)

            def map_flux_exchange(self, expr):
                return FieldComponent(
                        self.register_boundary_expr(expr),
                        is_interior=False)
            # }}}

        from hedge.tools import is_obj_array
        if not is_obj_array(vol_field):
            vol_field = [vol_field]

        mbfeef = MaxBoundaryFluxEvaluableExpressionFinder(list(vol_field),
                self.expensive_bdry_op_detector)
        #from hedge.optemplate.tools import pretty_print_optemplate
        #print pretty_print_optemplate(bdry_field)
        #raw_input("YO")
        new_bdry_field = mbfeef(bdry_field)

        # Step II: Substitute the new_bdry_field into the flux.
        def sub_bdry_into_flux(expr):
            if isinstance(expr, FieldComponent) and not expr.is_interior:
                if expr.index == 0 and not is_obj_array(bdry_field):
                    return new_bdry_field
                else:
                    return new_bdry_field[expr.index]
            else:
                return None

        new_flux = FluxSubstitutionMapper(sub_bdry_into_flux)(flux)

        from hedge.tools import is_zero, make_obj_array
        if is_zero(new_flux):
            return 0
        else:
            return type(expr.op)(new_flux, *expr.op.__getinitargs__()[1:])(
                    BoundaryPair(
                        make_obj_array([self.rec(e) for e in mbfeef.vol_expr_list]),
                        make_obj_array([self.rec(e) for e in mbfeef.bdry_expr_list]),
                        bpair.tag))
示例#30
0
    def aggregate_assignments(self, instructions, result):
        from pymbolic.primitives import Variable

        # {{{ aggregation helpers

        def get_complete_origins_set(insn, skip_levels=0):
            if skip_levels < 0:
                skip_levels = 0

            result = set()
            for dep in insn.get_dependencies():
                if isinstance(dep, Variable):
                    dep_origin = origins_map.get(dep.name, None)
                    if dep_origin is not None:
                        if skip_levels <= 0:
                            result.add(dep_origin)
                        result |= get_complete_origins_set(
                            dep_origin, skip_levels - 1)

            return result

        var_assignees_cache = {}

        def get_var_assignees(insn):
            try:
                return var_assignees_cache[insn]
            except KeyError:
                result = set(
                    Variable(assignee) for assignee in insn.get_assignees())
                var_assignees_cache[insn] = result
                return result

        def aggregate_two_assignments(ass_1, ass_2):
            names = ass_1.names + ass_2.names

            from pymbolic.primitives import Variable
            deps = (ass_1.get_dependencies() | ass_2.get_dependencies()) \
                    - set(Variable(name) for name in names)

            return Assign(names=names,
                          exprs=ass_1.exprs + ass_2.exprs,
                          _dependencies=deps,
                          dep_mapper_factory=self.dep_mapper_factory,
                          priority=max(ass_1.priority, ass_2.priority))

        # }}}

        # {{{ main aggregation pass

        origins_map = dict((assignee, insn) for insn in instructions
                           for assignee in insn.get_assignees())

        from pytools import partition
        unprocessed_assigns, other_insns = partition(
            lambda insn: isinstance(insn, Assign) and not insn.
            is_scalar_valued, instructions)

        # filter out zero-flop-count assigns--no need to bother with those
        processed_assigns, unprocessed_assigns = partition(
            lambda ass: ass.flop_count() == 0, unprocessed_assigns)

        # filter out zero assignments
        from pytools import any
        from hedge.tools import is_zero

        i = 0

        while i < len(unprocessed_assigns):
            my_assign = unprocessed_assigns[i]
            if any(is_zero(expr) for expr in my_assign.exprs):
                processed_assigns.append(unprocessed_assigns.pop())
            else:
                i += 1

        # greedy aggregation
        while unprocessed_assigns:
            my_assign = unprocessed_assigns.pop()

            my_deps = my_assign.get_dependencies()
            my_assignees = get_var_assignees(my_assign)

            agg_candidates = []
            for i, other_assign in enumerate(unprocessed_assigns):
                other_deps = other_assign.get_dependencies()
                other_assignees = get_var_assignees(other_assign)

                if ((my_deps & other_deps or my_deps & other_assignees
                     or other_deps & my_assignees)
                        and my_assign.priority == other_assign.priority):
                    agg_candidates.append((i, other_assign))

            did_work = False

            if agg_candidates:
                my_indirect_origins = get_complete_origins_set(my_assign,
                                                               skip_levels=1)

                for other_assign_index, other_assign in agg_candidates:
                    if self.max_vectors_in_batch_expr is not None:
                        new_assignee_count = len(
                            set(my_assign.get_assignees())
                            | set(other_assign.get_assignees()))
                        new_dep_count = len(
                            my_assign.get_dependencies(each_vector=True)
                            | other_assign.get_dependencies(each_vector=True))

                        if (new_assignee_count + new_dep_count >
                                self.max_vectors_in_batch_expr):
                            continue

                    other_indirect_origins = get_complete_origins_set(
                        other_assign, skip_levels=1)

                    if (my_assign not in other_indirect_origins
                            and other_assign not in my_indirect_origins):
                        did_work = True

                        # aggregate the two assignments
                        new_assignment = aggregate_two_assignments(
                            my_assign, other_assign)
                        del unprocessed_assigns[other_assign_index]
                        unprocessed_assigns.append(new_assignment)
                        for assignee in new_assignment.get_assignees():
                            origins_map[assignee] = new_assignment

                        break

            if not did_work:
                processed_assigns.append(my_assign)

        externally_used_names = set(expr
                                    for insn in processed_assigns + other_insns
                                    for expr in insn.get_dependencies())

        from hedge.tools import is_obj_array
        if is_obj_array(result):
            externally_used_names |= set(expr for expr in result)
        else:
            externally_used_names |= set([result])

        def schedule_and_finalize_assignment(ass):
            dep_mapper = self.dep_mapper_factory()

            names_exprs = zip(ass.names, ass.exprs)

            my_assignees = set(name for name, expr in names_exprs)
            names_exprs_deps = [
                (name, expr,
                 set(dep.name
                     for dep in dep_mapper(expr) if isinstance(dep, Variable))
                 & my_assignees) for name, expr in names_exprs
            ]

            ordered_names_exprs = []
            available_names = set()

            while names_exprs_deps:
                schedulable = []

                i = 0
                while i < len(names_exprs_deps):
                    name, expr, deps = names_exprs_deps[i]

                    unsatisfied_deps = deps - available_names

                    if not unsatisfied_deps:
                        schedulable.append((str(expr), name, expr))
                        del names_exprs_deps[i]
                    else:
                        i += 1

                # make sure these come out in a constant order
                schedulable.sort()

                if schedulable:
                    for key, name, expr in schedulable:
                        ordered_names_exprs.append((name, expr))
                        available_names.add(name)
                else:
                    raise RuntimeError("aggregation resulted in an "
                                       "impossible assignment")

            return self.finalize_multi_assign(
                names=[name for name, expr in ordered_names_exprs],
                exprs=[expr for name, expr in ordered_names_exprs],
                do_not_return=[
                    Variable(name) not in externally_used_names
                    for name, expr in ordered_names_exprs
                ],
                priority=ass.priority)

        return [
            schedule_and_finalize_assignment(ass) for ass in processed_assigns
        ] + other_insns