示例#1
0
    def _initialize_return_values(self, kwargs):
        # Obtain symbol values from arguments and constants
        syms = dict()
        syms.update({k: v for k, v in kwargs.items() if k not in self.sdfg.arrays})
        syms.update(self.sdfg.constants)

        if self._initialized:
            if self._return_syms == syms:
                if not self._create_new_arrays:
                    return
                else:
                    self._create_new_arrays = False
                    # Use stored sizes to recreate arrays (fast path)
                    if self._return_arrays is None:
                        return
                    elif isinstance(self._return_arrays, tuple):
                        self._return_arrays = tuple(kwargs[desc[0]] if desc[0] in kwargs else self._create_array(*desc)
                                                    for desc in self._retarray_shapes)
                        return
                    else:  # Single array return value
                        desc = self._retarray_shapes[0]
                        arr = (kwargs[desc[0]] if desc[0] in kwargs else self._create_array(*desc))
                        self._return_arrays = arr
                        return

        self._return_syms = syms
        self._create_new_arrays = False

        # Initialize return values with numpy arrays
        self._retarray_shapes = []
        self._return_arrays = []
        for arrname, arr in sorted(self.sdfg.arrays.items()):
            if arrname.startswith('__return') and not arr.transient:
                if arrname in kwargs:
                    self._return_arrays.append(kwargs[arrname])
                    self._retarray_shapes.append((arrname, ))
                    continue

                if isinstance(arr, dt.Stream):
                    raise NotImplementedError('Return streams are unsupported')

                shape = tuple(symbolic.evaluate(s, syms) for s in arr.shape)
                dtype = arr.dtype.as_numpy_dtype()
                total_size = int(symbolic.evaluate(arr.total_size, syms))
                strides = tuple(symbolic.evaluate(s, syms) * arr.dtype.bytes for s in arr.strides)
                shape_desc = (arrname, dtype, arr.storage, shape, strides, total_size)
                self._retarray_shapes.append(shape_desc)

                # Create an array with the properties of the SDFG array
                arr = self._create_array(*shape_desc)
                self._return_arrays.append(arr)

        # Set up return_arrays field
        if len(self._return_arrays) == 0:
            self._return_arrays = None
        elif len(self._return_arrays) == 1:
            self._return_arrays = self._return_arrays[0]
        else:
            self._return_arrays = tuple(self._return_arrays)
示例#2
0
    def _initialize_return_values(self, kwargs):
        # Obtain symbol values from arguments and constants
        syms = dict()
        syms.update(
            {k: v
             for k, v in kwargs.items() if k not in self.sdfg.arrays})
        syms.update(self.sdfg.constants)

        if self._initialized:
            if self._return_syms == syms:
                return self._return_kwarrays

        self._return_syms = syms

        # Initialize return values with numpy arrays
        self._return_arrays = []
        self._return_kwarrays = {}
        for arrname, arr in sorted(self.sdfg.arrays.items()):
            if arrname.startswith('__return') and not arr.transient:
                if arrname in kwargs:
                    self._return_arrays.append(kwargs[arrname])
                    self._return_kwarrays[arrname] = kwargs[arrname]
                    continue

                if isinstance(arr, dt.Stream):
                    raise NotImplementedError('Return streams are unsupported')
                if arr.storage in [
                        dtypes.StorageType.GPU_Global,
                        dtypes.StorageType.FPGA_Global
                ]:
                    raise NotImplementedError('Non-host return values are '
                                              'unsupported')

                # Create an array with the properties of the SDFG array
                self._return_arrays.append(
                    np.ndarray([symbolic.evaluate(s, syms) for s in arr.shape],
                               arr.dtype.as_numpy_dtype(),
                               buffer=np.zeros(
                                   [symbolic.evaluate(arr.total_size, syms)],
                                   arr.dtype.as_numpy_dtype()),
                               strides=[
                                   symbolic.evaluate(s, syms) * arr.dtype.bytes
                                   for s in arr.strides
                               ]))
                self._return_kwarrays[arrname] = self._return_arrays[-1]

        # Set up return_arrays field
        if len(self._return_arrays) == 0:
            self._return_arrays = None
        elif len(self._return_arrays) == 1:
            self._return_arrays = self._return_arrays[0]
        else:
            self._return_arrays = tuple(self._return_arrays)

        return self._return_kwarrays
示例#3
0
 def generate_rtl_inputs_outputs(self, sdfg, tasklet):
     # construct input / output module header
     inputs = list()
     for inp in tasklet.in_connectors:
         # add vector index
         idx_str = ""
         # catch symbolic (compile time variables)
         check_issymbolic([
             tasklet.in_connectors[inp].veclen,
             tasklet.in_connectors[inp].bytes
         ], sdfg)
         # extract parameters
         vec_len = int(
             symbolic.evaluate(tasklet.in_connectors[inp].veclen,
                               sdfg.constants))
         total_size = int(
             symbolic.evaluate(tasklet.in_connectors[inp].bytes,
                               sdfg.constants))
         # generate vector representation
         if vec_len > 1:
             idx_str = "[{}:0]".format(vec_len - 1)
         # add element index
         idx_str += "[{}:0]".format(int(total_size / vec_len) * 8 - 1)
         # generate padded string and add to list
         inputs.append(", input{padding}{idx_str} {name}".format(
             padding=" " * (17 - len(idx_str)), idx_str=idx_str, name=inp))
     outputs = list()
     for inp in tasklet.out_connectors:
         # add vector index
         idx_str = ""
         # catch symbolic (compile time variables)
         check_issymbolic([
             tasklet.out_connectors[inp].veclen,
             tasklet.out_connectors[inp].bytes
         ], sdfg)
         # extract parameters
         vec_len = int(
             symbolic.evaluate(tasklet.out_connectors[inp].veclen,
                               sdfg.constants))
         total_size = int(
             symbolic.evaluate(tasklet.out_connectors[inp].bytes,
                               sdfg.constants))
         # generate vector representation
         if vec_len > 1:
             idx_str = "[{}:0]".format(vec_len - 1)
         # add element index
         idx_str += "[{}:0]".format(int(total_size / vec_len) * 8 - 1)
         # generate padded string and add to list
         outputs.append(", output reg{padding}{idx_str} {name}".format(
             padding=" " * (12 - len(idx_str)), idx_str=idx_str, name=inp))
     return inputs, outputs
示例#4
0
 def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
     map_entry = graph.nodes()[candidate[MapUnroll._map_entry]]
     # Must be top-level map
     if graph.scope_dict()[map_entry] is not None:
         return False
     # All map ranges must be constant
     try:
         for begin, end, step in map_entry.map.range:
             symbolic.evaluate(begin, sdfg.constants)
             symbolic.evaluate(end, sdfg.constants)
             symbolic.evaluate(step, sdfg.constants)
     except TypeError:
         return False
     return True
示例#5
0
文件: rtl.py 项目: am-ivanov/dace
 def copy_memory(self, sdfg: sdfg.SDFG, dfg: state.StateSubgraphView,
                 state_id: int, src_node: nodes.Node, dst_node: nodes.Node,
                 edge: graph.MultiConnectorEdge,
                 function_stream: prettycode.CodeIOStream,
                 callsite_stream: prettycode.CodeIOStream):
     """
         Generate input/output memory copies from the array references to local variables (i.e. for the tasklet code).
     """
     if isinstance(edge.src, nodes.AccessNode) and isinstance(
             edge.dst, nodes.Tasklet):  # handle AccessNode->Tasklet
         if isinstance(dst_node.in_connectors[edge.dst_conn],
                       dtypes.pointer):  # pointer accessor
             line: str = "{} {} = &{}[0];".format(
                 dst_node.in_connectors[edge.dst_conn].ctype, edge.dst_conn,
                 edge.src.data)
         elif isinstance(dst_node.in_connectors[edge.dst_conn],
                         dtypes.vector):  # vector accessor
             line: str = "{} {} = *({} *)(&{}[0]);".format(
                 dst_node.in_connectors[edge.dst_conn].ctype, edge.dst_conn,
                 dst_node.in_connectors[edge.dst_conn].ctype, edge.src.data)
         else:  # scalar accessor
             arr = sdfg.arrays[edge.data.data]
             if isinstance(arr, data.Array):
                 line: str = "{}* {} = &{}[0];".format(
                     dst_node.in_connectors[edge.dst_conn].ctype,
                     edge.dst_conn, edge.src.data)
             elif isinstance(arr, data.Scalar):
                 line: str = "{} {} = {};".format(
                     dst_node.in_connectors[edge.dst_conn].ctype,
                     edge.dst_conn, edge.src.data)
     elif isinstance(edge.src, nodes.MapEntry) and isinstance(
             edge.dst, nodes.Tasklet):
         rtl_name = self.unique_name(edge.dst, sdfg.nodes()[state_id], sdfg)
         self.n_unrolled[rtl_name] = symbolic.evaluate(
             edge.src.map.range[0][1] + 1, sdfg.constants)
         line: str = f'{dst_node.in_connectors[edge.dst_conn]} {edge.dst_conn} = &{edge.data.data}[{edge.src.map.params[0]}*{edge.data.volume}];'
     else:
         raise RuntimeError(
             "Not handling copy_memory case of type {} -> {}.".format(
                 type(edge.src), type(edge.dst)))
     # write accessor to file
     callsite_stream.write(line)
示例#6
0
def move_small_arrays_to_stack(sdfg: SDFG) -> None:
    """
    Set all Default storage types that are constant sized and less than
    the auto-tile size to the stack (as StorageType.Register).
    :param sdfg: The SDFG to operate on.
    :note: Operates in-place on the SDFG.
    """
    converted = 0
    tile_size = config.Config.get('optimizer', 'autotile_size')
    for sd, aname, array in sdfg.arrays_recursive():
        if isinstance(array, dt.Stream):
            continue
        if (array.transient and array.storage == dtypes.StorageType.Default
                and array.lifetime == dtypes.AllocationLifetime.Scope):
            if not symbolic.issymbolic(array.total_size, sd.constants):
                eval_size = symbolic.evaluate(array.total_size, sd.constants)
                if eval_size <= tile_size:
                    array.storage = dtypes.StorageType.Register
                    converted += 1

    if config.Config.get_bool('debugprint') and converted > 0:
        print(f'Statically allocating {converted} transient arrays')
示例#7
0
    def apply(self, sdfg):

        from dace.transformation.dataflow import TrivialMapElimination

        state = sdfg.nodes()[self.state_id]
        map_entry = state.nodes()[self.subgraph[MapUnroll._map_entry]]
        map_exit = state.exit_node(map_entry)

        # Collect all nodes in this weakly connected component
        subgraph = sdutil.weakly_connected_component(state, map_entry)

        # Save nested SDFGs to JSON, then deserialize them for every copy we
        # need to make
        nested_sdfgs = {}
        for node in subgraph:
            if isinstance(node, nodes.NestedSDFG):
                nested_sdfgs[node.sdfg] = node.sdfg.to_json()

        # Check for local memories that need to be replicated
        local_memories = [
            name for name in sdutil.local_transients(
                sdfg, subgraph, entry_node=map_entry, include_nested=True)
            if not isinstance(sdfg.arrays[name], dt.Stream)
            and not isinstance(sdfg.arrays[name], dt.View)
        ]

        params = map_entry.map.params
        ranges = map_entry.map.range.ranges
        constant_ranges = []
        for r in ranges:
            begin = symbolic.evaluate(r[0], sdfg.constants)
            end = symbolic.evaluate(r[1], sdfg.constants)
            step = symbolic.evaluate(r[2], sdfg.constants)
            end += step  # Make non-inclusive
            constant_ranges.append(range(begin, end, step))
        index_tuples = itertools.product(*constant_ranges)
        for t in index_tuples:
            suffix = "_" + "_".join(map(str, t))
            node_to_unrolled = {}
            # Copy all nodes
            for node in subgraph:
                if isinstance(node, nodes.NestedSDFG):
                    # Avoid deep-copying the nested SDFG
                    nsdfg = node.sdfg
                    # Don't copy the nested SDFG, as we will do this separately
                    node.sdfg = None
                    unrolled_node = copy.deepcopy(node)
                    node.sdfg = nsdfg
                    # Deserialize into a new SDFG specific to this copy
                    nsdfg_json = nested_sdfgs[nsdfg]
                    name = nsdfg_json["attributes"]["name"]
                    nsdfg_json["attributes"]["name"] += suffix
                    unrolled_nsdfg = SDFG.from_json(nsdfg_json)
                    nsdfg_json["attributes"]["name"] = name  # Reinstate
                    # Set all the references
                    unrolled_nsdfg.parent = state
                    unrolled_nsdfg.parent_sdfg = sdfg
                    unrolled_nsdfg.update_sdfg_list([])
                    unrolled_node.sdfg = unrolled_nsdfg
                    unrolled_nsdfg.parent_nsdfg_node = unrolled_node
                else:
                    unrolled_node = copy.deepcopy(node)
                    if node == map_entry:
                        # Fix the map bounds to only this iteration
                        unrolled_node.map.range = [(i, i, 1) for i in t]
                    if (isinstance(node, nodes.AccessNode)
                            and node.data in local_memories):
                        # If this is a local memory only used in this subgraph,
                        # we need to replicate it for each new subgraph
                        unrolled_name = node.data + suffix
                        if unrolled_name not in sdfg.arrays:
                            unrolled_desc = copy.deepcopy(
                                sdfg.arrays[node.data])
                            sdfg.add_datadesc(unrolled_name, unrolled_desc)
                        unrolled_node.data = unrolled_name
                state.add_node(unrolled_node)
                node_to_unrolled[node] = unrolled_node  # Remember mapping
            # Copy all edges
            for src, src_conn, dst, dst_conn, memlet in subgraph.edges():
                src = node_to_unrolled[src]
                dst = node_to_unrolled[dst]
                memlet = copy.deepcopy(memlet)
                if memlet.data in local_memories:
                    memlet.data = memlet.data + suffix
                state.add_edge(src, src_conn, dst, dst_conn, memlet)
            # Eliminate the now trivial map
            TrivialMapElimination.apply_to(
                sdfg,
                verify=False,
                annotate=False,
                save=False,
                _map_entry=node_to_unrolled[map_entry])

        # Now we can delete the original subgraph. This implicitly also remove
        # memlets between nodes
        state.remove_nodes_from(subgraph)

        # If we added a bunch of new nested SDFGs, reset the internal list
        if len(nested_sdfgs) > 0:
            sdfg.reset_sdfg_list()

        # Remove local memories that were replicated
        for mem in local_memories:
            sdfg.remove_data(mem)
示例#8
0
    def apply(self, sdfg):
        graph = sdfg.nodes()[self.state_id]
        subgraph = self.subgraph_view(sdfg)
        map_entries = helpers.get_outermost_scope_maps(sdfg, graph, subgraph)

        result = StencilTiling.topology(sdfg, graph, map_entries)
        (children_dict, parent_dict, sink_maps) = result

        # next up, calculate inferred ranges for each map
        # for each map entry, this contains a tuple of dicts:
        # each of those maps from data_name of the array to
        # inferred outer ranges. An inferred outer range is created
        # by taking the union of ranges of inner subsets corresponding
        # to that data and substituting this subset by the min / max of the
        # parametrized map boundaries
        # finally, from these outer ranges we can easily calculate
        # strides and tile sizes required for every map
        inferred_ranges = defaultdict(dict)

        # create array of reverse topologically sorted map entries
        # to iterate over
        topo_reversed = []
        queue = set(sink_maps.copy())
        while len(queue) > 0:
            element = next(e for e in queue
                           if not children_dict[e] - set(topo_reversed))
            topo_reversed.append(element)
            queue.remove(element)
            for parent in parent_dict[element]:
                queue.add(parent)

        # main loop
        # first get coverage dicts for each map entry
        # for each map, contains a tuple of two dicts
        # each of those two maps from data name to outer range
        coverage = {}
        for map_entry in map_entries:
            coverage[map_entry] = StencilTiling.coverage_dicts(
                sdfg, graph, map_entry, outer_range=True)

        # we have a mapping from data name to outer range
        # however we want a mapping from map parameters to outer ranges
        # for this we need to find out how all array dimensions map to
        # outer ranges

        variable_mapping = defaultdict(list)
        for map_entry in topo_reversed:
            map = map_entry.map

            # first find out variable mapping
            for e in itertools.chain(
                    graph.out_edges(map_entry),
                    graph.in_edges(graph.exit_node(map_entry))):
                mapping = []
                for dim in e.data.subset:
                    syms = set()
                    for d in dim:
                        syms |= symbolic.symlist(d).keys()
                    if len(syms) > 1:
                        raise NotImplementedError(
                            "One incoming or outgoing stencil subset is indexed "
                            "by multiple map parameters. "
                            "This is not supported yet.")
                    try:
                        mapping.append(syms.pop())
                    except KeyError:
                        # just append None if there is no map symbol in it.
                        # we don't care for now.
                        mapping.append(None)

                if e.data in variable_mapping:
                    # assert that this is the same everywhere.
                    # else we might run into problems
                    assert variable_mapping[e.data.data] == mapping
                else:
                    variable_mapping[e.data.data] = mapping

            # now do mapping data name -> outer range
            # and from that infer mapping variable -> outer range
            local_ranges = {dn: None for dn in coverage[map_entry][1].keys()}
            for data_name, cov in coverage[map_entry][1].items():
                local_ranges[data_name] = subsets.union(
                    local_ranges[data_name], cov)
                # now look at proceeding maps
                # and union those subsets -> could be larger with stencil indent
                for child_map in children_dict[map_entry]:
                    if data_name in coverage[child_map][0]:
                        local_ranges[data_name] = subsets.union(
                            local_ranges[data_name],
                            coverage[child_map][0][data_name])

            # final assignent: combine local_ranges and variable_mapping
            # together into inferred_ranges
            inferred_ranges[map_entry] = {p: None for p in map.params}
            for data_name, ranges in local_ranges.items():
                for param, r in zip(variable_mapping[data_name], ranges):
                    # create new range from this subset and assign
                    rng = subsets.Range((r, ))
                    if param:
                        inferred_ranges[map_entry][param] = subsets.union(
                            inferred_ranges[map_entry][param], rng)

        # get parameters -- should all be the same
        params = next(iter(map_entries)).map.params.copy()
        # define reference range as inferred range of one of the sink maps
        self.reference_range = inferred_ranges[next(iter(sink_maps))]
        if self.debug:
            print("StencilTiling::Reference Range", self.reference_range)
        # next up, search for the ranges that don't change
        invariant_dims = []
        for idx, p in enumerate(params):
            different = False
            if self.reference_range[p] is None:
                invariant_dims.append(idx)
                warnings.warn(
                    f"StencilTiling::No Stencil pattern detected for parameter {p}"
                )
                continue
            for m in map_entries:
                if inferred_ranges[m][p] != self.reference_range[p]:
                    different = True
                    break
            if not different:
                invariant_dims.append(idx)
                warnings.warn(
                    f"StencilTiling::No Stencil pattern detected for parameter {p}"
                )

        # during stripmining, we will create new outer map entries
        # for easy access
        self._outer_entries = set()
        # with inferred_ranges constructed, we can begin to strip mine
        for map_entry in map_entries:
            # Retrieve map entry and exit nodes.
            map = map_entry.map

            stripmine_subgraph = {
                StripMining._map_entry: graph.nodes().index(map_entry)
            }

            sdfg_id = sdfg.sdfg_id
            last_map_entry = None
            original_schedule = map_entry.schedule
            self.tile_sizes = []
            self.tile_offset_lower = []
            self.tile_offset_upper = []

            # strip mining each dimension where necessary
            removed_maps = 0
            for dim_idx, param in enumerate(map_entry.map.params):
                # get current_node tile size
                if dim_idx >= len(self.strides):
                    tile_stride = symbolic.pystr_to_symbolic(self.strides[-1])
                else:
                    tile_stride = symbolic.pystr_to_symbolic(
                        self.strides[dim_idx])

                trivial = False

                if dim_idx in invariant_dims:
                    self.tile_sizes.append(tile_stride)
                    self.tile_offset_lower.append(0)
                    self.tile_offset_upper.append(0)
                else:
                    target_range_current = inferred_ranges[map_entry][param]
                    reference_range_current = self.reference_range[param]

                    min_diff = symbolic.SymExpr(reference_range_current.min_element()[0] \
                                    - target_range_current.min_element()[0])
                    max_diff = symbolic.SymExpr(target_range_current.max_element()[0] \
                                    - reference_range_current.max_element()[0])

                    try:
                        min_diff = symbolic.evaluate(min_diff, {})
                        max_diff = symbolic.evaluate(max_diff, {})
                    except TypeError:
                        raise RuntimeError("Symbolic evaluation of map "
                                           "ranges failed. Please check "
                                           "your parameters and match.")

                    self.tile_sizes.append(tile_stride + max_diff + min_diff)
                    self.tile_offset_lower.append(
                        symbolic.pystr_to_symbolic(str(min_diff)))
                    self.tile_offset_upper.append(
                        symbolic.pystr_to_symbolic(str(max_diff)))

                # get calculated parameters
                tile_size = self.tile_sizes[-1]

                dim_idx -= removed_maps
                # If map or tile sizes are trivial, skip strip-mining map dimension
                # special cases:
                # if tile size is trivial AND we have an invariant dimension, skip
                if tile_size == map.range.size()[dim_idx] and (
                        dim_idx + removed_maps) in invariant_dims:
                    continue

                # trivial map: we just continue
                if map.range.size()[dim_idx] in [0, 1]:
                    continue

                if tile_size == 1 and tile_stride == 1 and (
                        dim_idx + removed_maps) in invariant_dims:
                    trivial = True
                    removed_maps += 1

                # indent all map ranges accordingly and then perform
                # strip mining on these. Offset inner maps accordingly afterwards

                range_tuple = (map.range[dim_idx][0] +
                               self.tile_offset_lower[-1],
                               map.range[dim_idx][1] -
                               self.tile_offset_upper[-1],
                               map.range[dim_idx][2])
                map.range[dim_idx] = range_tuple
                stripmine = StripMining(sdfg_id, self.state_id,
                                        stripmine_subgraph, 0)

                stripmine.tiling_type = 'ceilrange'
                stripmine.dim_idx = dim_idx
                stripmine.new_dim_prefix = self.prefix if not trivial else ''
                # use tile_stride for both -- we will extend
                # the inner tiles later
                stripmine.tile_size = str(tile_stride)
                stripmine.tile_stride = str(tile_stride)
                outer_map = stripmine.apply(sdfg)
                outer_map.schedule = original_schedule

                # apply to the new map the schedule of the original one
                map_entry.schedule = self.schedule

                # if tile stride is 1, we can make a nice simplification by just
                # taking the overapproximated inner range as inner range
                # this eliminates the min/max in the range which
                # enables loop unrolling
                if tile_stride == 1:
                    map_entry.range[dim_idx] = tuple(
                        symbolic.SymExpr(el._approx_expr) if isinstance(
                            el, symbolic.SymExpr) else el
                        for el in map_entry.range[dim_idx])

                # in map_entry: enlarge tiles by upper and lower offset
                # doing it this way and not via stripmine strides ensures
                # that the max gets changed as well
                old_range = map_entry.range[dim_idx]
                map_entry.range[dim_idx] = ((old_range[0] -
                                             self.tile_offset_lower[-1]),
                                            (old_range[1] +
                                             self.tile_offset_upper[-1]),
                                            old_range[2])

                # We have to propagate here for correct outer volume and subset sizes
                _propagate_node(graph, map_entry)
                _propagate_node(graph, graph.exit_node(map_entry))

                # usual tiling pipeline
                if last_map_entry:
                    new_map_entry = graph.in_edges(map_entry)[0].src
                    mapcollapse_subgraph = {
                        MapCollapse._outer_map_entry:
                        graph.node_id(last_map_entry),
                        MapCollapse._inner_map_entry:
                        graph.node_id(new_map_entry)
                    }
                    mapcollapse = MapCollapse(sdfg_id, self.state_id,
                                              mapcollapse_subgraph, 0)
                    mapcollapse.apply(sdfg)
                last_map_entry = graph.in_edges(map_entry)[0].src
            # add last instance of map entries to _outer_entries
            if last_map_entry:
                self._outer_entries.add(last_map_entry)

            # Map Unroll Feature: only unroll if conditions are met:
            # Only unroll if at least one of the inner map ranges is strictly larger than 1
            # Only unroll if strides all are one
            if self.unroll_loops and all(s == 1 for s in self.strides) and any(
                    s not in [0, 1] for s in map_entry.range.size()):
                l = len(map_entry.params)
                if l > 1:
                    subgraph = {
                        MapExpansion.map_entry: graph.nodes().index(map_entry)
                    }
                    trafo_expansion = MapExpansion(sdfg.sdfg_id,
                                                   sdfg.nodes().index(graph),
                                                   subgraph, 0)
                    trafo_expansion.apply(sdfg)
                maps = [map_entry]
                for _ in range(l - 1):
                    map_entry = graph.out_edges(map_entry)[0].dst
                    maps.append(map_entry)

                for map in reversed(maps):
                    # MapToForLoop
                    subgraph = {
                        MapToForLoop._map_entry: graph.nodes().index(map)
                    }
                    trafo_for_loop = MapToForLoop(sdfg.sdfg_id,
                                                  sdfg.nodes().index(graph),
                                                  subgraph, 0)
                    trafo_for_loop.apply(sdfg)
                    nsdfg = trafo_for_loop.nsdfg

                    # LoopUnroll

                    guard = trafo_for_loop.guard
                    end = trafo_for_loop.after_state
                    begin = next(e.dst for e in nsdfg.out_edges(guard)
                                 if e.dst != end)

                    subgraph = {
                        DetectLoop._loop_guard: nsdfg.nodes().index(guard),
                        DetectLoop._loop_begin: nsdfg.nodes().index(begin),
                        DetectLoop._exit_state: nsdfg.nodes().index(end)
                    }
                    transformation = LoopUnroll(0, 0, subgraph, 0)
                    transformation.apply(nsdfg)
            elif self.unroll_loops:
                warnings.warn(
                    "Did not unroll loops. Either all ranges are equal to "
                    "one or range difference is symbolic.")

        self._outer_entries = list(self._outer_entries)
示例#9
0
文件: rtl.py 项目: sscholbe/dace
    def unparse_tasklet(self, sdfg: sdfg.SDFG, dfg: state.StateSubgraphView,
                        state_id: int, node: nodes.Node,
                        function_stream: prettycode.CodeIOStream,
                        callsite_stream: prettycode.CodeIOStream):

        # extract data
        state = sdfg.nodes()[state_id]
        tasklet = node

        # construct variables paths
        unique_name: str = "{}_{}_{}_{}".format(tasklet.name, sdfg.sdfg_id,
                                                sdfg.node_id(state),
                                                state.node_id(tasklet))

        # Collect all of the input and output connectors into buses and scalars
        buses = {}
        scalars = {}
        for edge in state.in_edges(tasklet):
            arr = sdfg.arrays[edge.src.data]
            # catch symbolic (compile time variables)
            check_issymbolic([
                tasklet.in_connectors[edge.dst_conn].veclen,
                tasklet.in_connectors[edge.dst_conn].bytes
            ], sdfg)

            # extract parameters
            vec_len = int(
                symbolic.evaluate(tasklet.in_connectors[edge.dst_conn].veclen,
                                  sdfg.constants))
            total_size = int(
                symbolic.evaluate(tasklet.in_connectors[edge.dst_conn].bytes,
                                  sdfg.constants))
            if isinstance(arr, data.Array):
                if self.hardware_target:
                    raise NotImplementedError(
                        'Array input for hardware* not implemented')
                else:
                    buses[edge.dst_conn] = (False, total_size, vec_len)
            elif isinstance(arr, data.Stream):
                buses[edge.dst_conn] = (False, total_size, vec_len)
            elif isinstance(arr, data.Scalar):
                scalars[edge.dst_conn] = (False, total_size * 8)

        for edge in state.out_edges(tasklet):
            arr = sdfg.arrays[edge.dst.data]
            # catch symbolic (compile time variables)
            check_issymbolic([
                tasklet.out_connectors[edge.src_conn].veclen,
                tasklet.out_connectors[edge.src_conn].bytes
            ], sdfg)

            # extract parameters
            vec_len = int(
                symbolic.evaluate(tasklet.out_connectors[edge.src_conn].veclen,
                                  sdfg.constants))
            total_size = int(
                symbolic.evaluate(tasklet.out_connectors[edge.src_conn].bytes,
                                  sdfg.constants))
            if isinstance(arr, data.Array):
                if self.hardware_target:
                    raise NotImplementedError(
                        'Array input for hardware* not implemented')
                else:
                    buses[edge.src_conn] = (True, total_size, vec_len)
            elif isinstance(arr, data.Stream):
                buses[edge.src_conn] = (True, total_size, vec_len)
            elif isinstance(arr, data.Scalar):
                print('Scalar output not implemented')

        # generate system verilog module components
        parameter_string: str = self.generate_rtl_parameters(sdfg.constants)
        inputs, outputs = self.generate_rtl_inputs_outputs(buses, scalars)

        # create rtl code object (that is later written to file)
        self.code_objects.append(
            codeobject.CodeObject(
                name="{}".format(unique_name),
                code=RTLCodeGen.RTL_HEADER.format(name=unique_name,
                                                  parameters=parameter_string,
                                                  inputs="\n".join(inputs),
                                                  outputs="\n".join(outputs)) +
                tasklet.code.code + RTLCodeGen.RTL_FOOTER,
                language="sv",
                target=RTLCodeGen,
                title="rtl",
                target_type="{}".format(unique_name),
                additional_compiler_kwargs="",
                linkable=True,
                environments=None))

        if self.hardware_target:
            if self.vendor == 'xilinx':
                rtllib_config = {
                    "name": unique_name,
                    "buses": {
                        name: ('m_axis' if is_output else 's_axis', vec_len)
                        for name, (is_output, _, vec_len) in buses.items()
                    },
                    "params": {
                        "scalars": {
                            name: total_size
                            for name, (_, total_size) in scalars.items()
                        },
                        "memory": {}
                    },
                    "ip_cores": tasklet.ip_cores if isinstance(
                        tasklet, nodes.RTLTasklet) else {},
                }

                self.code_objects.append(
                    codeobject.CodeObject(name=f"{unique_name}_control",
                                          code=rtllib_control(rtllib_config),
                                          language="v",
                                          target=RTLCodeGen,
                                          title="rtl",
                                          target_type="{}".format(unique_name),
                                          additional_compiler_kwargs="",
                                          linkable=True,
                                          environments=None))

                self.code_objects.append(
                    codeobject.CodeObject(name=f"{unique_name}_top",
                                          code=rtllib_top(rtllib_config),
                                          language="v",
                                          target=RTLCodeGen,
                                          title="rtl",
                                          target_type="{}".format(unique_name),
                                          additional_compiler_kwargs="",
                                          linkable=True,
                                          environments=None))

                self.code_objects.append(
                    codeobject.CodeObject(name=f"{unique_name}_package",
                                          code=rtllib_package(rtllib_config),
                                          language="tcl",
                                          target=RTLCodeGen,
                                          title="rtl",
                                          target_type="scripts",
                                          additional_compiler_kwargs="",
                                          linkable=True,
                                          environments=None))

                self.code_objects.append(
                    codeobject.CodeObject(name=f"{unique_name}_synth",
                                          code=rtllib_synth(rtllib_config),
                                          language="tcl",
                                          target=RTLCodeGen,
                                          title="rtl",
                                          target_type="scripts",
                                          additional_compiler_kwargs="",
                                          linkable=True,
                                          environments=None))
            else:  # self.vendor != "xilinx"
                raise NotImplementedError(
                    'Only RTL codegen for Xilinx is implemented')
        else:  # not hardware_target
            # generate verilator simulation cpp code components
            inputs, outputs = self.generate_cpp_inputs_outputs(tasklet)
            valid_zeros, ready_zeros = self.generate_cpp_zero_inits(tasklet)
            vector_init = self.generate_cpp_vector_init(tasklet)
            num_elements = self.generate_cpp_num_elements(tasklet)
            internal_state_str, internal_state_var = self.generate_cpp_internal_state(
                tasklet)
            read_input_hs = self.generate_input_hs(tasklet)
            feed_elements = self.generate_feeding(tasklet, inputs)
            in_ptrs, out_ptrs = self.generate_ptrs(tasklet)
            export_elements = self.generate_exporting(tasklet, outputs)
            write_output_hs = self.generate_write_output_hs(tasklet)
            hs_flags = self.generate_hs_flags(tasklet)
            input_hs_toggle = self.generate_input_hs_toggle(tasklet)
            output_hs_toggle = self.generate_output_hs_toggle(tasklet)
            running_condition = self.generate_running_condition(tasklet)

            # add header code to stream
            if not self.cpp_general_header_added:
                sdfg.append_global_code(
                    cpp_code=RTLCodeGen.CPP_GENERAL_HEADER_TEMPLATE.format(
                        debug_include="// generic includes\n#include <iostream>"
                        if self.verilator_debug else ""))
                self.cpp_general_header_added = True
            sdfg.append_global_code(
                cpp_code=RTLCodeGen.CPP_MODEL_HEADER_TEMPLATE.format(
                    name=unique_name))

            # add main cpp code to stream
            callsite_stream.write(contents=RTLCodeGen.CPP_MAIN_TEMPLATE.format(
                name=unique_name,
                inputs=inputs,
                outputs=outputs,
                num_elements=str.join('\n', num_elements),
                vector_init=vector_init,
                valid_zeros=str.join('\n', valid_zeros),
                ready_zeros=str.join('\n', ready_zeros),
                read_input_hs=str.join('\n', read_input_hs),
                feed_elements=str.join('\n', feed_elements),
                in_ptrs=str.join('\n', in_ptrs),
                out_ptrs=str.join('\n', out_ptrs),
                export_elements=str.join('\n', export_elements),
                write_output_hs=str.join('\n', write_output_hs),
                hs_flags=str.join('\n', hs_flags),
                input_hs_toggle=str.join('\n', input_hs_toggle),
                output_hs_toggle=str.join('\n', output_hs_toggle),
                running_condition=str.join(' && ', running_condition),
                internal_state_str=internal_state_str,
                internal_state_var=internal_state_var,
                debug_sim_start="std::cout << \"SIM {name} START\" << std::endl;"
                if self.verilator_debug else "",
                debug_internal_state="""
// report internal state
VL_PRINTF("[t=%lu] ap_aclk=%u ap_areset=%u valid_i=%u ready_i=%u valid_o=%u ready_o=%u \\n",
    main_time, model->ap_aclk, model->ap_areset,
    model->valid_i, model->ready_i, model->valid_o, model->ready_o);
VL_PRINTF("{internal_state_str}\\n", {internal_state_var});
std::cout << std::flush;
""".format(internal_state_str=internal_state_str,
            internal_state_var=internal_state_var)
                if self.verilator_debug else "",
                debug_sim_end="std::cout << \"SIM {name} END\" << std::endl;"
                if self.verilator_debug else ""),
                                  sdfg=sdfg,
                                  state_id=state_id,
                                  node_id=node)
示例#10
0
    def apply(self, sdfg):
        # Obtain loop information
        guard: sd.SDFGState = sdfg.node(self.subgraph[DetectLoop._loop_guard])
        begin: sd.SDFGState = sdfg.node(self.subgraph[DetectLoop._loop_begin])
        after_state: sd.SDFGState = sdfg.node(
            self.subgraph[DetectLoop._exit_state])

        # Obtain iteration variable, range, and stride
        guard_inedges = sdfg.in_edges(guard)
        condition_edge = sdfg.edges_between(guard, begin)[0]
        itervar = list(guard_inedges[0].data.assignments.keys())[0]
        condition = condition_edge.data.condition_sympy()
        rng = LoopUnroll._loop_range(itervar, guard_inedges, condition)

        # Loop must be unrollable
        if self.count == 0 and any(
                symbolic.issymbolic(r, sdfg.constants) for r in rng):
            raise ValueError('Loop cannot be fully unrolled, size is symbolic')
        if self.count != 0:
            raise NotImplementedError  # TODO(later)

        # Find the state prior to the loop
        if rng[0] == symbolic.pystr_to_symbolic(
                guard_inedges[0].data.assignments[itervar]):
            before_state: sd.SDFGState = guard_inedges[0].src
            last_state: sd.SDFGState = guard_inedges[1].src
        else:
            before_state: sd.SDFGState = guard_inedges[1].src
            last_state: sd.SDFGState = guard_inedges[0].src

        # Get loop states
        loop_states = list(
            sdutil.dfs_conditional(sdfg,
                                   sources=[begin],
                                   condition=lambda _, child: child != guard))
        first_id = loop_states.index(begin)
        last_id = loop_states.index(last_state)
        loop_subgraph = gr.SubgraphView(sdfg, loop_states)

        # Evaluate the real values of the loop
        start, end, stride = (symbolic.evaluate(r, sdfg.constants)
                              for r in rng)

        # Create states for loop subgraph
        unrolled_states = []
        for i in range(start, end + 1, stride):
            # Instantiate loop states with iterate value
            new_states = self.instantiate_loop(sdfg, loop_states,
                                               loop_subgraph, itervar, i)

            # Connect iterations with unconditional edges
            if len(unrolled_states) > 0:
                sdfg.add_edge(unrolled_states[-1][1], new_states[first_id],
                              sd.InterstateEdge())

            unrolled_states.append((new_states[first_id], new_states[last_id]))

        # Connect new states to before and after states without conditions
        if unrolled_states:
            sdfg.add_edge(before_state, unrolled_states[0][0],
                          sd.InterstateEdge())
            sdfg.add_edge(unrolled_states[-1][1], after_state,
                          sd.InterstateEdge())

        # Remove old states from SDFG
        sdfg.remove_nodes_from([guard] + loop_states)
示例#11
0
    def apply(self, sdfg):
        # Obtain loop information
        guard: sd.SDFGState = sdfg.node(self.subgraph[DetectLoop._loop_guard])
        begin: sd.SDFGState = sdfg.node(self.subgraph[DetectLoop._loop_begin])
        after_state: sd.SDFGState = sdfg.node(
            self.subgraph[DetectLoop._exit_state])

        # Obtain iteration variable, range, and stride
        guard_inedges = sdfg.in_edges(guard)
        condition_edge = sdfg.edges_between(guard, begin)[0]
        itervar = list(guard_inedges[0].data.assignments.keys())[0]
        condition = condition_edge.data.condition_sympy()
        rng = LoopUnroll._loop_range(itervar, guard_inedges, condition)

        # Loop must be unrollable
        if self.count == 0 and any(
                symbolic.issymbolic(r, sdfg.constants) for r in rng):
            raise ValueError('Loop cannot be fully unrolled, size is symbolic')
        if self.count != 0:
            raise NotImplementedError  # TODO(later)

        # Find the state prior to the loop
        if rng[0] == symbolic.pystr_to_symbolic(
                guard_inedges[0].data.assignments[itervar]):
            before_state: sd.SDFGState = guard_inedges[0].src
            last_state: sd.SDFGState = guard_inedges[1].src
        else:
            before_state: sd.SDFGState = guard_inedges[1].src
            last_state: sd.SDFGState = guard_inedges[0].src

        # Get loop states
        loop_states = list(
            sdutil.dfs_topological_sort(
                sdfg,
                sources=[begin],
                condition=lambda _, child: child != guard))
        first_id = loop_states.index(begin)
        last_id = loop_states.index(last_state)
        loop_subgraph = gr.SubgraphView(sdfg, loop_states)

        # Evaluate the real values of the loop
        start, end, stride = (symbolic.evaluate(r, sdfg.constants)
                              for r in rng)

        # Create states for loop subgraph
        unrolled_states = []
        for i in range(start, end + 1, stride):
            # Using to/from JSON copies faster than deepcopy (which will also
            # copy the parent SDFG)
            new_states = [
                sd.SDFGState.from_json(s.to_json(), context={'sdfg': sdfg})
                for s in loop_states
            ]

            # Replace iterate with value in each state
            for state in new_states:
                state.set_label(state.label + '_%s_%d' % (itervar, i))
                state.replace(itervar, i)

            # Add subgraph to original SDFG
            for edge in loop_subgraph.edges():
                src = new_states[loop_states.index(edge.src)]
                dst = new_states[loop_states.index(edge.dst)]

                # Replace conditions in subgraph edges
                data: sd.InterstateEdge = copy.deepcopy(edge.data)
                if data.condition:
                    ASTFindReplace({itervar: str(i)}).visit(data.condition)

                sdfg.add_edge(src, dst, data)

            # Connect iterations with unconditional edges
            if len(unrolled_states) > 0:
                sdfg.add_edge(unrolled_states[-1][1], new_states[first_id],
                              sd.InterstateEdge())

            unrolled_states.append((new_states[first_id], new_states[last_id]))

        # Connect new states to before and after states without conditions
        if unrolled_states:
            sdfg.add_edge(before_state, unrolled_states[0][0],
                          sd.InterstateEdge())
            sdfg.add_edge(unrolled_states[-1][1], after_state,
                          sd.InterstateEdge())

        # Remove old states from SDFG
        sdfg.remove_nodes_from([guard] + loop_states)
示例#12
0
    def apply(self, sdfg):
        # Obtain loop information
        guard: sd.SDFGState = sdfg.node(self.subgraph[DetectLoop._loop_guard])
        begin: sd.SDFGState = sdfg.node(self.subgraph[DetectLoop._loop_begin])
        after_state: sd.SDFGState = sdfg.node(
            self.subgraph[DetectLoop._exit_state])

        # Obtain iteration variable, range, and stride, together with the last
        # state(s) before the loop and the last loop state.
        itervar, rng, loop_struct = find_for_loop(sdfg, guard, begin)

        # Loop must be fully unrollable for now.
        if self.count != 0:
            raise NotImplementedError  # TODO(later)

        # Get loop states
        loop_states = list(
            sdutil.dfs_conditional(sdfg,
                                   sources=[begin],
                                   condition=lambda _, child: child != guard))
        first_id = loop_states.index(begin)
        last_state = loop_struct[1]
        last_id = loop_states.index(last_state)
        loop_subgraph = gr.SubgraphView(sdfg, loop_states)

        try:
            start, end, stride = (r for r in rng)
            stride = symbolic.evaluate(stride, sdfg.constants)
            loop_diff = int(symbolic.evaluate(end - start + 1, sdfg.constants))
            is_symbolic = any([symbolic.issymbolic(r) for r in rng[:2]])
        except TypeError:
            raise TypeError('Loop difference and strides cannot be symbolic.')
        # Create states for loop subgraph
        unrolled_states = []

        for i in range(0, loop_diff, stride):
            current_index = start + i
            # Instantiate loop states with iterate value
            new_states = self.instantiate_loop(sdfg, loop_states,
                                               loop_subgraph, itervar,
                                               current_index,
                                               str(i) if is_symbolic else None)

            # Connect iterations with unconditional edges
            if len(unrolled_states) > 0:
                sdfg.add_edge(unrolled_states[-1][1], new_states[first_id],
                              sd.InterstateEdge())

            unrolled_states.append((new_states[first_id], new_states[last_id]))

        # Get any assignments that might be on the edge to the after state
        after_assignments = (sdfg.edges_between(
            guard, after_state)[0].data.assignments)

        # Connect new states to before and after states without conditions
        if unrolled_states:
            before_states = loop_struct[0]
            for before_state in before_states:
                sdfg.add_edge(before_state, unrolled_states[0][0],
                              sd.InterstateEdge())
            sdfg.add_edge(unrolled_states[-1][1], after_state,
                          sd.InterstateEdge(assignments=after_assignments))

        # Remove old states from SDFG
        sdfg.remove_nodes_from([guard] + loop_states)
示例#13
0
    def _initialize_return_values(self, kwargs):
        # Obtain symbol values from arguments and constants
        syms = dict()
        syms.update(
            {k: v
             for k, v in kwargs.items() if k not in self.sdfg.arrays})
        syms.update(self.sdfg.constants)

        if self._initialized:
            if self._return_syms == syms:
                return self._return_kwarrays

        self._return_syms = syms

        # Initialize return values with numpy arrays
        self._return_arrays = []
        self._return_kwarrays = {}
        for arrname, arr in sorted(self.sdfg.arrays.items()):
            if arrname.startswith('__return') and not arr.transient:
                if arrname in kwargs:
                    self._return_arrays.append(kwargs[arrname])
                    self._return_kwarrays[arrname] = kwargs[arrname]
                    continue

                if isinstance(arr, dt.Stream):
                    raise NotImplementedError('Return streams are unsupported')

                ndarray = np.ndarray
                zeros = np.zeros

                if arr.storage is dtypes.StorageType.GPU_Global:
                    try:
                        import cupy

                        # Set allocator to GPU
                        def ndarray(*args, buffer=None, **kwargs):
                            if buffer is not None:
                                buffer = buffer.data
                            return cupy.ndarray(*args, memptr=buffer, **kwargs)

                        zeros = cupy.zeros
                    except (ImportError, ModuleNotFoundError):
                        raise NotImplementedError('GPU return values are '
                                                  'unsupported if cupy is not '
                                                  'installed')
                if arr.storage is dtypes.StorageType.FPGA_Global:
                    raise NotImplementedError('FPGA return values are '
                                              'unsupported')

                # Create an array with the properties of the SDFG array
                self._return_arrays.append(
                    ndarray([symbolic.evaluate(s, syms) for s in arr.shape],
                            arr.dtype.as_numpy_dtype(),
                            buffer=zeros(
                                [symbolic.evaluate(arr.total_size, syms)],
                                arr.dtype.as_numpy_dtype()),
                            strides=[
                                symbolic.evaluate(s, syms) * arr.dtype.bytes
                                for s in arr.strides
                            ]))
                self._return_kwarrays[arrname] = self._return_arrays[-1]

        # Set up return_arrays field
        if len(self._return_arrays) == 0:
            self._return_arrays = None
        elif len(self._return_arrays) == 1:
            self._return_arrays = self._return_arrays[0]
        else:
            self._return_arrays = tuple(self._return_arrays)

        return self._return_kwarrays