示例#1
0
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        map_entry = graph.nodes()[candidate[MPITransformMap._map_entry]]

        # Check if the map is one-dimensional
        if map_entry.map.range.dims() != 1:
            return False

        # We cannot transform a map which is already of schedule type MPI
        if map_entry.map.schedule == dtypes.ScheduleType.MPI:
            return False

        # We cannot transform a map which is already inside a MPI map, or in
        # another device
        schedule_whitelist = [
            dtypes.ScheduleType.Default, dtypes.ScheduleType.Sequential
        ]
        sdict = graph.scope_dict()
        parent = sdict[map_entry]
        while parent is not None:
            if parent.map.schedule not in schedule_whitelist:
                return False
            parent = sdict[parent]

        # Dynamic map ranges not supported (will allocate dynamic memory)
        if has_dynamic_map_inputs(graph, map_entry):
            return False

        # MPI schedules currently do not support WCR
        map_exit = graph.exit_node(map_entry)
        if any(e.data.wcr for e in graph.out_edges(map_exit)):
            return False

        return True
示例#2
0
    def can_be_applied(graph: SDFGState,
                       candidate,
                       expr_index,
                       sdfg,
                       strict=False):
        map_entry = graph.nodes()[candidate[GPUMultiTransformMap._map_entry]]

        # Check if there is more than one GPU available:
        if (Config.get("compiler", "cuda", "max_number_gpus") < 2):
            return False

        # Dynamic map ranges not supported
        if has_dynamic_map_inputs(graph, map_entry):
            return False

        # Only accept maps with a default schedule
        schedule_whitelist = [dtypes.ScheduleType.Default]
        sdict = graph.scope_dict()
        parent = sdict[map_entry]
        while parent is not None:
            if parent.map.schedule not in schedule_whitelist:
                return False
            parent = sdict[parent]

        # Library nodes inside the scope are not supported
        scope_subgraph = graph.scope_subgraph(map_entry)
        for node in scope_subgraph.nodes():
            if isinstance(node, nodes.LibraryNode):
                return False

        # Custom reductions can not have an accumulate transient, as the
        # reduction would have to be split up for the ingoing memlet of the
        # accumulate transient and the outgoing memlet. Not using GPU local
        # accumulate transient only works for a small volume of data.
        map_exit = graph.exit_node(map_entry)
        for edge in graph.out_edges(map_exit):
            if edge.data.wcr is not None and operations.detect_reduction_type(
                    edge.data.wcr) == dtypes.ReductionType.Custom:
                return False

        storage_whitelist = [
            dtypes.StorageType.Default,
            dtypes.StorageType.CPU_Pinned,
            dtypes.StorageType.CPU_Heap,
            dtypes.StorageType.GPU_Global,
        ]
        for node in graph.predecessors(map_entry):
            if not isinstance(node, nodes.AccessNode):
                return False
            if node.desc(graph).storage not in storage_whitelist:
                return False

        for node in graph.successors(map_exit):
            if not isinstance(node, nodes.AccessNode):
                return False
            if node.desc(graph).storage not in storage_whitelist:
                return False

        return True
    def can_be_applied(self, graph, expr_index, sdfg, permissive=False):
        if expr_index == 0:
            map_entry = self.map_entry
            candidate_map = map_entry.map

            # Disallow GPUTransform on nested maps in permissive mode
            if not permissive:
                if graph.entry_node(map_entry) is not None:
                    return False

            # Map schedules that are disallowed to transform to GPUs
            if (candidate_map.schedule == dtypes.ScheduleType.MPI
                    or candidate_map.schedule == dtypes.ScheduleType.GPU_Device
                    or candidate_map.schedule == dtypes.ScheduleType.GPU_ThreadBlock
                    or candidate_map.schedule == dtypes.ScheduleType.Sequential):
                return False

            # Dynamic map ranges cannot become kernels
            if sd.has_dynamic_map_inputs(graph, map_entry):
                return False

            # Recursively check parent for GPU schedules
            sdict = graph.scope_dict()
            current_node = map_entry
            while current_node is not None:
                if (current_node.map.schedule == dtypes.ScheduleType.GPU_Device
                        or current_node.map.schedule == dtypes.ScheduleType.GPU_ThreadBlock):
                    return False
                current_node = sdict[current_node]

            # Ensure that map does not include internal arrays that are
            # allocated on non-default space
            subgraph = graph.scope_subgraph(map_entry)
            for node in subgraph.nodes():
                if (isinstance(node, nodes.AccessNode) and node.desc(sdfg).storage != dtypes.StorageType.Default
                        and node.desc(sdfg).storage != dtypes.StorageType.Register):
                    return False

            # If one of the outputs is a stream, do not match
            map_exit = graph.exit_node(map_entry)
            for edge in graph.out_edges(map_exit):
                dst = graph.memlet_path(edge)[-1].dst
                if (isinstance(dst, nodes.AccessNode) and isinstance(sdfg.arrays[dst.data], data.Stream)):
                    return False

            return True
        elif expr_index == 1:
            reduce = self.reduce

            # Recursively check parent for GPU schedules
            sdict = graph.scope_dict()
            current_node = sdict[reduce]
            while current_node is not None:
                if (current_node.map.schedule == dtypes.ScheduleType.GPU_Device
                        or current_node.map.schedule == dtypes.ScheduleType.GPU_ThreadBlock):
                    return False
                current_node = sdict[current_node]

            return True
示例#4
0
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        if expr_index == 0:
            map_entry = graph.nodes()[candidate[GPUTransformMap._map_entry]]
            candidate_map = map_entry.map

            # Map schedules that are disallowed to transform to GPUs
            if (candidate_map.schedule in [dtypes.ScheduleType.MPI] +
                    dtypes.GPU_SCHEDULES):
                return False
            if sd.is_devicelevel(sdfg, graph, map_entry):
                return False

            # Dynamic map ranges cannot become kernels
            if sd.has_dynamic_map_inputs(graph, map_entry):
                return False

            # Ensure that map does not include internal arrays that are
            # allocated on non-default space
            subgraph = graph.scope_subgraph(map_entry)
            for node in subgraph.nodes():
                if (isinstance(node, nodes.AccessNode) and
                        node.desc(sdfg).storage != dtypes.StorageType.Default
                        and node.desc(sdfg).storage !=
                        dtypes.StorageType.Register):
                    return False

            # If one of the outputs is a stream, do not match
            map_exit = graph.exit_nodes(map_entry)[0]
            for edge in graph.out_edges(map_exit):
                dst = graph.memlet_path(edge)[-1].dst
                if (isinstance(dst, nodes.AccessNode)
                        and isinstance(sdfg.arrays[dst.data], data.Stream)):
                    return False

            return True
        elif expr_index == 1:
            reduce = graph.nodes()[candidate[GPUTransformMap._reduce]]

            # Map schedules that are disallowed to transform to GPUs
            if (reduce.schedule in [dtypes.ScheduleType.MPI] +
                    dtypes.GPU_SCHEDULES):
                return False
            if sd.is_devicelevel(sdfg, graph, reduce):
                return False

            return True
示例#5
0
文件: nestk.py 项目: spcl/stencilflow
    def can_be_applied(graph: dace.SDFGState,
                       candidate: Dict[Any, int],
                       expr_index: int,
                       sdfg: dace.SDFG,
                       strict=False):
        map_entry: nodes.MapEntry = graph.node(candidate[NestK._map_entry])
        stencil: Stencil = graph.node(candidate[NestK._stencil])

        if len(map_entry.map.params) != 1:
            return False
        if sd.has_dynamic_map_inputs(graph, map_entry):
            return False
        pname = map_entry.map.params[0]  # Usually "k"
        dim_index = None

        for edge in graph.out_edges(map_entry):
            if edge.dst != stencil:
                return False

        for edge in graph.all_edges(stencil):
            if edge.data.data is None:  # Empty memlet
                continue
            # TODO: Use bitmap to verify lower-dimensional arrays
            if len(edge.data.subset) == 3:
                for i, rng in enumerate(edge.data.subset.ndrange()):
                    for r in rng:
                        if pname in map(str, r.free_symbols):
                            if dim_index is not None and dim_index != i:
                                # k dimension must match in all memlets
                                return False
                            if str(r) != pname:
                                if symbolic.issymbolic(
                                        r - symbolic.symbol(pname),
                                        sdfg.constants):
                                    warnings.warn('k expression is nontrivial')
                            dim_index = i

        # No nesting dimension found
        if dim_index is None:
            return False

        # Ensure the stencil shape is 1 for the found dimension
        if stencil.shape[dim_index] != 1:
            return False

        return True
示例#6
0
    def can_be_applied(self, graph, expr_index, sdfg, permissive=False):
        map_node = self.map_entry
        nsdfg_node = None

        # If the map is dynamic-ranged, the resulting border arrays would be
        # dynamically sized
        if sd.has_dynamic_map_inputs(graph, map_node):
            return False

        if expr_index == 0:  # Map with subgraph
            subgraphs = [
                graph.scope_subgraph(map_node,
                                     include_entry=False,
                                     include_exit=False)
            ]
        else:  # Map with nested SDFG
            nsdfg_node = self.nested_sdfg
            # Make sure there are no other internal nodes in the map
            if len(set(e.dst for e in graph.out_edges(map_node))) > 1:
                return False
            subgraphs = list(nsdfg_node.sdfg.nodes())

        # Test subgraphs
        border_arrays = set()
        total_components = []
        for sg in subgraphs:
            components = self._components(sg)
            snodes = sg.nodes()
            # Test that the subgraphs have more than one computational component
            if expr_index == 0 and len(snodes) > 0 and len(components) <= 1:
                return False

            # Test that the components are connected by transients that are not
            # used anywhere else
            border_arrays |= self._border_arrays(
                nsdfg_node.sdfg if expr_index == 1 else sdfg,
                sg if expr_index == 1 else graph, sg)
            total_components.append(components)

            # In nested SDFGs and subgraphs, ensure none of the border
            # values are non-transients
            for array in border_arrays:
                if expr_index == 0:
                    ndesc = sdfg.arrays[array]
                else:
                    ndesc = nsdfg_node.sdfg.arrays[array]

                if ndesc.transient is False:
                    return False

            # In subgraphs, make sure transients are not used/allocated
            # in other scopes or states
            if expr_index == 0:
                # Find all nodes not in subgraph
                not_subgraph = set(
                    n.data for n in graph.nodes()
                    if n not in snodes and isinstance(n, nodes.AccessNode))
                not_subgraph.update(
                    set(n.data for s in sdfg.nodes() if s != graph
                        for n in s.nodes() if isinstance(n, nodes.AccessNode)))

                for _, component_out in components:
                    for e in sg.out_edges(component_out):
                        if isinstance(e.dst, nodes.AccessNode):
                            if e.dst.data in not_subgraph:
                                return False

        # Fail if there are arrays inside the map that are not a direct
        # output of a computational component
        # TODO(later): Support this case? Ambiguous array sizes and memlets
        external_arrays = (
            border_arrays -
            self._internal_border_arrays(total_components, subgraphs))
        if len(external_arrays) > 0:
            return False

        return True
max_params = 0
for file in tqdm(paths):
    try:
        sdfg = dace.SDFG.from_file(file)
    except:
        print("Not Valid SDFG at: " + str(file))
        continue
    opt = Optimizer(sdfg)
    vectorization_map_entry = [
        i.query_node(sdfg.sdfg_list[i.sdfg_id], i._map_entry)
        for i in opt.get_pattern_matches(patterns=[Vectorization])
    ]
    for node, state in sdfg.all_nodes_recursive():
        if isinstance(node, MapEntry):
            dic_training = {}
            if has_dynamic_map_inputs(state, node):
                continue
            tasklet = state.out_edges(node)[0].dst
            if not isinstance(tasklet, Tasklet):
                continue
            free_symbols = set()
            #Get the Free symbols
            for Memlet in state.in_edges(tasklet) + state.out_edges(tasklet):
                for free_symbol in Memlet.data.free_symbols:
                    free_symbols.add(free_symbol)
            for free_symbol in node.free_symbols:
                free_symbols.add(free_symbol)
            dic_training["Free_symbols"] = free_symbols
            dic_training["Params"] = node.params
            max_free_symbols = max(max_free_symbols, len(free_symbols))
            max_params = max(max_params, len(node.params))