Exemplo n.º 1
0
    def unparse_tasklet(self, sdfg: SDFG, dfg: state.StateSubgraphView,
                        state_id: int, node: nodes.Node,
                        function_stream: CodeIOStream,
                        callsite_stream: CodeIOStream):
        state_dfg: SDFGState = sdfg.nodes()[state_id]

        callsite_stream.write('\n///////////////////')
        callsite_stream.write(f'// Tasklet code ({node.label})')

        # Determine all defined symbols for the Unparser (for inference)

        # Constants and other defined symbols
        defined_symbols = state_dfg.symbols_defined_at(node)
        defined_symbols.update({
            k: v.dtype if hasattr(v, 'dtype') else dtypes.typeclass(type(v))
            for k, v in sdfg.constants.items()
        })

        # All memlets of that node
        memlets = {}
        for edge in state_dfg.all_edges(node):
            u, uconn, v, vconn, _ = edge
            if u == node and uconn in u.out_connectors:
                defined_symbols.update({uconn: u.out_connectors[uconn]})
            elif v == node and vconn in v.in_connectors:
                defined_symbols.update({vconn: v.in_connectors[vconn]})

        body = node.code.code
        for stmt in body:
            stmt = copy.deepcopy(stmt)
            result = StringIO()
            dace.codegen.targets.sve.unparse.SVEUnparser(
                sdfg, dfg, self.current_map, self.cpu_codegen,
                stmt, result, body, memlets,
                util.get_loop_predicate(sdfg, dfg, node), self.counter_type,
                defined_symbols, self.stream_associations,
                self.wcr_associations)
            callsite_stream.write(result.getvalue(), sdfg, state_id, node)

        callsite_stream.write('///////////////////\n\n')
Exemplo n.º 2
0
    def write_back(self, sdfg: SDFG, dfg: state.StateSubgraphView,
                   state_id: int, src_node: nodes.Node, dst_node: nodes.Node,
                   edge: graph.MultiConnectorEdge,
                   function_stream: CodeIOStream,
                   callsite_stream: CodeIOStream):
        scope = util.get_sve_scope(sdfg, dfg, src_node)
        if scope is None:
            raise NotImplementedError('Not in an SVE scope')

        out_conn = src_node.out_connectors[edge.src_conn]
        if out_conn.type not in util.TYPE_TO_SVE:
            raise NotImplementedError(
                f'Data type {out_conn.type} not supported')

        if edge.data.wcr is None:
            # No WCR required

            if isinstance(dst_node, dace.nodes.Tasklet):
                # Writeback into a tasklet is just writing into the shared register
                callsite_stream.write(f'{edge.data.data} = {edge.src_conn};')
                return

            if isinstance(out_conn, dtypes.vector):
                # If no WCR, we can directly store the vector (SVE register) in memory
                # Determine the stride of the store and use a scatter load if applicable

                stride = self.get_load_stride(sdfg, dfg, src_node, edge.data)

                ptr_cast = ''
                if out_conn.type == np.int64:
                    ptr_cast = '(int64_t*) '
                elif out_conn.type == np.uint64:
                    ptr_cast = '(uint64_t*) '

                store_args = '{}, {}'.format(
                    util.get_loop_predicate(sdfg, dfg, src_node),
                    ptr_cast +
                    cpp.cpp_ptr_expr(sdfg, edge.data, DefinedType.Pointer),
                )

                if stride == 1:
                    callsite_stream.write(
                        f'svst1({store_args}, {edge.src_conn});')
                else:
                    callsite_stream.write(
                        f'svst1_scatter_index({store_args}, svindex_s{util.get_base_type(out_conn).bytes * 8}(0, {sym2cpp(stride)}), {edge.src_conn});'
                    )
            else:
                raise NotImplementedError('Writeback into non-vector')
        else:
            # TODO: Check what are we WCR'ing in?

            # Since we have WCR, we must determine a suitable SVE reduce instruction
            # Check whether it is a known reduction that is possible in SVE
            reduction_type = detect_reduction_type(edge.data.wcr)
            if reduction_type not in util.REDUCTION_TYPE_TO_SVE:
                raise util.NotSupportedError('Unsupported reduction in SVE')

            # If the memlet contains the innermost SVE param, we have a problem, because
            # SVE doesn't support WCR stores. This would require unrolling the loop.
            if scope.params[-1] in edge.data.free_symbols:
                raise util.NotSupportedError(
                    'SVE loop param used in WCR memlet')

            # WCR on vectors works in two steps:
            # 1. Reduce the SVE register using SVE instructions into a scalar
            # 2. WCR the scalar to memory using DaCe functionality

            sve_reduction = '{}({}, {})'.format(
                util.REDUCTION_TYPE_TO_SVE[reduction_type],
                util.get_loop_predicate(sdfg, dfg, src_node), edge.src_conn)

            ptr_cast = ''
            if out_conn.type == np.int64:
                ptr_cast = '(long long*) '
            elif out_conn.type == np.uint64:
                ptr_cast = '(unsigned long long*) '

            wcr_expr = self.cpu_codegen.write_and_resolve_expr(
                sdfg,
                edge.data,
                edge.data.wcr_nonatomic,
                None,
                ptr_cast + sve_reduction,
                dtype=out_conn.vtype)

            callsite_stream.write(wcr_expr + ';')
Exemplo n.º 3
0
    def copy_memory(self, sdfg: SDFG, dfg: SDFGState, state_id: int,
                    src_node: nodes.Node, dst_node: nodes.Node,
                    edge: gr.MultiConnectorEdge[mm.Memlet],
                    function_stream: CodeIOStream,
                    callsite_stream: CodeIOStream):
        # We should always be in an SVE scope
        scope = util.get_sve_scope(sdfg, dfg, dst_node)
        if scope is None:
            raise NotImplementedError('Not in an SVE scope')

        in_conn = dst_node.in_connectors[edge.dst_conn]

        if isinstance(src_node, dace.nodes.Tasklet):
            # Copy from tasklet is just copying the shared register
            # Use defined_vars to get the C++ type of the shared register
            callsite_stream.write(
                f'{self.dispatcher.defined_vars.get(edge.data.data)[1]} {edge.dst_conn} = {edge.data.data};'
            )
            return

        if not isinstance(src_node, dace.nodes.AccessNode):
            raise util.NotSupportedError(
                'Copy neither from Tasklet nor AccessNode')

        src_desc = src_node.desc(sdfg)

        if isinstance(src_desc, dace.data.Stream):
            # A copy from a stream will trigger a vector pop
            raise NotImplementedError()

            # FIXME: Issue when we can pop different amounts of data!
            # If we limit to the smallest amount, certain data will be lost (never processed)
            """
            # SVE register where the stream will be popped to
            self.create_empty_definition(in_conn, edge, callsite_stream, output=True)

            var_name = edge.dst_conn

            callsite_stream.write(
                f'{util.TYPE_TO_SVE[in_conn.type]} {var_name};')

            callsite_stream.write('{')
            callsite_stream.write('// Stream pop')

            # Pop into local buffer
            # 256 // in_conn.vtype.bytes
            n_vec = f'{util.REGISTER_BYTE_SIZE} / {in_conn.vtype.bytes}'
            callsite_stream.write(f'{in_conn.vtype.ctype} __tmp[{n_vec}];')
            callsite_stream.write(
                f'size_t __cnt = {edge.data.data}.pop_try(__tmp, {n_vec});')

            # Limit the loop predicate
            loop_pred = util.get_loop_predicate(sdfg, dfg, dst_node)
            callsite_stream.write(
                f'{loop_pred} = svand_z({loop_pred}, {loop_pred}, svwhilelt_b{in_conn.vtype.bytes * 8}(0ll, __cnt));')

            # Transfer to register
            callsite_stream.write(f'{var_name} = svld1({loop_pred}, __tmp);')

            callsite_stream.write('}')
            """
            return

        if isinstance(in_conn, dtypes.vector):
            # Copy from vector, so we can use svld

            if in_conn.type not in util.TYPE_TO_SVE:
                raise NotImplementedError(
                    f'Data type {in_conn.type} not supported')

            self.dispatcher.defined_vars.add(edge.dst_conn, dtypes.vector,
                                             in_conn.ctype)

            # Determine the stride of the load and use a gather if applicable
            stride = self.get_load_stride(sdfg, dfg, dst_node, edge.data)

            # First part of the declaration is `type name`
            load_lhs = '{} {}'.format(util.TYPE_TO_SVE[in_conn.type],
                                      edge.dst_conn)

            ptr_cast = ''
            if in_conn.type == np.int64:
                ptr_cast = '(int64_t*) '
            elif in_conn.type == np.uint64:
                ptr_cast = '(uint64_t*) '

            # Regular load and gather share the first arguments
            load_args = '{}, {}'.format(
                util.get_loop_predicate(sdfg, dfg, dst_node), ptr_cast +
                cpp.cpp_ptr_expr(sdfg, edge.data, DefinedType.Pointer))

            if stride == 1:
                callsite_stream.write('{} = svld1({});'.format(
                    load_lhs, load_args))
            else:
                callsite_stream.write(
                    '{} = svld1_gather_index({}, svindex_s{}(0, {}));'.format(
                        load_lhs, load_args,
                        util.get_base_type(in_conn).bytes * 8, sym2cpp(stride)))
        else:
            # Any other copy (e.g. pointer or scalar) is handled by the default CPU codegen
            self.cpu_codegen.copy_memory(sdfg, dfg, state_id, src_node,
                                         dst_node, edge, function_stream,
                                         callsite_stream)
Exemplo n.º 4
0
    def generate_writeback(self, sdfg: SDFG, state: SDFGState, map: nodes.Map,
                           edge: graph.MultiConnectorEdge[mm.Memlet], code: CodeIOStream):
        """
            Responsible for generating code for a writeback in a Tasklet, given the outgoing edge.
            This is mainly taking the temporary register and writing it back.
        """
        if edge.src_conn is None:
            return

        dst_node = state.memlet_path(edge)[-1].dst

        src_type = edge.src.out_connectors[edge.src_conn]
        src_name = edge.src_conn

        if isinstance(dst_node, nodes.Tasklet):
            ##################
            # Code->Code edges
            dst_type = edge.dst.in_connectors[edge.dst_conn]

            if (util.is_vector(src_type) and util.is_vector(dst_type)) or (util.is_scalar(src_type)
                                                                           and util.is_scalar(dst_type)):
                # Simply write back to shared register
                code.write(f'{edge.data.data} = {src_name};')
            elif util.is_scalar(src_type) and util.is_vector(dst_type):
                # Scalar broadcast to shared vector register
                code.write(f'{edge.data.data} = svdup_{util.TYPE_TO_SVE_SUFFIX[dst_type.type]}({src_name});')
            else:
                raise util.NotSupportedError('Unsupported Code->Code edge')
        elif isinstance(dst_node, nodes.AccessNode):
            ##################
            # Write to AccessNode
            desc = dst_node.desc(sdfg)
            if isinstance(desc, data.Array):
                ##################
                # Write into Array
                if util.is_pointer(src_type):
                    raise util.NotSupportedError('Unsupported writeback')
                elif util.is_vector(src_type):
                    ##################
                    # Scatter vector store into array

                    stride = edge.data.get_stride(sdfg, map)

                    # long long fix
                    ptr_cast = ''
                    if src_type.type == np.int64:
                        ptr_cast = '(int64_t*) '
                    elif src_type.type == np.uint64:
                        ptr_cast = '(uint64_t*) '

                    store_args = '{}, {}'.format(
                        util.get_loop_predicate(sdfg, state, edge.src),
                        ptr_cast + cpp.cpp_ptr_expr(sdfg, edge.data, DefinedType.Pointer, codegen=self.frame),
                    )

                    if stride == 1:
                        code.write(f'svst1({store_args}, {src_name});')
                    else:
                        code.write(
                            f'svst1_scatter_index({store_args}, svindex_s{util.get_base_type(src_type).bytes * 8}(0, {sym2cpp(stride)}), {src_name});'
                        )
                else:
                    ##################
                    # Scalar write into array
                    code.write(f'{cpp.cpp_array_expr(sdfg, edge.data, codegen=self.frame)} = {src_name};')
            elif isinstance(desc, data.Scalar):
                ##################
                # Write into Scalar
                if util.is_pointer(src_type):
                    raise util.NotSupportedError('Unsupported writeback')
                elif util.is_vector(src_type):
                    if util.is_vector(desc.dtype):
                        ##################
                        # Vector write into vector Scalar access node
                        code.write(f'{edge.data.data} = {src_name};')
                    else:
                        raise util.NotSupportedError('Unsupported writeback')
                else:
                    if util.is_vector(desc.dtype):
                        ##################
                        # Broadcast into scalar AccessNode
                        code.write(f'{edge.data.data} = svdup_{util.TYPE_TO_SVE_SUFFIX[src_type]}({src_name});')
                    else:
                        ##################
                        # Scalar write into scalar AccessNode
                        code.write(f'{edge.data.data} = {src_name};')

        else:
            raise util.NotSupportedError('Only writeback to Tasklets and AccessNodes is supported')
Exemplo n.º 5
0
    def generate_read(self, sdfg: SDFG, state: SDFGState, map: nodes.Map, edge: graph.MultiConnectorEdge[mm.Memlet],
                      code: CodeIOStream):
        """
            Responsible for generating code for reads into a Tasklet, given the ingoing edge.
        """
        if edge.dst_conn is None:
            return
        src_node = state.memlet_path(edge)[0].src
        dst_type = edge.dst.in_connectors[edge.dst_conn]
        dst_name = edge.dst_conn
        if isinstance(src_node, nodes.Tasklet):
            ##################
            # Code->Code edges
            src_type = edge.src.out_connectors[edge.src_conn]
            if util.is_vector(src_type) and util.is_vector(dst_type):
                # Directly read from shared vector register
                code.write(f'{util.TYPE_TO_SVE[dst_type.type]} {dst_name} = {edge.data.data};')
            elif util.is_scalar(src_type) and util.is_scalar(dst_type):
                # Directly read from shared scalar register
                code.write(f'{dst_type} {dst_name} = {edge.data.data};')
            elif util.is_scalar(src_type) and util.is_vector(dst_type):
                # Scalar broadcast from shared scalar register
                code.write(
                    f'{util.TYPE_TO_SVE[dst_type.type]} {dst_name} = svdup_{util.TYPE_TO_SVE_SUFFIX[dst_type.type]}({edge.data.data});'
                )
            else:
                raise util.NotSupportedError('Unsupported Code->Code edge')
        elif isinstance(src_node, nodes.AccessNode):
            ##################
            # Read from AccessNode
            desc = src_node.desc(sdfg)
            if isinstance(desc, data.Array):
                # Copy from array
                if util.is_pointer(dst_type):
                    ##################
                    # Pointer reference
                    code.write(
                        f'{dst_type} {dst_name} = {cpp.cpp_ptr_expr(sdfg, edge.data, None, codegen=self.frame)};')
                elif util.is_vector(dst_type):
                    ##################
                    # Vector load

                    stride = edge.data.get_stride(sdfg, map)

                    # First part of the declaration is `type name`
                    load_lhs = '{} {}'.format(util.TYPE_TO_SVE[dst_type.type], dst_name)

                    # long long issue casting
                    ptr_cast = ''
                    if dst_type.type == np.int64:
                        ptr_cast = '(int64_t*) '
                    elif dst_type.type == np.uint64:
                        ptr_cast = '(uint64_t*) '

                    # Regular load and gather share the first arguments
                    load_args = '{}, {}'.format(
                        util.get_loop_predicate(sdfg, state, edge.dst),
                        ptr_cast + cpp.cpp_ptr_expr(sdfg, edge.data, DefinedType.Pointer, codegen=self.frame))

                    if stride == 1:
                        code.write('{} = svld1({});'.format(load_lhs, load_args))
                    else:
                        code.write('{} = svld1_gather_index({}, svindex_s{}(0, {}));'.format(
                            load_lhs, load_args,
                            util.get_base_type(dst_type).bytes * 8, sym2cpp(stride)))
                else:
                    ##################
                    # Scalar read from array
                    code.write(f'{dst_type} {dst_name} = {cpp.cpp_array_expr(sdfg, edge.data, codegen=self.frame)};')
            elif isinstance(desc, data.Scalar):
                # Refer to shared variable
                src_type = desc.dtype
                if util.is_vector(src_type) and util.is_vector(dst_type):
                    # Directly read from shared vector register
                    code.write(f'{util.TYPE_TO_SVE[dst_type.type]} {dst_name} = {edge.data.data};')
                elif util.is_scalar(src_type) and util.is_scalar(dst_type):
                    # Directly read from shared scalar register
                    code.write(f'{dst_type} {dst_name} = {edge.data.data};')
                elif util.is_scalar(src_type) and util.is_vector(dst_type):
                    # Scalar broadcast from shared scalar register
                    code.write(
                        f'{util.TYPE_TO_SVE[dst_type.type]} {dst_name} = svdup_{util.TYPE_TO_SVE_SUFFIX[dst_type.type]}({edge.data.data});'
                    )
                else:
                    raise util.NotSupportedError('Unsupported Scalar->Code edge')
        else:
            raise util.NotSupportedError('Only copy from Tasklets and AccessNodes is supported')