Exemplo n.º 1
0
def test_notbmap():
    sdfg = dace.SDFG('default_storage_test_1')
    sdfg.add_array('A', [20], dace.float64, dace.StorageType.GPU_Global)
    sdfg.add_transient('tmp', [1], dace.float64)
    state = sdfg.add_state()

    r = state.add_read('A')
    me, mx = state.add_map('kernel', dict(i='0:20'),
                           dace.ScheduleType.GPU_Device)
    tmp = state.add_access('tmp')
    t = state.add_tasklet('add', {'a'}, {'b'}, 'b = a + 1')
    w = state.add_write('A')

    state.add_memlet_path(r, me, tmp, memlet=dace.Memlet.simple('A', 'i'))
    state.add_memlet_path(tmp,
                          t,
                          dst_conn='a',
                          memlet=dace.Memlet.simple('tmp', '0'))
    state.add_memlet_path(t,
                          mx,
                          w,
                          src_conn='b',
                          memlet=dace.Memlet.simple('A', 'i'))

    set_default_schedule_and_storage_types(sdfg, None)
    assert sdfg.arrays['tmp'].storage == dace.StorageType.Register
Exemplo n.º 2
0
def tbmap_sequential_test():
    sdfg = dace.SDFG('default_storage_test_2')
    sdfg.add_array('A', [20, 32], dace.float64, dace.StorageType.GPU_Global)
    sdfg.add_transient('tmp', [1], dace.float64)
    state = sdfg.add_state()

    r = state.add_read('A')
    ome, omx = state.add_map('kernel', dict(i='0:20'),
                             dace.ScheduleType.GPU_Device)
    sme, smx = state.add_map('seq', dict(j='0:1'), dace.ScheduleType.Sequential)
    ime, imx = state.add_map('block', dict(ti='0:32'),
                             dace.ScheduleType.GPU_ThreadBlock)
    tmp = state.add_access('tmp')
    t = state.add_tasklet('add', {'a'}, {'b'}, 'b = a + 1')
    w = state.add_write('A')

    state.add_memlet_path(r,
                          ome,
                          sme,
                          tmp,
                          memlet=dace.Memlet.simple('A', 'i+j, 0:32'))
    state.add_memlet_path(tmp,
                          ime,
                          t,
                          dst_conn='a',
                          memlet=dace.Memlet.simple('tmp', '0, ti'))
    state.add_memlet_path(t,
                          imx,
                          smx,
                          omx,
                          w,
                          src_conn='b',
                          memlet=dace.Memlet.simple('A', 'i+j, ti'))
    set_default_schedule_and_storage_types(sdfg, None)
    assert sdfg.arrays['tmp'].storage == dace.StorageType.GPU_Shared
Exemplo n.º 3
0
def _test_determine_alloc(lifetime: dace.AllocationLifetime, unused: bool = False) -> dace.SDFG:
    """ Creates an SDFG playground for determining allocation. """
    sdfg = dace.SDFG('lifetimetest')
    sdfg.add_array('A', [N], dace.float64)
    sdfg.add_array('B', [N], dace.float64)
    sdfg.add_transient('unused', [N], dace.float64, lifetime=lifetime)
    state = sdfg.add_state()
    me, mx = state.add_map('m', dict(i='0:N'))

    #########################################################################
    nsdfg = dace.SDFG('nested')
    nsdfg.add_array('A', [N], dace.float64)
    nsdfg.add_array('B', [N], dace.float64)
    nsdfg.add_transient('tmp', [N], dace.float64, dace.StorageType.GPU_Global, lifetime=lifetime)
    nsdfg.add_transient('tmp2', [1], dace.float64, dace.StorageType.Register, lifetime=lifetime)
    nstate = nsdfg.add_state()
    ime, imx = nstate.add_map('m2', dict(i='0:20'), schedule=dace.ScheduleType.GPU_Device)
    t1 = nstate.add_access('tmp')
    t2 = nstate.add_access('tmp2')
    nstate.add_nedge(t1, t2, dace.Memlet('tmp[0]'))
    nstate.add_memlet_path(nstate.add_read('A'), ime, t1, memlet=dace.Memlet('A[i]'))
    nstate.add_memlet_path(t2, imx, nstate.add_write('B'), memlet=dace.Memlet('B[0]', wcr='lambda a,b: a+b'))
    #########################################################################
    nsdfg_node = state.add_nested_sdfg(nsdfg, None, {'A'}, {'B'})
    state.add_memlet_path(state.add_read('A'), me, nsdfg_node, dst_conn='A', memlet=dace.Memlet('A[0:N]'))
    state.add_memlet_path(nsdfg_node, mx, state.add_write('B'), src_conn='B', memlet=dace.Memlet('B[0:N]'))

    # Set default storage/schedule types in SDFG
    infer_types.set_default_schedule_and_storage_types(sdfg, None)

    return sdfg, (sdfg, state, me, nsdfg, nstate, ime)
Exemplo n.º 4
0
def test_schedule_inference_simple():
    @dace.program
    def nested_call(A: dace.float64[3, 3]):
        return A + 1

    @dace.program
    def simple_schedule_inference(A: dace.float64[3, 3]):
        return nested_call(A)

    sdfg: dace.SDFG = simple_schedule_inference.to_sdfg(strict=False)

    infer_types.infer_connector_types(sdfg)

    infer_types.set_default_schedule_and_storage_types(sdfg, None)
    sdfg.apply_transformations_repeated(StateFusion)

    entry = [
        n for n, _ in sdfg.all_nodes_recursive()
        if isinstance(n, dace.nodes.MapEntry)
    ][0]
    assert entry.schedule is dace.ScheduleType.CPU_Multicore
Exemplo n.º 5
0
    def apply(self, sdfg: SDFG):
        state: SDFGState = sdfg.nodes()[self.state_id]
        nsdfg_node = state.nodes()[self.subgraph[InlineSDFG._nested_sdfg]]
        nsdfg: SDFG = nsdfg_node.sdfg
        nstate: SDFGState = nsdfg.nodes()[0]

        if nsdfg_node.schedule is not dtypes.ScheduleType.Default:
            infer_types.set_default_schedule_and_storage_types(
                nsdfg, nsdfg_node.schedule)

        nsdfg_scope_entry = state.entry_node(nsdfg_node)
        nsdfg_scope_exit = (state.exit_node(nsdfg_scope_entry)
                            if nsdfg_scope_entry is not None else None)

        #######################################################
        # Collect and update top-level SDFG metadata

        # Global/init/exit code
        for loc, code in nsdfg.global_code.items():
            sdfg.append_global_code(code.code, loc)
        for loc, code in nsdfg.init_code.items():
            sdfg.append_init_code(code.code, loc)
        for loc, code in nsdfg.exit_code.items():
            sdfg.append_exit_code(code.code, loc)

        # Constants
        for cstname, cstval in nsdfg.constants.items():
            if cstname in sdfg.constants:
                if cstval != sdfg.constants[cstname]:
                    warnings.warn('Constant value mismatch for "%s" while '
                                  'inlining SDFG. Inner = %s != %s = outer' %
                                  (cstname, cstval, sdfg.constants[cstname]))
            else:
                sdfg.add_constant(cstname, cstval)

        # Find original source/destination edges (there is only one edge per
        # connector, according to match)
        inputs: Dict[str, MultiConnectorEdge] = {}
        outputs: Dict[str, MultiConnectorEdge] = {}
        input_set: Dict[str, str] = {}
        output_set: Dict[str, str] = {}
        for e in state.in_edges(nsdfg_node):
            inputs[e.dst_conn] = e
            input_set[e.data.data] = e.dst_conn
        for e in state.out_edges(nsdfg_node):
            outputs[e.src_conn] = e
            output_set[e.data.data] = e.src_conn

        # Access nodes that need to be reshaped
        reshapes: Set(str) = set()
        for aname, array in nsdfg.arrays.items():
            if array.transient:
                continue
            edge = None
            if aname in inputs:
                edge = inputs[aname]
                if len(array.shape) > len(edge.data.subset):
                    reshapes.add(aname)
                    continue
            if aname in outputs:
                edge = outputs[aname]
                if len(array.shape) > len(edge.data.subset):
                    reshapes.add(aname)
                    continue
            if edge is not None and not InlineSDFG._check_strides(
                    array.strides, sdfg.arrays[edge.data.data].strides,
                    edge.data, nsdfg_node):
                reshapes.add(aname)

        # Replace symbols using invocation symbol mapping
        # Two-step replacement (N -> __dacesym_N --> map[N]) to avoid clashes
        for symname, symvalue in nsdfg_node.symbol_mapping.items():
            if str(symname) != str(symvalue):
                nsdfg.replace(symname, '__dacesym_' + symname)
        for symname, symvalue in nsdfg_node.symbol_mapping.items():
            if str(symname) != str(symvalue):
                nsdfg.replace('__dacesym_' + symname, symvalue)

        # All transients become transients of the parent (if data already
        # exists, find new name)
        # Mapping from nested transient name to top-level name
        transients: Dict[str, str] = {}
        for node in nstate.nodes():
            if isinstance(node, nodes.AccessNode):
                datadesc = nsdfg.arrays[node.data]
                if node.data not in transients and datadesc.transient:
                    name = sdfg.add_datadesc('%s_%s' %
                                             (nsdfg.label, node.data),
                                             datadesc,
                                             find_new_name=True)
                    transients[node.data] = name

        # All transients of edges between code nodes are also added to parent
        for edge in nstate.edges():
            if (isinstance(edge.src, nodes.CodeNode)
                    and isinstance(edge.dst, nodes.CodeNode)):
                if edge.data.data is not None:
                    datadesc = nsdfg.arrays[edge.data.data]
                    if edge.data.data not in transients and datadesc.transient:
                        name = sdfg.add_datadesc('%s_%s' %
                                                 (nsdfg.label, edge.data.data),
                                                 datadesc,
                                                 find_new_name=True)
                        transients[edge.data.data] = name

        # Collect nodes to add to top-level graph
        new_incoming_edges: Dict[nodes.Node, MultiConnectorEdge] = {}
        new_outgoing_edges: Dict[nodes.Node, MultiConnectorEdge] = {}

        source_accesses = set()
        sink_accesses = set()
        for node in nstate.source_nodes():
            if (isinstance(node, nodes.AccessNode)
                    and node.data not in transients
                    and node.data not in reshapes):
                new_incoming_edges[node] = inputs[node.data]
                source_accesses.add(node)
        for node in nstate.sink_nodes():
            if (isinstance(node, nodes.AccessNode)
                    and node.data not in transients
                    and node.data not in reshapes):
                new_outgoing_edges[node] = outputs[node.data]
                sink_accesses.add(node)

        #######################################################
        # Replace data on inlined SDFG nodes/edges

        # Replace data names with their top-level counterparts
        repldict = {}
        repldict.update(transients)
        repldict.update({
            k: v.data.data
            for k, v in itertools.chain(inputs.items(), outputs.items())
        })

        # Add views whenever reshapes are necessary
        for dname in reshapes:
            desc = nsdfg.arrays[dname]
            # To avoid potential confusion, rename protected __return keyword
            if dname.startswith('__return'):
                newname = f'{nsdfg.name}_ret{dname[8:]}'
            else:
                newname = dname
            newname, _ = sdfg.add_view(newname,
                                       desc.shape,
                                       desc.dtype,
                                       storage=desc.storage,
                                       strides=desc.strides,
                                       offset=desc.offset,
                                       debuginfo=desc.debuginfo,
                                       allow_conflicts=desc.allow_conflicts,
                                       total_size=desc.total_size,
                                       alignment=desc.alignment,
                                       may_alias=desc.may_alias,
                                       find_new_name=True)
            repldict[dname] = newname

        for node in nstate.nodes():
            if isinstance(node, nodes.AccessNode) and node.data in repldict:
                node.data = repldict[node.data]
        for edge in nstate.edges():
            if edge.data.data in repldict:
                edge.data.data = repldict[edge.data.data]

        # Add extra access nodes for out/in view nodes
        for node in nstate.nodes():
            if isinstance(node, nodes.AccessNode) and node.data in reshapes:
                if nstate.in_degree(node) > 0 and nstate.out_degree(node) > 0:
                    # Such a node has to be in the output set
                    edge = outputs[node.data]

                    # Redirect outgoing edges through access node
                    out_edges = list(nstate.out_edges(node))
                    anode = nstate.add_access(edge.data.data)
                    vnode = nstate.add_access(node.data)
                    nstate.add_nedge(node, anode, edge.data)
                    nstate.add_nedge(anode, vnode, edge.data)
                    for e in out_edges:
                        nstate.remove_edge(e)
                        nstate.add_edge(vnode, e.src_conn, e.dst, e.dst_conn,
                                        e.data)

        #######################################################
        # Add nested SDFG into top-level SDFG

        # Add nested nodes into original state
        subgraph = SubgraphView(nstate, [
            n for n in nstate.nodes()
            if n not in (source_accesses | sink_accesses)
        ])
        state.add_nodes_from(subgraph.nodes())
        for edge in subgraph.edges():
            state.add_edge(edge.src, edge.src_conn, edge.dst, edge.dst_conn,
                           edge.data)

        #######################################################
        # Reconnect inlined SDFG

        # If a source/sink node is one of the inputs/outputs, reconnect it,
        # replacing memlets in outgoing/incoming paths
        modified_edges = set()
        modified_edges |= self._modify_memlet_path(new_incoming_edges, nstate,
                                                   state, True)
        modified_edges |= self._modify_memlet_path(new_outgoing_edges, nstate,
                                                   state, False)

        # Reshape: add connections to viewed data
        self._modify_reshape_data(reshapes, repldict, inputs, nstate, state,
                                  True)
        self._modify_reshape_data(reshapes, repldict, outputs, nstate, state,
                                  False)

        # Modify all other internal edges pertaining to input/output nodes
        for node in subgraph.nodes():
            if isinstance(node, nodes.AccessNode):
                if node.data in input_set or node.data in output_set:
                    if node.data in input_set:
                        outer_edge = inputs[input_set[node.data]]
                    else:
                        outer_edge = outputs[output_set[node.data]]

                    for edge in state.all_edges(node):
                        if (edge not in modified_edges
                                and edge.data.data == node.data):
                            for e in state.memlet_tree(edge):
                                if e.data.data == node.data:
                                    e._data = helpers.unsqueeze_memlet(
                                        e.data, outer_edge.data)

        # If source/sink node is not connected to a source/destination access
        # node, and the nested SDFG is in a scope, connect to scope with empty
        # memlets
        if nsdfg_scope_entry is not None:
            for node in subgraph.nodes():
                if state.in_degree(node) == 0:
                    state.add_edge(nsdfg_scope_entry, None, node, None,
                                   Memlet())
                if state.out_degree(node) == 0:
                    state.add_edge(node, None, nsdfg_scope_exit, None,
                                   Memlet())

        # Replace nested SDFG parents with new SDFG
        for node in nstate.nodes():
            if isinstance(node, nodes.NestedSDFG):
                node.sdfg.parent = state
                node.sdfg.parent_sdfg = sdfg
                node.sdfg.parent_nsdfg_node = node

        # Remove all unused external inputs/output memlet paths, as well as
        # resulting isolated nodes
        removed_in_edges = self._remove_edge_path(state,
                                                  inputs,
                                                  set(inputs.keys()) -
                                                  source_accesses,
                                                  reverse=True)
        removed_out_edges = self._remove_edge_path(state,
                                                   outputs,
                                                   set(outputs.keys()) -
                                                   sink_accesses,
                                                   reverse=False)

        # Re-add in/out edges to first/last nodes in subgraph
        order = [
            x for x in nx.topological_sort(nstate._nx)
            if isinstance(x, nodes.AccessNode)
        ]
        for edge in removed_in_edges:
            # Find first access node that refers to this edge
            node = next(n for n in order if n.data == edge.data.data)
            state.add_edge(edge.src, edge.src_conn, node, edge.dst_conn,
                           edge.data)
        for edge in removed_out_edges:
            # Find last access node that refers to this edge
            node = next(n for n in reversed(order) if n.data == edge.data.data)
            state.add_edge(node, edge.src_conn, edge.dst, edge.dst_conn,
                           edge.data)

        #######################################################
        # Remove nested SDFG node
        state.remove_node(nsdfg_node)
Exemplo n.º 6
0
def generate_code(sdfg) -> List[CodeObject]:
    """ Generates code as a list of code objects for a given SDFG.
        :param sdfg: The SDFG to use
        :return: List of code objects that correspond to files to compile.
    """
    # Before compiling, validate SDFG correctness
    sdfg.validate()

    if Config.get_bool('testing', 'serialization'):
        from dace.sdfg import SDFG
        import filecmp
        import shutil
        import tempfile
        with tempfile.TemporaryDirectory() as tmp_dir:
            sdfg.save(f'{tmp_dir}/test.sdfg')
            sdfg2 = SDFG.from_file(f'{tmp_dir}/test.sdfg')
            sdfg2.save(f'{tmp_dir}/test2.sdfg')
            print('Testing SDFG serialization...')
            if not filecmp.cmp(f'{tmp_dir}/test.sdfg',
                               f'{tmp_dir}/test2.sdfg'):
                shutil.move(f"{tmp_dir}/test.sdfg", "test.sdfg")
                shutil.move(f"{tmp_dir}/test2.sdfg", "test2.sdfg")
                raise RuntimeError(
                    'SDFG serialization failed - files do not match')

        # Run with the deserialized version
        # NOTE: This means that all subsequent modifications to `sdfg`
        # are not reflected outside of this function (e.g., library
        # node expansion).
        sdfg = sdfg2

    # Before generating the code, run type inference on the SDFG connectors
    infer_types.infer_connector_types(sdfg)

    # Set default storage/schedule types in SDFG
    infer_types.set_default_schedule_and_storage_types(sdfg, None)

    # Recursively expand library nodes that have not yet been expanded
    sdfg.expand_library_nodes()

    # After expansion, run another pass of connector/type inference
    infer_types.infer_connector_types(sdfg)
    infer_types.set_default_schedule_and_storage_types(sdfg, None)

    frame = framecode.DaCeCodeGenerator()

    # Instantiate CPU first (as it is used by the other code generators)
    # TODO: Refactor the parts used by other code generators out of CPU
    default_target = cpu.CPUCodeGen
    for k, v in target.TargetCodeGenerator.extensions().items():
        # If another target has already been registered as CPU, use it instead
        if v['name'] == 'cpu':
            default_target = k
    targets = {'cpu': default_target(frame, sdfg)}

    # Instantiate the rest of the targets
    targets.update({
        v['name']: k(frame, sdfg)
        for k, v in target.TargetCodeGenerator.extensions().items()
        if v['name'] not in targets
    })

    # Instantiate all instrumentation providers in SDFG
    provider_mapping = InstrumentationProvider.get_provider_mapping()
    frame._dispatcher.instrumentation[
        dtypes.InstrumentationType.No_Instrumentation] = None
    for node, _ in sdfg.all_nodes_recursive():
        if hasattr(node, 'instrument'):
            frame._dispatcher.instrumentation[node.instrument] = \
                provider_mapping[node.instrument]
        elif hasattr(node, 'consume'):
            frame._dispatcher.instrumentation[node.consume.instrument] = \
                provider_mapping[node.consume.instrument]
        elif hasattr(node, 'map'):
            frame._dispatcher.instrumentation[node.map.instrument] = \
                provider_mapping[node.map.instrument]
    if sdfg.instrument != dtypes.InstrumentationType.No_Instrumentation:
        frame._dispatcher.instrumentation[sdfg.instrument] = \
            provider_mapping[sdfg.instrument]
    frame._dispatcher.instrumentation = {
        k: v() if v is not None else None
        for k, v in frame._dispatcher.instrumentation.items()
    }

    # Generate frame code (and the rest of the code)
    (global_code, frame_code, used_targets,
     used_environments) = frame.generate_code(sdfg, None)
    target_objects = [
        CodeObject(sdfg.name,
                   global_code + frame_code,
                   'cpp',
                   cpu.CPUCodeGen,
                   'Frame',
                   environments=used_environments,
                   sdfg=sdfg)
    ]

    # Create code objects for each target
    for tgt in used_targets:
        target_objects.extend(tgt.get_generated_codeobjects())

    # add a header file for calling the SDFG
    dummy = CodeObject(sdfg.name,
                       generate_headers(sdfg),
                       'h',
                       cpu.CPUCodeGen,
                       'CallHeader',
                       target_type='../../include',
                       linkable=False)
    target_objects.append(dummy)

    for env in dace.library.get_environments_and_dependencies(
            used_environments):
        if hasattr(env, "codeobjects"):
            target_objects.extend(env.codeobjects)

    # add a dummy main function to show how to call the SDFG
    dummy = CodeObject(sdfg.name + "_main",
                       generate_dummy(sdfg),
                       'cpp',
                       cpu.CPUCodeGen,
                       'SampleMain',
                       target_type='../../sample',
                       linkable=False)
    target_objects.append(dummy)

    return target_objects
Exemplo n.º 7
0
    def apply(self, outer_state: SDFGState, sdfg: SDFG):
        nsdfg_node = self.nested_sdfg
        nsdfg: SDFG = nsdfg_node.sdfg

        if nsdfg_node.schedule is not dtypes.ScheduleType.Default:
            infer_types.set_default_schedule_and_storage_types(
                nsdfg, nsdfg_node.schedule)

        #######################################################
        # Collect and update top-level SDFG metadata

        # Global/init/exit code
        for loc, code in nsdfg.global_code.items():
            sdfg.append_global_code(code.code, loc)
        for loc, code in nsdfg.init_code.items():
            sdfg.append_init_code(code.code, loc)
        for loc, code in nsdfg.exit_code.items():
            sdfg.append_exit_code(code.code, loc)

        # Environments
        for nstate in nsdfg.nodes():
            for node in nstate.nodes():
                if isinstance(node, nodes.CodeNode):
                    node.environments |= nsdfg_node.environments

        # Constants
        for cstname, cstval in nsdfg.constants.items():
            if cstname in sdfg.constants:
                if cstval != sdfg.constants[cstname]:
                    warnings.warn('Constant value mismatch for "%s" while '
                                  'inlining SDFG. Inner = %s != %s = outer' %
                                  (cstname, cstval, sdfg.constants[cstname]))
            else:
                sdfg.add_constant(cstname, cstval)

        # Symbols
        outer_symbols = {str(k): v for k, v in sdfg.symbols.items()}
        for ise in sdfg.edges():
            outer_symbols.update(ise.data.new_symbols(sdfg, outer_symbols))

        # Find original source/destination edges (there is only one edge per
        # connector, according to match)
        inputs: Dict[str, MultiConnectorEdge] = {}
        outputs: Dict[str, MultiConnectorEdge] = {}
        input_set: Dict[str, str] = {}
        output_set: Dict[str, str] = {}
        for e in outer_state.in_edges(nsdfg_node):
            inputs[e.dst_conn] = e
            input_set[e.data.data] = e.dst_conn
        for e in outer_state.out_edges(nsdfg_node):
            outputs[e.src_conn] = e
            output_set[e.data.data] = e.src_conn

        # Replace symbols using invocation symbol mapping
        # Two-step replacement (N -> __dacesym_N --> map[N]) to avoid clashes
        symbolic.safe_replace(nsdfg_node.symbol_mapping, nsdfg.replace_dict)

        # Access nodes that need to be reshaped
        # reshapes: Set(str) = set()
        # for aname, array in nsdfg.arrays.items():
        #     if array.transient:
        #         continue
        #     edge = None
        #     if aname in inputs:
        #         edge = inputs[aname]
        #         if len(array.shape) > len(edge.data.subset):
        #             reshapes.add(aname)
        #             continue
        #     if aname in outputs:
        #         edge = outputs[aname]
        #         if len(array.shape) > len(edge.data.subset):
        #             reshapes.add(aname)
        #             continue
        #     if edge is not None and not InlineMultistateSDFG._check_strides(
        #             array.strides, sdfg.arrays[edge.data.data].strides,
        #             edge.data, nsdfg_node):
        #         reshapes.add(aname)

        # Mapping from nested transient name to top-level name
        transients: Dict[str, str] = {}

        # All transients become transients of the parent (if data already
        # exists, find new name)
        for nstate in nsdfg.nodes():
            for node in nstate.nodes():
                if isinstance(node, nodes.AccessNode):
                    datadesc = nsdfg.arrays[node.data]
                    if node.data not in transients and datadesc.transient:
                        new_name = node.data
                        if (new_name in sdfg.arrays
                                or new_name in outer_symbols
                                or new_name in sdfg.constants):
                            new_name = f'{nsdfg.label}_{node.data}'

                        name = sdfg.add_datadesc(new_name,
                                                 datadesc,
                                                 find_new_name=True)
                        transients[node.data] = name

            # All transients of edges between code nodes are also added to parent
            for edge in nstate.edges():
                if (isinstance(edge.src, nodes.CodeNode)
                        and isinstance(edge.dst, nodes.CodeNode)):
                    if edge.data.data is not None:
                        datadesc = nsdfg.arrays[edge.data.data]
                        if edge.data.data not in transients and datadesc.transient:
                            new_name = edge.data.data
                            if (new_name in sdfg.arrays
                                    or new_name in outer_symbols
                                    or new_name in sdfg.constants):
                                new_name = f'{nsdfg.label}_{edge.data.data}'

                            name = sdfg.add_datadesc(new_name,
                                                     datadesc,
                                                     find_new_name=True)
                            transients[edge.data.data] = name

        #######################################################
        # Replace data on inlined SDFG nodes/edges

        # Replace data names with their top-level counterparts
        repldict = {}
        repldict.update(transients)
        repldict.update({
            k: v.data.data
            for k, v in itertools.chain(inputs.items(), outputs.items())
        })

        symbolic.safe_replace(repldict,
                              lambda m: replace_datadesc_names(nsdfg, m),
                              value_as_string=True)

        # Add views whenever reshapes are necessary
        # for dname in reshapes:
        #     desc = nsdfg.arrays[dname]
        #     # To avoid potential confusion, rename protected __return keyword
        #     if dname.startswith('__return'):
        #         newname = f'{nsdfg.name}_ret{dname[8:]}'
        #     else:
        #         newname = dname
        #     newname, _ = sdfg.add_view(newname,
        #                                desc.shape,
        #                                desc.dtype,
        #                                storage=desc.storage,
        #                                strides=desc.strides,
        #                                offset=desc.offset,
        #                                debuginfo=desc.debuginfo,
        #                                allow_conflicts=desc.allow_conflicts,
        #                                total_size=desc.total_size,
        #                                alignment=desc.alignment,
        #                                may_alias=desc.may_alias,
        #                                find_new_name=True)
        #     repldict[dname] = newname

        # Add extra access nodes for out/in view nodes
        # inv_reshapes = {repldict[r]: r for r in reshapes}
        # for nstate in nsdfg.nodes():
        #     for node in nstate.nodes():
        #         if isinstance(node,
        #                       nodes.AccessNode) and node.data in inv_reshapes:
        #             if nstate.in_degree(node) > 0 and nstate.out_degree(
        #                     node) > 0:
        #                 # Such a node has to be in the output set
        #                 edge = outputs[inv_reshapes[node.data]]

        #                 # Redirect outgoing edges through access node
        #                 out_edges = list(nstate.out_edges(node))
        #                 anode = nstate.add_access(edge.data.data)
        #                 vnode = nstate.add_access(node.data)
        #                 nstate.add_nedge(node, anode, edge.data)
        #                 nstate.add_nedge(anode, vnode, edge.data)
        #                 for e in out_edges:
        #                     nstate.remove_edge(e)
        #                     nstate.add_edge(vnode, e.src_conn, e.dst,
        #                                     e.dst_conn, e.data)

        # Make unique names for states
        statenames = set(s.label for s in sdfg.nodes())
        for nstate in nsdfg.nodes():
            if nstate.label in statenames:
                newname = data.find_new_name(nstate.label, statenames)
                statenames.add(newname)
                nstate.set_label(newname)

        #######################################################
        # Collect and modify interstate edges as necessary

        outer_assignments = set()
        for e in sdfg.edges():
            outer_assignments |= e.data.assignments.keys()

        inner_assignments = set()
        for e in nsdfg.edges():
            inner_assignments |= e.data.assignments.keys()

        assignments_to_replace = inner_assignments & outer_assignments
        sym_replacements: Dict[str, str] = {}
        allnames = set(outer_symbols.keys()) | set(sdfg.arrays.keys())
        for assign in assignments_to_replace:
            newname = data.find_new_name(assign, allnames)
            allnames.add(newname)
            sym_replacements[assign] = newname
        nsdfg.replace_dict(sym_replacements)

        #######################################################
        # Add nested SDFG states into top-level SDFG

        outer_start_state = sdfg.start_state

        sdfg.add_nodes_from(nsdfg.nodes())
        for ise in nsdfg.edges():
            sdfg.add_edge(ise.src, ise.dst, ise.data)

        #######################################################
        # Reconnect inlined SDFG

        source = nsdfg.start_state
        sinks = nsdfg.sink_nodes()

        # Reconnect state machine
        for e in sdfg.in_edges(outer_state):
            sdfg.add_edge(e.src, source, e.data)
        for e in sdfg.out_edges(outer_state):
            for sink in sinks:
                sdfg.add_edge(sink, e.dst, e.data)

        # Modify start state as necessary
        if outer_start_state is outer_state:
            sdfg.start_state = sdfg.node_id(source)

        # TODO: Modify memlets by offsetting
        # If both source and sink nodes are inputs/outputs, reconnect once
        # edges_to_ignore = self._modify_access_to_access(new_incoming_edges,
        #                                                 nsdfg, nstate, state,
        #                                                 orig_data)

        # source_to_outer = {n: e.src for n, e in new_incoming_edges.items()}
        # sink_to_outer = {n: e.dst for n, e in new_outgoing_edges.items()}
        # # If a source/sink node is one of the inputs/outputs, reconnect it,
        # # replacing memlets in outgoing/incoming paths
        # modified_edges = set()
        # modified_edges |= self._modify_memlet_path(new_incoming_edges, nstate,
        #                                            state, sink_to_outer, True,
        #                                            edges_to_ignore)
        # modified_edges |= self._modify_memlet_path(new_outgoing_edges, nstate,
        #                                            state, source_to_outer,
        #                                            False, edges_to_ignore)

        # # Reshape: add connections to viewed data
        # self._modify_reshape_data(reshapes, repldict, inputs, nstate, state,
        #                           True)
        # self._modify_reshape_data(reshapes, repldict, outputs, nstate, state,
        #                           False)

        # Modify all other internal edges pertaining to input/output nodes
        # for nstate in nsdfg.nodes():
        #     for node in nstate.nodes():
        #         if isinstance(node, nodes.AccessNode):
        #             if node.data in input_set or node.data in output_set:
        #                 if node.data in input_set:
        #                     outer_edge = inputs[input_set[node.data]]
        #                 else:
        #                     outer_edge = outputs[output_set[node.data]]

        #                 for edge in state.all_edges(node):
        #                     if (edge not in modified_edges
        #                             and edge.data.data == node.data):
        #                         for e in state.memlet_tree(edge):
        #                             if e.data.data == node.data:
        #                                 e._data = helpers.unsqueeze_memlet(
        #                                     e.data, outer_edge.data)

        # Replace nested SDFG parents with new SDFG
        for nstate in nsdfg.nodes():
            nstate.parent = sdfg
            for node in nstate.nodes():
                if isinstance(node, nodes.NestedSDFG):
                    node.sdfg.parent_sdfg = sdfg
                    node.sdfg.parent_nsdfg_node = node

        #######################################################
        # Remove nested SDFG and state
        sdfg.remove_node(outer_state)

        return nsdfg.nodes()
Exemplo n.º 8
0
    def generate_code(
        self,
        sdfg: SDFG,
        schedule: Optional[dtypes.ScheduleType],
        sdfg_id: str = ""
    ) -> Tuple[str, str, Set[TargetCodeGenerator], Set[str]]:
        """ Generate frame code for a given SDFG, calling registered targets'
            code generation callbacks for them to generate their own code.
            :param sdfg: The SDFG to generate code for.
            :param schedule: The schedule the SDFG is currently located, or
                             None if the SDFG is top-level.
            :param sdfg_id: An optional string id given to the SDFG label
            :return: A tuple of the generated global frame code, local frame
                     code, and a set of targets that have been used in the
                     generation of this SDFG.
        """

        if len(sdfg_id) == 0 and sdfg.sdfg_id != 0:
            sdfg_id = '_%d' % sdfg.sdfg_id

        sdfg_label = sdfg.name + sdfg_id

        global_stream = CodeIOStream()
        callsite_stream = CodeIOStream()

        # Set default storage/schedule types in SDFG
        set_default_schedule_and_storage_types(sdfg, schedule)

        is_top_level = sdfg.parent is None

        # Generate code
        ###########################

        # Invoke all instrumentation providers
        for instr in self._dispatcher.instrumentation.values():
            if instr is not None:
                instr.on_sdfg_begin(sdfg, callsite_stream, global_stream)

        # Allocate outer-level transients
        shared_transients = sdfg.shared_transients()
        allocated = set()
        for state in sdfg.nodes():
            for node in state.data_nodes():
                if (node.data in shared_transients
                        and node.data not in allocated):
                    self._dispatcher.dispatch_allocate(sdfg, state, None, node,
                                                       global_stream,
                                                       callsite_stream)
                    allocated.add(node.data)

        # Allocate inter-state variables
        global_symbols = copy.deepcopy(sdfg.symbols)
        interstate_symbols = {}
        for e in sdfg.edges():
            symbols = e.data.new_symbols(global_symbols)
            interstate_symbols.update(symbols)
            global_symbols.update(symbols)

        for isvarName, isvarType in interstate_symbols.items():
            # Skip symbols that have been declared as outer-level transients
            if isvarName in allocated:
                continue
            isvar = data.Scalar(isvarType)
            callsite_stream.write(
                '%s;\n' % (isvar.as_arg(with_types=True, name=isvarName)),
                sdfg)

        callsite_stream.write('\n', sdfg)

        states_topological = list(sdfg.topological_sort(sdfg.start_state))

        # {edge: [dace.edges.ControlFlow]}
        control_flow = {e: [] for e in sdfg.edges()}

        if dace.config.Config.get_bool('optimizer', 'detect_control_flow'):

            ####################################################################
            # Loop detection procedure

            all_cycles = list(sdfg.find_cycles())  # Returns a list of lists
            # Order according to topological sort
            all_cycles = [
                sorted(c, key=lambda x: states_topological.index(x))
                for c in all_cycles
            ]
            # Group in terms of starting node
            starting_nodes = [c[0] for c in all_cycles]
            # Order cycles according to starting node in topological sort
            starting_nodes = sorted(starting_nodes,
                                    key=lambda x: states_topological.index(x))
            cycles_by_node = [[c for c in all_cycles if c[0] == n]
                              for n in starting_nodes]
            for cycles in cycles_by_node:

                # Use arbitrary cycle to find the first and last nodes
                first_node = cycles[0][0]
                last_node = cycles[0][-1]

                if not first_node.is_empty():
                    # The entry node should not contain any computations
                    continue

                if not all([c[-1] == last_node for c in cycles]):
                    # There are multiple back edges: not a for or while loop
                    continue

                previous_edge = [
                    e for e in sdfg.in_edges(first_node) if e.src != last_node
                ]
                if len(previous_edge) != 1:
                    # No single starting point: not a for or while
                    continue
                previous_edge = previous_edge[0]

                back_edge = sdfg.edges_between(last_node, first_node)
                if len(back_edge) != 1:
                    raise RuntimeError("Expected exactly one edge in cycle")
                back_edge = back_edge[0]

                # Build a set of all nodes in all cycles associated with this
                # set of start and end node
                internal_nodes = functools.reduce(
                    lambda a, b: a | b, [set(c)
                                         for c in cycles]) - {first_node}

                exit_edge = [
                    e for e in sdfg.out_edges(first_node)
                    if e.dst not in internal_nodes | {first_node}
                ]
                if len(exit_edge) != 1:
                    # No single stopping condition: not a for or while
                    # (we don't support continue or break)
                    continue
                exit_edge = exit_edge[0]

                entry_edge = [
                    e for e in sdfg.out_edges(first_node) if e != exit_edge
                ]
                if len(entry_edge) != 1:
                    # No single starting condition: not a for or while
                    continue
                entry_edge = entry_edge[0]

                # Make sure this is not already annotated to be another construct
                if (len(control_flow[entry_edge]) != 0
                        or len(control_flow[back_edge]) != 0):
                    continue

                # Nested loops case I - previous edge of internal loop is a
                # loop-entry of an external loop (first state in a loop is
                # another loop)
                if (len(control_flow[previous_edge]) == 1 and isinstance(
                        control_flow[previous_edge][0], cflow.LoopEntry)):
                    # Nested loop, mark parent scope
                    loop_parent = control_flow[previous_edge][0].scope
                # Nested loops case II - exit edge of internal loop is a
                # back-edge of an external loop (last state in a loop is another
                # loop)
                elif (len(control_flow[exit_edge]) == 1 and isinstance(
                        control_flow[exit_edge][0], cflow.LoopBack)):
                    # Nested loop, mark parent scope
                    loop_parent = control_flow[exit_edge][0].scope
                elif (len(control_flow[exit_edge]) == 0
                      or len(control_flow[previous_edge]) == 0):
                    loop_parent = None
                else:
                    continue

                if entry_edge == back_edge:
                    # No entry check (we don't support do-loops)
                    # TODO: do we want to add some support for self-loops?
                    continue

                # Now we make sure that there is no other way to exit this
                # cycle, by checking that there's no reachable node *not*
                # included in any cycle between the first and last node.
                if any([len(set(c) - internal_nodes) > 1 for c in cycles]):
                    continue

                # This is a loop! Generate the necessary annotation objects.
                loop_scope = cflow.LoopScope(internal_nodes)

                if ((len(previous_edge.data.assignments) > 0
                     or len(back_edge.data.assignments) > 0) and
                    (len(control_flow[previous_edge]) == 0 or
                     (len(control_flow[previous_edge]) == 1 and
                      control_flow[previous_edge][0].scope == loop_parent))):
                    # Generate assignment edge, if available
                    control_flow[previous_edge].append(
                        cflow.LoopAssignment(loop_scope, previous_edge))
                # Assign remaining control flow constructs
                control_flow[entry_edge].append(
                    cflow.LoopEntry(loop_scope, entry_edge))
                control_flow[exit_edge].append(
                    cflow.LoopExit(loop_scope, exit_edge))
                control_flow[back_edge].append(
                    cflow.LoopBack(loop_scope, back_edge))

            ###################################################################
            # If/then/else detection procedure

            candidates = [
                n for n in states_topological if sdfg.out_degree(n) == 2
            ]
            for candidate in candidates:

                # A valid if occurs when then are no reachable nodes for either
                # path that does not pass through a common dominator.
                dominators = nx.dominance.dominance_frontiers(
                    sdfg.nx, candidate)

                left_entry, right_entry = sdfg.out_edges(candidate)
                if (len(control_flow[left_entry]) > 0
                        or len(control_flow[right_entry]) > 0):
                    # Already assigned to a control flow construct
                    # TODO: carefully allow this in some cases
                    continue

                left, right = left_entry.dst, right_entry.dst
                dominator = dominators[left] & dominators[right]
                if len(dominator) != 1:
                    # There must be a single dominator across both branches,
                    # unless one of the nodes _is_ the next dominator
                    # if (len(dominator) == 0 and dominators[left] == {right}
                    #         or dominators[right] == {left}):
                    #     dominator = dominators[left] | dominators[right]
                    # else:
                    #     continue
                    continue
                dominator = next(iter(dominator))  # Exactly one dominator

                exit_edges = sdfg.in_edges(dominator)
                if len(exit_edges) != 2:
                    # There must be a single entry and a single exit. This
                    # could be relaxed in the future.
                    continue

                left_exit, right_exit = exit_edges
                if (len(control_flow[left_exit]) > 0
                        or len(control_flow[right_exit]) > 0):
                    # Already assigned to a control flow construct
                    # TODO: carefully allow this in some cases
                    continue

                # Now traverse from the source and verify that all possible paths
                # pass through the dominator
                left_nodes = sdfg.all_nodes_between(left, dominator)
                if left_nodes is None:
                    # Not all paths lead to the next dominator
                    continue
                right_nodes = sdfg.all_nodes_between(right, dominator)
                if right_nodes is None:
                    # Not all paths lead to the next dominator
                    continue
                all_nodes = left_nodes | right_nodes

                # Make sure there is no overlap between left and right nodes
                if len(left_nodes & right_nodes) > 0:
                    continue

                # This is a valid if/then/else construct. Generate annotations
                if_then_else = cflow.IfThenElse(candidate, dominator)

                # Arbitrarily assign then/else to the two branches. If one edge
                # has no dominator but leads to the dominator, it means there's
                # only a then clause (and no else).
                has_else = False
                if len(dominators[left]) == 1:
                    then_scope = cflow.IfThenScope(if_then_else, left_nodes)
                    else_scope = cflow.IfElseScope(if_then_else, right_nodes)
                    control_flow[left_entry].append(
                        cflow.IfEntry(then_scope, left_entry))
                    control_flow[left_exit].append(
                        cflow.IfExit(then_scope, left_exit))
                    control_flow[right_exit].append(
                        cflow.IfExit(else_scope, right_exit))
                    if len(dominators[right]) == 1:
                        control_flow[right_entry].append(
                            cflow.IfEntry(else_scope, right_entry))
                        has_else = True
                else:
                    then_scope = cflow.IfThenScope(if_then_else, right_nodes)
                    else_scope = cflow.IfElseScope(if_then_else, left_nodes)
                    control_flow[right_entry].append(
                        cflow.IfEntry(then_scope, right_entry))
                    control_flow[right_exit].append(
                        cflow.IfExit(then_scope, right_exit))
                    control_flow[left_exit].append(
                        cflow.IfExit(else_scope, left_exit))

        #######################################################################
        # Generate actual program body

        states_generated = set()  # For sanity check
        generated_edges = set()
        self.generate_states(sdfg, "sdfg", control_flow,
                             global_stream, callsite_stream,
                             set(states_topological), states_generated,
                             generated_edges)

        #######################################################################

        # Sanity check
        if len(states_generated) != len(sdfg.nodes()):
            raise RuntimeError(
                "Not all states were generated in SDFG {}!"
                "\n  Generated: {}\n  Missing: {}".format(
                    sdfg.label, [s.label for s in states_generated],
                    [s.label for s in (set(sdfg.nodes()) - states_generated)]))

        # Deallocate transients
        shared_transients = sdfg.shared_transients()
        deallocated = set()
        for state in sdfg.nodes():
            for node in state.data_nodes():
                if (node.data in shared_transients
                        and node.data not in deallocated):
                    self._dispatcher.dispatch_deallocate(
                        sdfg, state, None, node, global_stream,
                        callsite_stream)
                    deallocated.add(node.data)

        # Now that we have all the information about dependencies, generate
        # header and footer
        if is_top_level:
            header_stream = CodeIOStream()
            header_global_stream = CodeIOStream()
            footer_stream = CodeIOStream()
            footer_global_stream = CodeIOStream()
            self.generate_header(sdfg, self._dispatcher.used_environments,
                                 header_global_stream, header_stream)

            # Open program function
            function_signature = 'void __program_%s_internal(%s)\n{\n' % (
                sdfg.name, sdfg.signature())

            self.generate_footer(sdfg, self._dispatcher.used_environments,
                                 footer_global_stream, footer_stream)

            header_global_stream.write(global_stream.getvalue())
            header_global_stream.write(footer_global_stream.getvalue())
            generated_header = header_global_stream.getvalue()

            all_code = CodeIOStream()
            all_code.write(function_signature)
            all_code.write(header_stream.getvalue())
            all_code.write(callsite_stream.getvalue())
            all_code.write(footer_stream.getvalue())
            generated_code = all_code.getvalue()
        else:
            generated_header = global_stream.getvalue()
            generated_code = callsite_stream.getvalue()

        # Clean up generated code
        gotos = re.findall(r'goto (.*);', generated_code)
        clean_code = ''
        for line in generated_code.split('\n'):
            # Empty line with semicolon
            if re.match(r'^\s*;\s*', line):
                continue
            # Label that might be unused
            label = re.findall(
                r'^\s*([a-zA-Z_][a-zA-Z_0-9]*):\s*[;]?\s*////.*$', line)
            if len(label) > 0:
                if label[0] not in gotos:
                    continue
            clean_code += line + '\n'

        # Return the generated global and local code strings
        return (generated_header, clean_code, self._dispatcher.used_targets,
                self._dispatcher.used_environments)