示例#1
0
文件: cpp.py 项目: gibchikafa/dace
def presynchronize_streams(sdfg, dfg, state_id, node, callsite_stream):
    state_dfg = sdfg.nodes()[state_id]
    if hasattr(node, "_cuda_stream") or is_devicelevel_gpu(
            sdfg, state_dfg, node):
        return
    backend = Config.get('compiler', 'cuda', 'backend')
    for e in state_dfg.in_edges(node):
        if hasattr(e.src, "_cuda_stream"):
            cudastream = "__state->gpu_context->streams[%d]" % e.src._cuda_stream
            callsite_stream.write(
                "%sStreamSynchronize(%s);" % (backend, cudastream),
                sdfg,
                state_id,
                [e.src, e.dst],
            )
示例#2
0
def _run_liveoutput(command, **kwargs):
    process = subprocess.Popen(
        command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, **kwargs)
    output = six.StringIO()
    while True:
        line = process.stdout.readline().rstrip()
        if not line:
            break
        output.write(line.decode('utf-8') + '\n')
        if Config.get_bool('debugprint'):
            print(line.decode('utf-8'), flush=True)
    stdout, stderr = process.communicate()
    if Config.get_bool('debugprint'):
        print(stdout.decode('utf-8'), flush=True)
        if stderr is not None:
            print(stderr.decode('utf-8'), flush=True)
    output.write(stdout.decode('utf-8'))
    if stderr is not None:
        output.write(stderr.decode('utf-8'))

    # An error occurred, raise exception
    if process.returncode != 0:
        raise subprocess.CalledProcessError(process.returncode, command,
                                            output.getvalue())
示例#3
0
def set_settings(settings_array, client_id):
    from dace.config import Config

    if not os.path.isdir("./client_configs"):
        os.mkdir("./client_configs/")
    clientpath = "./client_configs/" + client_id + ".conf"

    if os.path.isfile(clientpath):
        Config.load(clientpath)
    else:
        Config.load()

    for path, val in settings_array.items():
        path = path.split("/")
        Config.set(*path, value=val)

    Config.save(clientpath)
示例#4
0
def test_nccl_reduce():
    ng = Config.get('compiler', 'cuda', 'max_number_gpus')
    n = 15
    sdfg: dace.SDFG = nccl_reduce.to_sdfg(strict=True)
    gpu_map = find_map_by_param(sdfg, 'gpu')
    gpu_map.schedule = dtypes.ScheduleType.GPU_Multidevice
    infer_types.set_default_schedule_storage_types_and_location(sdfg, None)
    sdfg.specialize(dict(root_device=0, num_gpus=ng))

    out = np.ndarray(shape=n, dtype=np_dtype)
    out.fill(0)

    sdfg(out=out, N=n)

    assert np.unique(out)[0] == sum(range(ng))
示例#5
0
    def render_config_dialog(self):
        # Load metadata for configuration
        Config.load_schema()

        self.window = Gtk.Window()
        notebook = Gtk.Notebook()
        notebook.set_scrollable(True)
        self.window.add(notebook)

        # General (top-level) settings
        gtklabel = Gtk.Label()
        gtklabel.set_label('General')
        general_grid = Gtk.Grid()
        general_grid.set_hexpand(True)
        notebook.append_page(general_grid, gtklabel)
        columized = False

        for i, (cname, cval) in enumerate(sorted(Config.get().items())):
            meta = Config.get_metadata(cname)
            if meta['type'] == 'dict':
                gtklabel = Gtk.Label()
                gtklabel.set_label(meta['title'])
                grid = Gtk.Grid()
                grid.set_hexpand(True)
                notebook.append_page(grid, gtklabel)
                self.render_config_subtree(cval, (cname, ), grid)
                continue

            if columized == False:
                general_grid.insert_column(0)
                general_grid.insert_column(1)
                columized = True
            self.render_config_element(cval, (cname, ), general_grid, i, meta)

        self.window.show_all()
        self.window.connect("delete-event", self.win_close_callback, None)
示例#6
0
    def on_sdfg_begin(self, sdfg, local_stream, global_stream, codegen):
        if sdfg.parent is None and PAPIUtils.is_papi_used(sdfg):
            # Configure CMake project and counters
            self.configure_papi()

            if not self._papi_used:
                return

            # Add instrumentation includes and initialize PAPI
            global_stream.write('#include <dace/perf/papi.h>', sdfg)
            local_stream.write(
                '''dace::perf::PAPI::init();
dace::perf::PAPIValueStore<%s> __perf_store (__state->report);''' % (', '.join(self._counters)), sdfg)
            # Get the measured overhead and take the minimum to compensate
            if Config.get_bool('instrumentation', 'papi', 'overhead_compensation'):
                local_stream.write("__perf_store.getMeasuredOverhead();", sdfg)
示例#7
0
def test_nccl_send_recv():
    ng = Config.get('compiler', 'cuda', 'max_number_gpus')
    if ng < 2:
        raise ValueError('This test needs to run with at least 2 GPUs.')
    else:
        ng = 2
    sdfg: dace.SDFG = nccl_send_recv.to_sdfg(strict=True)
    gpu_map = find_map_by_param(sdfg, 'gpu_id')
    gpu_map.schedule = dtypes.ScheduleType.GPU_Multidevice
    infer_types.set_default_schedule_storage_types_and_location(sdfg, None)
    sdfg.specialize(dict(num_gpus=ng))

    out = sdfg()
    res = np.array([0, 1])

    assert np.allclose(np.unique(out), res), f'\nout: {out}\nres: {res}\n'
示例#8
0
    def testDefaultDataTypes(self):
        # check that configuration about defult data types is enforced
        config_data_types = Config.get('compiler', 'default_data_types')

        code_str = """value1 = 10
value2=3.14
value3=5000000000"""
        inf_symbols = type_inference.infer_types(code_str)
        if config_data_types.lower() == "python":
            self.assertEqual(inf_symbols["value1"], dtypes.typeclass(np.int64))
            self.assertEqual(inf_symbols["value2"], dtypes.typeclass(np.float64))
        elif config_data_types.lower() == "c":
            self.assertEqual(inf_symbols["value1"], dtypes.typeclass(np.int32))
            self.assertEqual(inf_symbols["value2"], dtypes.typeclass(np.float32))

        # in any case, value3 needs uint64
        self.assertEqual(inf_symbols["value3"], dtypes.typeclass(np.uint64))
示例#9
0
def test_nccl_allreduce():
    ng = Config.get('compiler', 'cuda', 'max_number_gpus')
    n = 15
    sdfg: dace.SDFG = nccl_allreduce.to_sdfg(strict=True)
    state = sdfg.start_state
    gpu_map = state.nodes()[0]
    gpu_map.schedule = dtypes.ScheduleType.GPU_Multidevice
    infer_types.set_default_schedule_storage_types_and_location(sdfg, None)
    sdfg.specialize(dict(num_gpus=ng))

    out = np.ndarray(shape=n, dtype=np_dtype)
    out.fill(0)

    sdfg(out=out, N=n)

    res = sum(range(ng))
    assert np.unique(out)[0] == res
示例#10
0
def bounding_box_union(subset_a: Subset, subset_b: Subset) -> Range:
    """ Perform union by creating a bounding-box of two subsets. """
    if subset_a.dims() != subset_b.dims():
        raise ValueError('Dimension mismatch between %s and %s' %
                         (str(subset_a), str(subset_b)))

    # Check whether all expressions containing a symbolic value should
    # always be evaluated to positive. If so, union will yield
    # a different result respectively.
    symbolic_positive = Config.get('optimizer', 'symbolic_positive')

    if not symbolic_positive:
        result = [(min(arb,
                       brb), max(are, bre), 1) for arb, brb, are, bre in zip(
                           subset_a.min_element(), subset_b.min_element(),
                           subset_a.max_element(), subset_b.max_element())]

    else:
        result = []
        for arb, brb, are, bre in zip(subset_a.min_element(),
                                      subset_b.min_element(),
                                      subset_a.max_element(),
                                      subset_b.max_element()):
            try:
                minrb = min(arb, brb)
            except TypeError:
                if len(arb.free_symbols) == 0:
                    minrb = arb
                elif len(brb.free_symbols) == 0:
                    minrb = brb
                else:
                    raise

            try:
                maxre = max(are, bre)
            except TypeError:
                if len(are.free_symbols) == 0:
                    maxre = bre
                elif len(bre.free_symbols) == 0:
                    maxre = are
                else:
                    raise
            result.append((minrb, maxre, 1))

    return Range(result)
示例#11
0
def timethis(program, title, flop_count, f, *args, **kwargs):
    """ Runs a function multiple (`DACE_treps`) times, logs the running times 
        to a file, and prints the median time (with FLOPs if given).
        @param program: The title of the measurement.
        @param title: A sub-title of the measurement.
        @param flop_count: Number of floating point operations in `program`.
                           If greater than zero, produces a median FLOPS 
                           report.
        @param f: The function to measure.
        @param args: Arguments to invoke the function with.
        @param kwargs: Keyword arguments to invoke the function with.
        @return: Latest return value of the function.
    """

    start = timer()
    REPS = int(Config.get('treps'))
    times = [start] * (REPS + 1)
    ret = None
    for i in range(REPS):
        # Call function
        ret = f(*args, **kwargs)
        times[i + 1] = timer()

    diffs = np.array([(times[i] - times[i - 1]) for i in range(1, REPS + 1)])

    problem_size = sys.argv[1] if len(sys.argv) >= 2 else 0

    if not os.path.isfile('results.log'):
        with open('results.log', 'w') as f:
            f.write('Program\tOptimization\tProblem_Size\tRuntime_sec\n')

    with open('results.log', 'w') as f:
        for d in diffs:
            f.write('%s\t%s\t%s\t%.8f\n' % (program, title, problem_size, d))

    if flop_count > 0:
        gflops_arr = (flop_count / diffs) * 1e-9
        time_secs = np.median(diffs)
        GFLOPs = (flop_count / time_secs) * 1e-9
        print(title, GFLOPs, 'GFLOP/s       (', time_secs * 1000, 'ms)')
    else:
        time_secs = np.median(diffs)
        print(title, time_secs * 1000, 'ms')

    return ret
示例#12
0
    def __call__(self, **kwargs):
        try:
            argtuple = self._construct_args(**kwargs)

            # Call initializer function if necessary, then SDFG
            if self._initialized is False:
                self.initialize(*argtuple)

            # PROFILING
            if Config.get_bool('profiling'):
                operations.timethis(self._sdfg.name, 'DaCe', 0, self._cfunc,
                                    *argtuple)
            else:
                return self._cfunc(*argtuple)
        except (RuntimeError, TypeError, UnboundLocalError, KeyError,
                DuplicateDLLError, ReferenceError):
            self._lib.unload()
            raise
示例#13
0
def _try_to_match_transformation(graph: Union[SDFG, SDFGState], collapsed_graph: nx.DiGraph, subgraph: Dict[int, int],
                                 sdfg: SDFG, xform: Union[xf.PatternTransformation, Type[xf.PatternTransformation]],
                                 expr_idx: int, nxpattern: nx.DiGraph, state_id: int, permissive: bool,
                                 options: Dict[str, Any]) -> Optional[xf.PatternTransformation]:
    """ 
    Helper function that tries to instantiate a pattern match into a 
    transformation object. 
    """
    subgraph = {
        nxpattern.nodes[j]['node']: graph.node_id(collapsed_graph.nodes[i]['node'])
        for i, j in subgraph.items()
    }

    try:
        if isinstance(xform, xf.PatternTransformation):
            match = xform
        else:  # Construct directly from type with options
            opts = options or {}
            try:
                match = xform(**opts)
            except TypeError:
                # Backwards compatibility, transformation does not support ctor arguments
                match = xform()
                # Set manually
                for oname, oval in opts.items():
                    setattr(match, oname, oval)

        match.setup_match(sdfg, sdfg.sdfg_id, state_id, subgraph, expr_idx, options=options)
        match_found = match.can_be_applied(graph, expr_idx, sdfg, permissive=permissive)
    except Exception as e:
        if Config.get_bool('optimizer', 'match_exception'):
            raise
        if not isinstance(xform, type):
            xft = type(xform)
        else:
            xft = xform
        print('WARNING: {p}::can_be_applied triggered a {c} exception:'
              ' {e}'.format(p=xft.__name__, c=e.__class__.__name__, e=e))
        return None

    if match_found:
        return match

    return None
示例#14
0
    def apply(self, sdfg):
        def gnode(nname):
            return graph.nodes()[self.subgraph[nname]]

        graph = sdfg.nodes()[self.state_id]
        in_array = gnode(RedundantSecondArray._in_array)
        out_array = gnode(RedundantSecondArray._out_array)

        # We assume the following pattern: A -- e1 --> B -- e2 --> others

        # 1. Get edge e1 and extract subsets for arrays A and B
        e1 = graph.edges_between(in_array, out_array)[0]
        a_subset, b1_subset = _validate_subsets(e1, sdfg.arrays)
        # 2. Iterate over the e2 edges and traverse the memlet tree
        for e2 in graph.out_edges(out_array):
            path = graph.memlet_tree(e2)
            for e3 in path:
                # 2-a. Extract subsets for array B and others
                b3_subset, other_subset = _validate_subsets(
                    e3, sdfg.arrays, src_name=out_array.data)
                # 2-b. Modify memlet to match array A. Example:
                # A -- (0, a:b)/(c:c+b) --> B -- (c+d)/None --> others
                # A -- (0, a+d)/None --> others
                e3.data.data = in_array.data
                # (c+d) - (c:c+b) = (d)
                b3_subset.offset(b1_subset, negative=True)
                # (0, a:b)(d) = (0, a+d) (or offset for indices)
                if isinstance(a_subset, subsets.Indices):
                    tmp = copy.deepcopy(a_subset)
                    tmp.offset(b3_subset, negative=False)
                    e3.data.subset = tmp
                else:
                    e3.data.subset = a_subset.compose(b3_subset)
                e3.data.other_subset = other_subset
            # 2-c. Remove edge and add new one
            graph.remove_edge(e2)
            graph.add_edge(in_array, e2.src_conn, e2.dst, e2.dst_conn, e2.data)

        # Finally, remove out_array node
        graph.remove_node(out_array)
        # TODO: Should the array be removed from the SDFG?
        # del sdfg.arrays[out_array]
        if Config.get_bool("debugprint"):
            RedundantSecondArray._arrays_removed += 1
示例#15
0
文件: xilinx.py 项目: targetsm/dace
 def cmake_options():
     host_flags = Config.get("compiler", "xilinx", "host_flags")
     synthesis_flags = Config.get("compiler", "xilinx", "synthesis_flags")
     build_flags = Config.get("compiler", "xilinx", "build_flags")
     mode = Config.get("compiler", "xilinx", "mode")
     target_platform = Config.get("compiler", "xilinx", "platform")
     enable_debugging = ("ON" if Config.get_bool(
         "compiler", "xilinx", "enable_debugging") else "OFF")
     autobuild = ("ON" if Config.get_bool("compiler", "autobuild_bitstreams")
                  else "OFF")
     options = [
         "-DDACE_XILINX_HOST_FLAGS=\"{}\"".format(host_flags),
         "-DDACE_XILINX_SYNTHESIS_FLAGS=\"{}\"".format(synthesis_flags),
         "-DDACE_XILINX_BUILD_FLAGS=\"{}\"".format(build_flags),
         "-DDACE_XILINX_MODE={}".format(mode),
         "-DDACE_XILINX_TARGET_PLATFORM=\"{}\"".format(target_platform),
         "-DDACE_XILINX_ENABLE_DEBUGGING={}".format(enable_debugging),
         "-DDACE_FPGA_AUTOBUILD_BITSTREAM={}".format(autobuild)
     ]
     # Override Vitis/SDx/SDAccel installation directory
     if Config.get("compiler", "xilinx", "path"):
         options.append("-DVITIS_ROOT_DIR=\"{}\"".format(
             Config.get("compiler", "xilinx", "path").replace("\\", "/")))
     return options
示例#16
0
def test_nccl_reduce_symbolic():
    ng = Config.get('compiler', 'cuda', 'max_number_gpus')
    n = 2
    sdfg: dace.SDFG = nccl_reduce_symbolic.to_sdfg(strict=True)
    outer_map = find_map_by_param(sdfg, 'root_gpu')
    if outer_map:
        outer_map.schedule = dtypes.ScheduleType.Sequential
    gpu_map = find_map_by_param(sdfg, 'gpu')
    gpu_map.schedule = dtypes.ScheduleType.GPU_Multidevice
    infer_types.set_default_schedule_storage_types_and_location(sdfg, None)
    sdfg.specialize(dict(num_gpus=ng))

    out = np.ndarray(shape=[ng, n], dtype=np_dtype)
    out.fill(0)

    sdfg(out=out, N=n)

    res = np.array([ng * i for i in range(ng)])
    assert (np.unique(out) == res).all()
示例#17
0
文件: dtypes.py 项目: fthaler/dace
    def __init__(self, wrapped_type):
        # Convert python basic types
        if isinstance(wrapped_type, str):
            try:
                wrapped_type = getattr(numpy, wrapped_type)
            except AttributeError:
                raise ValueError("Unknown type: {}".format(wrapped_type))

        config_data_types = Config.get('compiler', 'default_data_types')
        if wrapped_type is int:
            if config_data_types.lower() == 'python':
                wrapped_type = numpy.int64
            elif config_data_types.lower() == 'c':
                wrapped_type = numpy.int32
            else:
                raise NameError(
                    "Unknown configuration for default_data_types: {}".format(
                        config_data_types))
        elif wrapped_type is float:
            if config_data_types.lower() == 'python':
                wrapped_type = numpy.float64
            elif config_data_types.lower() == 'c':
                wrapped_type = numpy.float32
            else:
                raise NameError(
                    "Unknown configuration for default_data_types: {}".format(
                        config_data_types))
        elif wrapped_type is complex:
            if config_data_types.lower() == 'python':
                wrapped_type = numpy.complex128
            elif config_data_types.lower() == 'c':
                wrapped_type = numpy.complex64
            else:
                raise NameError(
                    "Unknown configuration for default_data_types: {}".format(
                        config_data_types))

        self.type = wrapped_type  # Type in Python
        self.ctype = _CTYPES[wrapped_type]  # Type in C
        self.ctype_unaligned = self.ctype  # Type in C (without alignment)
        self.dtype = self  # For compatibility support with numpy
        self.bytes = _BYTES[wrapped_type]  # Number of bytes for this type
示例#18
0
    def apply(self, sdfg):
        def gnode(nname):
            return graph.nodes()[self.subgraph[nname]]

        graph = sdfg.nodes()[self.state_id]
        map_entry = gnode(RedundantArrayCopying3._map_entry)
        out_array = gnode(RedundantArrayCopying3._out_array)

        for e1 in graph.out_edges(map_entry):
            dst = e1.dst
            if (isinstance(dst, nodes.AccessNode) and dst != out_array
                    and dst.data == out_array.data):
                for e2 in graph.out_edges(dst):
                    graph.add_edge(out_array, None, e2.dst, e2.dst_conn,
                                   e2.data)
                    graph.remove_edge(e2)
                graph.remove_edge(e1)
                graph.remove_node(dst)
                if Config.get_bool("debugprint"):
                    RedundantArrayCopying3._arrays_removed += 1
示例#19
0
    def __call__(self, *args, **kwargs):
        """ Convenience function that parses, compiles, and runs a DaCe 
            program. """
        # Parse SDFG
        sdfg = parse_from_function(self, *args)

        # Add named arguments to the call
        kwargs.update({aname: arg for aname, arg in zip(self.argnames, args)})

        # Update arguments with symbols in data shapes
        kwargs.update(infer_symbols_from_shapes(sdfg, kwargs))

        # Allow CLI to prompt for optimizations
        if Config.get_bool('optimizer', 'transform_on_call'):
            sdfg = sdfg.optimize()

        # Compile SDFG (note: this is done after symbol inference due to shape
        # altering transformations such as Vectorization)
        binaryobj = sdfg.compile()

        return binaryobj(**kwargs)
示例#20
0
    def apply(self, sdfg):
        def gnode(nname):
            return graph.nodes()[self.subgraph[nname]]

        graph = sdfg.nodes()[self.state_id]
        in_array = gnode(RedundantSecondArray._in_array)
        out_array = gnode(RedundantSecondArray._out_array)
        memlet = graph.edges_between(in_array, out_array)[0].data
        if memlet.data == in_array.data:
            subset = memlet.subset
        else:
            subset = memlet.other_subset

        for e in graph.out_edges(out_array):
            # Modify all outgoing edges to point to in_array
            path = graph.memlet_tree(e)
            for pe in path:
                if pe.data.data == out_array.data:
                    pe.data.data = in_array.data
                    if isinstance(subset, subsets.Indices):
                        pe.data.subset.offset(subset, False)
                    else:
                        pe.data.subset = subset.compose(pe.data.subset)
                elif pe.data.other_subset:
                    if isinstance(subset, subsets.Indices):
                        pe.data.other_subset.offset(subset, False)
                    else:
                        pe.data.other_subset = subset.compose(
                            pe.data.other_subset)

            # Redirect edge to out_array
            graph.remove_edge(e)
            graph.add_edge(in_array, e.src_conn, e.dst, e.dst_conn, e.data)

        # Finally, remove out_array node
        graph.remove_node(out_array)
        # TODO: Should the array be removed from the SDFG?
        # del sdfg.arrays[out_array]
        if Config.get_bool("debugprint"):
            RedundantSecondArray._arrays_removed += 1
示例#21
0
    def __call__(self, *args, **kwargs):
        # Update arguments from ordered list
        if len(args) > 0 and self.argnames is not None:
            kwargs.update({aname: arg for aname, arg in zip(self.argnames, args)})

        try:
            argtuple, initargtuple = self._construct_args(kwargs)

            # Call initializer function if necessary, then SDFG
            if self._initialized is False:
                self._lib.load()
                self.initialize(*initargtuple)
            # PROFILING
            if Config.get_bool('profiling'):
                operations.timethis(self._sdfg, 'DaCe', 0, self._cfunc, self._libhandle, *argtuple)
            else:
                self._cfunc(self._libhandle, *argtuple)

            return self._return_arrays
        except (RuntimeError, TypeError, UnboundLocalError, KeyError, cgx.DuplicateDLLError, ReferenceError):
            self._lib.unload()
            raise
示例#22
0
def parse_from_function(function, *compilation_args, strict=None):
    """ Try to parse a DaceProgram object and return the `dace.SDFG` object
        that corresponds to it.
        @param function: DaceProgram object (obtained from the `@dace.program`
                         decorator).
        @param compilation_args: Various compilation arguments e.g. dtypes.
        @param strict: Whether to apply strict transformations or not (None
                       uses configuration-defined value). 
        @return: The generated SDFG object.
    """
    if not isinstance(function, DaceProgram):
        raise TypeError(
            'Function must be of type dace.frontend.python.DaceProgram')

    # Obtain DaCe program as SDFG
    sdfg = function.generate_pdp(*compilation_args)

    # No need at this point
    # Fill in scope entry/exit connectors
    #sdfg.fill_scope_connectors()
    # Memlet propagation
    #if sdfg.propagate:
    #    labeling.propagate_labels_sdfg(sdfg)
    ########################

    # Apply strict transformations automatically
    if (strict == True or (strict is None and Config.get_bool(
            'optimizer', 'automatic_strict_transformations'))):
        sdfg.apply_strict_transformations()

    # Drawing the SDFG (again) to a .dot file
    sdfg.draw_to_file(recursive=True)
    sdfg.save(os.path.join('_dotgraphs', 'program.sdfg'))

    # Validate SDFG
    sdfg.validate()

    return sdfg
示例#23
0
def _try_to_match_transformation(
        graph: Union[SDFG, SDFGState], collapsed_graph: nx.DiGraph,
        subgraph: Dict[int,
                       int], sdfg: SDFG, xform: Type[xf.PatternTransformation],
        expr_idx: int, nxpattern: nx.DiGraph, state_id: int, permissive: bool,
        options: Dict[str, Any]) -> Optional[xf.PatternTransformation]:
    """ 
    Helper function that tries to instantiate a pattern match into a 
    transformation object. 
    """
    subgraph = {
        nxpattern.nodes[j]['node']:
        graph.node_id(collapsed_graph.nodes[i]['node'])
        for i, j in subgraph.items()
    }

    try:
        match = xform(sdfg,
                      sdfg.sdfg_id,
                      state_id,
                      subgraph,
                      expr_idx,
                      options=options)
        match_found = match.can_be_applied(graph,
                                           expr_idx,
                                           sdfg,
                                           permissive=permissive)
    except Exception as e:
        if Config.get_bool('optimizer', 'match_exception'):
            raise
        print('WARNING: {p}::can_be_applied triggered a {c} exception:'
              ' {e}'.format(p=xform.__name__, c=e.__class__.__name__, e=e))
        return None

    if match_found:
        return match

    return None
示例#24
0
 def cmake_options():
     compiler = make_absolute(Config.get("compiler", "xilinx",
                                         "executable"))
     host_flags = Config.get("compiler", "xilinx", "host_flags")
     synthesis_flags = Config.get("compiler", "xilinx", "synthesis_flags")
     build_flags = Config.get("compiler", "xilinx", "build_flags")
     mode = Config.get("compiler", "xilinx", "mode")
     target_platform = Config.get("compiler", "xilinx", "platform")
     enable_debugging = ("ON" if Config.get_bool(
         "compiler", "xilinx", "enable_debugging") else "OFF")
     options = [
         "-DSDACCEL_ROOT_DIR={}".format(
             os.path.dirname(os.path.dirname(compiler))),
         "-DDACE_XILINX_HOST_FLAGS=\"{}\"".format(host_flags),
         "-DDACE_XILINX_SYNTHESIS_FLAGS=\"{}\"".format(synthesis_flags),
         "-DDACE_XILINX_BUILD_FLAGS=\"{}\"".format(build_flags),
         "-DDACE_XILINX_MODE={}".format(mode),
         "-DDACE_XILINX_TARGET_PLATFORM=\"{}\"".format(target_platform),
         "-DDACE_XILINX_ENABLE_DEBUGGING={}".format(enable_debugging),
     ]
     return options
示例#25
0
    def run_local(self, sdfg: SDFG, driver_file: str):
        workdir = sdfg.build_folder
        if Config.get_bool('diode', 'general', 'library_autoexpand'):
            sdfg.expand_library_nodes()
        code_objects = sdfg.generate_code()
        use_mpi = Executor._use_mpi(code_objects)
        # TODO: Implement (instead of pyrun, use mpirun/mpiexec)
        if use_mpi:
            raise NotImplementedError('Running MPI locally unimplemented')

        # Pipe stdout/stderr back to client output
        stdout = sys.stdout
        stderr = sys.stderr
        sys.stdout = FunctionStreamWrapper(self.show_output, stdout.write)
        sys.stderr = FunctionStreamWrapper(self.show_output, stderr.write)

        # Compile SDFG
        generate_program_folder(sdfg, code_objects, workdir, self._config)
        configure_and_compile(workdir, sdfg.name)

        self.show_output("Running script\n")

        # Run driver script with the compiled SDFG(s) as the default
        old_usecache = Config.get_bool('compiler', 'use_cache')
        Config.set('compiler', 'use_cache', value=True)
        try:
            runpy.run_path(driver_file, run_name='__main__')
        # Catching all exceptions, including SystemExit
        except (Exception, SystemExit) as ex:
            # Corner case: If exited with error code 0, it is a success
            if isinstance(ex, SystemExit):
                # If the exit code is nonzero, "raise" will not trigger a
                # printout on the server
                if ex.code != 0:
                    traceback.print_exc()
                    raise
            else:
                raise

        self.show_output("Execution Terminated\n")

        # Revert configuration and output redirection
        Config.set('compiler', 'use_cache', value=old_usecache)
        sys.stdout = stdout
        sys.stderr = stderr
示例#26
0
    def apply(self, sdfg):
        def gnode(nname):
            return graph.nodes()[self.subgraph[nname]]

        graph = sdfg.nodes()[self.state_id]
        in_array = gnode(RedundantArray._in_array)
        out_array = gnode(RedundantArray._out_array)

        for e in graph.in_edges(in_array):
            # Modify all incoming edges to point to out_array
            path = graph.memlet_path(e)
            for pe in path:
                if pe.data.data == in_array.data:
                    pe.data.data = out_array.data

            # Redirect edge to out_array
            graph.remove_edge(e)
            graph.add_edge(e.src, e.src_conn, out_array, e.dst_conn, e.data)

        # Finally, remove in_array node
        graph.remove_node(in_array)
        if Config.get_bool("debugprint"):
            RedundantArray._arrays_removed += 1
示例#27
0
    def apply(self, sdfg):
        def gnode(nname):
            return graph.nodes()[self.subgraph[nname]]

        graph = sdfg.nodes()[self.state_id]
        in_array = gnode(RedundantArrayCopying._in_array)
        med_array = gnode(RedundantArrayCopying._med_array)
        out_array = gnode(RedundantArrayCopying._out_array)

        med_edges = len(graph.out_edges(med_array))
        med_out_edges = 0
        for med_e in graph.out_edges(med_array):
            if (isinstance(med_e.dst, nodes.AccessNode)
                    and med_e.dst.data == out_array.data):
                # Modify all outcoming edges to point to in_array
                for out_e in graph.out_edges(med_e.dst):
                    path = graph.memlet_path(out_e)
                    for pe in path:
                        if pe.data.data == out_array.data:
                            pe.data.data = in_array.data
                    # Redirect edge to in_array
                    graph.remove_edge(out_e)
                    graph.add_edge(in_array, out_e.src_conn, out_e.dst,
                                   out_e.dst_conn, out_e.data)
                # Remove out_array
                for e in graph.edges_between(med_e, med_e.dst):
                    graph.remove_edge(e)
                graph.remove_node(med_e.dst)
                med_out_edges += 1

        # Finally, med_array node
        if med_array.desc(sdfg).transient and med_edges == med_out_edges:
            for e in graph.edges_between(in_array, med_array):
                graph.remove_edge(e)
            graph.remove_node(med_array)
            if Config.get_bool("debugprint"):
                RedundantArrayCopying._arrays_removed += 1
示例#28
0
    def render_config_subtree(self, cv, config_path, grid):
        # Add notebook to grid and render each child within

        columized = False
        notebook = Gtk.Notebook()
        grid.add(notebook)
        grid.set_hexpand(True)
        for i, (cname, cval) in enumerate(sorted(cv.items())):
            # Create current config "path"
            cpath = tuple(list(config_path) + [cname])
            meta = Config.get_metadata(*cpath)
            if meta['type'] == 'dict':
                gtklabel = Gtk.Label()
                gtklabel.set_label(meta['title'])
                ngrid = Gtk.Grid()
                notebook.append_page(ngrid, gtklabel)
                self.render_config_subtree(cval, cpath, ngrid)
                continue

            if columized == False:
                grid.insert_column(0)
                grid.insert_column(1)
                columized = True
            self.render_config_element(cval, cpath, grid, i, meta)
示例#29
0
def setup_env():
    num_concurrent_streams = Config.get("compiler", "cuda",
                                        "max_concurrent_streams")
    if 'ORT_USE_STREAMS' in os.environ:
        ONNXRuntimeCUDA.use_streams = _env2bool(os.environ["ORT_USE_STREAMS"])
        if ONNXRuntimeCUDA.use_streams:
            log.info("Using streams with ORT (experimental)")
            if num_concurrent_streams == 0:
                log.info("Setting compiler.cuda.max_concurrent_streams to 8")
                Config.set("compiler",
                           "cuda",
                           "max_concurrent_streams",
                           value=8)
            elif num_concurrent_streams == -1:
                ONNXRuntimeCUDA.use_streams = False
    else:
        if num_concurrent_streams != -1:
            log.info("Setting compiler.cuda.max_concurrent_streams to -1")
            Config.set("compiler", "cuda", "max_concurrent_streams", value=-1)
        ONNXRuntimeCUDA.use_streams = False
    ONNXRuntimeCUDA.max_concurrent_streams = Config.get(
        "compiler", "cuda", "max_concurrent_streams")
示例#30
0
# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved.
import dace
from simple_systolic_array import P, make_sdfg
from dace.config import Config

KERNEL_NAME = ("_this_is_a_very_long_kernel_name_that_does_not_fit_"
               "in_the_61_character_limit")

if __name__ == "__main__":

    Config.set("compiler", "fpga_vendor", value="intel_fpga")

    sdfg = make_sdfg("name_too_long")
    for node, _ in sdfg.all_nodes_recursive():
        if isinstance(node, dace.sdfg.nodes.CodeNode):
            node.label += KERNEL_NAME
    sdfg.specialize({"P": 4})
    try:
        code = sdfg.generate_code()
    except dace.codegen.targets.intel_fpga.NameTooLongError:
        pass
    else:
        raise RuntimeError("No exception thrown.")