Exemplo n.º 1
0
    def _arg_check(self, args, size, interval):
        """
        Raises
        ------
        InvalidArgument
            If any of the ``self``-related runtime arguments in ``args``
            will cause an out-of-bounds access.
        """
        if self.min_name not in args:
            raise InvalidArgument("No runtime value for %s" % self.min_name)
        if interval.is_Defined and args[self.min_name] + interval.lower < 0:
            raise InvalidArgument("OOB detected due to %s=%d" % (self.min_name,
                                                                 args[self.min_name]))

        if self.max_name not in args:
            raise InvalidArgument("No runtime value for %s" % self.max_name)
        if interval.is_Defined and args[self.max_name] + interval.upper >= size:
            raise InvalidArgument("OOB detected due to %s=%d" % (self.max_name,
                                                                 args[self.max_name]))

        # Allow the specific case of max=min-1, which disables the loop
        if args[self.max_name] < args[self.min_name]-1:
            raise InvalidArgument("Illegal max=%s < min=%s"
                                  % (args[self.max_name], args[self.min_name]))
        elif args[self.max_name] == args[self.min_name]-1:
            debug("%s=%d and %s=%d might cause no iterations along Dimension %s",
                  self.min_name, args[self.min_name],
                  self.max_name, args[self.max_name], self.name)
Exemplo n.º 2
0
def make(loc, args):
    """Invoke the ``make`` command from within ``loc`` with arguments ``args``."""
    hash_key = sha1((loc + str(args)).encode()).hexdigest()
    logfile = path.join(get_jit_dir(), "%s.log" % hash_key)
    errfile = path.join(get_jit_dir(), "%s.err" % hash_key)

    tic = time()
    with change_directory(loc):
        with open(logfile, "w") as lf:
            with open(errfile, "w") as ef:

                command = ['make'] + args
                lf.write("Compilation command:\n")
                lf.write(" ".join(command))
                lf.write("\n\n")
                try:
                    check_call(command, stderr=ef, stdout=lf)
                except CalledProcessError as e:
                    raise CompilationError(
                        'Command "%s" return error status %d. '
                        'Unable to compile code.\n'
                        'Compile log in %s\n'
                        'Compile errors in %s\n' %
                        (e.cmd, e.returncode, logfile, errfile))
    toc = time()
    debug("Make <%s>: run in [%.2f s]" % (" ".join(args), toc - tic))
Exemplo n.º 3
0
    def _arg_check(self, args, size, interval):
        """
        :raises InvalidArgument: If any of the ``self``-related runtime arguments
                                 in ``args`` will cause an out-of-bounds access.
        """
        if self.min_name not in args:
            raise InvalidArgument("No runtime value for %s" % self.min_name)
        if interval.is_Defined and args[self.min_name] + interval.lower < 0:
            raise InvalidArgument("OOB detected due to %s=%d" %
                                  (self.min_name, args[self.min_name]))

        if self.max_name not in args:
            raise InvalidArgument("No runtime value for %s" % self.max_name)
        if interval.is_Defined and args[self.max_name] + interval.upper >= size:
            raise InvalidArgument("OOB detected due to %s=%d" %
                                  (self.max_name, args[self.max_name]))

        # Allow the specific case of max=min-1, which disables the loop
        if args[self.max_name] < args[self.min_name] - 1:
            raise InvalidArgument("Illegal max=%s < min=%s" %
                                  (args[self.max_name], args[self.min_name]))
        elif args[self.max_name] == args[self.min_name] - 1:
            debug(
                "%s=%d and %s=%d might cause no iterations along Dimension %s",
                self.min_name, args[self.min_name], self.max_name,
                args[self.max_name], self.name)
Exemplo n.º 4
0
def make(loc, args):
    """Invoke the ``make`` command from within ``loc`` with arguments ``args``."""
    hash_key = sha1((loc + str(args)).encode()).hexdigest()
    logfile = path.join(get_jit_dir(), "%s.log" % hash_key)
    errfile = path.join(get_jit_dir(), "%s.err" % hash_key)

    tic = time()
    with change_directory(loc):
        with open(logfile, "w") as lf:
            with open(errfile, "w") as ef:

                command = ['make'] + args
                lf.write("Compilation command:\n")
                lf.write(" ".join(command))
                lf.write("\n\n")
                try:
                    check_call(command, stderr=ef, stdout=lf)
                except CalledProcessError as e:
                    raise CompilationError('Command "%s" return error status %d. '
                                           'Unable to compile code.\n'
                                           'Compile log in %s\n'
                                           'Compile errors in %s\n' %
                                           (e.cmd, e.returncode, logfile, errfile))
    toc = time()
    debug("Make <%s>: run in [%.2f s]" % (" ".join(args), toc-tic))
Exemplo n.º 5
0
 def _allocate_memory(self):
     """Allocate memory in terms of numpy ndarrays."""
     debug("Allocating memory for %s (%s)" % (self.name, str(self.shape)))
     self._data_object = CMemory(self.shape, dtype=self.dtype)
     if self._first_touch:
         first_touch(self)
     else:
         self.data.fill(0)
Exemplo n.º 6
0
def log_args(arguments):
    arg_str = []
    for k, v in arguments.items():
        if hasattr(v, 'shape'):
            arg_str.append('(%s, shape=%s, L2 Norm=%d)' %
                           (k, str(v.shape), np.linalg.norm(v.view())))
        else:
            arg_str.append('(%s, value=%s)' % (k, str(v)))
    debug("Passing Arguments: " + ", ".join(arg_str))
Exemplo n.º 7
0
 def wrapper(self):
     if self._data is None:
         debug("Allocating memory for %s (%s)" %
               (self.name, self.shape))
         self._data = Data(self.shape, self.indices, self.dtype)
         if self._first_touch:
             first_touch(self)
         else:
             self.data.fill(0)
     return func(self)
Exemplo n.º 8
0
    def _allocate_memory(self):
        """Function to allocate memmory in terms of numpy ndarrays.

        Note: memmap is a subclass of ndarray.
        """
        if self.memmap:
            self._data = np.memmap(filename=self.f, dtype=self.dtype, mode='w+',
                                   shape=self.shape, order='C')
        else:
            debug("Allocating memory for %s (%s)" % (self.name, str(self.shape)))
            self._data, self.internal_pointer = malloc_aligned(
                self.shape, dtype=self.dtype)
            first_touch(self)
Exemplo n.º 9
0
    def pre_apply(self, toshare):
        """
        Set up the YaskKernel before it's called from within an Operator.

        :param toshare: Mapper from functions to :class:`Data`s for sharing
                        grid storage.
        """
        # Sanity check
        grids = {i.grid for i in toshare if i.is_TensorFunction and i.grid is not None}
        assert len(grids) == 1
        grid = grids.pop()

        # Set the domain size, apply grid sharing, more sanity checks
        for k, v in zip(self.space_dimensions, grid.shape):
            self.soln.set_rank_domain_size(k, int(v))
        for k, v in toshare.items():
            target = self.grids.get(k.name)
            if target is not None:
                v._give_storage(target)
        assert all(not i.is_storage_allocated() for i in self.local_grids.values())
        assert all(v.is_storage_allocated() for k, v in self.grids.items()
                   if k not in self.local_grids)

        # Debug info
        debug("%s<%s,%s>" % (self.name, self.step_dimension, self.space_dimensions))
        for i in list(self.grids.values()) + list(self.local_grids.values()):
            if i.get_num_dims() == 0:
                debug("    Scalar: %s", i.get_name())
            elif not i.is_storage_allocated():
                size = [i.get_rank_domain_size(j) for j in self.space_dimensions]
                debug("    LocalGrid: %s%s, size=%s" %
                      (i.get_name(), str(i.get_dim_names()), size))
            else:
                size = []
                lpad, rpad = [], []
                for j in i.get_dim_names():
                    if j in self.space_dimensions:
                        size.append(i.get_rank_domain_size(j))
                        lpad.append(i.get_left_pad_size(j))
                        rpad.append(i.get_right_pad_size(j))
                    else:
                        size.append(i.get_alloc_size(j))
                        lpad.append(0)
                        rpad.append(0)
                debug("    Grid: %s%s, size=%s, left_pad=%s, right_pad=%s" %
                      (i.get_name(), str(i.get_dim_names()), size, lpad, rpad))

        # Set up the block shape for loop blocking
        for i, j in zip(self.space_dimensions, configuration.yask['blockshape']):
            self.soln.set_block_size(i, j)

        # This, amongst other things, allocates storage for the temporary grids
        self.soln.prepare_solution()

        # Set up auto-tuning
        if configuration['autotuning'] is False:
            self.soln.reset_auto_tuner(False)
        elif configuration['autotuning'] == 'preemptive':
            self.soln.run_auto_tuner_now()
Exemplo n.º 10
0
def save(soname, binary, compiler):
    """
    Store a binary into a file within a temporary directory.

    :param soname: Name of the .so file (w/o the suffix).
    :param binary: The binary data.
    :param compiler: The toolchain used for compilation.
    """
    sofile = get_jit_dir().joinpath(soname).with_suffix(compiler.so_ext)
    if sofile.is_file():
        debug("%s: `%s` was not saved in `%s` as it already exists" %
              (compiler, sofile.name, get_jit_dir()))
    else:
        with open(str(sofile), 'wb') as f:
            f.write(binary)
        debug("%s: `%s` successfully saved in `%s`" %
              (compiler, sofile.name, get_jit_dir()))
Exemplo n.º 11
0
def jit_compile(soname, code, compiler):
    """
    JIT compile the given C/C++ ``code``.

    This function relies upon codepy's ``compile_from_string``, which performs
    caching of compilation units and avoids potential race conditions due to
    multiple processing trying to compile the same object.

    :param soname: A unique name for the jit-compiled shared object.
    :param code: String of C source code.
    :param compiler: The toolchain used for compilation.
    """
    target = str(get_jit_dir().joinpath(soname))
    src_file = "%s.%s" % (target, compiler.src_ext)

    # This makes a suite of cache directories based on the soname
    cache_dir = get_codepy_dir().joinpath(soname[:7])
    cache_dir.mkdir(parents=True, exist_ok=True)

    # `catch_warnings` suppresses codepy complaining that it's taking
    # too long to acquire the cache lock. This warning can only appear
    # in a multiprocess session, typically (but not necessarily) when
    # many processes are frequently attempting jit-compilation (e.g.,
    # when running the test suite in parallel)
    with warnings.catch_warnings():
        warnings.simplefilter('ignore')

        tic = time()
        # Spinlock in case of MPI
        sleep_delay = 0 if configuration['mpi'] else 1
        _, _, _, recompiled = compile_from_string(
            compiler,
            target,
            code,
            src_file,
            cache_dir=cache_dir,
            debug=configuration['debug-compiler'],
            sleep_delay=sleep_delay)
        toc = time()

    if recompiled:
        debug("%s: compiled `%s` [%.2f s]" % (compiler, src_file, toc - tic))
    else:
        debug("%s: cache hit `%s` [%.2f s]" % (compiler, src_file, toc - tic))
Exemplo n.º 12
0
 def wrapper(self):
     if self._data is None:
         debug("Allocating memory for %s%s" % (self.name, self.shape_allocated))
         self._data = Data(self.shape_allocated, self.dtype,
                           modulo=self._mask_modulo, allocator=self._allocator)
         if self._first_touch:
             assign(self, 0)
         if callable(self._initializer):
             if self._first_touch:
                 warning("`first touch` together with `initializer` causing "
                         "redundant data initialization")
             try:
                 self._initializer(self.data_with_halo)
             except ValueError:
                 # Perhaps user only wants to initialise the physical domain
                 self._initializer(self.data)
         else:
             self.data_with_halo.fill(0)
     return func(self)
Exemplo n.º 13
0
    def save(self, soname, binary):
        """
        Store a binary into a file within a temporary directory.

        Parameters
        ----------
        soname : str
            Name of the .so file (w/o the suffix).
        binary : obj
            The binary data.
        """
        sofile = self.get_jit_dir().joinpath(soname).with_suffix(self.so_ext)
        if sofile.is_file():
            debug("%s: `%s` was not saved in `%s` as it already exists" %
                  (self, sofile.name, self.get_jit_dir()))
        else:
            with open(str(sofile), 'wb') as f:
                f.write(binary)
            debug("%s: `%s` successfully saved in `%s`" %
                  (self, sofile.name, self.get_jit_dir()))
Exemplo n.º 14
0
def save(soname, binary, compiler):
    """
    Store a binary into a file within a temporary directory.

    Parameters
    ----------
    soname : str
        Name of the .so file (w/o the suffix).
    binary : obj
        The binary data.
    compiler : Compiler
        The toolchain used for JIT compilation.
    """
    sofile = get_jit_dir().joinpath(soname).with_suffix(compiler.so_ext)
    if sofile.is_file():
        debug("%s: `%s` was not saved in `%s` as it already exists"
              % (compiler, sofile.name, get_jit_dir()))
    else:
        with open(str(sofile), 'wb') as f:
            f.write(binary)
        debug("%s: `%s` successfully saved in `%s`"
              % (compiler, sofile.name, get_jit_dir()))
Exemplo n.º 15
0
    def _arg_check(self, args, size, interval):
        """
        Raises
        ------
        InvalidArgument
            If any of the ``self``-related runtime arguments in ``args``
            will cause an out-of-bounds access.
        """
        if self.min_name not in args:
            raise InvalidArgument("No runtime value for %s" % self.min_name)
        if interval.is_Defined and args[self.min_name] + interval.lower < 0:
            raise InvalidArgument("OOB detected due to %s=%d" %
                                  (self.min_name, args[self.min_name]))

        if self.max_name not in args:
            raise InvalidArgument("No runtime value for %s" % self.max_name)
        if interval.is_Defined:
            if is_integer(interval.upper):
                upper = interval.upper
            else:
                # Autopadding causes non-integer upper limit
                upper = interval.upper.subs(args)
            if args[self.max_name] + upper >= size:
                raise InvalidArgument("OOB detected due to %s=%d" %
                                      (self.max_name, args[self.max_name]))

        # Allow the specific case of max=min-1, which disables the loop
        if args[self.max_name] < args[self.min_name] - 1:
            raise InvalidArgument("Illegal %s=%d < %s=%d" %
                                  (self.max_name, args[self.max_name],
                                   self.min_name, args[self.min_name]))
        elif args[self.max_name] == args[self.min_name] - 1:
            debug(
                "%s=%d and %s=%d might cause no iterations along Dimension %s",
                self.min_name, args[self.min_name], self.max_name,
                args[self.max_name], self.name)
Exemplo n.º 16
0
def print_defaults():
    """Print the environment variables accepted by Devito, their default value,
    as well as all of the accepted values."""
    for k, v in env_vars_mapper.items():
        debug('%s: %s. Default: %s' % (k, accepted[v], defaults[v]))
Exemplo n.º 17
0
    def __init__(self, name, yc_soln, local_grids=None):
        """
        Write out a YASK kernel, compile it using the YASK's Makefiles,
        import the corresponding SWIG-generated Python module, and finally
        create a YASK kernel solution object.

        Parameters
        ----------
        name : str
            Unique name of this YaskKernel.
        yc_soln
            The YaskCompiler solution.
        local_grids : list of Array, optional
            A local grid is necessary to run the YaskKernel, but it can be
            deallocated upon returning to Python-land.  For example, local
            grids could be used to implement the temporary arrays introduced by
            the DSE.  This parameter tells which of the ``yc_soln``'s grids are
            local.
        """
        self.name = name

        # Shared object name
        self.soname = "%s.devito.%s" % (name, configuration['platform'])

        if os.path.exists(os.path.join(namespace['yask-pylib'], '%s.py' % name)):
            # Nothing to do -- the YASK solution was compiled in a previous session
            yk = import_module(name)
            debug("cache hit, `%s` imported w/o jitting" % name)
        else:
            # We create and JIT compile a fresh YASK solution

            # The lock manager prevents race conditions
            # `lock_m` is used only to keep the lock manager alive
            with warnings.catch_warnings():
                cleanup_m = CleanupManager()
                lock_m = CacheLockManager(cleanup_m, namespace['yask-output-dir'])  # noqa

            # The directory in which the YASK-generated code (.hpp) will be placed
            yk_codegen = namespace['yask-codegen'](name, 'devito',
                                                   configuration['platform'])
            if not os.path.exists(yk_codegen):
                os.makedirs(yk_codegen)

            # Write out the stencil file
            yk_codegen_file = os.path.join(yk_codegen, namespace['yask-codegen-file'])
            yc_soln.format(configuration['platform'].isa,
                           ofac.new_file_output(yk_codegen_file))

            # JIT-compile it
            compiler = configuration.yask['compiler']
            if configuration['develop-mode']:
                if yc_soln.get_num_equations() == 0:
                    # YASK will compile more quickly, and no price has to be paid
                    # in terms of performance, as this is a void kernel
                    opt_level = 0
                else:
                    opt_level = 1
            else:
                opt_level = 3
            args = [
                '-j', 'YK_CXX=%s' % compiler.cc, 'YK_CXXOPT=-O%d' % opt_level,
                # No MPI support at the moment
                'mpi=0',
                # To locate the YASK compiler
                'YC_EXEC=%s' % os.path.join(namespace['path'], 'bin'),
                # Error out if a grid not explicitly defined in the compiler is created
                'allow_new_grid_types=0',
                # To give a unique name to the generated Python modules, rather
                # than creating `yask_kernel.py`
                'YK_BASE=%s' % name,
                # `stencil` and `arch` should always be provided
                'stencil=%s' % 'devito', 'arch=%s' % configuration['platform'],
                # The root directory of generated code files, shared libs, Python modules
                'YASK_OUTPUT_DIR=%s' % namespace['yask-output-dir'],
                # Pick the YASK kernel Makefile, i.e. the one under `yask/src/kernel`
                '-C', namespace['kernel-path'],
                # Make target
                'api'
            ]
            if configuration['develop-mode']:
                args.append('check=1')   # Activate internal YASK asserts
                args.append('trace=1')   # Print out verbose progress msgs w/-trace knob
                args.append('trace_mem=0')   # Print out verbose mem-access msgs
            make(namespace['path'], args)

            # Now we must be able to import the SWIG-generated Python module
            invalidate_caches()
            yk = import_module(name)

            # Release the lock manager
            cleanup_m.clean_up()

        # Create the YASK solution object
        kfac = yk.yk_factory()
        self.env = kfac.new_env()
        self.soln = kfac.new_solution(self.env)

        # Allow step indices to wrap-around
        self.soln.set_step_wrap(True)

        # Apply any user-provided options, if any.
        # These are applied here instead of just before prepare_solution()
        # so that applicable options will apply to all API calls
        self.soln.apply_command_line_options(configuration.yask['options'] or '')

        # MPI setup: simple rank configuration in 1st dim only.
        # TODO: in production runs, the ranks would be distributed along all
        # domain dimensions
        self.soln.set_num_ranks(self.space_dimensions[0], self.env.get_num_ranks())

        # Redirect stdout to a string or file
        if configuration.yask['dump']:
            filename = 'yk_dump.%s.%s.%s.txt' % (name, configuration['platform'],
                                                 configuration['platform'].isa)
            filename = os.path.join(configuration.yask['dump'], filename)
            self.output = yk.yask_output_factory().new_file_output(filename)
        else:
            self.output = yk.yask_output_factory().new_string_output()
        self.soln.set_debug_output(self.output)

        # Users may want to run the same Operator (same domain etc.) with
        # different grids
        self.grids = {i.get_name(): i for i in self.soln.get_grids()}
        self.local_grids = {i.name: self.grids[i.name] for i in (local_grids or [])}
Exemplo n.º 18
0
    def pre_apply(self, toshare):
        """
        Set up the YaskKernel before it's called from within an Operator.

        Parameters
        ----------
        toshare : dict
            Mapper ``Function -> Data`` for grid-storage sharing.
        """
        # Sanity check
        grids = {i.grid for i in toshare if i.is_DiscreteFunction and i.grid is not None}
        assert len(grids) == 1
        grid = grids.pop()

        # Set the domain size, apply grid sharing, more sanity checks
        for k, v in toshare.items():
            target = self.grids.get(k.name)
            if target is not None:
                v._give_storage(target)
        for k, v in zip(self.space_dimensions, grid.shape):
            self.soln.set_rank_domain_size(k, int(v))
        assert all(not i.is_storage_allocated() for i in self.local_grids.values())
        assert all(v.is_storage_allocated() for k, v in self.grids.items()
                   if k not in self.local_grids)

        # Debug info
        debug("%s<%s,%s>" % (self.name, self.step_dimension, self.space_dimensions))
        for i in list(self.grids.values()) + list(self.local_grids.values()):
            if i.get_num_dims() == 0:
                debug("    Scalar: %s", i.get_name())
            elif not i.is_storage_allocated():
                size = [i.get_rank_domain_size(j) for j in self.space_dimensions]
                debug("    LocalGrid: %s%s, size=%s" %
                      (i.get_name(), str(i.get_dim_names()), size))
            else:
                size = []
                lpad, rpad = [], []
                for j in i.get_dim_names():
                    if j in self.space_dimensions:
                        size.append(i.get_rank_domain_size(j))
                        lpad.append(i.get_left_pad_size(j))
                        rpad.append(i.get_right_pad_size(j))
                    else:
                        size.append(i.get_alloc_size(j))
                        lpad.append(0)
                        rpad.append(0)
                debug("    Grid: %s%s, size=%s, left_pad=%s, right_pad=%s" %
                      (i.get_name(), str(i.get_dim_names()), size, lpad, rpad))

        # Set up the block shape for loop blocking
        for i, j in zip(self.space_dimensions, configuration.yask['blockshape']):
            self.soln.set_block_size(i, j)

        # This, amongst other things, allocates storage for the temporary grids
        self.soln.prepare_solution()

        # Set up auto-tuning
        if configuration['autotuning'].level is False:
            self.soln.reset_auto_tuner(False)
        elif configuration['autotuning'].mode == 'preemptive':
            self.soln.run_auto_tuner_now()
Exemplo n.º 19
0
def jit_compile(soname, code, compiler):
    """
    JIT compile some source code given as a string.

    This function relies upon codepy's ``compile_from_string``, which performs
    caching of compilation units and avoids potential race conditions due to
    multiple processing trying to compile the same object.

    Parameters
    ----------
    soname : str
        Name of the .so file (w/o the suffix).
    code : str
        The source code to be JIT compiled.
    compiler : Compiler
        The toolchain used for JIT compilation.
    """
    target = str(get_jit_dir().joinpath(soname))
    src_file = "%s.%s" % (target, compiler.src_ext)

    cache_dir = get_codepy_dir().joinpath(soname[:7])
    if configuration['jit-backdoor'] is False:
        # Typically we end up here
        # Make a suite of cache directories based on the soname
        cache_dir.mkdir(parents=True, exist_ok=True)
    else:
        # Warning: dropping `code` on the floor in favor to whatever is written
        # within `src_file`
        try:
            with open(src_file, 'r') as f:
                code = f.read()
            # Bypass the devito JIT cache
            # Note: can't simply use Python's `mkdtemp()` as, with MPI, different
            # ranks would end up creating different cache dirs
            cache_dir = cache_dir.joinpath('jit-backdoor')
            cache_dir.mkdir(parents=True, exist_ok=True)
        except FileNotFoundError:
            raise ValueError("Trying to use the JIT backdoor for `%s`, but "
                             "the file isn't present" % src_file)

    # `catch_warnings` suppresses codepy complaining that it's taking
    # too long to acquire the cache lock. This warning can only appear
    # in a multiprocess session, typically (but not necessarily) when
    # many processes are frequently attempting jit-compilation (e.g.,
    # when running the test suite in parallel)
    with warnings.catch_warnings():
        warnings.simplefilter('ignore')

        tic = time()
        # Spinlock in case of MPI
        sleep_delay = 0 if configuration['mpi'] else 1
        _, _, _, recompiled = compile_from_string(compiler, target, code, src_file,
                                                  cache_dir=cache_dir,
                                                  debug=configuration['debug-compiler'],
                                                  sleep_delay=sleep_delay)
        toc = time()

    if recompiled:
        debug("%s: compiled `%s` [%.2f s]" % (compiler, src_file, toc-tic))
    else:
        debug("%s: cache hit `%s` [%.2f s]" % (compiler, src_file, toc-tic))
Exemplo n.º 20
0
def print_state():
    """Print the current configuration state."""
    for k, v in configuration.items():
        debug('%s: %s' % (k, v))
Exemplo n.º 21
0
    def run(self, cfunction, arguments, toshare):
        """
        Run the YaskKernel through a JIT-compiled function.

        :param cfunction: The JIT-compiler function, of type :class:`ctypes.FuncPtr`
        :param arguments: Mapper from function/dimension/... names to run-time values
               to be passed to ``cfunction``.
        :param toshare: Mapper from functions to :class:`Data`s for sharing
                        grid storage.
        """
        # Sanity check
        grids = {i.grid for i in toshare if i.is_TensorFunction}
        assert len(grids) == 1
        grid = grids.pop()

        # Set the domain size, apply grid sharing, more sanity checks
        for k, v in zip(self.space_dimensions, grid.shape):
            self.soln.set_rank_domain_size(k, int(v))
        for k, v in toshare.items():
            target = self.grids.get(k.name)
            if target is not None:
                v._give_storage(target)
        assert all(not i.is_storage_allocated()
                   for i in self.local_grids.values())
        assert all(v.is_storage_allocated() for k, v in self.grids.items()
                   if k not in self.local_grids)

        # Debug info
        debug("%s<%s,%s>" %
              (self.name, self.time_dimension, self.space_dimensions))
        for i in list(self.grids.values()) + list(self.local_grids.values()):
            if i.get_num_dims() == 0:
                debug("    Scalar: %s", i.get_name())
            elif not i.is_storage_allocated():
                size = [
                    i.get_rank_domain_size(j) for j in self.space_dimensions
                ]
                debug("    LocalGrid: %s%s, size=%s" %
                      (i.get_name(), str(i.get_dim_names()), size))
            else:
                size = [
                    i.get_rank_domain_size(j) for j in self.space_dimensions
                ]
                pad = [i.get_pad_size(j) for j in self.space_dimensions]
                debug("    Grid: %s%s, size=%s, pad=%s" %
                      (i.get_name(), str(i.get_dim_names()), size, pad))

        # Apply any user-provided option, if any
        self.soln.apply_command_line_options(configuration.yask['options']
                                             or '')
        # Set up the block shape for loop blocking
        for i, j in zip(self.space_dimensions,
                        configuration.yask['blockshape']):
            self.soln.set_block_size(i, j)

        # This, amongst other things, allocates storage for the temporary grids
        self.soln.prepare_solution()

        # Set up auto-tuning
        if configuration.yask['autotuning'] == 'off':
            self.soln.reset_auto_tuner(False)
        elif configuration.yask['autotuning'] == 'preemptive':
            self.soln.run_auto_tuner_now()

        # Run the kernel
        cfunction(*list(arguments.values()))

        # Release grid storage. Note: this *will not* cause deallocation, as these
        # grids are actually shared with the hook solution
        for i in self.grids.values():
            i.release_storage()
        # Release local grid storage. This *will* cause deallocation
        for i in self.local_grids.values():
            i.release_storage()
        # Dump performance data
        self.soln.get_stats()
Exemplo n.º 22
0
    def __init__(self, name, yc_soln, local_vars=None):
        """
        Write out a YASK kernel, compile it using the YASK's Makefiles,
        import the corresponding SWIG-generated Python module, and finally
        create a YASK kernel solution object.

        Parameters
        ----------
        name : str
            Unique name of this YaskKernel.
        yc_soln
            The YaskCompiler solution.
        local_vars : list of Array, optional
            A local var is necessary to run the YaskKernel, but it can be
            deallocated upon returning to Python-land.  For example, local
            vars could be used to implement the temporary arrays introduced by
            the DSE.  This parameter tells which of the ``yc_soln``'s vars are
            local.
        """
        self.name = name

        # Shared object name
        self.soname = "%s.devito.%s" % (name, configuration['platform'])

        if os.path.exists(os.path.join(namespace['yask-pylib'],
                                       '%s.py' % name)):
            # Nothing to do -- the YASK solution was compiled in a previous session
            yk = import_module(name)
            debug("cache hit, `%s` imported w/o jitting" % name)
        else:
            # We create and JIT compile a fresh YASK solution

            # The lock manager prevents race conditions
            # `lock_m` is used only to keep the lock manager alive
            with warnings.catch_warnings():
                cleanup_m = CleanupManager()
                lock_m = CacheLockManager(cleanup_m,
                                          namespace['yask-output-dir'])  # noqa

            # The directory in which the YASK-generated code (.hpp) will be placed
            yk_codegen = namespace['yask-codegen'](name, 'devito',
                                                   configuration['platform'])
            if not os.path.exists(yk_codegen):
                os.makedirs(yk_codegen)

            # Write out the stencil file
            yk_codegen_file = os.path.join(yk_codegen,
                                           namespace['yask-codegen-file'])
            yc_soln.output_solution(ofac.new_file_output(yk_codegen_file))

            # JIT-compile it
            compiler = configuration.yask['compiler']
            if configuration['develop-mode']:
                if yc_soln.get_num_equations() == 0:
                    # YASK will compile more quickly, and no price has to be paid
                    # in terms of performance, as this is a void kernel
                    opt_level = 0
                else:
                    opt_level = 1
            else:
                opt_level = 3
            args = [
                '-j',
                'YK_CXX=%s' % compiler.cc,
                'YK_CXXOPT=-O%d' % opt_level,
                # No MPI support at the moment
                'mpi=0',
                # To locate the YASK compiler
                'YC_EXEC=%s' % os.path.join(namespace['path'], 'bin'),
                # Error out if a var not explicitly defined in the compiler is created
                'allow_new_var_types=0',
                # To give a unique name to the generated Python modules, rather
                # than creating `yask_kernel.py`
                'YK_BASE=%s' % name,
                # `stencil` and `arch` should always be provided
                'stencil=%s' % 'devito',
                'arch=%s' % configuration['platform'],
                # The root directory of generated code files, shared libs, Python modules
                'YASK_OUTPUT_DIR=%s' % namespace['yask-output-dir'],
                # Pick the YASK kernel Makefile, i.e. the one under `yask/src/kernel`
                '-C',
                namespace['kernel-path'],
                # Make target
                'api'
            ]
            if configuration['develop-mode']:
                # Activate internal YASK asserts
                args.append('check=1')
                # Enable verbose progress msgs w/-trace knob
                args.append('trace=1')
                # Enable verbose mem-access msgs w/-trace knob
                args.append('trace_mem=0')
            compiler.make(namespace['path'], args)

            # Import the SWIG-generated Python module
            invalidate_caches()
            yk = import_module(name)

            # Release the lock manager
            cleanup_m.clean_up()

        # Create the YASK solution object
        kfac = yk.yk_factory()
        self.env = kfac.new_env()
        self.soln = kfac.new_solution(self.env)

        # Allow step indices to wrap-around
        self.soln.set_step_wrap(True)

        # Apply any user-provided options, if any.
        # These are applied here instead of just before prepare_solution()
        # so that applicable options will apply to all API calls
        self.soln.apply_command_line_options(configuration.yask['options']
                                             or '')

        # MPI setup: simple rank configuration in 1st dim only.
        # TODO: in production runs, the ranks would be distributed along all
        # domain dimensions
        self.soln.set_num_ranks(self.space_dimensions[0],
                                self.env.get_num_ranks())

        # Redirect stdout to a string or file
        if configuration.yask['dump']:
            filename = 'yk_dump.%s.%s.%s.txt' % (
                name, configuration['platform'], configuration['platform'].isa)
            filename = os.path.join(configuration.yask['dump'], filename)
            self.output = yk.yask_output_factory().new_file_output(filename)
        else:
            self.output = yk.yask_output_factory().new_string_output()
        self.soln.set_debug_output(self.output)

        # Users may want to run the same Operator (same domain etc.) with
        # different vars
        self.vars = {i.get_name(): i for i in self.soln.get_vars()}
        self.local_vars = {
            i.name: self.vars[i.name]
            for i in (local_vars or [])
        }
Exemplo n.º 23
0
def jit_compile(soname, code, compiler):
    """
    JIT compile some source code given as a string.

    This function relies upon codepy's ``compile_from_string``, which performs
    caching of compilation units and avoids potential race conditions due to
    multiple processing trying to compile the same object.

    Parameters
    ----------
    soname : str
        Name of the .so file (w/o the suffix).
    code : str
        The source code to be JIT compiled.
    compiler : Compiler
        The toolchain used for JIT compilation.
    """
    target = str(get_jit_dir().joinpath(soname))
    src_file = "%s.%s" % (target, compiler.src_ext)

    cache_dir = get_codepy_dir().joinpath(soname[:7])
    if configuration['jit-backdoor'] is False:
        # Typically we end up here
        # Make a suite of cache directories based on the soname
        cache_dir.mkdir(parents=True, exist_ok=True)
    else:
        # Warning: dropping `code` on the floor in favor to whatever is written
        # within `src_file`
        try:
            with open(src_file, 'r') as f:
                code = f.read()
            # Bypass the devito JIT cache
            # Note: can't simply use Python's `mkdtemp()` as, with MPI, different
            # ranks would end up creating different cache dirs
            cache_dir = cache_dir.joinpath('jit-backdoor')
            cache_dir.mkdir(parents=True, exist_ok=True)
        except FileNotFoundError:
            raise ValueError("Trying to use the JIT backdoor for `%s`, but "
                             "the file isn't present" % src_file)

    # `catch_warnings` suppresses codepy complaining that it's taking
    # too long to acquire the cache lock. This warning can only appear
    # in a multiprocess session, typically (but not necessarily) when
    # many processes are frequently attempting jit-compilation (e.g.,
    # when running the test suite in parallel)
    with warnings.catch_warnings():
        warnings.simplefilter('ignore')

        tic = time()
        # Spinlock in case of MPI
        sleep_delay = 0 if configuration['mpi'] else 1
        _, _, _, recompiled = compile_from_string(compiler, target, code, src_file,
                                                  cache_dir=cache_dir,
                                                  debug=configuration['debug-compiler'],
                                                  sleep_delay=sleep_delay)
        toc = time()

    if recompiled:
        debug("%s: compiled `%s` [%.2f s]" % (compiler, src_file, toc-tic))
    else:
        debug("%s: cache hit `%s` [%.2f s]" % (compiler, src_file, toc-tic))