Exemplo n.º 1
0
    def _store(self, obj, form, bcs, tsfc_parameters):
        key = self._cache_key(form)

        if self.invalid_count[key] > parameters["assembly_cache"]["max_misses"]:
            if self.invalid_count[key] == \
               parameters["assembly_cache"]["max_misses"] + 1:
                debug("form %s missed too many times, excluding from cache." % form)

        else:
            cache_entry = _CacheEntry(obj, form, bcs)
            self.cache[key] = str(tsfc_parameters), cache_entry
            self.evict()
Exemplo n.º 2
0
    def _store(self, obj, form, bcs, ffc_parameters):
        form_sig = form.signature()

        if self.invalid_count[form_sig] > parameters["assembly_cache"]["max_misses"]:
            if self.invalid_count[form_sig] == \
               parameters["assembly_cache"]["max_misses"] + 1:
                debug("form %s missed too many times, excluding from cache." % form)

        else:
            cache_entry = _CacheEntry(obj, form, bcs)
            self.cache[form_sig] = str(ffc_parameters), cache_entry
            self.evict()
Exemplo n.º 3
0
    def _store(self, obj, form, bcs, ffc_parameters):
        key = self._cache_key(form)

        if self.invalid_count[key] > parameters["assembly_cache"]["max_misses"]:
            if self.invalid_count[key] == \
               parameters["assembly_cache"]["max_misses"] + 1:
                debug("form %s missed too many times, excluding from cache." % form)

        else:
            cache_entry = _CacheEntry(obj, form, bcs)
            self.cache[key] = str(ffc_parameters), cache_entry
            self.evict()
Exemplo n.º 4
0
def compilation_comm(comm):
    """Get a communicator for compilation.

    :arg comm: The input communicator.
    :returns: A communicator used for compilation (may be smaller)
    """
    # Should we try and do node-local compilation?
    if not configuration["node_local_compilation"]:
        return comm
    retcomm = get_compilation_comm(comm)
    if retcomm is not None:
        debug("Found existing compilation communicator")
        return retcomm
    if MPI.VERSION >= 3:
        debug("Creating compilation communicator using MPI_Split_type")
        retcomm = comm.Split_type(MPI.COMM_TYPE_SHARED)
        set_compilation_comm(comm, retcomm)
        return retcomm
    debug("Creating compilation communicator using MPI_Split + filesystem")
    import tempfile
    if comm.rank == 0:
        if not os.path.exists(configuration["cache_dir"]):
            os.makedirs(configuration["cache_dir"])
        tmpname = tempfile.mkdtemp(prefix="rank-determination-",
                                   dir=configuration["cache_dir"])
    else:
        tmpname = None
    tmpname = comm.bcast(tmpname, root=0)
    if tmpname is None:
        raise CompilationError("Cannot determine sharedness of filesystem")
    # Touch file
    with open(os.path.join(tmpname, str(comm.rank)), "wb"):
        pass
    comm.barrier()
    import glob
    ranks = sorted(
        int(os.path.basename(name))
        for name in glob.glob("%s/[0-9]*" % tmpname))
    retcomm = comm.Split(color=min(ranks), key=comm.rank)
    set_compilation_comm(comm, retcomm)
    return retcomm
Exemplo n.º 5
0
def compilation_comm(comm):
    """Get a communicator for compilation.

    :arg comm: The input communicator.
    :returns: A communicator used for compilation (may be smaller)
    """
    # Should we try and do node-local compilation?
    if not configuration["node_local_compilation"]:
        return comm
    retcomm = get_compilation_comm(comm)
    if retcomm is not None:
        debug("Found existing compilation communicator")
        return retcomm
    if MPI.VERSION >= 3:
        debug("Creating compilation communicator using MPI_Split_type")
        retcomm = comm.Split_type(MPI.COMM_TYPE_SHARED)
        set_compilation_comm(comm, retcomm)
        return retcomm
    debug("Creating compilation communicator using MPI_Split + filesystem")
    import tempfile
    if comm.rank == 0:
        if not os.path.exists(configuration["cache_dir"]):
            os.makedirs(configuration["cache_dir"], exist_ok=True)
        tmpname = tempfile.mkdtemp(prefix="rank-determination-",
                                   dir=configuration["cache_dir"])
    else:
        tmpname = None
    tmpname = comm.bcast(tmpname, root=0)
    if tmpname is None:
        raise CompilationError("Cannot determine sharedness of filesystem")
    # Touch file
    with open(os.path.join(tmpname, str(comm.rank)), "wb"):
        pass
    comm.barrier()
    import glob
    ranks = sorted(int(os.path.basename(name))
                   for name in glob.glob("%s/[0-9]*" % tmpname))
    retcomm = comm.Split(color=min(ranks), key=comm.rank)
    set_compilation_comm(comm, retcomm)
    return retcomm
Exemplo n.º 6
0
    def get_so(self, src, extension):
        """Build a shared library and load it

        :arg src: The source string to compile.
        :arg extension: extension of the source file (c, cpp).

        Returns a :class:`ctypes.CDLL` object of the resulting shared
        library."""

        # Determine cache key
        hsh = md5(src.encode())
        hsh.update(self._cc.encode())
        if self._ld:
            hsh.update(self._ld.encode())
        hsh.update("".join(self._cppargs).encode())
        hsh.update("".join(self._ldargs).encode())

        basename = hsh.hexdigest()

        cachedir = configuration['cache_dir']
        pid = os.getpid()
        cname = os.path.join(cachedir, "%s_p%d.%s" % (basename, pid, extension))
        oname = os.path.join(cachedir, "%s_p%d.o" % (basename, pid))
        soname = os.path.join(cachedir, "%s.so" % basename)
        # Link into temporary file, then rename to shared library
        # atomically (avoiding races).
        tmpname = os.path.join(cachedir, "%s_p%d.so.tmp" % (basename, pid))

        if configuration['check_src_hashes'] or configuration['debug']:
            matching = self.comm.allreduce(basename, op=_check_op)
            if matching != basename:
                # Dump all src code to disk for debugging
                output = os.path.join(cachedir, "mismatching-kernels")
                srcfile = os.path.join(output, "src-rank%d.c" % self.comm.rank)
                if self.comm.rank == 0:
                    if not os.path.exists(output):
                        os.makedirs(output, exist_ok=True)
                self.comm.barrier()
                with open(srcfile, "w") as f:
                    f.write(src)
                self.comm.barrier()
                raise CompilationError("Generated code differs across ranks (see output in %s)" % output)
        try:
            # Are we in the cache?
            return ctypes.CDLL(soname)
        except OSError:
            # No, let's go ahead and build
            if self.comm.rank == 0:
                # No need to do this on all ranks
                if not os.path.exists(cachedir):
                    os.makedirs(cachedir, exist_ok=True)
                logfile = os.path.join(cachedir, "%s_p%d.log" % (basename, pid))
                errfile = os.path.join(cachedir, "%s_p%d.err" % (basename, pid))
                with progress(INFO, 'Compiling wrapper'):
                    with open(cname, "w") as f:
                        f.write(src)
                    # Compiler also links
                    if self._ld is None:
                        cc = [self._cc] + self._cppargs + \
                             ['-o', tmpname, cname] + self._ldargs
                        debug('Compilation command: %s', ' '.join(cc))
                        with open(logfile, "w") as log:
                            with open(errfile, "w") as err:
                                log.write("Compilation command:\n")
                                log.write(" ".join(cc))
                                log.write("\n\n")
                                try:
                                    if configuration['no_fork_available']:
                                        cc += ["2>", errfile, ">", logfile]
                                        cmd = " ".join(cc)
                                        status = os.system(cmd)
                                        if status != 0:
                                            raise subprocess.CalledProcessError(status, cmd)
                                    else:
                                        subprocess.check_call(cc, stderr=err,
                                                              stdout=log)
                                except subprocess.CalledProcessError as e:
                                    raise CompilationError(
                                        """Command "%s" return error status %d.
Unable to compile code
Compile log in %s
Compile errors in %s""" % (e.cmd, e.returncode, logfile, errfile))
                    else:
                        cc = [self._cc] + self._cppargs + \
                             ['-c', '-o', oname, cname]
                        ld = self._ld.split() + ['-o', tmpname, oname] + self._ldargs
                        debug('Compilation command: %s', ' '.join(cc))
                        debug('Link command: %s', ' '.join(ld))
                        with open(logfile, "w") as log:
                            with open(errfile, "w") as err:
                                log.write("Compilation command:\n")
                                log.write(" ".join(cc))
                                log.write("\n\n")
                                log.write("Link command:\n")
                                log.write(" ".join(ld))
                                log.write("\n\n")
                                try:
                                    if configuration['no_fork_available']:
                                        cc += ["2>", errfile, ">", logfile]
                                        ld += ["2>", errfile, ">", logfile]
                                        cccmd = " ".join(cc)
                                        ldcmd = " ".join(ld)
                                        status = os.system(cccmd)
                                        if status != 0:
                                            raise subprocess.CalledProcessError(status, cccmd)
                                        status = os.system(ldcmd)
                                        if status != 0:
                                            raise subprocess.CalledProcessError(status, ldcmd)
                                    else:
                                        subprocess.check_call(cc, stderr=err,
                                                              stdout=log)
                                        subprocess.check_call(ld, stderr=err,
                                                              stdout=log)
                                except subprocess.CalledProcessError as e:
                                    raise CompilationError(
                                        """Command "%s" return error status %d.
Unable to compile code
Compile log in %s
Compile errors in %s""" % (e.cmd, e.returncode, logfile, errfile))
                    # Atomically ensure soname exists
                    os.rename(tmpname, soname)
            # Wait for compilation to complete
            self.comm.barrier()
            # Load resulting library
            return ctypes.CDLL(soname)
Exemplo n.º 7
0
    def evict(self):
        """Run the cache eviction algorithm. This works out the permitted
cache size and deletes objects until it is achieved. Cache values are
assumed to have a :attr:`value` attribute and eviction occurs in
increasing :attr:`value` order. Currently :attr:`value` is an index of
the assembly operation, so older operations are evicted first.

The cache will be evicted down to 90% of permitted size.

The permitted size is either the explicit
:data:`parameters["assembly_cache"]["max_bytes"]` or it is the amount of
memory per core scaled by :data:`parameters["assembly_cache"]["max_factor"]`
(by default the scale factor is 0.6).

In MPI parallel, the nbytes of each cache entry is set to the maximum
over all processes, while the available memory is set to the
minimum. This produces a conservative caching policy which is
guaranteed to result in the same evictions on each processor.

        """

        if not parameters["assembly_cache"]["eviction"]:
            return

        max_cache_size = min(parameters["assembly_cache"]["max_bytes"] or float("inf"),
                             (memory or float("inf"))
                             * parameters["assembly_cache"]["max_factor"]
                             )

        if max_cache_size == float("inf"):
            if not self.evictwarned:
                warning("No maximum assembly cache size. Install psutil >= 2.0.0 or risk leaking memory!")
                self.evictwarned = True
            return

        cache_size = self.nbytes
        if cache_size < max_cache_size:
            return

        debug("Cache eviction triggered. %s bytes in cache, %s bytes allowed" %
              (cache_size, max_cache_size))

        # Evict down to 90% full.
        bytes_to_evict = cache_size - 0.9 * max_cache_size

        sorted_cache = sorted(self.cache.items(), key=lambda x: x[1][1].value)

        nbytes = lambda x: x[1][1].nbytes

        candidates = []
        while bytes_to_evict > 0:
            next = sorted_cache.pop(0)
            candidates.append(next)
            bytes_to_evict -= nbytes(next)

        for c in reversed(candidates):
            if bytes_to_evict + nbytes(c) < 0:
                # We may have been overzealous.
                bytes_to_evict += nbytes(c)
            else:
                del self.cache[c[0]]
Exemplo n.º 8
0
from pyop2.mpi import MPI
from pyop2.logger import warning, debug
from pyop2.utils import flatten

try:
    from pyslope import slope
    backend = os.environ.get('SLOPE_BACKEND')
    if backend not in ['SEQUENTIAL', 'OMP']:
        backend = 'SEQUENTIAL'
    if MPI.COMM_WORLD.size > 1:
        if backend == 'SEQUENTIAL':
            backend = 'ONLY_MPI'
        if backend == 'OMP':
            backend = 'OMP_MPI'
    slope.set_exec_mode(backend)
    debug("SLOPE backend set to %s" % backend)
except ImportError:
    slope = None

lazy_trace_name = 'lazy_trace'
"""The default name for sequences of lazily evaluated :class:`ParLoop`s."""

from pyop2.fusion.transformer import Inspector
from pyop2.fusion import extended


def fuse(name, loop_chain, **kwargs):
    """Apply fusion (and possibly tiling) to an iterator of :class:`ParLoop`
    obecjts, which we refer to as ``loop_chain``. Return an iterator of
    :class:`ParLoop` objects, in which some loops may have been fused or tiled.
    If fusion could not be applied, return the unmodified ``loop_chain``.
Exemplo n.º 9
0
from pyop2.mpi import MPI
from pyop2.logger import warning, debug
from pyop2.utils import flatten

try:
    from pyslope import slope
    backend = os.environ.get('SLOPE_BACKEND')
    if backend not in ['SEQUENTIAL', 'OMP']:
        backend = 'SEQUENTIAL'
    if MPI.COMM_WORLD.size > 1:
        if backend == 'SEQUENTIAL':
            backend = 'ONLY_MPI'
        if backend == 'OMP':
            backend = 'OMP_MPI'
    slope.set_exec_mode(backend)
    debug("SLOPE backend set to %s" % backend)
except ImportError:
    slope = None

lazy_trace_name = 'lazy_trace'
"""The default name for sequences of lazily evaluated :class:`ParLoop`s."""

from pyop2.fusion.transformer import Inspector
from pyop2.fusion import extended


def fuse(name, loop_chain, **kwargs):
    """Apply fusion (and possibly tiling) to an iterator of :class:`ParLoop`
    obecjts, which we refer to as ``loop_chain``. Return an iterator of
    :class:`ParLoop` objects, in which some loops may have been fused or tiled.
    If fusion could not be applied, return the unmodified ``loop_chain``.
Exemplo n.º 10
0
    def get_so(self, jitmodule, extension):
        """Build a shared library and load it

        :arg jitmodule: The JIT Module which can generate the code to compile.
        :arg extension: extension of the source file (c, cpp).

        Returns a :class:`ctypes.CDLL` object of the resulting shared
        library."""

        # Determine cache key
        hsh = md5(str(jitmodule.cache_key).encode())
        hsh.update(self._cc.encode())
        if self._ld:
            hsh.update(self._ld.encode())
        hsh.update("".join(self._cppargs).encode())
        hsh.update("".join(self._ldargs).encode())

        basename = hsh.hexdigest()

        cachedir = configuration['cache_dir']

        dirpart, basename = basename[:2], basename[2:]
        cachedir = os.path.join(cachedir, dirpart)
        pid = os.getpid()
        cname = os.path.join(cachedir, "%s_p%d.%s" % (basename, pid, extension))
        oname = os.path.join(cachedir, "%s_p%d.o" % (basename, pid))
        soname = os.path.join(cachedir, "%s.so" % basename)
        # Link into temporary file, then rename to shared library
        # atomically (avoiding races).
        tmpname = os.path.join(cachedir, "%s_p%d.so.tmp" % (basename, pid))

        if configuration['check_src_hashes'] or configuration['debug']:
            matching = self.comm.allreduce(basename, op=_check_op)
            if matching != basename:
                # Dump all src code to disk for debugging
                output = os.path.join(cachedir, "mismatching-kernels")
                srcfile = os.path.join(output, "src-rank%d.c" % self.comm.rank)
                if self.comm.rank == 0:
                    os.makedirs(output, exist_ok=True)
                self.comm.barrier()
                with open(srcfile, "w") as f:
                    f.write(jitmodule.code_to_compile)
                self.comm.barrier()
                raise CompilationError("Generated code differs across ranks (see output in %s)" % output)
        try:
            # Are we in the cache?
            return ctypes.CDLL(soname)
        except OSError:
            # No, let's go ahead and build
            if self.comm.rank == 0:
                # No need to do this on all ranks
                os.makedirs(cachedir, exist_ok=True)
                logfile = os.path.join(cachedir, "%s_p%d.log" % (basename, pid))
                errfile = os.path.join(cachedir, "%s_p%d.err" % (basename, pid))
                with progress(INFO, 'Compiling wrapper'):
                    with open(cname, "w") as f:
                        f.write(jitmodule.code_to_compile)
                    # Compiler also links
                    if self._ld is None:
                        cc = [self._cc] + self._cppargs + \
                             ['-o', tmpname, cname] + self._ldargs
                        debug('Compilation command: %s', ' '.join(cc))
                        with open(logfile, "w") as log:
                            with open(errfile, "w") as err:
                                log.write("Compilation command:\n")
                                log.write(" ".join(cc))
                                log.write("\n\n")
                                try:
                                    if configuration['no_fork_available']:
                                        cc += ["2>", errfile, ">", logfile]
                                        cmd = " ".join(cc)
                                        status = os.system(cmd)
                                        if status != 0:
                                            raise subprocess.CalledProcessError(status, cmd)
                                    else:
                                        subprocess.check_call(cc, stderr=err,
                                                              stdout=log)
                                except subprocess.CalledProcessError as e:
                                    raise CompilationError(
                                        """Command "%s" return error status %d.
Unable to compile code
Compile log in %s
Compile errors in %s""" % (e.cmd, e.returncode, logfile, errfile))
                    else:
                        cc = [self._cc] + self._cppargs + \
                             ['-c', '-o', oname, cname]
                        ld = self._ld.split() + ['-o', tmpname, oname] + self._ldargs
                        debug('Compilation command: %s', ' '.join(cc))
                        debug('Link command: %s', ' '.join(ld))
                        with open(logfile, "w") as log:
                            with open(errfile, "w") as err:
                                log.write("Compilation command:\n")
                                log.write(" ".join(cc))
                                log.write("\n\n")
                                log.write("Link command:\n")
                                log.write(" ".join(ld))
                                log.write("\n\n")
                                try:
                                    if configuration['no_fork_available']:
                                        cc += ["2>", errfile, ">", logfile]
                                        ld += ["2>", errfile, ">", logfile]
                                        cccmd = " ".join(cc)
                                        ldcmd = " ".join(ld)
                                        status = os.system(cccmd)
                                        if status != 0:
                                            raise subprocess.CalledProcessError(status, cccmd)
                                        status = os.system(ldcmd)
                                        if status != 0:
                                            raise subprocess.CalledProcessError(status, ldcmd)
                                    else:
                                        subprocess.check_call(cc, stderr=err,
                                                              stdout=log)
                                        subprocess.check_call(ld, stderr=err,
                                                              stdout=log)
                                except subprocess.CalledProcessError as e:
                                    raise CompilationError(
                                        """Command "%s" return error status %d.
Unable to compile code
Compile log in %s
Compile errors in %s""" % (e.cmd, e.returncode, logfile, errfile))
                    # Atomically ensure soname exists
                    os.rename(tmpname, soname)
            # Wait for compilation to complete
            self.comm.barrier()
            # Load resulting library
            return ctypes.CDLL(soname)
Exemplo n.º 11
0
    def evict(self):
        """Run the cache eviction algorithm. This works out the permitted
cache size and deletes objects until it is achieved. Cache values are
assumed to have a :attr:`value` attribute and eviction occurs in
increasing :attr:`value` order. Currently :attr:`value` is an index of
the assembly operation, so older operations are evicted first.

The cache will be evicted down to 90% of permitted size.

The permitted size is either the explicit
:data:`parameters["assembly_cache"]["max_bytes"]` or it is the amount of
memory per core scaled by :data:`parameters["assembly_cache"]["max_factor"]`
(by default the scale factor is 0.6).

In MPI parallel, the nbytes of each cache entry is set to the maximum
over all processes, while the available memory is set to the
minimum. This produces a conservative caching policy which is
guaranteed to result in the same evictions on each processor.

        """

        if not parameters["assembly_cache"]["eviction"]:
            return

        max_cache_size = min(
            parameters["assembly_cache"]["max_bytes"] or float("inf"),
            (memory or float("inf")) *
            parameters["assembly_cache"]["max_factor"])

        if max_cache_size == float("inf"):
            if not self.evictwarned:
                warning(
                    "No maximum assembly cache size. Install psutil >= 2.0.0 or risk leaking memory!"
                )
                self.evictwarned = True
            return

        cache_size = self.nbytes
        if cache_size < max_cache_size:
            return

        debug("Cache eviction triggered. %s bytes in cache, %s bytes allowed" %
              (cache_size, max_cache_size))

        # Evict down to 90% full.
        bytes_to_evict = cache_size - 0.9 * max_cache_size

        sorted_cache = sorted(self.cache.items(), key=lambda x: x[1][1].value)

        nbytes = lambda x: x[1][1].nbytes

        candidates = []
        while bytes_to_evict > 0:
            next = sorted_cache.pop(0)
            candidates.append(next)
            bytes_to_evict -= nbytes(next)

        for c in reversed(candidates):
            if bytes_to_evict + nbytes(c) < 0:
                # We may have been overzealous.
                bytes_to_evict += nbytes(c)
            else:
                del self.cache[c[0]]