def __init__(self, cachedir, mmap_mode=None, compress=False, verbose=1): """ Parameters ---------- cachedir: string or None The path of the base directory to use as a data store or None. If None is given, no caching is done and the Memory object is completely transparent. mmap_mode: {None, 'r+', 'r', 'w+', 'c'}, optional The memmapping mode used when loading from cache numpy arrays. See numpy.load for the meaning of the arguments. compress: boolean Whether to zip the stored data on disk. Note that compressed arrays cannot be read by memmapping. verbose: int, optional Verbosity flag, controls the debug messages that are issued as functions are revaluated. """ # XXX: Bad explaination of the None value of cachedir Logger.__init__(self) self._verbose = verbose self.mmap_mode = mmap_mode self.timestamp = time.time() self.compress = compress if compress and mmap_mode is not None: warnings.warn('Compressed results cannot be memmapped', stacklevel=2) if cachedir is None: self.cachedir = None else: self.cachedir = os.path.join(cachedir, 'joblib') mkdirp(self.cachedir)
def __init__(self, logfile=None, logdir=None): if logfile is not None and logdir is not None: raise ValueError('Cannot specify both logfile and logdir') # XXX: Need argument docstring self.last_time = time.time() self.start_time = self.last_time if logdir is not None: logfile = os.path.join(logdir, 'joblib.log') self.logfile = logfile if logfile is not None: mkdirp(os.path.dirname(logfile)) if os.path.exists(logfile): # Rotate the logs for i in xrange(1, 9): try: shutil.move(logfile + '.%i' % i, logfile + '.%i' % (i + 1)) except: "No reason failing here" # Use a copy rather than a move, so that a process # monitoring this file does not get lost. try: shutil.copy(logfile, logfile + '.1') except: "No reason failing here" try: logfile = open(logfile, 'w') logfile.write('\nLogging joblib python script\n') logfile.write('\n---%s---\n' % time.ctime(self.last_time)) logfile.close() except: """ Multiprocessing writing to files can create race
def _get_func_dir(self, mkdir=True): """ Get the directory corresponding to the cache for the function. """ module, name = get_func_name(self.func) module.append(name) func_dir = os.path.join(self.cachedir, *module) if mkdir: mkdirp(func_dir) return func_dir
def _persist_output(self, output, dir): """ Persist the given output tuple in the directory. """ try: mkdirp(dir) filename = os.path.join(dir, 'output.pkl') numpy_pickle.dump(output, filename, compress=self.compress) if self._verbose > 10: print 'Persisting in %s' % dir except OSError: " Race condition in the creation of the directory "
def __init__(self, func, cachedir, ignore=None, mmap_mode=None, compress=False, verbose=1, timestamp=None): """ Parameters ---------- func: callable The function to decorate cachedir: string The path of the base directory to use as a data store ignore: list or None List of variable names to ignore. mmap_mode: {None, 'r+', 'r', 'w+', 'c'}, optional The memmapping mode used when loading from cache numpy arrays. See numpy.load for the meaning of the arguments. verbose: int, optional Verbosity flag, controls the debug messages that are issued as functions are revaluated. The higher, the more verbose timestamp: float, optional The reference time from which times in tracing messages are reported. """ Logger.__init__(self) self._verbose = verbose self.cachedir = cachedir self.func = func self.mmap_mode = mmap_mode self.compress = compress if compress and mmap_mode is not None: warnings.warn('Compressed results cannot be memmapped', stacklevel=2) if timestamp is None: timestamp = time.time() self.timestamp = timestamp if ignore is None: ignore = [] self.ignore = ignore mkdirp(self.cachedir) try: functools.update_wrapper(self, func) except: " Objects like ufunc don't like that " if inspect.isfunction(func): doc = pydoc.TextDoc().document(func).replace('\n', '\n\n', 1) else: # Pydoc does a poor job on other objects doc = func.__doc__ self.__doc__ = 'Memoized version of %s' % doc
def clear(self, warn=True): """ Empty the function's cache. """ func_dir = self._get_func_dir(mkdir=False) if self._verbose and warn: self.warn("Clearing cache %s" % func_dir) if os.path.exists(func_dir): shutil.rmtree(func_dir, ignore_errors=True) mkdirp(func_dir) func_code, _, first_line = get_func_code(self.func) func_code_file = os.path.join(func_dir, 'func_code.py') self._write_func_code(func_code_file, func_code, first_line)
def __init__(self, func, cachedir, ignore=None, mmap_mode=None, compress=False, verbose=1, timestamp=None): """ Parameters ---------- func: callable The function to decorate cachedir: string The path of the base directory to use as a data store ignore: list or None List of variable names to ignore. mmap_mode: {None, 'r+', 'r', 'w+', 'c'}, optional The memmapping mode used when loading from cache numpy arrays. See numpy.load for the meaning of the arguments. verbose: int, optional Verbosity flag, controls the debug messages that are issued as functions are revaluated. The higher, the more verbose timestamp: float, optional The reference time from which times in tracing messages are reported. """ Logger.__init__(self) self._verbose = verbose self.cachedir = cachedir self.func = func self.mmap_mode = mmap_mode self.compress = compress if compress and mmap_mode is not None: warnings.warn('Compressed results cannot be memmapped', stacklevel=2) if timestamp is None: timestamp = time.time() self.timestamp = timestamp if ignore is None: ignore = [] self.ignore = ignore mkdirp(self.cachedir) try: functools.update_wrapper(self, func) except: " Objects like ufunc don't like that " if inspect.isfunction(func): doc = pydoc.TextDoc().document(func ).replace('\n', '\n\n', 1) else: # Pydoc does a poor job on other objects doc = func.__doc__ self.__doc__ = 'Memoized version of %s' % doc
def _persist_input(self, output_dir, *args, **kwargs): """ Save a small summary of the call using json format in the output directory. """ argument_dict = filter_args(self.func, self.ignore, args, kwargs) input_repr = dict((k, repr(v)) for k, v in argument_dict.iteritems()) if json is not None: # This can fail do to race-conditions with multiple # concurrent joblibs removing the file or the directory try: mkdirp(output_dir) json.dump(input_repr, file(os.path.join(output_dir, "input_args.json"), "w")) except: pass return input_repr
def _persist_input(self, output_dir, *args, **kwargs): """ Save a small summary of the call using json format in the output directory. """ argument_dict = filter_args(self.func, self.ignore, args, kwargs) input_repr = dict((k, repr(v)) for k, v in argument_dict.iteritems()) if json is not None: # This can fail do to race-conditions with multiple # concurrent joblibs removing the file or the directory try: mkdirp(output_dir) json.dump( input_repr, file(os.path.join(output_dir, 'input_args.json'), 'w'), ) except: pass return input_repr