示例#1
0
def save(filepath, obj, on_overwrite='ignore'):
    """
    Serialize `object` to a file denoted by `filepath`.

    Parameters
    ----------
    filepath : str
        A filename. If the suffix is `.joblib` and joblib can be
        imported, `joblib.dump` is used in place of the regular
        pickling mechanisms; this results in much faster saves by
        saving arrays as separate .npy files on disk. If the file
        suffix is `.npy` than `numpy.save` is attempted on `obj`.
        Otherwise, (c)pickle is used.

    obj : object
        A Python object to be serialized.

    on_overwrite : str, optional
        A string specifying what to do if the file already exists.
        Possible values include:

        - "ignore" : Just overwrite the existing file.
        - "backup" : Make a backup copy of the file (<filepath>.bak).
          Save the new copy. Then delete the backup copy. This allows
          recovery of the old version of the file if saving the new one
          fails.
    """
    filepath = preprocess(filepath)

    if os.path.exists(filepath):
        if on_overwrite == 'backup':
            backup = filepath + '.bak'
            shutil.move(filepath, backup)
            save(filepath, obj)
            try:
                os.remove(backup)
            except Exception as e:
                warnings.warn("Got an error while trying to remove " + backup +
                              ":" + str(e))
            return
        else:
            assert on_overwrite == 'ignore'

    try:
        _save(filepath, obj)
    except RuntimeError as e:
        """ Sometimes for large theano graphs, pickle/cPickle exceed the
            maximum recursion depth. This seems to me like a fundamental
            design flaw in pickle/cPickle. The workaround I employ here
            is the one recommended to someone who had a similar problem
            on stackexchange:

            http://stackoverflow.com/questions/2134706/hitting-maximum-recursion-depth-using-pythons-pickle-cpickle

            Obviously this does not scale and could cause a crash
            but I don't see another solution short of writing our
            own implementation of pickle.
        """
        if str(e).find('recursion') != -1:
            logger.warning('pylearn2.utils.save encountered the following '
                           'error: ' + str(e) +
                           '\nAttempting to resolve this error by calling ' +
                           'sys.setrecusionlimit and retrying')
            old_limit = sys.getrecursionlimit()
            try:
                sys.setrecursionlimit(50000)
                _save(filepath, obj)
            finally:
                sys.setrecursionlimit(old_limit)
示例#2
0
def save(filepath, obj, on_overwrite='ignore'):
    """
    Serialize `object` to a file denoted by `filepath`.

    Parameters
    ----------
    filepath : str
        A filename. If the suffix is `.joblib` and joblib can be
        imported, `joblib.dump` is used in place of the regular
        pickling mechanisms; this results in much faster saves by
        saving arrays as separate .npy files on disk. If the file
        suffix is `.npy` than `numpy.save` is attempted on `obj`.
        Otherwise, (c)pickle is used.

    obj : object
        A Python object to be serialized.

    on_overwrite : str, optional
        A string specifying what to do if the file already exists.
        Possible values include:

        - "ignore" : Just overwrite the existing file.
        - "backup" : Make a backup copy of the file (<filepath>.bak).
          Save the new copy. Then delete the backup copy. This allows
          recovery of the old version of the file if saving the new one
          fails.
    """
    filepath = preprocess(filepath)

    if os.path.exists(filepath):
        if on_overwrite == 'backup':
            backup = filepath + '.bak'
            shutil.move(filepath, backup)
            save(filepath, obj)
            try:
                os.remove(backup)
            except Exception as e:
                warnings.warn("Got an error while trying to remove " + backup
                              + ":" + str(e))
            return
        else:
            assert on_overwrite == 'ignore'

    try:
        _save(filepath, obj)
    except RuntimeError as e:
        """ Sometimes for large theano graphs, pickle/cPickle exceed the
            maximum recursion depth. This seems to me like a fundamental
            design flaw in pickle/cPickle. The workaround I employ here
            is the one recommended to someone who had a similar problem
            on stackexchange:

            http://stackoverflow.com/questions/2134706/hitting-maximum-recursion-depth-using-pythons-pickle-cpickle

            Obviously this does not scale and could cause a crash
            but I don't see another solution short of writing our
            own implementation of pickle.
        """
        if str(e).find('recursion') != -1:
            logger.warning('pylearn2.utils.save encountered the following '
                           'error: ' + str(e) +
                           '\nAttempting to resolve this error by calling ' +
                           'sys.setrecusionlimit and retrying')
            old_limit = sys.getrecursionlimit()
            try:
                sys.setrecursionlimit(50000)
                _save(filepath, obj)
            finally:
                sys.setrecursionlimit(old_limit)
示例#3
0
def _load(filepath, recurse_depth=0, retry=True):
    """
    Recursively tries to load a file until success or maximum number of
    attempts.
    Parameters
    ----------
    filepath : str
        A path to a file to load. Should be a pickle, Matlab, or NumPy
        file; or a .txt or .amat file that numpy.loadtxt can load.
    recurse_depth : int, optional
        End users should not use this argument. It is used by the function
        itself to implement the `retry` option recursively.
    retry : bool, optional
        If True, will make a handful of attempts to load the file before
        giving up. This can be useful if you are for example calling
        show_weights.py on a file that is actively being written to by a
        training script--sometimes the load attempt might fail if the
        training script writes at the same time show_weights tries to
        read, but if you try again after a few seconds you should be able
        to open the file.
    Returns
    -------
    loaded_object : object
        The object that was stored in the file.
    """
    try:
        import joblib
        joblib_available = True
    except ImportError:
        joblib_available = False
    if recurse_depth == 0:
        filepath = preprocess(filepath)

    if filepath.endswith('.npy') or filepath.endswith('.npz'):
        return np.load(filepath)

    if filepath.endswith('.amat') or filepath.endswith('txt'):
        try:
            return np.loadtxt(filepath)
        except Exception:
            raise Exception("{0} cannot be loaded by serial.load (trying "
                            "to use np.loadtxt)".format(filepath))

    if filepath.endswith('.mat'):
        global io
        if io is None:
            import scipy.io
            io = scipy.io
        try:
            return io.loadmat(filepath)
        except NotImplementedError as nei:
            if str(nei).find('HDF reader') != -1:
                global hdf_reader
                if hdf_reader is None:
                    import h5py
                    hdf_reader = h5py
                return hdf_reader.File(filepath, 'r')
            else:
                raise
        # this code should never be reached
        assert False

    # for loading PY2 pickle in PY3
    encoding = {'encoding': 'latin-1'} if six.PY3 else {}

    def exponential_backoff():
        if recurse_depth > 9:
            logger.info('Max number of tries exceeded while trying to open '
                        '{0}'.format(filepath))
            logger.info('attempting to open via reading string')
            with open(filepath, 'rb') as f:
                content = f.read()
            return cPickle.loads(content, **encoding)
        else:
            nsec = 0.5 * (2.0**float(recurse_depth))
            logger.info("Waiting {0} seconds and trying again".format(nsec))
            time.sleep(nsec)
            return _load(filepath, recurse_depth + 1, retry)

    try:
        if not joblib_available:
            with open(filepath, 'rb') as f:
                obj = cPickle.load(f, **encoding)
        else:
            try:
                obj = joblib.load(filepath)
            except Exception as e:
                if os.path.exists(filepath) and not os.path.isdir(filepath):
                    raise
                raise_cannot_open(filepath)
    except MemoryError as e:
        # We want to explicitly catch this exception because for MemoryError
        # __str__ returns the empty string, so some of our default printouts
        # below don't make a lot of sense.
        # Also, a lot of users assume any exception is a bug in the library,
        # so we can cut down on mail to pylearn-users by adding a message
        # that makes it clear this exception is caused by their machine not
        # meeting requirements.
        if os.path.splitext(filepath)[1] == ".pkl":
            improve_memory_error_message(
                e, ("You do not have enough memory to "
                    "open %s \n"
                    " + Try using numpy.{save,load} "
                    "(file with extension '.npy') "
                    "to save your file. It uses less "
                    "memory when reading and "
                    "writing files than pickled files.") % filepath)
        else:
            improve_memory_error_message(
                e, "You do not have enough memory to "
                "open %s" % filepath)

    except (BadPickleGet, EOFError, KeyError) as e:
        if not retry:
            raise Exception(e.__class__('Failed to open {0}'.format(filepath)))
        obj = exponential_backoff()
    except ValueError:
        logger.exception

        if not retry:
            raise Exception(ValueError('Failed to open {0}'.format(filepath)))
        obj = exponential_backoff()
    except Exception:
        # assert False
        raise Exception("Couldn't open {0}".format(filepath))

    # if the object has no yaml_src, we give it one that just says it
    # came from this file. could cause trouble if you save obj again
    # to a different location
    if not hasattr(obj, 'yaml_src'):
        try:
            obj.yaml_src = '!pkl: "' + os.path.abspath(filepath) + '"'
        except Exception:
            pass

    return obj
示例#4
0
def _load(filepath, recurse_depth=0, retry=True):
    """
    Recursively tries to load a file until success or maximum number of
    attempts.
    Parameters
    ----------
    filepath : str
        A path to a file to load. Should be a pickle, Matlab, or NumPy
        file; or a .txt or .amat file that numpy.loadtxt can load.
    recurse_depth : int, optional
        End users should not use this argument. It is used by the function
        itself to implement the `retry` option recursively.
    retry : bool, optional
        If True, will make a handful of attempts to load the file before
        giving up. This can be useful if you are for example calling
        show_weights.py on a file that is actively being written to by a
        training script--sometimes the load attempt might fail if the
        training script writes at the same time show_weights tries to
        read, but if you try again after a few seconds you should be able
        to open the file.
    Returns
    -------
    loaded_object : object
        The object that was stored in the file.
    """
    try:
        import joblib
        joblib_available = True
    except ImportError:
        joblib_available = False
    if recurse_depth == 0:
        filepath = preprocess(filepath)

    if filepath.endswith('.npy') or filepath.endswith('.npz'):
        return np.load(filepath)

    if filepath.endswith('.amat') or filepath.endswith('txt'):
        try:
            return np.loadtxt(filepath)
        except Exception:
            raise Exception("{0} cannot be loaded by serial.load (trying "
                       "to use np.loadtxt)".format(filepath))

    if filepath.endswith('.mat'):
        global io
        if io is None:
            import scipy.io
            io = scipy.io
        try:
            return io.loadmat(filepath)
        except NotImplementedError as nei:
            if str(nei).find('HDF reader') != -1:
                global hdf_reader
                if hdf_reader is None:
                    import h5py
                    hdf_reader = h5py
                return hdf_reader.File(filepath, 'r')
            else:
                raise
        # this code should never be reached
        assert False

    # for loading PY2 pickle in PY3
    encoding = {'encoding': 'latin-1'} if six.PY3 else {}

    def exponential_backoff():
        if recurse_depth > 9:
            logger.info('Max number of tries exceeded while trying to open '
                        '{0}'.format(filepath))
            logger.info('attempting to open via reading string')
            with open(filepath, 'rb') as f:
                content = f.read()
            return cPickle.loads(content, **encoding)
        else:
            nsec = 0.5 * (2.0 ** float(recurse_depth))
            logger.info("Waiting {0} seconds and trying again".format(nsec))
            time.sleep(nsec)
            return _load(filepath, recurse_depth + 1, retry)

    try:
        if not joblib_available:
            with open(filepath, 'rb') as f:
                obj = cPickle.load(f, **encoding)
        else:
            try:
                obj = joblib.load(filepath)
            except Exception as e:
                if os.path.exists(filepath) and not os.path.isdir(filepath):
                    raise
                raise_cannot_open(filepath)
    except MemoryError as e:
        # We want to explicitly catch this exception because for MemoryError
        # __str__ returns the empty string, so some of our default printouts
        # below don't make a lot of sense.
        # Also, a lot of users assume any exception is a bug in the library,
        # so we can cut down on mail to pylearn-users by adding a message
        # that makes it clear this exception is caused by their machine not
        # meeting requirements.
        if os.path.splitext(filepath)[1] == ".pkl":
            improve_memory_error_message(e,
                                         ("You do not have enough memory to "
                                          "open %s \n"
                                          " + Try using numpy.{save,load} "
                                          "(file with extension '.npy') "
                                          "to save your file. It uses less "
                                          "memory when reading and "
                                          "writing files than pickled files.")
                                         % filepath)
        else:
            improve_memory_error_message(e,
                                         "You do not have enough memory to "
                                         "open %s" % filepath)

    except (BadPickleGet, EOFError, KeyError) as e:
        if not retry:
            raise Exception(e.__class__('Failed to open {0}'.format(filepath)))
        obj = exponential_backoff()
    except ValueError:
        logger.exception

        if not retry:
            raise Exception(ValueError('Failed to open {0}'.format(filepath)))
        obj = exponential_backoff()
    except Exception:
        # assert False
        raise Exception("Couldn't open {0}".format(filepath))

    # if the object has no yaml_src, we give it one that just says it
    # came from this file. could cause trouble if you save obj again
    # to a different location
    if not hasattr(obj, 'yaml_src'):
        try:
            obj.yaml_src = '!pkl: "' + os.path.abspath(filepath) + '"'
        except Exception:
            pass

    return obj