Exemplo n.º 1
0
    def put_file_from_fobj(self, src, desc=""):
        """Store a file in the storage.

        If it's already (for some reason...) in the cache send that
        copy to the backend. Otherwise store it in the file-system
        cache first.

        The file is obtained from a file-object. Other interfaces are
        available as `put_file_content', `put_file_from_path'.

        src (fileobj): a readable binary file-like object from which
            to read the contents of the file.
        desc (unicode): the (optional) description to associate to the
            file.

        return (unicode): the digest of the stored file.

        """
        logger.debug("Reading input file to store on the database.")

        # Unfortunately, we have to read the whole file-obj to compute
        # the digest but we take that chance to save it to a temporary
        # path so that we then just need to move it. Hoping that both
        # locations will be on the same filesystem, that should be way
        # faster than reading the whole file-obj again (as it could be
        # compressed or require network communication).
        # XXX We're *almost* reimplementing copyfileobj.
        with tempfile.NamedTemporaryFile('wb', delete=False,
                                         dir=config.temp_dir) as dst:
            hasher = hashlib.sha1()
            buf = src.read(self.CHUNK_SIZE)
            while len(buf) > 0:
                hasher.update(buf)
                while len(buf) > 0:
                    written = dst.write(buf)
                    # Cooperative yield.
                    gevent.sleep(0)
                    if written is None:
                        break
                    buf = buf[written:]
                buf = src.read(self.CHUNK_SIZE)
            digest = hasher.hexdigest().decode("ascii")
            dst.flush()

            logger.debug("File has digest %s." % digest)

            cache_file_path = os.path.join(self.file_dir, digest)

            if not os.path.exists(cache_file_path):
                move(dst.name, cache_file_path)
            else:
                os.unlink(dst.name)

        # Store the file in the backend. We do that even if the file
        # was already in the cache (that is, we ignore the check above)
        # because there's a (small) chance that the file got removed
        # from the backend but somehow remained in the cache.
        self.save(digest, desc)

        return digest
Exemplo n.º 2
0
    def put_file_from_fobj(self, src, desc=""):
        """Store a file in the storage.

        If it's already (for some reason...) in the cache send that
        copy to the backend. Otherwise store it in the file-system
        cache first.

        The file is obtained from a file-object. Other interfaces are
        available as `put_file_content', `put_file_from_path'.

        src (fileobj): a readable binary file-like object from which
            to read the contents of the file.
        desc (unicode): the (optional) description to associate to the
            file.

        return (unicode): the digest of the stored file.

        """
        logger.debug("Reading input file to store on the database.")

        # Unfortunately, we have to read the whole file-obj to compute
        # the digest but we take that chance to save it to a temporary
        # path so that we then just need to move it. Hoping that both
        # locations will be on the same filesystem, that should be way
        # faster than reading the whole file-obj again (as it could be
        # compressed or require network communication).
        # XXX We're *almost* reimplementing copyfileobj.
        with tempfile.NamedTemporaryFile('wb', delete=False,
                                         dir=self.temp_dir) as dst:
            hasher = hashlib.sha1()
            buf = src.read(self.CHUNK_SIZE)
            while len(buf) > 0:
                hasher.update(buf)
                while len(buf) > 0:
                    written = dst.write(buf)
                    # Cooperative yield.
                    gevent.sleep(0)
                    if written is None:
                        break
                    buf = buf[written:]
                buf = src.read(self.CHUNK_SIZE)
            digest = bin_to_hex(hasher.digest())
            dst.flush()

            logger.debug("File has digest %s.", digest)

            cache_file_path = os.path.join(self.file_dir, digest)

            if not os.path.exists(cache_file_path):
                move(dst.name, cache_file_path)
            else:
                os.unlink(dst.name)

        # Store the file in the backend. We do that even if the file
        # was already in the cache (that is, we ignore the check above)
        # because there's a (small) chance that the file got removed
        # from the backend but somehow remained in the cache.
        self.save(digest, desc)

        return digest
Exemplo n.º 3
0
    def put_file(self, description="", binary_data=None,
                 file_obj=None, path=None):
        """Put a file in the storage, and keep a copy locally. The
        caller has to provide exactly one among binary_data, file_obj
        and path.

        description (string): a human-readable description of the
                              content.
        binary_data (string): the content of the file to send.
        file_obj (file): the file-like object to send.
        path (string): the file to send.

        """
        temp_fd, temp_path = tempfile.mkstemp(dir=self.tmp_dir)
        os.close(temp_fd)

        # Input checking
        if [binary_data, file_obj, path].count(None) != 2:
            error_string = "No content (or too many) specified in put_file."
            logger.error(error_string)
            raise ValueError(error_string)

        logger.debug("Reading input file to store on the database.")

        # Copy the file content, whatever forms it arrives, into the
        # temporary file
        if path is not None:
            copy(path, temp_path)
        elif binary_data is not None:
            with open(temp_path, 'wb') as temp_file:
                temp_file.write(binary_data)
        else:  # file_obj is not None.
            with open(temp_path, 'wb') as temp_file:
                copyfileobj(file_obj, temp_file)

        hasher = hashlib.sha1()

        # Calculate the file SHA1 digest
        with open(temp_path, 'rb') as temp_file:
            buf = temp_file.read(self.CHUNK_SIZE)
            while buf != '':
                hasher.update(buf)
                buf = temp_file.read(self.CHUNK_SIZE)
        digest = hasher.hexdigest()

        logger.debug("File has digest %s." % digest)

        self.backend.put_file(digest, temp_path, description=description)

        # Move the temporary file in the cache
        move(temp_path,
             os.path.join(self.obj_dir, digest))

        return digest
Exemplo n.º 4
0
    def get_file(self, digest, path=None, file_obj=None,
                 string=False, temp_path=False, temp_file_obj=False):
        """Get a file from the storage, possibly using the cache if
        the file is available there.

        digest (string): the sha1 sum of the file.
        path (string): a path where to save the file.
        file_obj (file): a handler where to save the file (that is not
                         closed at return).
        string (bool): True to return content as a string.
        temp_path (bool): True to return path of a temporary file with
                          that content. The file is reserved to the
                          caller, who has the duty to unlink it.
        temp_file-obj (bool): True to return a file object opened to a
                              temporary file with that content. The
                              file is reserved to the caller. Use this
                              method only for debugging purpose, as it
                              leave a file lying in the temporary
                              directory of FileCacher.

        """
        if [string, temp_path, temp_file_obj].count(True) > 1:
            raise ValueError("Ask for at most one amongst content, "
                             "temp path and temp file obj.")

        cache_path = os.path.join(self.obj_dir, digest)
        cache_exists = os.path.exists(cache_path)

        logger.debug("Getting file %s." % (digest))

        if not cache_exists:
            logger.debug("File %s not in cache, downloading "
                         "from database." % digest)

            # Receives the file from the database
            temp_file, temp_filename = tempfile.mkstemp(dir=self.tmp_dir)
            temp_file = os.fdopen(temp_file, "wb")
            self.backend.get_file(digest, temp_filename)

            # And move it in the cache. Warning: this is not atomic if
            # the temp and the cache dir are on different filesystems.
            move(temp_filename, cache_path)

            logger.debug("File %s downloaded." % digest)

        # Saving to path
        if path is not None:
            copy(cache_path, path)

        # Saving to file object
        if file_obj is not None:
            with open(cache_path, "rb") as file_:
                copyfileobj(file_, file_obj)

        # Returning string?
        if string:
            with open(cache_path, "rb") as cache_file:
                return cache_file.read()

        # Returning temporary file?
        elif temp_path:
            temp_file, temp_filename = tempfile.mkstemp(dir=self.tmp_dir)
            os.close(temp_file)
            copy(cache_path, temp_filename)
            return temp_filename

        # Returning temporary file object?
        elif temp_file_obj:
            temp_file, temp_filename = tempfile.mkstemp(dir=self.tmp_dir)
            os.close(temp_file)
            copy(cache_path, temp_filename)
            temp_file = open(temp_filename, "rb")
            return temp_file