Пример #1
0
    def add_file(self, name, filename, compress_hint=True):
        url, version = self._parse_name(name)

        if self._has_capability(SERVER_ACCEPTS_SHA256_DIGEST):
            sha = file_digest(filename)
        else:
            sha = ''

        headers = {'SHA256-Checksum': sha}

        # Important detail: this upload is streaming.
        # http://docs.python-requests.org/en/latest/user/advanced/#streaming-uploads

        with open(filename, 'rb') as f:
            if (compress_hint and self._has_capability(SERVER_ACCEPTS_GZIP)):
                # Unfortunately it seems a temporary file is required here.
                # Our server requires Content-Length to be present, because
                # some WSGI implementations (among others the one used in
                # our tests) are not required to support EOF (instead the
                # user is required to not read beyond content length,
                # but that cannot be done if we don't know the content
                # length). As content length is required for the tests to
                # work, we need to send it, and to be able to compute it we
                # need to temporarily store the compressed data before
                # sending. It can be stored in memory or in a temporary file
                #  and a temporary file seems to be a more suitable choice.
                with tempfile.TemporaryFile() as tmp:
                    with gzip.GzipFile(fileobj=tmp, mode='wb') as gz:
                        shutil.copyfileobj(f, gz)
                    tmp.seek(0)
                    headers['Content-Encoding'] = 'gzip'
                    headers['Logical-Size'] = str(os.stat(filename).st_size)
                    response = self._put_file(url, version, tmp, headers)
            else:
                response = self._put_file(url, version, f, headers)

        name, version = split_name(name)
        return versioned_name(name, self._parse_last_modified(response))
Пример #2
0
    def add_file(self, name, filename, compress_hint=True):
        url, version = self._parse_name(name)

        headers = {}

        if (compress_hint
                and self._has_capability(SERVER_ACCEPTS_SHA256_DIGEST)):
            headers['SHA256-Checksum'] = file_digest(filename)

        # Important detail: this upload is streaming.
        # http://docs.python-requests.org/en/latest/user/advanced/#streaming-uploads

        with open(filename, 'rb') as f:
            if (compress_hint
                    and self._has_capability(SERVER_ACCEPTS_GZIP)):
                # Unfortunately it seems a temporary file is required here.
                # Our server requires Content-Length to be present, because
                # some WSGI implementations (among others the one used in
                # our tests) are not required to support EOF (instead the
                # user is required to not read beyond content length,
                # but that cannot be done if we don't know the content
                # length). As content length is required for the tests to
                # work, we need to send it, and to be able to compute it we
                # need to temporarily store the compressed data before
                # sending. It can be stored in memory or in a temporary file
                #  and a temporary file seems to be a more suitable choice.
                with tempfile.TemporaryFile() as tmp:
                    with gzip.GzipFile(fileobj=tmp, mode='wb') as gz:
                        shutil.copyfileobj(f, gz)
                    tmp.seek(0)
                    headers['Content-Encoding'] = 'gzip'
                    headers['Logical-Size'] = str(os.stat(filename).st_size)
                    response = self._put_file(url, version, tmp, headers)
            else:
                response = self._put_file(url, version, f, headers)

        name, version = split_name(name)
        return versioned_name(name, self._parse_last_modified(response))
Пример #3
0
    def store(self, name, data, version, size=0,
              compressed=False, digest=None, logical_size=None):
        """Adds a new file to the storage.
        
        If the file with the same name existed before, it's not
        guaranteed that the link for the old version will exist until
        the operation completes, but it's guaranteed that the link
        will never point to an invalid blob.

        Args:
            name: name of the file being stored.
                May contain slashes that are treated as path separators.
            data: binary file-like object with file contents. 
                Files with unknown length are supported for compatibility with
                WSGI interface: ``size`` parameter should be passed in these
                cases.
            version: new file "version"
                Link modification time will be set to this timestamp. If
                the link exists, and its modification time is higher, the
                file is not overwritten.
            size: length of ``data`` in bytes
                If not 0, this takes priority over internal ``data`` size.
            compressed: whether ``data`` is gzip-compressed
                If True, the compression is skipped, and file is written as-is.
                Note that the current server implementation sends
                'Content-Encoding' header anyway, mandating client to
                decompress the file.
            digest: SHA256 digest of the file before compression
                If specified, the digest will not be computed again, saving
                resources.
            logical_size: if ``data`` is gzip-compressed, this parameter
                has to be set to decompressed file size.
        """
        with _exclusive_lock(self._lock_path('links', name)):
            logger.debug('Acquired lock to link for %s.', name)
            link_path = self._link_path(name)
            if _path_exists(link_path) and _file_version(link_path) > version:
                logger.info(
                    'Tried to store older version of %s (%d < %d), ignoring.',
                    name, version, _file_version(link_path))
                return _file_version(link_path)

            # data is managed by contents now, and shouldn't be used directly
            with _InputStreamWrapper(data, size) as contents:
                if digest is None or logical_size is None:
                    contents.save()
                    if compressed:
                        # This shouldn't occur if the request came from a proper
                        # filetracker client, so we don't care if it's slow.
                        logger.warning(
                            'Storing compressed stream without hints.')
                        with gzip.open(
                                contents.current_path, 'rb') as decompressed:
                            digest = file_digest(decompressed)
                        with gzip.open(
                                contents.current_path, 'rb') as decompressed:
                            logical_size = _read_stream_for_size(decompressed)
                    else:
                        digest = file_digest(contents.current_path)
                        logical_size = os.stat(contents.current_path).st_size

                blob_path = self._blob_path(digest)
                
                with _exclusive_lock(self._lock_path('blobs', digest)):
                    logger.debug('Acquired lock for blob %s.', digest)
                    digest_bytes = digest.encode()

                    with self._db_transaction() as txn:
                        logger.debug('Started DB transaction (adding link).')
                        link_count = int(self.db.get(digest_bytes, 0, txn=txn))
                        new_count = str(link_count + 1).encode()
                        self.db.put(digest_bytes, new_count, txn=txn)

                        if link_count == 0:
                            self.db.put(
                                    '{}:logical_size'.format(digest).encode(),
                                    str(logical_size).encode(),
                                    txn=txn)
                        logger.debug('Commiting DB transaction (adding link).')

                    logger.debug('Committed DB transaction (adding link).')

                    # Create a new blob if this isn't a duplicate.
                    if link_count == 0:
                        logger.debug('Creating new blob.')
                        _create_file_dirs(blob_path)

                        if compressed:
                            contents.save(blob_path)
                        else:
                            contents.save()
                            with open(contents.current_path, 'rb') as raw,\
                                    gzip.open(blob_path, 'wb') as blob:
                                shutil.copyfileobj(raw, blob)

                logger.debug('Released lock for blob %s.', digest)

            if _path_exists(link_path):
                # Lend the link lock to delete().
                # Note that DB lock has to be released in advance, otherwise
                # deadlock is possible in concurrent scenarios.
                logger.info('Overwriting existing link %s.', name)
                self.delete(name, version, _lock=False)

            _create_file_dirs(link_path)
            rel_blob_path = os.path.relpath(blob_path,
                                            os.path.dirname(link_path))
            os.symlink(rel_blob_path, link_path)

            logger.debug('Created link %s.', name)

            lutime(link_path, version)
            return version

        logger.debug('Released lock for link %s.', name)
Пример #4
0
    def store(self, name, data, version, size=0,
              compressed=False, digest=None, logical_size=None):
        """Adds a new file to the storage.
        
        If the file with the same name existed before, it's not
        guaranteed that the link for the old version will exist until
        the operation completes, but it's guaranteed that the link
        will never point to an invalid blob.

        Args:
            name: name of the file being stored.
                May contain slashes that are treated as path separators.
            data: binary file-like object with file contents. 
                Files with unknown length are supported for compatibility with
                WSGI interface: ``size`` parameter should be passed in these
                cases.
            version: new file "version"
                Link modification time will be set to this timestamp. If
                the link exists, and its modification time is higher, the
                file is not overwritten.
            size: length of ``data`` in bytes
                If not 0, this takes priority over internal ``data`` size.
            compressed: whether ``data`` is gzip-compressed
                If True, the compression is skipped, and file is written as-is.
                Note that the current server implementation sends
                'Content-Encoding' header anyway, mandating client to
                decompress the file.
            digest: SHA256 digest of the file before compression
                If specified, the digest will not be computed again, saving
                resources.
            logical_size: if ``data`` is gzip-compressed, this parameter
                has to be set to decompressed file size.
        """
        with _exclusive_lock(self._lock_path('links', name)):
            logger.debug('Acquired lock to link for %s.', name)
            link_path = self._link_path(name)
            if _path_exists(link_path) and _file_version(link_path) > version:
                logger.info(
                    'Tried to store older version of %s (%d < %d), ignoring.',
                    name, version, _file_version(link_path))
                return _file_version(link_path)

            # data is managed by contents now, and shouldn't be used directly
            with _InputStreamWrapper(data, size) as contents:
                if digest is None or logical_size is None:
                    contents.save()
                    if compressed:
                        # This shouldn't occur if the request came from a proper
                        # filetracker client, so we don't care if it's slow.
                        logger.warning(
                            'Storing compressed stream without hints.')
                        with gzip.open(
                                contents.current_path, 'rb') as decompressed:
                            digest = file_digest(decompressed)
                        with gzip.open(
                                contents.current_path, 'rb') as decompressed:
                            logical_size = _read_stream_for_size(decompressed)
                    else:
                        digest = file_digest(contents.current_path)
                        logical_size = os.stat(contents.current_path).st_size

                blob_path = self._blob_path(digest)
                
                with _exclusive_lock(self._lock_path('blobs', digest)):
                    logger.debug('Acquired lock for blob %s.', digest)
                    digest_bytes = digest.encode()

                    with self._db_transaction() as txn:
                        logger.debug('Started DB transaction (adding link).')
                        link_count = int(self.db.get(digest_bytes, 0, txn=txn))
                        new_count = str(link_count + 1).encode()
                        self.db.put(digest_bytes, new_count, txn=txn)

                        if link_count == 0:
                            self.db.put(
                                    '{}:logical_size'.format(digest).encode(),
                                    str(logical_size).encode(),
                                    txn=txn)
                        logger.debug('Commiting DB transaction (adding link).')

                    logger.debug('Committed DB transaction (adding link).')

                    # Create a new blob if this isn't a duplicate.
                    if link_count == 0:
                        logger.debug('Creating new blob.')
                        _create_file_dirs(blob_path)

                        if compressed:
                            contents.save(blob_path)
                        else:
                            contents.save()
                            with open(contents.current_path, 'rb') as raw,\
                                    gzip.open(blob_path, 'wb') as blob:
                                shutil.copyfileobj(raw, blob)

                logger.debug('Released lock for blob %s.', digest)

            if _path_exists(link_path):
                # Lend the link lock to delete().
                # Note that DB lock has to be released in advance, otherwise
                # deadlock is possible in concurrent scenarios.
                logger.info('Overwriting existing link %s.', name)
                self.delete(name, version, _lock=False)

            _create_file_dirs(link_path)
            rel_blob_path = os.path.relpath(blob_path,
                                            os.path.dirname(link_path))
            os.symlink(rel_blob_path, link_path)

            logger.debug('Created link %s.', name)

            lutime(link_path, version)
            return version

        logger.debug('Released lock for link %s.', name)