Пример #1
0
    def _requests_to_bytes_iter(self):
        # Take the requests out of self._coalesce_requests, actually make
        # the requests, and generate the bytes from the responses.
        #
        # Yields 2-tuples (segment-name, byte-chunk). The segment name is
        # used for logging.
        for data_or_req, seg_etag, seg_size in self._coalesce_requests():
            if isinstance(data_or_req, bytes):  # ugly, awful overloading
                yield ('data segment', data_or_req)
                continue
            seg_req = data_or_req
            seg_resp = seg_req.get_response(self.app)
            if not is_success(seg_resp.status_int):
                close_if_possible(seg_resp.app_iter)
                raise SegmentError(
                    'While processing manifest %s, '
                    'got %d while retrieving %s' %
                    (self.name, seg_resp.status_int, seg_req.path))

            elif (
                (seg_etag and (seg_resp.etag != seg_etag)) or
                (seg_size and
                 (seg_resp.content_length != seg_size) and not seg_req.range)):
                # The content-length check is for security reasons. Seems
                # possible that an attacker could upload a >1mb object and
                # then replace it with a much smaller object with same
                # etag. Then create a big nested SLO that calls that
                # object many times which would hammer our obj servers. If
                # this is a range request, don't check content-length
                # because it won't match.
                close_if_possible(seg_resp.app_iter)
                raise SegmentError(
                    'Object segment no longer valid: '
                    '%(path)s etag: %(r_etag)s != %(s_etag)s or '
                    '%(r_size)s != %(s_size)s.' % {
                        'path': seg_req.path,
                        'r_etag': seg_resp.etag,
                        'r_size': seg_resp.content_length,
                        's_etag': seg_etag,
                        's_size': seg_size
                    })
            else:
                self.current_resp = seg_resp

            seg_hash = None
            if seg_resp.etag and not seg_req.headers.get('Range'):
                # Only calculate the MD5 if it we can use it to validate
                seg_hash = hashlib.md5()

            document_iters = maybe_multipart_byteranges_to_document_iters(
                seg_resp.app_iter, seg_resp.headers['Content-Type'])

            for chunk in itertools.chain.from_iterable(document_iters):
                if seg_hash:
                    seg_hash.update(chunk)
                yield (seg_req.path, chunk)
            close_if_possible(seg_resp.app_iter)

            if seg_hash and seg_hash.hexdigest() != seg_resp.etag:
                raise SegmentError(
                    "Bad MD5 checksum in %(name)s for %(seg)s: headers had"
                    " %(etag)s, but object MD5 was actually %(actual)s" % {
                        'seg': seg_req.path,
                        'etag': seg_resp.etag,
                        'name': self.name,
                        'actual': seg_hash.hexdigest()
                    })
Пример #2
0
    def _internal_iter(self):
        bytes_left = self.response_body_length

        try:
            for seg_req, seg_etag, seg_size in self._coalesce_requests():
                seg_resp = seg_req.get_response(self.app)
                if not is_success(seg_resp.status_int):
                    close_if_possible(seg_resp.app_iter)
                    raise SegmentError(
                        'While processing manifest %s, '
                        'got %d while retrieving %s' %
                        (self.name, seg_resp.status_int, seg_req.path))

                elif ((seg_etag and (seg_resp.etag != seg_etag))
                      or (seg_size and (seg_resp.content_length != seg_size)
                          and not seg_req.range)):
                    # The content-length check is for security reasons. Seems
                    # possible that an attacker could upload a >1mb object and
                    # then replace it with a much smaller object with same
                    # etag. Then create a big nested SLO that calls that
                    # object many times which would hammer our obj servers. If
                    # this is a range request, don't check content-length
                    # because it won't match.
                    close_if_possible(seg_resp.app_iter)
                    raise SegmentError(
                        'Object segment no longer valid: '
                        '%(path)s etag: %(r_etag)s != %(s_etag)s or '
                        '%(r_size)s != %(s_size)s.' % {
                            'path': seg_req.path,
                            'r_etag': seg_resp.etag,
                            'r_size': seg_resp.content_length,
                            's_etag': seg_etag,
                            's_size': seg_size
                        })
                else:
                    self.current_resp = seg_resp

                seg_hash = None
                if seg_resp.etag and not seg_req.headers.get('Range'):
                    # Only calculate the MD5 if it we can use it to validate
                    seg_hash = hashlib.md5()

                document_iters = maybe_multipart_byteranges_to_document_iters(
                    seg_resp.app_iter, seg_resp.headers['Content-Type'])

                for chunk in itertools.chain.from_iterable(document_iters):
                    if seg_hash:
                        seg_hash.update(chunk)

                    if bytes_left is None:
                        yield chunk
                    elif bytes_left >= len(chunk):
                        yield chunk
                        bytes_left -= len(chunk)
                    else:
                        yield chunk[:bytes_left]
                        bytes_left -= len(chunk)
                        close_if_possible(seg_resp.app_iter)
                        raise SegmentError(
                            'Too many bytes for %(name)s; truncating in '
                            '%(seg)s with %(left)d bytes left' % {
                                'name': self.name,
                                'seg': seg_req.path,
                                'left': bytes_left
                            })
                close_if_possible(seg_resp.app_iter)

                if seg_hash and seg_hash.hexdigest() != seg_resp.etag:
                    raise SegmentError(
                        "Bad MD5 checksum in %(name)s for %(seg)s: headers had"
                        " %(etag)s, but object MD5 was actually %(actual)s" % {
                            'seg': seg_req.path,
                            'etag': seg_resp.etag,
                            'name': self.name,
                            'actual': seg_hash.hexdigest()
                        })

            if bytes_left:
                raise SegmentError(
                    'Not enough bytes for %s; closing connection' % self.name)
        except (ListingIterError, SegmentError) as err:
            self.logger.error(err)
            if not self.validated_first_segment:
                raise
        finally:
            if self.current_resp:
                close_if_possible(self.current_resp.app_iter)
Пример #3
0
    def _requests_to_bytes_iter(self):
        # Take the requests out of self._coalesce_requests, actually make
        # the requests, and generate the bytes from the responses.
        #
        # Yields 2-tuples (segment-name, byte-chunk). The segment name is
        # used for logging.
        for data_or_req, seg_etag, seg_size in self._coalesce_requests():
            if isinstance(data_or_req, bytes):  # ugly, awful overloading
                yield ('data segment', data_or_req)
                continue
            seg_req = data_or_req
            seg_resp = seg_req.get_response(self.app)
            if not is_success(seg_resp.status_int):
                # Error body should be short
                body = seg_resp.body
                if not six.PY2:
                    body = body.decode('utf8')
                msg = 'While processing manifest %s, got %d (%s) ' \
                    'while retrieving %s' % (
                        self.name, seg_resp.status_int,
                        body if len(body) <= 60 else body[:57] + '...',
                        seg_req.path)
                if is_server_error(seg_resp.status_int):
                    self.logger.error(msg)
                    raise HTTPServiceUnavailable(request=seg_req,
                                                 content_type='text/plain')
                raise SegmentError(msg)
            elif (
                (seg_etag and (seg_resp.etag != seg_etag)) or
                (seg_size and
                 (seg_resp.content_length != seg_size) and not seg_req.range)):
                # The content-length check is for security reasons. Seems
                # possible that an attacker could upload a >1mb object and
                # then replace it with a much smaller object with same
                # etag. Then create a big nested SLO that calls that
                # object many times which would hammer our obj servers. If
                # this is a range request, don't check content-length
                # because it won't match.
                close_if_possible(seg_resp.app_iter)
                raise SegmentError(
                    'Object segment no longer valid: '
                    '%(path)s etag: %(r_etag)s != %(s_etag)s or '
                    '%(r_size)s != %(s_size)s.' % {
                        'path': seg_req.path,
                        'r_etag': seg_resp.etag,
                        'r_size': seg_resp.content_length,
                        's_etag': seg_etag,
                        's_size': seg_size
                    })
            else:
                self.current_resp = seg_resp

            resp_len = 0
            seg_hash = None
            if seg_resp.etag and not seg_req.headers.get('Range'):
                # Only calculate the MD5 if it we can use it to validate
                seg_hash = md5(usedforsecurity=False)

            document_iters = maybe_multipart_byteranges_to_document_iters(
                seg_resp.app_iter, seg_resp.headers['Content-Type'])

            for chunk in itertools.chain.from_iterable(document_iters):
                if seg_hash:
                    seg_hash.update(chunk)
                    resp_len += len(chunk)
                yield (seg_req.path, chunk)
            close_if_possible(seg_resp.app_iter)

            if seg_hash:
                if resp_len != seg_resp.content_length:
                    raise SegmentError(
                        "Bad response length for %(seg)s as part of %(name)s: "
                        "headers had %(from_headers)s, but response length "
                        "was actually %(actual)s" % {
                            'seg': seg_req.path,
                            'from_headers': seg_resp.content_length,
                            'name': self.name,
                            'actual': resp_len
                        })
                if seg_hash.hexdigest() != seg_resp.etag:
                    raise SegmentError(
                        "Bad MD5 checksum for %(seg)s as part of %(name)s: "
                        "headers had %(etag)s, but object MD5 was actually "
                        "%(actual)s" % {
                            'seg': seg_req.path,
                            'etag': seg_resp.etag,
                            'name': self.name,
                            'actual': seg_hash.hexdigest()
                        })
Пример #4
0
    def _internal_iter(self):
        bytes_left = self.response_body_length

        try:
            for seg_req, seg_etag, seg_size in self._coalesce_requests():
                seg_resp = seg_req.get_response(self.app)
                if not is_success(seg_resp.status_int):
                    close_if_possible(seg_resp.app_iter)
                    raise SegmentError(
                        'While processing manifest %s, '
                        'got %d while retrieving %s' %
                        (self.name, seg_resp.status_int, seg_req.path))

                elif ((seg_etag and (seg_resp.etag != seg_etag)) or
                        (seg_size and (seg_resp.content_length != seg_size) and
                         not seg_req.range)):
                    # The content-length check is for security reasons. Seems
                    # possible that an attacker could upload a >1mb object and
                    # then replace it with a much smaller object with same
                    # etag. Then create a big nested SLO that calls that
                    # object many times which would hammer our obj servers. If
                    # this is a range request, don't check content-length
                    # because it won't match.
                    close_if_possible(seg_resp.app_iter)
                    raise SegmentError(
                        'Object segment no longer valid: '
                        '%(path)s etag: %(r_etag)s != %(s_etag)s or '
                        '%(r_size)s != %(s_size)s.' %
                        {'path': seg_req.path, 'r_etag': seg_resp.etag,
                         'r_size': seg_resp.content_length,
                         's_etag': seg_etag,
                         's_size': seg_size})
                else:
                    self.current_resp = seg_resp

                seg_hash = None
                if seg_resp.etag and not seg_req.headers.get('Range'):
                    # Only calculate the MD5 if it we can use it to validate
                    seg_hash = hashlib.md5()

                document_iters = maybe_multipart_byteranges_to_document_iters(
                    seg_resp.app_iter,
                    seg_resp.headers['Content-Type'])

                for chunk in itertools.chain.from_iterable(document_iters):
                    if seg_hash:
                        seg_hash.update(chunk)

                    if bytes_left is None:
                        yield chunk
                    elif bytes_left >= len(chunk):
                        yield chunk
                        bytes_left -= len(chunk)
                    else:
                        yield chunk[:bytes_left]
                        bytes_left -= len(chunk)
                        close_if_possible(seg_resp.app_iter)
                        raise SegmentError(
                            'Too many bytes for %(name)s; truncating in '
                            '%(seg)s with %(left)d bytes left' %
                            {'name': self.name, 'seg': seg_req.path,
                             'left': bytes_left})
                close_if_possible(seg_resp.app_iter)

                if seg_hash and seg_hash.hexdigest() != seg_resp.etag:
                    raise SegmentError(
                        "Bad MD5 checksum in %(name)s for %(seg)s: headers had"
                        " %(etag)s, but object MD5 was actually %(actual)s" %
                        {'seg': seg_req.path, 'etag': seg_resp.etag,
                         'name': self.name, 'actual': seg_hash.hexdigest()})

            if bytes_left:
                raise SegmentError(
                    'Not enough bytes for %s; closing connection' % self.name)
        except (ListingIterError, SegmentError) as err:
            self.logger.error(err)
            if not self.validated_first_segment:
                raise
        finally:
            if self.current_resp:
                close_if_possible(self.current_resp.app_iter)
Пример #5
0
    def _requests_to_bytes_iter(self):
        # Take the requests out of self._coalesce_requests, actually make
        # the requests, and generate the bytes from the responses.
        #
        # Yields 2-tuples (segment-name, byte-chunk). The segment name is
        # used for logging.
        for data_or_req, seg_etag, seg_size in self._coalesce_requests():
            if isinstance(data_or_req, bytes):  # ugly, awful overloading
                yield ('data segment', data_or_req)
                continue
            seg_req = data_or_req
            seg_resp = seg_req.get_response(self.app)
            if not is_success(seg_resp.status_int):
                close_if_possible(seg_resp.app_iter)
                raise SegmentError(
                    'While processing manifest %s, '
                    'got %d while retrieving %s' %
                    (self.name, seg_resp.status_int, seg_req.path))

            elif ((seg_etag and (seg_resp.etag != seg_etag)) or
                    (seg_size and (seg_resp.content_length != seg_size) and
                     not seg_req.range)):
                # The content-length check is for security reasons. Seems
                # possible that an attacker could upload a >1mb object and
                # then replace it with a much smaller object with same
                # etag. Then create a big nested SLO that calls that
                # object many times which would hammer our obj servers. If
                # this is a range request, don't check content-length
                # because it won't match.
                close_if_possible(seg_resp.app_iter)
                raise SegmentError(
                    'Object segment no longer valid: '
                    '%(path)s etag: %(r_etag)s != %(s_etag)s or '
                    '%(r_size)s != %(s_size)s.' %
                    {'path': seg_req.path, 'r_etag': seg_resp.etag,
                     'r_size': seg_resp.content_length,
                     's_etag': seg_etag,
                     's_size': seg_size})
            else:
                self.current_resp = seg_resp

            seg_hash = None
            if seg_resp.etag and not seg_req.headers.get('Range'):
                # Only calculate the MD5 if it we can use it to validate
                seg_hash = hashlib.md5()

            document_iters = maybe_multipart_byteranges_to_document_iters(
                seg_resp.app_iter,
                seg_resp.headers['Content-Type'])

            for chunk in itertools.chain.from_iterable(document_iters):
                if seg_hash:
                    seg_hash.update(chunk)
                yield (seg_req.path, chunk)
            close_if_possible(seg_resp.app_iter)

            if seg_hash and seg_hash.hexdigest() != seg_resp.etag:
                raise SegmentError(
                    "Bad MD5 checksum in %(name)s for %(seg)s: headers had"
                    " %(etag)s, but object MD5 was actually %(actual)s" %
                    {'seg': seg_req.path, 'etag': seg_resp.etag,
                     'name': self.name, 'actual': seg_hash.hexdigest()})