Exemplo n.º 1
0
    def _fetch_sub_slo_segments(self, req, version, acc, con, obj):
        """
        Fetch the submanifest, parse it, and return it.
        Raise exception on failures.
        """
        sub_req = make_subrequest(
            req.environ,
            path='/'.join(['', version, acc, con, obj]),
            method='GET',
            headers={'x-auth-token': req.headers.get('x-auth-token')},
            agent='%(orig)s SLO MultipartGET',
            swift_source='SLO')
        sub_resp = sub_req.get_response(self.slo.app)

        if not is_success(sub_resp.status_int):
            close_if_possible(sub_resp.app_iter)
            raise ListingIterError(
                'ERROR: while fetching %s, GET of submanifest %s '
                'failed with status %d' %
                (req.path, sub_req.path, sub_resp.status_int))

        try:
            with closing_if_possible(sub_resp.app_iter):
                return json.loads(''.join(sub_resp.app_iter))
        except ValueError as err:
            raise ListingIterError(
                'ERROR: while fetching %s, JSON-decoding of submanifest %s '
                'failed with %s' % (req.path, sub_req.path, err))
Exemplo n.º 2
0
    def _listing_pages_iter(self, account_name, lcontainer, lprefix, env):
        marker = ''
        while True:
            lreq = make_pre_authed_request(env,
                                           method='GET',
                                           swift_source='VW',
                                           path='/v1/%s/%s' %
                                           (account_name, lcontainer))
            lreq.environ['QUERY_STRING'] = \
                'format=json&prefix=%s&reverse=on&marker=%s' % (
                    quote(lprefix), quote(marker))
            lresp = lreq.get_response(self.app)
            if not is_success(lresp.status_int):
                if lresp.status_int == HTTP_NOT_FOUND:
                    raise ListingIterNotFound()
                elif is_client_error(lresp.status_int):
                    raise HTTPPreconditionFailed()
                else:
                    raise ListingIterError()

            if not lresp.body:
                break

            sublisting = json.loads(lresp.body)
            if not sublisting:
                break
            marker = sublisting[-1]['name'].encode('utf-8')
            yield sublisting
Exemplo n.º 3
0
 def _listing_pages_iter(self, lcontainer, lprefix, env):
     lpartition = self.app.container_ring.get_part(
         self.account_name, lcontainer)
     marker = ''
     while True:
         lreq = Request.blank('i will be overridden by env', environ=env)
         # Don't quote PATH_INFO, by WSGI spec
         lreq.environ['PATH_INFO'] = \
             '/v1/%s/%s' % (self.account_name, lcontainer)
         lreq.environ['REQUEST_METHOD'] = 'GET'
         lreq.environ['QUERY_STRING'] = \
             'format=json&prefix=%s&marker=%s' % (quote(lprefix),
                                                  quote(marker))
         lresp = self.GETorHEAD_base(
             lreq, _('Container'), self.app.container_ring, lpartition,
             lreq.swift_entity_path)
         if 'swift.authorize' in env:
             lreq.acl = lresp.headers.get('x-container-read')
             aresp = env['swift.authorize'](lreq)
             if aresp:
                 raise ListingIterNotAuthorized(aresp)
         if lresp.status_int == HTTP_NOT_FOUND:
             raise ListingIterNotFound()
         elif not is_success(lresp.status_int):
             raise ListingIterError()
         if not lresp.body:
             break
         sublisting = json.loads(lresp.body)
         if not sublisting:
             break
         marker = sublisting[-1]['name'].encode('utf-8')
         yield sublisting
Exemplo n.º 4
0
    def _segment_listing_iterator(self, req, version, account, container,
                                  prefix, segments, first_byte=None,
                                  last_byte=None):
        # It's sort of hokey that this thing takes in the first page of
        # segments as an argument, but we need to compute the etag and content
        # length from the first page, and it's better to have a hokey
        # interface than to make redundant requests.
        if first_byte is None:
            first_byte = 0
        if last_byte is None:
            last_byte = float("inf")

        marker = ''
        while True:
            for segment in segments:
                seg_length = int(segment['bytes'])

                if first_byte >= seg_length:
                    # don't need any bytes from this segment
                    first_byte = max(first_byte - seg_length, -1)
                    last_byte = max(last_byte - seg_length, -1)
                    continue
                elif last_byte < 0:
                    # no bytes are needed from this or any future segment
                    break

                seg_name = segment['name']
                if isinstance(seg_name, unicode):
                    seg_name = seg_name.encode("utf-8")

                # (obj path, etag, size, first byte, last byte)
                yield ("/" + "/".join((version, account, container,
                                       seg_name)),
                       # We deliberately omit the etag and size here;
                       # SegmentedIterable will check size and etag if
                       # specified, but we don't want it to. DLOs only care
                       # that the objects' names match the specified prefix.
                       None, None,
                       (None if first_byte <= 0 else first_byte),
                       (None if last_byte >= seg_length - 1 else last_byte))

                first_byte = max(first_byte - seg_length, -1)
                last_byte = max(last_byte - seg_length, -1)

            if len(segments) < CONTAINER_LISTING_LIMIT:
                # a short page means that we're done with the listing
                break
            elif last_byte < 0:
                break

            marker = segments[-1]['name']
            error_response, segments = self._get_container_listing(
                req, version, account, container, prefix, marker)
            if error_response:
                # we've already started sending the response body to the
                # client, so all we can do is raise an exception to make the
                # WSGI server close the connection early
                raise ListingIterError(
                    "Got status %d listing container /%s/%s" %
                    (error_response.status_int, account, container))
Exemplo n.º 5
0
    def _listing_pages_iter(self, account_name, lcontainer, lprefix,
                            req, marker='', end_marker='', reverse=True):
        '''Get "pages" worth of objects that start with a prefix.

        The optional keyword arguments ``marker``, ``end_marker``, and
        ``reverse`` are used similar to how they are for containers. We're
        either coming:

           - directly from ``_listing_iter``, in which case none of the
             optional args are specified, or

           - from ``_in_proxy_reverse_listing``, in which case ``reverse``
             is ``False`` and both ``marker`` and ``end_marker`` are specified
             (although they may still be blank).
        '''
        while True:
            lreq = make_pre_authed_request(
                req.environ, method='GET', swift_source='VW',
                path=wsgi_quote('/v1/%s/%s' % (account_name, lcontainer)))
            lreq.environ['QUERY_STRING'] = \
                'prefix=%s&marker=%s' % (wsgi_quote(lprefix),
                                         wsgi_quote(marker))
            if end_marker:
                lreq.environ['QUERY_STRING'] += '&end_marker=%s' % (
                    wsgi_quote(end_marker))
            if reverse:
                lreq.environ['QUERY_STRING'] += '&reverse=on'
            lresp = lreq.get_response(self.app)
            if not is_success(lresp.status_int):
                close_if_possible(lresp.app_iter)
                if lresp.status_int == HTTP_NOT_FOUND:
                    raise ListingIterNotFound()
                elif is_client_error(lresp.status_int):
                    raise HTTPPreconditionFailed(request=req)
                else:
                    raise ListingIterError()

            if not lresp.body:
                break

            sublisting = json.loads(lresp.body)
            if not sublisting:
                break

            # When using the ``reverse`` param, check that the listing is
            # actually reversed
            first_item = bytes_to_wsgi(sublisting[0]['name'].encode('utf-8'))
            last_item = bytes_to_wsgi(sublisting[-1]['name'].encode('utf-8'))
            page_is_after_marker = marker and first_item > marker
            if reverse and (first_item < last_item or page_is_after_marker):
                # Apparently there's at least one pre-2.6.0 container server
                yield self._in_proxy_reverse_listing(
                    account_name, lcontainer, lprefix,
                    req, marker, sublisting)
                return

            marker = last_item
            yield sublisting
Exemplo n.º 6
0
    def _byterange_listing_iterator(self,
                                    req,
                                    version,
                                    account,
                                    segments,
                                    first_byte,
                                    last_byte,
                                    cached_fetch_sub_slo_segments,
                                    recursion_depth=1):
        last_sub_path = None
        for seg_dict in segments:
            seg_length = self._segment_length(seg_dict)
            if first_byte >= seg_length:
                # don't need any bytes from this segment
                first_byte -= seg_length
                last_byte -= seg_length
                continue

            if last_byte < 0:
                # no bytes are needed from this or any future segment
                return

            seg_range = seg_dict.get('range')
            if seg_range is None:
                range_start, range_end = 0, seg_length - 1
            else:
                # We already validated and supplied concrete values
                # for the range on upload
                range_start, range_end = map(int, seg_range.split('-'))

            if config_true_value(seg_dict.get('sub_slo')):
                # do this check here so that we can avoid fetching this last
                # manifest before raising the exception
                if recursion_depth >= self.max_slo_recursion_depth:
                    raise ListingIterError("Max recursion depth exceeded")

                sub_path = get_valid_utf8_str(seg_dict['name'])
                sub_cont, sub_obj = split_path(sub_path, 2, 2, True)
                if last_sub_path != sub_path:
                    sub_segments = cached_fetch_sub_slo_segments(
                        req, version, account, sub_cont, sub_obj)
                last_sub_path = sub_path

                # Use the existing machinery to slice into the sub-SLO.
                for sub_seg_dict, sb, eb in self._byterange_listing_iterator(
                        req,
                        version,
                        account,
                        sub_segments,
                        # This adjusts first_byte and last_byte to be
                        # relative to the sub-SLO.
                        range_start + max(0, first_byte),
                        min(range_end, range_start + last_byte),
                        cached_fetch_sub_slo_segments,
                        recursion_depth=recursion_depth + 1):
                    yield sub_seg_dict, sb, eb
            else:
                if isinstance(seg_dict['name'], six.text_type):
                    seg_dict['name'] = seg_dict['name'].encode("utf-8")
                yield (seg_dict, max(0, first_byte) + range_start,
                       min(range_end, range_start + last_byte))

            first_byte -= seg_length
            last_byte -= seg_length
Exemplo n.º 7
0
    def _segment_listing_iterator(self, req, version, account, container,
                                  prefix, segments, first_byte=None,
                                  last_byte=None):
        '''
        :param req: upstream request
        :param version: native
        :param account: native
        :param container: native
        :param prefix: native
        :param segments: array of dicts, with native strings
        :param first_byte: number
        :param last_byte: number
        '''
        # It's sort of hokey that this thing takes in the first page of
        # segments as an argument, but we need to compute the etag and content
        # length from the first page, and it's better to have a hokey
        # interface than to make redundant requests.
        if first_byte is None:
            first_byte = 0
        if last_byte is None:
            last_byte = float("inf")

        while True:
            for segment in segments:
                seg_length = int(segment['bytes'])

                if first_byte >= seg_length:
                    # don't need any bytes from this segment
                    first_byte = max(first_byte - seg_length, -1)
                    last_byte = max(last_byte - seg_length, -1)
                    continue
                elif last_byte < 0:
                    # no bytes are needed from this or any future segment
                    break

                seg_name = segment['name']
                if six.PY2:
                    seg_name = seg_name.encode("utf-8")

                # We deliberately omit the etag and size here;
                # SegmentedIterable will check size and etag if
                # specified, but we don't want it to. DLOs only care
                # that the objects' names match the specified prefix.
                # SegmentedIterable will instead check that the data read
                # from each segment matches the response headers.
                _path = "/".join(["", version, account, container, seg_name])
                _first = None if first_byte <= 0 else first_byte
                _last = None if last_byte >= seg_length - 1 else last_byte
                yield {
                    'path': _path,
                    'first_byte': _first,
                    'last_byte': _last
                }

                first_byte = max(first_byte - seg_length, -1)
                last_byte = max(last_byte - seg_length, -1)

            if len(segments) < constraints.CONTAINER_LISTING_LIMIT:
                # a short page means that we're done with the listing
                break
            elif last_byte < 0:
                break

            marker = segments[-1]['name']
            error_response, segments = self._get_container_listing(
                req, version, account, container, prefix, marker)
            if error_response:
                # we've already started sending the response body to the
                # client, so all we can do is raise an exception to make the
                # WSGI server close the connection early
                close_if_possible(error_response.app_iter)
                raise ListingIterError(
                    "Got status %d listing container /%s/%s" %
                    (error_response.status_int, account, container))
Exemplo n.º 8
0
    def _segment_listing_iterator(self, req, version, account, segments,
                                  recursion_depth=1):
        for seg_dict in segments:
            if config_true_value(seg_dict.get('sub_slo')):
                override_bytes_from_content_type(seg_dict,
                                                 logger=self.slo.logger)

        # We handle the range stuff here so that we can be smart about
        # skipping unused submanifests. For example, if our first segment is a
        # submanifest referencing 50 MiB total, but start_byte falls in
        # the 51st MiB, then we can avoid fetching the first submanifest.
        #
        # If we were to make SegmentedIterable handle all the range
        # calculations, we would be unable to make this optimization.
        total_length = sum(self._segment_length(seg) for seg in segments)
        if self.first_byte is None:
            self.first_byte = 0
        if self.last_byte is None:
            self.last_byte = total_length - 1

        last_sub_path = None
        for seg_dict in segments:
            seg_length = self._segment_length(seg_dict)
            if self.first_byte >= seg_length:
                # don't need any bytes from this segment
                self.first_byte -= seg_length
                self.last_byte -= seg_length
                continue

            if self.last_byte < 0:
                # no bytes are needed from this or any future segment
                break

            seg_range = seg_dict.get('range')
            if seg_range is None:
                range_start, range_end = 0, seg_length - 1
            else:
                # We already validated and supplied concrete values
                # for the range on upload
                range_start, range_end = map(int, seg_range.split('-'))

            if config_true_value(seg_dict.get('sub_slo')):
                # do this check here so that we can avoid fetching this last
                # manifest before raising the exception
                if recursion_depth >= self.max_slo_recursion_depth:
                    raise ListingIterError("Max recursion depth exceeded")

                sub_path = get_valid_utf8_str(seg_dict['name'])
                sub_cont, sub_obj = split_path(sub_path, 2, 2, True)
                if last_sub_path != sub_path:
                    sub_segments = self._fetch_sub_slo_segments(
                        req, version, account, sub_cont, sub_obj)
                last_sub_path = sub_path

                # Use the existing machinery to slice into the sub-SLO.
                # This requires that we save off our current state, and
                # restore at the other end.
                orig_start, orig_end = self.first_byte, self.last_byte
                self.first_byte = range_start + max(0, self.first_byte)
                self.last_byte = min(range_end, range_start + self.last_byte)

                for sub_seg_dict, sb, eb in self._segment_listing_iterator(
                        req, version, account, sub_segments,
                        recursion_depth=recursion_depth + 1):
                    yield sub_seg_dict, sb, eb

                # Restore the first/last state
                self.first_byte, self.last_byte = orig_start, orig_end
            else:
                if isinstance(seg_dict['name'], six.text_type):
                    seg_dict['name'] = seg_dict['name'].encode("utf-8")
                yield (seg_dict,
                       max(0, self.first_byte) + range_start,
                       min(range_end, range_start + self.last_byte))

            self.first_byte -= seg_length
            self.last_byte -= seg_length
Exemplo n.º 9
0
    def _segment_listing_iterator(self,
                                  req,
                                  version,
                                  account,
                                  segments,
                                  recursion_depth=1):
        for seg_dict in segments:
            if config_true_value(seg_dict.get('sub_slo')):
                override_bytes_from_content_type(seg_dict,
                                                 logger=self.slo.logger)

        # We handle the range stuff here so that we can be smart about
        # skipping unused submanifests. For example, if our first segment is a
        # submanifest referencing 50 MiB total, but self.first_byte falls in
        # the 51st MiB, then we can avoid fetching the first submanifest.
        #
        # If we were to make SegmentedIterable handle all the range
        # calculations, we would be unable to make this optimization.
        total_length = sum(int(seg['bytes']) for seg in segments)
        if self.first_byte is None:
            self.first_byte = 0
        if self.last_byte is None:
            self.last_byte = total_length - 1

        for seg_dict in segments:
            seg_length = int(seg_dict['bytes'])

            if self.first_byte >= seg_length:
                # don't need any bytes from this segment
                self.first_byte = max(self.first_byte - seg_length, -1)
                self.last_byte = max(self.last_byte - seg_length, -1)
                continue

            if self.last_byte < 0:
                # no bytes are needed from this or any future segment
                break

            if config_true_value(seg_dict.get('sub_slo')):
                # do this check here so that we can avoid fetching this last
                # manifest before raising the exception
                if recursion_depth >= self.max_slo_recursion_depth:
                    raise ListingIterError("Max recursion depth exceeded")

                sub_path = get_valid_utf8_str(seg_dict['name'])
                sub_cont, sub_obj = split_path(sub_path, 2, 2, True)
                sub_segments = self._fetch_sub_slo_segments(
                    req, version, account, sub_cont, sub_obj)
                for sub_seg_dict, sb, eb in self._segment_listing_iterator(
                        req,
                        version,
                        account,
                        sub_segments,
                        recursion_depth=recursion_depth + 1):
                    yield sub_seg_dict, sb, eb
            else:
                if isinstance(seg_dict['name'], unicode):
                    seg_dict['name'] = seg_dict['name'].encode("utf-8")
                seg_length = int(seg_dict['bytes'])
                yield (seg_dict,
                       (None if self.first_byte <= 0 else self.first_byte),
                       (None if self.last_byte >= seg_length - 1 else
                        self.last_byte))
                self.first_byte = max(self.first_byte - seg_length, -1)
                self.last_byte = max(self.last_byte - seg_length, -1)