Exemplo n.º 1
0
    def _BuildBucketFilterStrings(self, wildcard):
        """Builds strings needed for querying a bucket and filtering results.

    This implements wildcard object name matching.

    Args:
      wildcard: The wildcard string to match to objects.

    Returns:
      (prefix, delimiter, prefix_wildcard, suffix_wildcard)
      where:
        prefix is the prefix to be sent in bucket GET request.
        delimiter is the delimiter to be sent in bucket GET request.
        prefix_wildcard is the wildcard to be used to filter bucket GET results.
        suffix_wildcard is wildcard to be appended to filtered bucket GET
          results for next wildcard expansion iteration.
      For example, given the wildcard gs://bucket/abc/d*e/f*.txt we
      would build prefix= abc/d, delimiter=/, prefix_wildcard=d*e, and
      suffix_wildcard=f*.txt. Using this prefix and delimiter for a bucket
      listing request will then produce a listing result set that can be
      filtered using this prefix_wildcard; and we'd use this suffix_wildcard
      to feed into the next call(s) to _BuildBucketFilterStrings(), for the
      next iteration of listing/filtering.

    Raises:
      AssertionError if wildcard doesn't contain any wildcard chars.
    """
        # Generate a request prefix if the object name part of the wildcard starts
        # with a non-wildcard string (e.g., that's true for 'gs://bucket/abc*xyz').
        match = WILDCARD_REGEX.search(wildcard)
        if not match:
            # Input "wildcard" has no wildcard chars, so just return tuple that will
            # cause a bucket listing to match the given input wildcard. Example: if
            # previous iteration yielded gs://bucket/dir/ with suffix_wildcard abc,
            # the next iteration will call _BuildBucketFilterStrings() with
            # gs://bucket/dir/abc, and we will return prefix ='dir/abc',
            # delimiter='/', prefix_wildcard='dir/abc', and suffix_wildcard=''.
            prefix = wildcard
            delimiter = '/'
            prefix_wildcard = wildcard
            suffix_wildcard = ''
        else:
            if match.start() > 0:
                # Wildcard does not occur at beginning of object name, so construct a
                # prefix string to send to server.
                prefix = wildcard[:match.start()]
                wildcard_part = wildcard[match.start():]
            else:
                prefix = None
                wildcard_part = wildcard
            end = wildcard_part.find('/')
            if end != -1:
                wildcard_part = wildcard_part[:end + 1]
            # Remove trailing '/' so we will match gs://bucket/abc* as well as
            # gs://bucket/abc*/ with the same wildcard regex.
            prefix_wildcard = StripOneSlash((prefix or '') + wildcard_part)
            suffix_wildcard = wildcard[match.end():]
            end = suffix_wildcard.find('/')
            if end == -1:
                suffix_wildcard = ''
            else:
                suffix_wildcard = suffix_wildcard[end + 1:]
            # To implement recursive (**) wildcarding, if prefix_wildcard
            # suffix_wildcard starts with '**' don't send a delimiter, and combine
            # suffix_wildcard at end of prefix_wildcard.
            if prefix_wildcard.find('**') != -1:
                delimiter = None
                prefix_wildcard += suffix_wildcard
                suffix_wildcard = ''
            else:
                delimiter = '/'
        # The following debug output is useful for tracing how the algorithm
        # walks through a multi-part wildcard like gs://bucket/abc/d*e/f*.txt
        if self.debug > 1:
            sys.stderr.write(
                'DEBUG: wildcard=%s, prefix=%s, delimiter=%s, '
                'prefix_wildcard=%s, suffix_wildcard=%s\n' %
                (PrintableStr(wildcard), PrintableStr(prefix),
                 PrintableStr(delimiter), PrintableStr(prefix_wildcard),
                 PrintableStr(suffix_wildcard)))
        return (prefix, delimiter, prefix_wildcard, suffix_wildcard)
  def _BuildBucketFilterStrings(self, wildcard):
    """Builds strings needed for querying a bucket and filtering results.

    This implements wildcard object name matching.

    Args:
      wildcard: The wildcard string to match to objects.

    Returns:
      (prefix, delimiter, prefix_wildcard, suffix_wildcard)
      where:
        prefix is the prefix to be sent in bucket GET request.
        delimiter is the delimiter to be sent in bucket GET request.
        prefix_wildcard is the wildcard to be used to filter bucket GET results.
        suffix_wildcard is wildcard to be appended to filtered bucket GET
          results for next wildcard expansion iteration.
      For example, given the wildcard gs://bucket/abc/d*e/f*.txt we
      would build prefix= abc/d, delimiter=/, prefix_wildcard=d*e, and
      suffix_wildcard=f*.txt. Using this prefix and delimiter for a bucket
      listing request will then produce a listing result set that can be
      filtered using this prefix_wildcard; and we'd use this suffix_wildcard
      to feed into the next call(s) to _BuildBucketFilterStrings(), for the
      next iteration of listing/filtering.

    Raises:
      AssertionError if wildcard doesn't contain any wildcard chars.
    """
    # Generate a request prefix if the object name part of the wildcard starts
    # with a non-wildcard string (e.g., that's true for 'gs://bucket/abc*xyz').
    match = WILDCARD_REGEX.search(wildcard)
    if not match:
      # Input "wildcard" has no wildcard chars, so just return tuple that will
      # cause a bucket listing to match the given input wildcard. Example: if
      # previous iteration yielded gs://bucket/dir/ with suffix_wildcard abc,
      # the next iteration will call _BuildBucketFilterStrings() with
      # gs://bucket/dir/abc, and we will return prefix ='dir/abc',
      # delimiter='/', prefix_wildcard='dir/abc', and suffix_wildcard=''.
      prefix = wildcard
      delimiter = '/'
      prefix_wildcard = wildcard
      suffix_wildcard = ''
    else:
      if match.start() > 0:
        # Wildcard does not occur at beginning of object name, so construct a
        # prefix string to send to server.
        prefix = wildcard[:match.start()]
        wildcard_part = wildcard[match.start():]
      else:
        prefix = None
        wildcard_part = wildcard
      end = wildcard_part.find('/')
      if end != -1:
        wildcard_part = wildcard_part[:end+1]
      # Remove trailing '/' so we will match gs://bucket/abc* as well as
      # gs://bucket/abc*/ with the same wildcard regex.
      prefix_wildcard = StripOneSlash((prefix or '') + wildcard_part)
      suffix_wildcard = wildcard[match.end():]
      end = suffix_wildcard.find('/')
      if end == -1:
        suffix_wildcard = ''
      else:
        suffix_wildcard = suffix_wildcard[end+1:]
      # To implement recursive (**) wildcarding, if prefix_wildcard
      # suffix_wildcard starts with '**' don't send a delimiter, and combine
      # suffix_wildcard at end of prefix_wildcard.
      if prefix_wildcard.find('**') != -1:
        delimiter = None
        prefix_wildcard += suffix_wildcard
        suffix_wildcard = ''
      else:
        delimiter = '/'
    # The following debug output is useful for tracing how the algorithm
    # walks through a multi-part wildcard like gs://bucket/abc/d*e/f*.txt
    if self.debug > 1:
      sys.stderr.write(
          'DEBUG: wildcard=%s, prefix=%s, delimiter=%s, '
          'prefix_wildcard=%s, suffix_wildcard=%s\n' %
          (wildcard, prefix, delimiter, prefix_wildcard, suffix_wildcard))
    return (prefix, delimiter, prefix_wildcard, suffix_wildcard)