示例#1
0
    def CatUrlStrings(self,
                      url_strings,
                      show_header=False,
                      start_byte=0,
                      end_byte=None):
        """Prints each of the url strings to stdout.

    Args:
      url_strings: String iterable.
      show_header: If true, print a header per file.
      start_byte: Starting byte of the file to print, used for constructing
                  range requests.
      end_byte: Ending byte of the file to print; used for constructing range
                requests. If this is negative, the start_byte is ignored and
                and end range is sent over HTTP (such as range: bytes -9)
    Returns:
      0 on success.

    Raises:
      CommandException if no URLs can be found.
    """
        printed_one = False
        # We manipulate the stdout so that all other data other than the Object
        # contents go to stderr.
        cat_outfd = sys.stdout
        sys.stdout = sys.stderr
        try:
            for url_str in url_strings:
                did_some_work = False
                # TODO: Get only the needed fields here.
                for blr in self.command_obj.WildcardIterator(
                        url_str).IterObjects():
                    did_some_work = True
                    if show_header:
                        if printed_one:
                            print
                        print '==> %s <==' % blr
                        printed_one = True
                    cat_object = blr.root_object
                    storage_url = StorageUrlFromString(blr.url_string)
                    if storage_url.IsCloudUrl():
                        self.command_obj.gsutil_api.GetObjectMedia(
                            cat_object.bucket,
                            cat_object.name,
                            cat_outfd,
                            start_byte=start_byte,
                            end_byte=end_byte,
                            object_size=cat_object.size,
                            generation=storage_url.generation,
                            provider=storage_url.scheme)
                    else:
                        cat_outfd.write(
                            open(storage_url.object_name, 'rb').read())
                if not did_some_work:
                    raise CommandException('No URLs matched %s' % url_str)
            sys.stdout = cat_outfd
        finally:
            sys.stdout = cat_outfd

        return 0
示例#2
0
    def __iter__(self):
        """Iterates over all source URLs passed to the iterator.

    For each src url, expands wildcards, object-less bucket names,
    subdir bucket names, and directory names, and generates a flat listing of
    all the matching objects/files.

    You should instantiate this object using the static factory function
    NameExpansionIterator, because consumers of this iterator need the
    PluralityCheckableIterator wrapper built by that function.

    Yields:
      gslib.name_expansion.NameExpansionResult.

    Raises:
      CommandException: if errors encountered.
    """
        for url_str in self.url_strs:
            storage_url = StorageUrlFromString(url_str)

            if storage_url.IsFileUrl() and storage_url.IsStream():
                if self.url_strs.has_plurality:
                    raise CommandException(
                        'Multiple URL strings are not supported '
                        'with streaming ("-") URLs.')
                yield NameExpansionResult(storage_url, False, False,
                                          storage_url)
                continue

            # Step 1: Expand any explicitly specified wildcards. The output from this
            # step is an iterator of BucketListingRef.
            # Starting with gs://buck*/abc* this step would expand to gs://bucket/abcd

            src_names_bucket = False
            if (storage_url.IsCloudUrl() and storage_url.IsBucket()
                    and not self.recursion_requested):
                # UNIX commands like rm and cp will omit directory references.
                # If url_str refers only to buckets and we are not recursing,
                # then produce references of type BUCKET, because they are guaranteed
                # to pass through Step 2 and be omitted in Step 3.
                post_step1_iter = PluralityCheckableIterator(
                    self.WildcardIterator(url_str).IterBuckets(
                        bucket_fields=['id']))
            else:
                # Get a list of objects and prefixes, expanding the top level for
                # any listed buckets.  If our source is a bucket, however, we need
                # to treat all of the top level expansions as names_container=True.
                post_step1_iter = PluralityCheckableIterator(
                    self.WildcardIterator(url_str).IterAll(
                        bucket_listing_fields=['name'],
                        expand_top_level_buckets=True))
                if storage_url.IsCloudUrl() and storage_url.IsBucket():
                    src_names_bucket = True

            # Step 2: Expand bucket subdirs. The output from this
            # step is an iterator of (names_container, BucketListingRef).
            # Starting with gs://bucket/abcd this step would expand to:
            #   iter([(True, abcd/o1.txt), (True, abcd/o2.txt)]).
            subdir_exp_wildcard = self._flatness_wildcard[
                self.recursion_requested]
            if self.recursion_requested:
                post_step2_iter = _ImplicitBucketSubdirIterator(
                    self, post_step1_iter, subdir_exp_wildcard)
            else:
                post_step2_iter = _NonContainerTuplifyIterator(post_step1_iter)
            post_step2_iter = PluralityCheckableIterator(post_step2_iter)

            # Because we actually perform and check object listings here, this will
            # raise if url_args includes a non-existent object.  However,
            # plurality_checkable_iterator will buffer the exception for us, not
            # raising it until the iterator is actually asked to yield the first
            # result.
            if post_step2_iter.IsEmpty():
                if self.continue_on_error:
                    try:
                        raise CommandException('No URLs matched: %s' % url_str)
                    except CommandException, e:
                        # Yield a specialized tuple of (exception, stack_trace) to
                        # the wrapping PluralityCheckableIterator.
                        yield (e, sys.exc_info()[2])
                else:
                    raise CommandException('No URLs matched: %s' % url_str)

            # Step 3. Omit any directories, buckets, or bucket subdirectories for
            # non-recursive expansions.
            post_step3_iter = PluralityCheckableIterator(
                _OmitNonRecursiveIterator(post_step2_iter,
                                          self.recursion_requested,
                                          self.command_name,
                                          self.cmd_supports_recursion,
                                          self.logger))

            src_url_expands_to_multi = post_step3_iter.HasPlurality()
            is_multi_source_request = (self.url_strs.has_plurality
                                       or src_url_expands_to_multi)

            # Step 4. Expand directories and buckets. This step yields the iterated
            # values. Starting with gs://bucket this step would expand to:
            #  [abcd/o1.txt, abcd/o2.txt, xyz/o1.txt, xyz/o2.txt]
            # Starting with file://dir this step would expand to:
            #  [dir/a.txt, dir/b.txt, dir/c/]
            for (names_container, blr) in post_step3_iter:
                src_names_container = src_names_bucket or names_container

                if blr.IsObject():
                    yield NameExpansionResult(storage_url,
                                              is_multi_source_request,
                                              src_names_container,
                                              blr.storage_url)
                else:
                    # Use implicit wildcarding to do the enumeration.
                    # At this point we are guaranteed that:
                    # - Recursion has been requested because non-object entries are
                    #   filtered in step 3 otherwise.
                    # - This is a prefix or bucket subdirectory because only
                    #   non-recursive iterations product bucket references.
                    expanded_url = StorageUrlFromString(blr.url_string)
                    if expanded_url.IsFileUrl():
                        # Convert dir to implicit recursive wildcard.
                        url_to_iterate = '%s%s%s' % (blr, os.sep,
                                                     subdir_exp_wildcard)
                    else:
                        # Convert subdir to implicit recursive wildcard.
                        url_to_iterate = expanded_url.CreatePrefixUrl(
                            wildcard_suffix=subdir_exp_wildcard)

                    wc_iter = PluralityCheckableIterator(
                        self.WildcardIterator(url_to_iterate).IterObjects(
                            bucket_listing_fields=['name']))
                    src_url_expands_to_multi = (src_url_expands_to_multi
                                                or wc_iter.HasPlurality())
                    is_multi_source_request = (self.url_strs.has_plurality
                                               or src_url_expands_to_multi)
                    # This will be a flattened listing of all underlying objects in the
                    # subdir.
                    for blr in wc_iter:
                        yield NameExpansionResult(storage_url,
                                                  is_multi_source_request,
                                                  True, blr.storage_url)
示例#3
0
    def CatUrlStrings(self,
                      url_strings,
                      show_header=False,
                      start_byte=0,
                      end_byte=None,
                      cat_out_fd=None):
        """Prints each of the url strings to stdout.

    Args:
      url_strings: String iterable.
      show_header: If true, print a header per file.
      start_byte: Starting byte of the file to print, used for constructing
                  range requests.
      end_byte: Ending byte of the file to print; used for constructing range
                requests. If this is negative, the start_byte is ignored and
                and end range is sent over HTTP (such as range: bytes -9)
      cat_out_fd: File descriptor to which output should be written. Defaults to
                 stdout if no file descriptor is supplied.
    Returns:
      0 on success.

    Raises:
      CommandException if no URLs can be found.
    """
        printed_one = False
        # This should refer to whatever sys.stdin refers to when this method is
        # run, not when this method is defined, so we do the initialization here
        # rather than define sys.stdin as the cat_out_fd parameter's default value.
        if cat_out_fd is None:
            cat_out_fd = sys.stdout
        # We manipulate the stdout so that all other data other than the Object
        # contents go to stderr.
        old_stdout = sys.stdout
        sys.stdout = sys.stderr
        try:
            if url_strings and url_strings[0] in ('-', 'file://-'):
                self._WriteBytesBufferedFileToFile(sys.stdin, cat_out_fd)
            else:
                for url_str in url_strings:
                    did_some_work = False
                    # TODO: Get only the needed fields here.
                    for blr in self.command_obj.WildcardIterator(
                            url_str
                    ).IterObjects(
                            bucket_listing_fields=_CAT_BUCKET_LISTING_FIELDS):
                        decryption_keywrapper = None
                        if (blr.root_object
                                and blr.root_object.customerEncryption and
                                blr.root_object.customerEncryption.keySha256):
                            decryption_key = FindMatchingCSEKInBotoConfig(
                                blr.root_object.customerEncryption.keySha256,
                                config)
                            if not decryption_key:
                                raise EncryptionException(
                                    'Missing decryption key with SHA256 hash %s. No decryption '
                                    'key matches object %s' %
                                    (blr.root_object.customerEncryption.
                                     keySha256, blr.url_string))
                            decryption_keywrapper = CryptoKeyWrapperFromKey(
                                decryption_key)

                        did_some_work = True
                        if show_header:
                            if printed_one:
                                print
                            print '==> %s <==' % blr
                            printed_one = True
                        cat_object = blr.root_object
                        storage_url = StorageUrlFromString(blr.url_string)
                        if storage_url.IsCloudUrl():
                            compressed_encoding = ObjectIsGzipEncoded(
                                cat_object)
                            self.command_obj.gsutil_api.GetObjectMedia(
                                cat_object.bucket,
                                cat_object.name,
                                cat_out_fd,
                                compressed_encoding=compressed_encoding,
                                start_byte=start_byte,
                                end_byte=end_byte,
                                object_size=cat_object.size,
                                generation=storage_url.generation,
                                decryption_tuple=decryption_keywrapper,
                                provider=storage_url.scheme)
                        else:
                            with open(storage_url.object_name, 'rb') as f:
                                self._WriteBytesBufferedFileToFile(
                                    f, cat_out_fd)
                    if not did_some_work:
                        raise CommandException(NO_URLS_MATCHED_TARGET %
                                               url_str)
        finally:
            sys.stdout = old_stdout

        return 0
示例#4
0
  def CatUrlStrings(self, url_strings, show_header=False, start_byte=0,
                    end_byte=None):
    """Prints each of the url strings to stdout.

    Args:
      url_strings: String iterable.
      show_header: If true, print a header per file.
      start_byte: Starting byte of the file to print, used for constructing
                  range requests.
      end_byte: Ending byte of the file to print; used for constructing range
                requests. If this is negative, the start_byte is ignored and
                and end range is sent over HTTP (such as range: bytes -9)
    Returns:
      0 on success.

    Raises:
      CommandException if no URLs can be found.
    """
    printed_one = False
    # We manipulate the stdout so that all other data other than the Object
    # contents go to stderr.
    cat_outfd = sys.stdout
    sys.stdout = sys.stderr
    try:
      for url_str in url_strings:
        did_some_work = False
        # TODO: Get only the needed fields here.
        for blr in self.command_obj.WildcardIterator(url_str).IterObjects():

          decryption_tuple = None
          if (blr.root_object and
              blr.root_object.customerEncryption and
              blr.root_object.customerEncryption.keySha256):
            decryption_key = FindMatchingCryptoKey(
                blr.root_object.customerEncryption.keySha256)
            if not decryption_key:
              raise EncryptionException(
                  'Missing decryption key with SHA256 hash %s. No decryption '
                  'key matches object %s'
                  % (blr.root_object.customerEncryption.keySha256,
                     blr.url_string))
            decryption_tuple = CryptoTupleFromKey(decryption_key)

          did_some_work = True
          if show_header:
            if printed_one:
              print
            print '==> %s <==' % blr
            printed_one = True
          cat_object = blr.root_object
          storage_url = StorageUrlFromString(blr.url_string)
          if storage_url.IsCloudUrl():
            compressed_encoding = ObjectIsGzipEncoded(cat_object)
            self.command_obj.gsutil_api.GetObjectMedia(
                cat_object.bucket, cat_object.name, cat_outfd,
                compressed_encoding=compressed_encoding,
                start_byte=start_byte, end_byte=end_byte,
                object_size=cat_object.size, generation=storage_url.generation,
                decryption_tuple=decryption_tuple, provider=storage_url.scheme)
          else:
            cat_outfd.write(open(storage_url.object_name, 'rb').read())
        if not did_some_work:
          raise CommandException(NO_URLS_MATCHED_TARGET % url_str)
      sys.stdout = cat_outfd
    finally:
      sys.stdout = cat_outfd

    return 0