Exemplo n.º 1
0
 def testContainsWildcardMultipleMatches(self, mock_CreateWildcardIterator,
                                         mock_gsutil_api):
   mock_CreateWildcardIterator.return_value = iter([
       BucketListingObject(StorageUrlFromString('gs://test/helloworld')),
       BucketListingObject(StorageUrlFromString('gs://test/helloworld2'))
   ])
   with self.assertRaises(CommandException):
     ExpandUrlToSingleBlr('gs://test/hello*/', mock_gsutil_api, 'project_id',
                          False, CreateOrGetGsutilLogger('copy_test'))
Exemplo n.º 2
0
    def __iter__(self, bucket_listing_fields=None):
        """Iterator that gets called when iterating over the file wildcard.

    In the case where no wildcard is present, returns a single matching file
    or directory.

    Args:
      bucket_listing_fields: Iterable fields to include in listings.
          Ex. ['size']. Currently only 'size' is supported.
          If present, will populate yielded BucketListingObject.root_object
          with the file name and size.

    Raises:
      WildcardException: if invalid wildcard found.

    Yields:
      BucketListingRef of type OBJECT (for files) or PREFIX (for directories)
    """
        include_size = (bucket_listing_fields
                        and 'size' in set(bucket_listing_fields))

        wildcard = self.wildcard_url.object_name
        match = FLAT_LIST_REGEX.match(wildcard)
        if match:
            # Recursive wildcarding request ('.../**/...').
            # Example input: wildcard = '/tmp/tmp2pQJAX/**/*'
            base_dir = match.group('before')[:-1]
            remaining_wildcard = match.group('after')
            # At this point for the above example base_dir = '/tmp/tmp2pQJAX' and
            # remaining_wildcard = '/*'
            if remaining_wildcard.startswith('*'):
                raise WildcardException(
                    'Invalid wildcard with more than 2 consecutive '
                    '*s (%s)' % wildcard)
            # If there was no remaining wildcard past the recursive wildcard,
            # treat it as if it were a '*'. For example, file://tmp/** is equivalent
            # to file://tmp/**/*
            if not remaining_wildcard:
                remaining_wildcard = '*'
            # Skip slash(es).
            remaining_wildcard = remaining_wildcard.lstrip(os.sep)
            filepaths = self._IterDir(base_dir, remaining_wildcard)
        else:
            # Not a recursive wildcarding request.
            filepaths = glob.iglob(wildcard)
        for filepath in filepaths:
            expanded_url = StorageUrlFromString(filepath)
            try:
                if self.ignore_symlinks and os.path.islink(filepath):
                    continue
                if os.path.isdir(filepath):
                    yield BucketListingPrefix(expanded_url)
                else:
                    blr_object = _GetFileObject(
                        filepath) if include_size else None
                    yield BucketListingObject(expanded_url,
                                              root_object=blr_object)
            except UnicodeEncodeError:
                raise CommandException('\n'.join(
                    textwrap.wrap(_UNICODE_EXCEPTION_TEXT % repr(filepath))))
Exemplo n.º 3
0
  def RunCommand(self):
    """Command entry point for the compose command."""
    target_url_str = self.args[-1]
    self.args = self.args[:-1]
    target_url = StorageUrlFromString(target_url_str)
    self.CheckProvider(target_url)
    if target_url.HasGeneration():
      raise CommandException('A version-specific URL (%s) cannot be '
                             'the destination for gsutil compose - abort.'
                             % target_url)

    dst_obj_metadata = apitools_messages.Object(name=target_url.object_name,
                                                bucket=target_url.bucket_name)

    components = []
    # Remember the first source object so we can get its content type.
    first_src_url = None
    for src_url_str in self.args:
      if ContainsWildcard(src_url_str):
        src_url_iter = self.WildcardIterator(src_url_str).IterObjects()
      else:
        src_url_iter = [BucketListingObject(StorageUrlFromString(src_url_str))]
      for blr in src_url_iter:
        src_url = blr.storage_url
        self.CheckProvider(src_url)

        if src_url.bucket_name != target_url.bucket_name:
          raise CommandException(
              'GCS does not support inter-bucket composing.')

        if not first_src_url:
          first_src_url = src_url
        src_obj_metadata = (
            apitools_messages.ComposeRequest.SourceObjectsValueListEntry(
                name=src_url.object_name))
        if src_url.HasGeneration():
          src_obj_metadata.generation = src_url.generation
        components.append(src_obj_metadata)
        # Avoid expanding too many components, and sanity check each name
        # expansion result.
        if len(components) > MAX_COMPOSE_ARITY:
          raise CommandException('"compose" called with too many component '
                                 'objects. Limit is %d.' % MAX_COMPOSE_ARITY)

    if not components:
      raise CommandException('"compose" requires at least 1 component object.')

    dst_obj_metadata.contentType = self.gsutil_api.GetObjectMetadata(
        first_src_url.bucket_name, first_src_url.object_name,
        provider=first_src_url.scheme, fields=['contentType']).contentType

    preconditions = PreconditionsFromHeaders(self.headers or {})

    self.logger.info(
        'Composing %s from %d component object(s).',
        target_url, len(components))
    self.gsutil_api.ComposeObject(
        components, dst_obj_metadata, preconditions=preconditions,
        provider=target_url.scheme,
        encryption_tuple=GetEncryptionKeyWrapper(config))
Exemplo n.º 4
0
 def RunCommand(self):
     """Command entry point for stat command."""
     stat_fields = ENCRYPTED_FIELDS + UNENCRYPTED_FULL_LISTING_FIELDS
     found_nonmatching_arg = False
     for url_str in self.args:
         arg_matches = 0
         url = StorageUrlFromString(url_str)
         if not url.IsObject():
             raise CommandException(
                 'The stat command only works with object URLs')
         try:
             if ContainsWildcard(url_str):
                 blr_iter = self.WildcardIterator(url_str).IterObjects(
                     bucket_listing_fields=stat_fields)
             else:
                 try:
                     single_obj = self.gsutil_api.GetObjectMetadata(
                         url.bucket_name,
                         url.object_name,
                         generation=url.generation,
                         provider=url.scheme,
                         fields=stat_fields)
                 except EncryptionException:
                     # Retry without requesting hashes.
                     single_obj = self.gsutil_api.GetObjectMetadata(
                         url.bucket_name,
                         url.object_name,
                         generation=url.generation,
                         provider=url.scheme,
                         fields=UNENCRYPTED_FULL_LISTING_FIELDS)
                 blr_iter = [
                     BucketListingObject(url, root_object=single_obj)
                 ]
             for blr in blr_iter:
                 if blr.IsObject():
                     arg_matches += 1
                     # TODO: Request fewer fields if we're not printing the object.
                     if logging.getLogger().isEnabledFor(logging.INFO):
                         PrintFullInfoAboutObject(blr, incl_acl=False)
         except AccessDeniedException:
             if logging.getLogger().isEnabledFor(logging.INFO):
                 sys.stderr.write(
                     'You aren\'t authorized to read %s - skipping' %
                     url_str)
         except InvalidUrlError:
             raise
         except NotFoundException:
             pass
         if not arg_matches:
             if logging.getLogger().isEnabledFor(logging.INFO):
                 sys.stderr.write(NO_URLS_MATCHED_TARGET % url_str)
             found_nonmatching_arg = True
     if found_nonmatching_arg:
         return 1
     return 0
Exemplo n.º 5
0
  def testContainsWildcardMatchesObject(self, mock_CreateWildcardIterator,
                                        mock_gsutil_api):
    storage_url = StorageUrlFromString('gs://test/helloworld')
    mock_CreateWildcardIterator.return_value = iter(
        [BucketListingObject(storage_url)])
    (exp_url, have_existing_dst_container) = ExpandUrlToSingleBlr(
        'gs://test/hello*/', mock_gsutil_api, 'project_id', False,
        CreateOrGetGsutilLogger('copy_test'))

    self.assertFalse(have_existing_dst_container)
    self.assertEqual(exp_url, storage_url)
Exemplo n.º 6
0
 def RunCommand(self):
     """Command entry point for stat command."""
     # List of fields we'll print for stat objects.
     stat_fields = [
         'updated', 'cacheControl', 'contentDisposition', 'contentEncoding',
         'contentLanguage', 'size', 'contentType', 'componentCount',
         'metadata', 'crc32c', 'md5Hash', 'etag', 'generation',
         'metageneration'
     ]
     found_nonmatching_arg = False
     for url_str in self.args:
         arg_matches = 0
         url = StorageUrlFromString(url_str)
         if not url.IsObject():
             raise CommandException(
                 'The stat command only works with object URLs')
         try:
             if ContainsWildcard(url_str):
                 blr_iter = self.WildcardIterator(url_str).IterObjects(
                     bucket_listing_fields=stat_fields)
             else:
                 single_obj = self.gsutil_api.GetObjectMetadata(
                     url.bucket_name,
                     url.object_name,
                     generation=url.generation,
                     provider=url.scheme,
                     fields=stat_fields)
                 blr_iter = [
                     BucketListingObject(url, root_object=single_obj)
                 ]
             for blr in blr_iter:
                 if blr.IsObject():
                     arg_matches += 1
                     if logging.getLogger().isEnabledFor(logging.INFO):
                         PrintFullInfoAboutObject(blr, incl_acl=False)
         except AccessDeniedException:
             if logging.getLogger().isEnabledFor(logging.INFO):
                 sys.stderr.write(
                     'You aren\'t authorized to read %s - skipping' %
                     url_str)
         except InvalidUrlError:
             raise
         except NotFoundException:
             pass
         if not arg_matches:
             if logging.getLogger().isEnabledFor(logging.INFO):
                 sys.stderr.write('No URLs matched %s' % url_str)
             found_nonmatching_arg = True
     if found_nonmatching_arg:
         return 1
     return 0
Exemplo n.º 7
0
    def __iter__(self):
        """Iterator that gets called when iterating over the file wildcard.

    In the case where no wildcard is present, returns a single matching file
    or directory.

    Raises:
      WildcardException: if invalid wildcard found.

    Yields:
      BucketListingRef of type OBJECT (for files) or PREFIX (for directories)
    """
        wildcard = self.wildcard_url.object_name
        match = FLAT_LIST_REGEX.match(wildcard)
        if match:
            # Recursive wildcarding request ('.../**/...').
            # Example input: wildcard = '/tmp/tmp2pQJAX/**/*'
            base_dir = match.group('before')[:-1]
            remaining_wildcard = match.group('after')
            # At this point for the above example base_dir = '/tmp/tmp2pQJAX' and
            # remaining_wildcard = '/*'
            if remaining_wildcard.startswith('*'):
                raise WildcardException(
                    'Invalid wildcard with more than 2 consecutive '
                    '*s (%s)' % wildcard)
            # If there was no remaining wildcard past the recursive wildcard,
            # treat it as if it were a '*'. For example, file://tmp/** is equivalent
            # to file://tmp/**/*
            if not remaining_wildcard:
                remaining_wildcard = '*'
            # Skip slash(es).
            remaining_wildcard = remaining_wildcard.lstrip(os.sep)
            filepaths = self._IterDir(base_dir, remaining_wildcard)
        else:
            # Not a recursive wildcarding request.
            filepaths = glob.iglob(wildcard)
        for filepath in filepaths:
            expanded_url = StorageUrlFromString(filepath)
            try:
                if os.path.isdir(filepath):
                    yield BucketListingPrefix(expanded_url)
                else:
                    yield BucketListingObject(expanded_url)
            except UnicodeEncodeError:
                raise CommandException('\n'.join(
                    textwrap.wrap(_UNICODE_EXCEPTION_TEXT % repr(filepath))))
Exemplo n.º 8
0
def _LocalDirIterator(base_url):
  """A generator that yields a BLR for each file in a local directory.

     We use this function instead of WildcardIterator for listing a local
     directory without recursion, because the glob.globi implementation called
     by WildcardIterator skips "dot" files (which we don't want to do when
     synchronizing to or from a local directory).

  Args:
    base_url: URL for the directory over which to iterate.

  Yields:
    BucketListingObject for each file in the directory.
  """
  for filename in os.listdir(base_url.object_name):
    filename = os.path.join(base_url.object_name, filename)
    if os.path.isfile(filename):
      yield BucketListingObject(StorageUrlFromString(filename), None)
Exemplo n.º 9
0
    def _GetObjectRef(self, bucket_url_string, gcs_object, with_version=False):
        """Creates a BucketListingRef of type OBJECT from the arguments.

    Args:
      bucket_url_string: Wildcardless string describing the containing bucket.
      gcs_object: gsutil_api root Object for populating the BucketListingRef.
      with_version: If true, return a reference with a versioned string.

    Returns:
      BucketListingRef of type OBJECT.
    """
        # Generation can be None in test mocks, so just return the
        # live object for simplicity.
        if with_version and gcs_object.generation is not None:
            generation_str = GenerationFromUrlAndString(
                self.wildcard_url, gcs_object.generation)
            object_string = '%s%s#%s' % (bucket_url_string, gcs_object.name,
                                         generation_str)
        else:
            object_string = '%s%s' % (bucket_url_string, gcs_object.name)
        object_url = StorageUrlFromString(object_string)
        return BucketListingObject(object_url, root_object=gcs_object)
Exemplo n.º 10
0
    def RunCommand(self):
        """Command entry point for the du command."""
        self.line_ending = '\n'
        self.all_versions = False
        self.produce_total = False
        self.human_readable = False
        self.summary_only = False
        self.exclude_patterns = []
        if self.sub_opts:
            for o, a in self.sub_opts:
                if o == '-0':
                    self.line_ending = '\0'
                elif o == '-a':
                    self.all_versions = True
                elif o == '-c':
                    self.produce_total = True
                elif o == '-e':
                    self.exclude_patterns.append(a)
                elif o == '-h':
                    self.human_readable = True
                elif o == '-s':
                    self.summary_only = True
                elif o == '-X':
                    if a == '-':
                        f = sys.stdin
                    else:
                        f = open(a, 'r')
                    try:
                        for line in f:
                            line = line.strip()
                            if line:
                                self.exclude_patterns.append(line)
                    finally:
                        f.close()

        if not self.args:
            # Default to listing all gs buckets.
            self.args = ['gs://']

        total_bytes = 0
        got_nomatch_errors = False

        def _PrintObjectLong(blr):
            return self._PrintInfoAboutBucketListingRef(blr)

        def _PrintNothing(unused_blr=None):
            pass

        def _PrintDirectory(num_bytes, name):
            if not self.summary_only:
                self._PrintSummaryLine(num_bytes, name)

        for url_arg in self.args:
            top_level_storage_url = StorageUrlFromString(url_arg)
            if top_level_storage_url.IsFileUrl():
                raise CommandException('Only cloud URLs are supported for %s' %
                                       self.command_name)
            bucket_listing_fields = ['size']

            ls_helper = LsHelper(self.WildcardIterator,
                                 self.logger,
                                 print_object_func=_PrintObjectLong,
                                 print_dir_func=_PrintNothing,
                                 print_dir_header_func=_PrintNothing,
                                 print_dir_summary_func=_PrintDirectory,
                                 print_newline_func=_PrintNothing,
                                 all_versions=self.all_versions,
                                 should_recurse=True,
                                 exclude_patterns=self.exclude_patterns,
                                 fields=bucket_listing_fields)

            # ls_helper expands to objects and prefixes, so perform a top-level
            # expansion first.
            if top_level_storage_url.IsProvider():
                # Provider URL: use bucket wildcard to iterate over all buckets.
                top_level_iter = self.WildcardIterator(
                    '%s://*' % top_level_storage_url.scheme).IterBuckets(
                        bucket_fields=['id'])
            elif top_level_storage_url.IsBucket():
                top_level_iter = self.WildcardIterator(
                    '%s://%s' %
                    (top_level_storage_url.scheme,
                     top_level_storage_url.bucket_name)).IterBuckets(
                         bucket_fields=['id'])
            else:
                top_level_iter = [BucketListingObject(top_level_storage_url)]

            for blr in top_level_iter:
                storage_url = blr.storage_url
                if storage_url.IsBucket() and self.summary_only:
                    storage_url = StorageUrlFromString(
                        storage_url.CreatePrefixUrl(wildcard_suffix='**'))
                _, exp_objs, exp_bytes = ls_helper.ExpandUrlAndPrint(
                    storage_url)
                if (storage_url.IsObject() and exp_objs == 0
                        and ContainsWildcard(url_arg)
                        and not self.exclude_patterns):
                    got_nomatch_errors = True
                total_bytes += exp_bytes

                if self.summary_only:
                    self._PrintSummaryLine(exp_bytes,
                                           blr.url_string.rstrip('/'))

        if self.produce_total:
            self._PrintSummaryLine(total_bytes, 'total')

        if got_nomatch_errors:
            raise CommandException('One or more URLs matched no objects.')

        return 0