示例#1
0
    def RunCommand(self):
        """Command entry point for the setmeta command."""
        headers = []
        if self.sub_opts:
            for o, a in self.sub_opts:
                if o == '-n':
                    self.logger.warning(
                        'Warning: gsutil setmeta -n is now on by default, and will be '
                        'removed in the future.\nPlease use gsutil acl set ... to set '
                        'canned ACLs.')
                elif o == '-h':
                    if 'x-goog-acl' in a or 'x-amz-acl' in a:
                        raise CommandException(
                            'gsutil setmeta no longer allows canned ACLs. Use gsutil acl '
                            'set ... to set canned ACLs.')
                    headers.append(a)

        (metadata_minus, metadata_plus) = self._ParseMetadataHeaders(headers)

        self.metadata_change = metadata_plus
        for header in metadata_minus:
            self.metadata_change[header] = ''

        if len(self.args) == 1 and not self.recursion_requested:
            url = StorageUrlFromString(self.args[0])
            if not (url.IsCloudUrl() and url.IsObject()):
                raise CommandException('URL (%s) must name an object' %
                                       self.args[0])

        # Used to track if any objects' metadata failed to be set.
        self.everything_set_okay = True

        name_expansion_iterator = NameExpansionIterator(
            self.command_name,
            self.debug,
            self.logger,
            self.gsutil_api,
            self.args,
            self.recursion_requested,
            all_versions=self.all_versions,
            continue_on_error=self.parallel_operations)

        try:
            # Perform requests in parallel (-m) mode, if requested, using
            # configured number of parallel processes and threads. Otherwise,
            # perform requests with sequential function calls in current process.
            self.Apply(_SetMetadataFuncWrapper,
                       name_expansion_iterator,
                       _SetMetadataExceptionHandler,
                       fail_on_error=True)
        except AccessDeniedException as e:
            if e.status == 403:
                self._WarnServiceAccounts()
            raise

        if not self.everything_set_okay:
            raise CommandException(
                'Metadata for some objects could not be set.')

        return 0
示例#2
0
  def RunCommand(self):
    target_uri = self.args[-1]
    self.args = self.args[:-1]
    target_suri = self.suri_builder.StorageUri(target_uri)
    self.CheckSUriProvider(target_suri)
    if target_suri.is_version_specific:
      raise CommandException('A version-specific URI\n(%s)\ncannot be '
                             'the destination for gsutil compose - abort.'
                              % target_suri)

    name_expansion_iterator = NameExpansionIterator(
        self.command_name, self.proj_id_handler, self.headers, self.debug,
        self.logger, self.bucket_storage_uri_class, self.args, False,
        cmd_supports_recursion=False)
    components = []
    for ne_result in name_expansion_iterator:
      suri = self.suri_builder.StorageUri(ne_result.GetExpandedUriStr())
      self.CheckSUriProvider(suri)
      components.append(suri)
      # Avoid expanding too many components, and sanity check each name
      # expansion result.
      if len(components) > self.command_spec[MAX_ARGS] - 1:
        raise CommandException('"compose" called with too many component '
                               'arguments. Limit is %d.'
                               % (self.command_spec[MAX_ARGS] - 1))

    self.logger.info(
        'Composing %s from %d component objects.' %
        (target_suri, len(components)))
    target_suri.compose(components)
示例#3
0
    def RunCommand(self):
        # self.recursion_requested initialized in command.py (so can be checked
        # in parent class for all commands).
        self.continue_on_error = False
        self.all_versions = False
        if self.sub_opts:
            for o, unused_a in self.sub_opts:
                if o == '-a':
                    self.all_versions = True
                elif o == '-f':
                    self.continue_on_error = True
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True
                elif o == '-v':
                    self.logger.info(
                        'WARNING: The %s -v option is no longer'
                        ' needed, and will eventually be removed.\n' %
                        self.command_name)

        # Used to track if any files failed to be removed.
        self.everything_removed_okay = True

        # Tracks if any URIs matched the given args.

        remove_func = self._MkRemoveFunc()
        exception_handler = self._MkRemoveExceptionHandler()

        try:
            # Expand wildcards, dirs, buckets, and bucket subdirs in URIs.
            name_expansion_iterator = NameExpansionIterator(
                self.command_name,
                self.proj_id_handler,
                self.headers,
                self.debug,
                self.logger,
                self.bucket_storage_uri_class,
                self.args,
                self.recursion_requested,
                flat=self.recursion_requested,
                all_versions=self.all_versions)

            # Perform remove requests in parallel (-m) mode, if requested, using
            # configured number of parallel processes and threads. Otherwise,
            # perform requests with sequential function calls in current process.
            self.Apply(remove_func, name_expansion_iterator, exception_handler)

        # Assuming the bucket has versioning enabled, uri's that don't map to
        # objects should throw an error even with all_versions, since the prior
        # round of deletes only sends objects to a history table.
        # This assumption that rm -a is only called for versioned buckets should be
        # corrected, but the fix is non-trivial.
        except CommandException as e:
            if not self.continue_on_error:
                raise
        except GSResponseError, e:
            if not self.continue_on_error:
                raise
示例#4
0
 def _GetObjectNameExpansionIterator(self, url_args):
   return NameExpansionIterator(
       self.command_name,
       self.debug,
       self.logger,
       self.gsutil_api,
       url_args,
       self.recursion_requested,
       all_versions=self.all_versions,
       continue_on_error=self.parallel_operations,
       bucket_listing_fields=['generation', 'metageneration'])
示例#5
0
  def RunCommand(self):
    # self.recursion_requested initialized in command.py (so can be checked
    # in parent class for all commands).
    continue_on_error = False
    if self.sub_opts:
      for o, unused_a in self.sub_opts:
        if o == '-f':
          continue_on_error = True
        elif o == '-r' or o == '-R':
          self.recursion_requested = True

    # Used to track if any files failed to be removed.
    self.everything_removed_okay = True

    def _RemoveExceptionHandler(e):
      """Simple exception handler to allow post-completion status."""
      self.THREADED_LOGGER.error(str(e))
      self.everything_removed_okay = False

    def _RemoveFunc(name_expansion_result):
      exp_src_uri = self.suri_builder.StorageUri(
          name_expansion_result.GetExpandedUriStr())
      if exp_src_uri.names_container():
        if exp_src_uri.is_cloud_uri():
          # Before offering advice about how to do rm + rb, ensure those
          # commands won't fail because of bucket naming problems.
          boto.s3.connection.check_lowercase_bucketname(exp_src_uri.bucket_name)
        uri_str = exp_src_uri.object_name.rstrip('/')
        raise CommandException('"rm" command will not remove buckets. To '
                               'delete this/these bucket(s) do:\n\tgsutil rm '
                               '%s/*\n\tgsutil rb %s' % (uri_str, uri_str))
      self.THREADED_LOGGER.info('Removing %s...', exp_src_uri)
      try:
        exp_src_uri.delete_key(validate=False, headers=self.headers)
      except:
        if continue_on_error:
          self.everything_removed_okay = False
        else:
          raise

    # Expand wildcards, dirs, buckets, and bucket subdirs in URIs.
    name_expansion_iterator = NameExpansionIterator(
        self.command_name, self.proj_id_handler, self.headers, self.debug,
        self.bucket_storage_uri_class, self.args, self.recursion_requested,
        flat=self.recursion_requested)

    # Perform remove requests in parallel (-m) mode, if requested, using
    # configured number of parallel processes and threads. Otherwise,
    # perform requests with sequential function calls in current process.
    self.Apply(_RemoveFunc, name_expansion_iterator, _RemoveExceptionHandler)

    if not self.everything_removed_okay:
      raise CommandException('Some files could not be removed.')
示例#6
0
文件: setmeta.py 项目: jkff/gsutil
    def RunCommand(self):
        if (len(self.args) == 1 and not self.recursion_requested and
                not self.suri_builder.StorageUri(self.args[0]).names_object()):
            raise CommandException('URI (%s) must name an object' %
                                   self.args[0])

        # Used to track if any objects' metadata failed to be set.
        self.everything_set_okay = True

        name_expansion_iterator = NameExpansionIterator(
            self.command_name,
            self.proj_id_handler,
            self.headers,
            self.debug,
            self.logger,
            self.bucket_storage_uri_class,
            self.args,
            self.recursion_requested,
            flat=self.recursion_requested)
        try:
            # Perform requests in parallel (-m) mode, if requested, using
            # configured number of parallel processes and threads. Otherwise,
            # perform requests with sequential function calls in current process.
            self.Apply(_SetMetadataFuncWrapper,
                       name_expansion_iterator,
                       _SetMetadataExceptionHandler,
                       fail_on_error=True)
        except GSResponseError as e:
            if e.code == 'AccessDenied' and e.reason == 'Forbidden' \
                and e.status == 403:
                self._WarnServiceAccounts()
            raise

        if not self.everything_set_okay:
            raise CommandException(
                'Metadata for some objects could not be set.')

        return 0
示例#7
0
文件: iam.py 项目: vjeffz/gsutil
    def _PatchIam(self):
        self.continue_on_error = False
        self.recursion_requested = False

        patch_bindings_tuples = []

        if self.sub_opts:
            for o, a in self.sub_opts:
                if o in ['-r', '-R']:
                    self.recursion_requested = True
                elif o == '-f':
                    self.continue_on_error = True
                elif o == '-d':
                    patch_bindings_tuples.append(BindingStringToTuple(
                        False, a))

        patterns = []

        # N.B.: self.sub_opts stops taking in options at the first non-flagged
        # token. The rest of the tokens are sent to self.args. Thus, in order to
        # handle input of the form "-d <binding> <binding> <url>", we will have to
        # parse self.args for a mix of both bindings and CloudUrls. We are not
        # expecting to come across the -r, -f flags here.
        it = iter(self.args)
        for token in it:
            if STORAGE_URI_REGEX.match(token):
                patterns.append(token)
                break
            if token == '-d':
                patch_bindings_tuples.append(
                    BindingStringToTuple(False, it.next()))
            else:
                patch_bindings_tuples.append(BindingStringToTuple(True, token))
        if not patch_bindings_tuples:
            raise CommandException('Must specify at least one binding.')

        # All following arguments are urls.
        for token in it:
            patterns.append(token)

        self.everything_set_okay = True
        self.tried_ch_on_resource_with_conditions = False
        threaded_wildcards = []
        for pattern in patterns:
            surl = StorageUrlFromString(pattern)
            try:
                if surl.IsBucket():
                    if self.recursion_requested:
                        surl.object = '*'
                        threaded_wildcards.append(surl.url_string)
                    else:
                        self.PatchIamHelper(surl, patch_bindings_tuples)
                else:
                    threaded_wildcards.append(surl.url_string)
            except AttributeError:
                error_msg = 'Invalid Cloud URL "%s".' % surl.object_name
                if set(surl.object_name).issubset(set('-Rrf')):
                    error_msg += (
                        ' This resource handle looks like a flag, which must appear '
                        'before all bindings. See "gsutil help iam ch" for more details.'
                    )
                raise CommandException(error_msg)

        if threaded_wildcards:
            name_expansion_iterator = NameExpansionIterator(
                self.command_name,
                self.debug,
                self.logger,
                self.gsutil_api,
                threaded_wildcards,
                self.recursion_requested,
                all_versions=self.all_versions,
                continue_on_error=self.continue_on_error
                or self.parallel_operations,
                bucket_listing_fields=['name'])

            seek_ahead_iterator = SeekAheadNameExpansionIterator(
                self.command_name,
                self.debug,
                self.GetSeekAheadGsutilApi(),
                threaded_wildcards,
                self.recursion_requested,
                all_versions=self.all_versions)

            serialized_bindings_tuples_it = itertools.repeat(
                [SerializeBindingsTuple(t) for t in patch_bindings_tuples])
            self.Apply(_PatchIamWrapper,
                       itertools.izip(serialized_bindings_tuples_it,
                                      name_expansion_iterator),
                       _PatchIamExceptionHandler,
                       fail_on_error=not self.continue_on_error,
                       seek_ahead_iterator=seek_ahead_iterator)

            self.everything_set_okay &= not GetFailureCount() > 0

        # TODO: Add an error counter for files and objects.
        if not self.everything_set_okay:
            msg = 'Some IAM policies could not be patched.'
            if self.tried_ch_on_resource_with_conditions:
                msg += '\n'
                msg += '\n'.join(
                    textwrap.wrap(
                        'Some resources had conditions present in their IAM policy '
                        'bindings, which is not supported by "iam ch". %s' %
                        (IAM_CH_CONDITIONS_WORKAROUND_MSG)))
            raise CommandException(msg)
示例#8
0
    def RunCommand(self):
        """Command entry point for the rewrite command."""
        self.continue_on_error = self.parallel_operations
        self.dest_storage_class = None
        self.no_preserve_acl = False
        self.read_args_from_stdin = False
        self.supported_transformation_flags = ['-k', '-s']
        self.transform_types = set()

        self.op_failure_count = 0
        self.boto_file_encryption_tuple, self.boto_file_encryption_sha256 = (
            GetEncryptionTupleAndSha256Hash())

        if self.sub_opts:
            for o, a in self.sub_opts:
                if o == '-f':
                    self.continue_on_error = True
                elif o == '-k':
                    self.transform_types.add(_TransformTypes.CRYPTO_KEY)
                elif o == '-I':
                    self.read_args_from_stdin = True
                elif o == '-O':
                    self.no_preserve_acl = True
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True
                    self.all_versions = True
                elif o == '-s':
                    self.transform_types.add(_TransformTypes.STORAGE_CLASS)
                    self.dest_storage_class = NormalizeStorageClass(a)

        if self.read_args_from_stdin:
            if self.args:
                raise CommandException(
                    'No arguments allowed with the -I flag.')
            url_strs = StdinIterator()
        else:
            if not self.args:
                raise CommandException(
                    'The rewrite command (without -I) expects at '
                    'least one URL.')
            url_strs = self.args

        if not self.transform_types:
            raise CommandException(
                'rewrite command requires at least one transformation flag. '
                'Currently supported transformation flags: %s' %
                self.supported_transformation_flags)

        self.preconditions = PreconditionsFromHeaders(self.headers or {})

        url_strs_generator = GenerationCheckGenerator(url_strs)

        # Convert recursive flag to flat wildcard to avoid performing multiple
        # listings.
        if self.recursion_requested:
            url_strs_generator = ConvertRecursiveToFlatWildcard(
                url_strs_generator)

        # Expand the source argument(s).
        name_expansion_iterator = NameExpansionIterator(
            self.command_name,
            self.debug,
            self.logger,
            self.gsutil_api,
            url_strs_generator,
            self.recursion_requested,
            project_id=self.project_id,
            continue_on_error=self.continue_on_error
            or self.parallel_operations,
            bucket_listing_fields=['name', 'size'])

        seek_ahead_iterator = None
        # Cannot seek ahead with stdin args, since we can only iterate them
        # once without buffering in memory.
        if not self.read_args_from_stdin:
            # Perform the same recursive-to-flat conversion on original url_strs so
            # that it is as true to the original iterator as possible.
            seek_ahead_url_strs = ConvertRecursiveToFlatWildcard(url_strs)
            seek_ahead_iterator = SeekAheadNameExpansionIterator(
                self.command_name,
                self.debug,
                self.GetSeekAheadGsutilApi(),
                seek_ahead_url_strs,
                self.recursion_requested,
                all_versions=self.all_versions,
                project_id=self.project_id)

        # Perform rewrite requests in parallel (-m) mode, if requested.
        self.Apply(_RewriteFuncWrapper,
                   name_expansion_iterator,
                   _RewriteExceptionHandler,
                   fail_on_error=(not self.continue_on_error),
                   shared_attrs=['op_failure_count'],
                   seek_ahead_iterator=seek_ahead_iterator)

        if self.op_failure_count:
            plural_str = 's' if self.op_failure_count else ''
            raise CommandException(
                '%d file%s/object%s could not be rewritten.' %
                (self.op_failure_count, plural_str, plural_str))

        return 0
示例#9
0
    def RunCommand(self):
        """Command entry point for the rewrite command."""
        self.continue_on_error = self.parallel_operations
        self.read_args_from_stdin = False
        self.no_preserve_acl = False
        self.supported_transformation_flags = ['-k']
        self.transform_types = []

        self.op_failure_count = 0
        self.current_encryption_tuple, self.current_encryption_sha256 = (
            GetEncryptionTupleAndSha256Hash())

        if self.sub_opts:
            for o, unused_a in self.sub_opts:
                if o == '-f':
                    self.continue_on_error = True
                elif o == '-k':
                    self.transform_types.append(_TransformTypes.CRYPTO_KEY)
                elif o == '-I':
                    self.read_args_from_stdin = True
                elif o == '-O':
                    self.no_preserve_acl = True
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True
                    self.all_versions = True

        if self.read_args_from_stdin:
            if self.args:
                raise CommandException(
                    'No arguments allowed with the -I flag.')
            url_strs = StdinIterator()
        else:
            if not self.args:
                raise CommandException(
                    'The rewrite command (without -I) expects at '
                    'least one URL.')
            url_strs = self.args

        url_strs = GenerationCheckGenerator(url_strs)

        if not self.transform_types:
            raise CommandException(
                'rewrite command requires at least one transformation flag. '
                'Currently supported transformation flags: %s' %
                self.supported_transformation_flags)

        self.preconditions = PreconditionsFromHeaders(self.headers or {})

        # Convert recursive flag to flat wildcard to avoid performing multiple
        # listings.
        if self.recursion_requested:
            url_strs = ConvertRecursiveToFlatWildcard(url_strs)

        # Expand the source argument(s).
        name_expansion_iterator = NameExpansionIterator(
            self.command_name,
            self.debug,
            self.logger,
            self.gsutil_api,
            url_strs,
            self.recursion_requested,
            project_id=self.project_id,
            continue_on_error=self.continue_on_error
            or self.parallel_operations)

        # Perform rewrite requests in parallel (-m) mode, if requested.
        self.Apply(_RewriteFuncWrapper,
                   name_expansion_iterator,
                   _RewriteExceptionHandler,
                   fail_on_error=(not self.continue_on_error),
                   shared_attrs=['op_failure_count'])

        if self.op_failure_count:
            plural_str = 's' if self.op_failure_count else ''
            raise CommandException(
                '%d file%s/object%s could not be rewritten.' %
                (self.op_failure_count, plural_str, plural_str))

        return 0
示例#10
0
  def RunCommand(self):
    """Command entry point for the rewrite command."""
    self.continue_on_error = self.parallel_operations
    self.csek_hash_to_keywrapper = {}
    self.dest_storage_class = None
    self.no_preserve_acl = False
    self.read_args_from_stdin = False
    self.supported_transformation_flags = ['-k', '-s']
    self.transform_types = set()

    self.op_failure_count = 0
    self.boto_file_encryption_keywrapper = GetEncryptionKeyWrapper(config)
    self.boto_file_encryption_sha256 = (
        self.boto_file_encryption_keywrapper.crypto_key_sha256
        if self.boto_file_encryption_keywrapper else None)

    if self.sub_opts:
      for o, a in self.sub_opts:
        if o == '-f':
          self.continue_on_error = True
        elif o == '-k':
          self.transform_types.add(_TransformTypes.CRYPTO_KEY)
        elif o == '-I':
          self.read_args_from_stdin = True
        elif o == '-O':
          self.no_preserve_acl = True
        elif o == '-r' or o == '-R':
          self.recursion_requested = True
          self.all_versions = True
        elif o == '-s':
          self.transform_types.add(_TransformTypes.STORAGE_CLASS)
          self.dest_storage_class = NormalizeStorageClass(a)

    if self.read_args_from_stdin:
      if self.args:
        raise CommandException('No arguments allowed with the -I flag.')
      url_strs = StdinIterator()
    else:
      if not self.args:
        raise CommandException('The rewrite command (without -I) expects at '
                               'least one URL.')
      url_strs = self.args

    if not self.transform_types:
      raise CommandException(
          'rewrite command requires at least one transformation flag. '
          'Currently supported transformation flags: %s' %
          self.supported_transformation_flags)

    self.preconditions = PreconditionsFromHeaders(self.headers or {})

    url_strs_generator = GenerationCheckGenerator(url_strs)

    # Convert recursive flag to flat wildcard to avoid performing multiple
    # listings.
    if self.recursion_requested:
      url_strs_generator = ConvertRecursiveToFlatWildcard(url_strs_generator)

    # Expand the source argument(s).
    name_expansion_iterator = NameExpansionIterator(
        self.command_name,
        self.debug,
        self.logger,
        self.gsutil_api,
        url_strs_generator,
        self.recursion_requested,
        project_id=self.project_id,
        continue_on_error=self.continue_on_error or self.parallel_operations,
        bucket_listing_fields=['name', 'size'])

    seek_ahead_iterator = None
    # Cannot seek ahead with stdin args, since we can only iterate them
    # once without buffering in memory.
    if not self.read_args_from_stdin:
      # Perform the same recursive-to-flat conversion on original url_strs so
      # that it is as true to the original iterator as possible.
      seek_ahead_url_strs = ConvertRecursiveToFlatWildcard(url_strs)
      seek_ahead_iterator = SeekAheadNameExpansionIterator(
          self.command_name,
          self.debug,
          self.GetSeekAheadGsutilApi(),
          seek_ahead_url_strs,
          self.recursion_requested,
          all_versions=self.all_versions,
          project_id=self.project_id)

    # Rather than have each worker repeatedly calculate the sha256 hash for each
    # decryption_key in the boto config, do this once now and cache the results.
    for i in range(0, MAX_DECRYPTION_KEYS):
      key_number = i + 1
      keywrapper = CryptoKeyWrapperFromKey(
          config.get('GSUtil', 'decryption_key%s' % str(key_number), None))
      if keywrapper is None:
        # Stop at first attribute absence in lexicographical iteration.
        break
      if keywrapper.crypto_type == CryptoKeyType.CSEK:
        self.csek_hash_to_keywrapper[keywrapper.crypto_key_sha256] = keywrapper
    # Also include the encryption_key, since it should be used to decrypt and
    # then encrypt if the object's CSEK should remain the same.
    if self.boto_file_encryption_sha256 is not None:
      self.csek_hash_to_keywrapper[self.boto_file_encryption_sha256] = (
          self.boto_file_encryption_keywrapper)

    if self.boto_file_encryption_keywrapper is None:
      msg = '\n'.join(
          textwrap.wrap(
              'NOTE: No encryption_key was specified in the boto configuration '
              'file, so gsutil will not provide an encryption key in its rewrite '
              'API requests. This will decrypt the objects unless they are in '
              'buckets with a default KMS key set, in which case the service '
              'will automatically encrypt the rewritten objects with that key.')
      )
      print('%s\n' % msg, file=sys.stderr)

    # Perform rewrite requests in parallel (-m) mode, if requested.
    self.Apply(_RewriteFuncWrapper,
               name_expansion_iterator,
               _RewriteExceptionHandler,
               fail_on_error=(not self.continue_on_error),
               shared_attrs=['op_failure_count'],
               seek_ahead_iterator=seek_ahead_iterator)

    if self.op_failure_count:
      plural_str = 's' if self.op_failure_count else ''
      raise CommandException('%d file%s/object%s could not be rewritten.' %
                             (self.op_failure_count, plural_str, plural_str))

    return 0
示例#11
0
    def RunCommand(self):
        """Command entry point for the rm command."""
        # self.recursion_requested is initialized in command.py (so it can be
        # checked in parent class for all commands).
        self.continue_on_error = False
        self.read_args_from_stdin = False
        self.all_versions = False
        if self.sub_opts:
            for o, unused_a in self.sub_opts:
                if o == '-a':
                    self.all_versions = True
                elif o == '-f':
                    self.continue_on_error = True
                elif o == '-I':
                    self.read_args_from_stdin = True
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True
                    self.all_versions = True

        if self.read_args_from_stdin:
            if self.args:
                raise CommandException(
                    'No arguments allowed with the -I flag.')
            url_strs = StdinIterator()
        else:
            if not self.args:
                raise CommandException(
                    'The rm command (without -I) expects at '
                    'least one URL.')
            url_strs = self.args

        bucket_urls_to_delete = []
        bucket_strings_to_delete = []
        if self.recursion_requested:
            bucket_fields = ['id']
            for url_str in url_strs:
                url = StorageUrlFromString(url_str)
                if url.IsBucket() or url.IsProvider():
                    for blr in self.WildcardIterator(url_str).IterBuckets(
                            bucket_fields=bucket_fields):
                        bucket_urls_to_delete.append(blr.storage_url)
                        bucket_strings_to_delete.append(url_str)

        self.preconditions = PreconditionsFromHeaders(self.headers or {})

        # Used to track if any files failed to be removed.
        self.everything_removed_okay = True

        try:
            # Expand wildcards, dirs, buckets, and bucket subdirs in URLs.
            name_expansion_iterator = NameExpansionIterator(
                self.command_name,
                self.debug,
                self.logger,
                self.gsutil_api,
                url_strs,
                self.recursion_requested,
                project_id=self.project_id,
                all_versions=self.all_versions,
                continue_on_error=self.continue_on_error
                or self.parallel_operations)

            # Perform remove requests in parallel (-m) mode, if requested, using
            # configured number of parallel processes and threads. Otherwise,
            # perform requests with sequential function calls in current process.
            self.Apply(_RemoveFuncWrapper,
                       name_expansion_iterator,
                       _RemoveExceptionHandler,
                       fail_on_error=(not self.continue_on_error))

        # Assuming the bucket has versioning enabled, url's that don't map to
        # objects should throw an error even with all_versions, since the prior
        # round of deletes only sends objects to a history table.
        # This assumption that rm -a is only called for versioned buckets should be
        # corrected, but the fix is non-trivial.
        except CommandException as e:
            # Don't raise if there are buckets to delete -- it's valid to say:
            #   gsutil rm -r gs://some_bucket
            # if the bucket is empty.
            if not bucket_urls_to_delete and not self.continue_on_error:
                raise
            # Reset the failure count if we failed due to an empty bucket that we're
            # going to delete.
            msg = 'No URLs matched: '
            if msg in str(e):
                parts = str(e).split(msg)
                if len(parts) == 2 and parts[1] in bucket_strings_to_delete:
                    ResetFailureCount()
        except ServiceException, e:
            if not self.continue_on_error:
                raise
示例#12
0
    def RunCommand(self):
        headers = []
        preserve_acl = True
        if self.sub_opts:
            for o, a in self.sub_opts:
                if o == '-n':
                    preserve_acl = False
                elif o == '-h':
                    headers.append(a)

        if headers:
            (metadata_minus,
             metadata_plus) = self._ParseMetadataHeaders(headers)
            uri_args = self.args
        else:
            (metadata_minus,
             metadata_plus) = self._ParseMetadataSpec(self.args[0])
            uri_args = self.args[1:]

        if (len(uri_args) == 1 and
                not self.suri_builder.StorageUri(uri_args[0]).names_object()):
            raise CommandException('URI (%s) must name an object' %
                                   uri_args[0])

        # Used to track if any objects' metadata failed to be set.
        self.everything_set_okay = True

        def _SetMetadataExceptionHandler(e):
            """Simple exception handler to allow post-completion status."""
            self.THREADED_LOGGER.error(str(e))
            self.everything_set_okay = False

        def _SetMetadataFunc(name_expansion_result, retry=3):
            exp_src_uri = self.suri_builder.StorageUri(
                name_expansion_result.GetExpandedUriStr())
            self.THREADED_LOGGER.info('Setting metadata on %s...', exp_src_uri)

            key = exp_src_uri.get_key()
            meta_generation = key.meta_generation
            generation = key.generation

            headers = {}
            if generation:
                headers['x-goog-if-generation-match'] = generation
            if meta_generation:
                headers['x-goog-if-metageneration-match'] = meta_generation

            try:
                exp_src_uri.set_metadata(metadata_plus,
                                         metadata_minus,
                                         preserve_acl,
                                         headers=headers)
            except GSResponseError as response_error:
                # HTTP error 412 is "Precondition Failed."
                if response_error.status == 412:
                    if retry <= 0:
                        self.THREADED_LOGGER.error(
                            'Exhausted retries. Giving up.')
                        raise
                    self.THREADED_LOGGER.warn('Collision - %d tries left.',
                                              retry)
                    time.sleep(random.uniform(0.5, 1.0))
                    _SetMetadataFunc(name_expansion_result, retry - 1)
                else:
                    raise

        name_expansion_iterator = NameExpansionIterator(
            self.command_name, self.proj_id_handler, self.headers, self.debug,
            self.bucket_storage_uri_class, uri_args, self.recursion_requested,
            self.recursion_requested)

        # Perform requests in parallel (-m) mode, if requested, using
        # configured number of parallel processes and threads. Otherwise,
        # perform requests with sequential function calls in current process.
        self.Apply(_SetMetadataFunc, name_expansion_iterator,
                   _SetMetadataExceptionHandler)

        if not self.everything_set_okay:
            raise CommandException(
                'Metadata for some objects could not be set.')

        return 0
示例#13
0
    def RunCommand(self):
        headers = []
        preserve_acl = True
        if self.sub_opts:
            for o, a in self.sub_opts:
                if o == '-n':
                    preserve_acl = False
                elif o == '-h':
                    headers.append(a)

        (metadata_minus, metadata_plus) = self._ParseMetadataHeaders(headers)

        if (len(self.args) == 1 and
                not self.suri_builder.StorageUri(self.args[0]).names_object()):
            raise CommandException('URI (%s) must name an object' %
                                   self.args[0])

        # Used to track if any objects' metadata failed to be set.
        self.everything_set_okay = True

        def _SetMetadataExceptionHandler(e):
            """Simple exception handler to allow post-completion status."""
            self.logger.error(str(e))
            self.everything_set_okay = False

        @Retry(GSResponseError, tries=3, delay=1, backoff=2)
        def _SetMetadataFunc(name_expansion_result):
            exp_src_uri = self.suri_builder.StorageUri(
                name_expansion_result.GetExpandedUriStr())
            self.logger.info('Setting metadata on %s...', exp_src_uri)

            key = exp_src_uri.get_key()
            metageneration = getattr(key, 'metageneration', None)
            generation = getattr(key, 'generation', None)

            headers = {}
            if generation:
                headers['x-goog-if-generation-match'] = generation
            if metageneration:
                headers['x-goog-if-metageneration-match'] = metageneration

            # If this fails because of a precondition, it will raise a
            # GSResponseError for @Retry to handle.
            exp_src_uri.set_metadata(metadata_plus,
                                     metadata_minus,
                                     preserve_acl,
                                     headers=headers)

        name_expansion_iterator = NameExpansionIterator(
            self.command_name, self.proj_id_handler, self.headers, self.debug,
            self.logger, self.bucket_storage_uri_class, self.args,
            self.recursion_requested, self.recursion_requested)

        try:
            # Perform requests in parallel (-m) mode, if requested, using
            # configured number of parallel processes and threads. Otherwise,
            # perform requests with sequential function calls in current process.
            self.Apply(_SetMetadataFunc, name_expansion_iterator,
                       _SetMetadataExceptionHandler)
        except GSResponseError as e:
            if e.code == 'AccessDenied' and e.reason == 'Forbidden' \
                and e.status == 403:
                self._WarnServiceAccounts()
            raise

        if not self.everything_set_okay:
            raise CommandException(
                'Metadata for some objects could not be set.')

        return 0
示例#14
0
文件: rm.py 项目: unhooked/gsutil
class RmCommand(Command):
    """Implementation of gsutil rm command."""

    # Command specification. See base class for documentation.
    command_spec = Command.CreateCommandSpec(
        'rm',
        command_name_aliases=['del', 'delete', 'remove'],
        usage_synopsis=_SYNOPSIS,
        min_args=0,
        max_args=NO_MAX,
        supported_sub_args='afIrR',
        file_url_ok=False,
        provider_url_ok=False,
        urls_start_arg=0,
        gs_api_support=[ApiSelector.XML, ApiSelector.JSON],
        gs_default_api=ApiSelector.JSON,
        argparse_arguments=[CommandArgument.MakeZeroOrMoreCloudURLsArgument()])
    # Help specification. See help_provider.py for documentation.
    help_spec = Command.HelpSpec(
        help_name='rm',
        help_name_aliases=['del', 'delete', 'remove'],
        help_type='command_help',
        help_one_line_summary='Remove objects',
        help_text=_DETAILED_HELP_TEXT,
        subcommand_help_text={},
    )

    def RunCommand(self):
        """Command entry point for the rm command."""
        # self.recursion_requested is initialized in command.py (so it can be
        # checked in parent class for all commands).
        self.continue_on_error = self.parallel_operations
        self.read_args_from_stdin = False
        self.all_versions = False
        if self.sub_opts:
            for o, unused_a in self.sub_opts:
                if o == '-a':
                    self.all_versions = True
                elif o == '-f':
                    self.continue_on_error = True
                elif o == '-I':
                    self.read_args_from_stdin = True
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True
                    self.all_versions = True

        if self.read_args_from_stdin:
            if self.args:
                raise CommandException(
                    'No arguments allowed with the -I flag.')
            url_strs = StdinIterator()
        else:
            if not self.args:
                raise CommandException(
                    'The rm command (without -I) expects at '
                    'least one URL.')
            url_strs = self.args

        # Tracks number of object deletes that failed.
        self.op_failure_count = 0

        # Tracks if any buckets were missing.
        self.bucket_not_found_count = 0

        # Tracks buckets that are slated for recursive deletion.
        bucket_urls_to_delete = []
        self.bucket_strings_to_delete = []

        if self.recursion_requested:
            bucket_fields = ['id']
            for url_str in url_strs:
                url = StorageUrlFromString(url_str)
                if url.IsBucket() or url.IsProvider():
                    for blr in self.WildcardIterator(url_str).IterBuckets(
                            bucket_fields=bucket_fields):
                        bucket_urls_to_delete.append(blr.storage_url)
                        self.bucket_strings_to_delete.append(url_str)

        self.preconditions = PreconditionsFromHeaders(self.headers or {})

        try:
            # Expand wildcards, dirs, buckets, and bucket subdirs in URLs.
            name_expansion_iterator = NameExpansionIterator(
                self.command_name,
                self.debug,
                self.logger,
                self.gsutil_api,
                url_strs,
                self.recursion_requested,
                project_id=self.project_id,
                all_versions=self.all_versions,
                continue_on_error=self.continue_on_error
                or self.parallel_operations)

            seek_ahead_iterator = None
            # Cannot seek ahead with stdin args, since we can only iterate them
            # once without buffering in memory.
            if not self.read_args_from_stdin:
                seek_ahead_iterator = SeekAheadNameExpansionIterator(
                    self.command_name,
                    self.debug,
                    self.GetSeekAheadGsutilApi(),
                    url_strs,
                    self.recursion_requested,
                    all_versions=self.all_versions,
                    project_id=self.project_id)

            # Perform remove requests in parallel (-m) mode, if requested, using
            # configured number of parallel processes and threads. Otherwise,
            # perform requests with sequential function calls in current process.
            self.Apply(
                _RemoveFuncWrapper,
                name_expansion_iterator,
                _RemoveExceptionHandler,
                fail_on_error=(not self.continue_on_error),
                shared_attrs=['op_failure_count', 'bucket_not_found_count'],
                seek_ahead_iterator=seek_ahead_iterator)

        # Assuming the bucket has versioning enabled, url's that don't map to
        # objects should throw an error even with all_versions, since the prior
        # round of deletes only sends objects to a history table.
        # This assumption that rm -a is only called for versioned buckets should be
        # corrected, but the fix is non-trivial.
        except CommandException as e:
            # Don't raise if there are buckets to delete -- it's valid to say:
            #   gsutil rm -r gs://some_bucket
            # if the bucket is empty.
            if _ExceptionMatchesBucketToDelete(self.bucket_strings_to_delete,
                                               e):
                DecrementFailureCount()
            else:
                raise
        except ServiceException, e:
            if not self.continue_on_error:
                raise

        if self.bucket_not_found_count:
            raise CommandException(
                'Encountered non-existent bucket during listing')

        if self.op_failure_count and not self.continue_on_error:
            raise CommandException('Some files could not be removed.')

        # If this was a gsutil rm -r command covering any bucket subdirs,
        # remove any dir_$folder$ objects (which are created by various web UI
        # tools to simulate folders).
        if self.recursion_requested:
            folder_object_wildcards = []
            for url_str in url_strs:
                url = StorageUrlFromString(url_str)
                if url.IsObject():
                    folder_object_wildcards.append('%s**_$folder$' % url_str)
            if folder_object_wildcards:
                self.continue_on_error = True
                try:
                    name_expansion_iterator = NameExpansionIterator(
                        self.command_name,
                        self.debug,
                        self.logger,
                        self.gsutil_api,
                        folder_object_wildcards,
                        self.recursion_requested,
                        project_id=self.project_id,
                        all_versions=self.all_versions)
                    # When we're removing folder objects, always continue on error
                    self.Apply(_RemoveFuncWrapper,
                               name_expansion_iterator,
                               _RemoveFoldersExceptionHandler,
                               fail_on_error=False)
                except CommandException as e:
                    # Ignore exception from name expansion due to an absent folder file.
                    if not e.reason.startswith(NO_URLS_MATCHED_GENERIC):
                        raise

        # Now that all data has been deleted, delete any bucket URLs.
        for url in bucket_urls_to_delete:
            self.logger.info('Removing %s...', url)

            @Retry(NotEmptyException, tries=3, timeout_secs=1)
            def BucketDeleteWithRetry():
                self.gsutil_api.DeleteBucket(url.bucket_name,
                                             provider=url.scheme)

            BucketDeleteWithRetry()

        if self.op_failure_count:
            plural_str = 's' if self.op_failure_count else ''
            raise CommandException(
                '%d file%s/object%s could not be removed.' %
                (self.op_failure_count, plural_str, plural_str))

        return 0
示例#15
0
  def RunCommand(self):
    """Command entry point for the setmeta command."""
    headers = []
    if self.sub_opts:
      for o, a in self.sub_opts:
        if o == '-h':
          if 'x-goog-acl' in a or 'x-amz-acl' in a:
            raise CommandException(
                'gsutil setmeta no longer allows canned ACLs. Use gsutil acl '
                'set ... to set canned ACLs.')
          headers.append(a)

    (metadata_minus, metadata_plus) = self._ParseMetadataHeaders(headers)

    self.metadata_change = metadata_plus
    for header in metadata_minus:
      self.metadata_change[header] = ''

    if not self.metadata_change:
      raise CommandException(
          'gsutil setmeta requires one or more headers to be provided with the'
          ' -h flag. See "gsutil help setmeta" for more information.')

    if len(self.args) == 1 and not self.recursion_requested:
      url = StorageUrlFromString(self.args[0])
      if not (url.IsCloudUrl() and url.IsObject()):
        raise CommandException('URL (%s) must name an object' % self.args[0])

    # Used to track if any objects' metadata failed to be set.
    self.everything_set_okay = True

    self.preconditions = PreconditionsFromHeaders(self.headers)

    name_expansion_iterator = NameExpansionIterator(
        self.command_name,
        self.debug,
        self.logger,
        self.gsutil_api,
        self.args,
        self.recursion_requested,
        all_versions=self.all_versions,
        continue_on_error=self.parallel_operations,
        bucket_listing_fields=['generation', 'metadata', 'metageneration'])

    seek_ahead_iterator = SeekAheadNameExpansionIterator(
        self.command_name,
        self.debug,
        self.GetSeekAheadGsutilApi(),
        self.args,
        self.recursion_requested,
        all_versions=self.all_versions,
        project_id=self.project_id)

    try:
      # Perform requests in parallel (-m) mode, if requested, using
      # configured number of parallel processes and threads. Otherwise,
      # perform requests with sequential function calls in current process.
      self.Apply(_SetMetadataFuncWrapper,
                 name_expansion_iterator,
                 _SetMetadataExceptionHandler,
                 fail_on_error=True,
                 seek_ahead_iterator=seek_ahead_iterator)
    except AccessDeniedException as e:
      if e.status == 403:
        self._WarnServiceAccounts()
      raise

    if not self.everything_set_okay:
      raise CommandException('Metadata for some objects could not be set.')

    return 0
示例#16
0
    def RunCommand(self):
        # self.recursion_requested initialized in command.py (so can be checked
        # in parent class for all commands).
        self.continue_on_error = False
        self.all_versions = False
        if self.sub_opts:
            for o, unused_a in self.sub_opts:
                if o == '-a':
                    self.all_versions = True
                elif o == '-f':
                    self.continue_on_error = True
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True
                elif o == '-v':
                    self.logger.info(
                        'WARNING: The %s -v option is no longer'
                        ' needed, and will eventually be removed.\n' %
                        self.command_name)

        if self.recursion_requested and not self.all_versions:
            for uri_str in self.args:
                # WildcardIterator returns BucketListingRefs.
                for blr in self.WildcardIterator(uri_str):
                    uri = blr.GetUri()
                    if uri.names_bucket() and uri.get_versioning_config():
                        raise CommandException(
                            'Running gsutil rm -R on a bucket-only URI (%s)\nwith '
                            'versioning enabled will not work without specifying the -a '
                            'flag. Please try\nagain, using:\n\tgsutil rm -Ra %s'
                            % (uri_str, ' '.join(self.args)))

        # Used to track if any files failed to be removed.
        self.everything_removed_okay = True

        # Tracks if any URIs matched the given args.

        remove_func = self._MkRemoveFunc()
        exception_handler = self._MkRemoveExceptionHandler()

        bucket_uris_to_delete = []
        if self.recursion_requested:
            for uri_str in self.args:
                for blr in self.WildcardIterator(uri_str):
                    uri = blr.GetUri()
                    if uri.names_bucket():
                        bucket_uris_to_delete.append(uri)

        try:
            # Expand wildcards, dirs, buckets, and bucket subdirs in URIs.
            name_expansion_iterator = NameExpansionIterator(
                self.command_name,
                self.proj_id_handler,
                self.headers,
                self.debug,
                self.logger,
                self.bucket_storage_uri_class,
                self.args,
                self.recursion_requested,
                flat=self.recursion_requested,
                all_versions=self.all_versions)

            # Perform remove requests in parallel (-m) mode, if requested, using
            # configured number of parallel processes and threads. Otherwise,
            # perform requests with sequential function calls in current process.
            self.Apply(remove_func, name_expansion_iterator, exception_handler)

        # Assuming the bucket has versioning enabled, uri's that don't map to
        # objects should throw an error even with all_versions, since the prior
        # round of deletes only sends objects to a history table.
        # This assumption that rm -a is only called for versioned buckets should be
        # corrected, but the fix is non-trivial.
        except CommandException as e:
            # Don't raise if there are buckets to delete -- it's valid to say:
            #   gsutil rm -r gs://some_bucket
            # if the bucket is empty.
            if not bucket_uris_to_delete and not self.continue_on_error:
                raise
        except GSResponseError, e:
            if not self.continue_on_error:
                raise
示例#17
0
class RmCommand(Command):
    """Implementation of gsutil rm command."""

    # Command specification (processed by parent class).
    command_spec = {
        # Name of command.
        COMMAND_NAME: 'rm',
        # List of command name aliases.
        COMMAND_NAME_ALIASES: ['del', 'delete', 'remove'],
        # Min number of args required by this command.
        MIN_ARGS: 1,
        # Max number of args required by this command, or NO_MAX.
        MAX_ARGS: NO_MAX,
        # Getopt-style string specifying acceptable sub args.
        SUPPORTED_SUB_ARGS: 'afrRv',
        # True if file URIs acceptable for this command.
        FILE_URIS_OK: False,
        # True if provider-only URIs acceptable for this command.
        PROVIDER_URIS_OK: False,
        # Index in args of first URI arg.
        URIS_START_ARG: 0,
    }
    help_spec = {
        # Name of command or auxiliary help info for which this help applies.
        HELP_NAME: 'rm',
        # List of help name aliases.
        HELP_NAME_ALIASES: ['del', 'delete', 'remove'],
        # Type of help:
        HELP_TYPE: HelpType.COMMAND_HELP,
        # One line summary of this help.
        HELP_ONE_LINE_SUMMARY: 'Remove objects',
        # The full help text.
        HELP_TEXT: _detailed_help_text,
    }

    # Command entry point.
    def RunCommand(self):
        # self.recursion_requested initialized in command.py (so can be checked
        # in parent class for all commands).
        self.continue_on_error = False
        self.all_versions = False
        if self.sub_opts:
            for o, unused_a in self.sub_opts:
                if o == '-a':
                    self.all_versions = True
                elif o == '-f':
                    self.continue_on_error = True
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True
                elif o == '-v':
                    self.logger.info(
                        'WARNING: The %s -v option is no longer'
                        ' needed, and will eventually be removed.\n' %
                        self.command_name)

        if self.recursion_requested and not self.all_versions:
            for uri_str in self.args:
                # WildcardIterator returns BucketListingRefs.
                for blr in self.WildcardIterator(uri_str):
                    uri = blr.GetUri()
                    if uri.names_bucket() and uri.get_versioning_config():
                        raise CommandException(
                            'Running gsutil rm -R on a bucket-only URI (%s)\nwith '
                            'versioning enabled will not work without specifying the -a '
                            'flag. Please try\nagain, using:\n\tgsutil rm -Ra %s'
                            % (uri_str, ' '.join(self.args)))

        # Used to track if any files failed to be removed.
        self.everything_removed_okay = True

        # Tracks if any URIs matched the given args.

        remove_func = self._MkRemoveFunc()
        exception_handler = self._MkRemoveExceptionHandler()

        bucket_uris_to_delete = []
        if self.recursion_requested:
            for uri_str in self.args:
                for blr in self.WildcardIterator(uri_str):
                    uri = blr.GetUri()
                    if uri.names_bucket():
                        bucket_uris_to_delete.append(uri)

        try:
            # Expand wildcards, dirs, buckets, and bucket subdirs in URIs.
            name_expansion_iterator = NameExpansionIterator(
                self.command_name,
                self.proj_id_handler,
                self.headers,
                self.debug,
                self.logger,
                self.bucket_storage_uri_class,
                self.args,
                self.recursion_requested,
                flat=self.recursion_requested,
                all_versions=self.all_versions)

            # Perform remove requests in parallel (-m) mode, if requested, using
            # configured number of parallel processes and threads. Otherwise,
            # perform requests with sequential function calls in current process.
            self.Apply(remove_func, name_expansion_iterator, exception_handler)

        # Assuming the bucket has versioning enabled, uri's that don't map to
        # objects should throw an error even with all_versions, since the prior
        # round of deletes only sends objects to a history table.
        # This assumption that rm -a is only called for versioned buckets should be
        # corrected, but the fix is non-trivial.
        except CommandException as e:
            # Don't raise if there are buckets to delete -- it's valid to say:
            #   gsutil rm -r gs://some_bucket
            # if the bucket is empty.
            if not bucket_uris_to_delete and not self.continue_on_error:
                raise
        except GSResponseError, e:
            if not self.continue_on_error:
                raise

        if not self.everything_removed_okay and not self.continue_on_error:
            raise CommandException('Some files could not be removed.')

        # If this was a gsutil rm -r command covering any bucket subdirs,
        # remove any dir_$folder$ objects (which are created by various web UI
        # tools to simulate folders).
        if self.recursion_requested:
            folder_object_wildcards = []
            for uri_str in self.args:
                uri = self.suri_builder.StorageUri(uri_str)
                if uri.names_object:
                    folder_object_wildcards.append('%s**_$folder$' % uri)
            if len(folder_object_wildcards):
                self.continue_on_error = True
                try:
                    name_expansion_iterator = NameExpansionIterator(
                        self.command_name,
                        self.proj_id_handler,
                        self.headers,
                        self.debug,
                        self.logger,
                        self.bucket_storage_uri_class,
                        folder_object_wildcards,
                        self.recursion_requested,
                        flat=True,
                        all_versions=self.all_versions)
                    self.Apply(remove_func, name_expansion_iterator,
                               exception_handler)
                except CommandException as e:
                    # Ignore exception from name expansion due to an absent folder file.
                    if not e.reason.startswith('No URIs matched:'):
                        raise

        # Now that all data has been deleted, delete any bucket URIs.
        for uri in bucket_uris_to_delete:
            self.logger.info('Removing %s...', uri)
            uri.delete_bucket(self.headers)

        return 0
示例#18
0
class RmCommand(Command):
    """Implementation of gsutil rm command."""

    # Command specification. See base class for documentation.
    command_spec = Command.CreateCommandSpec(
        'rm',
        command_name_aliases=['del', 'delete', 'remove'],
        min_args=1,
        max_args=NO_MAX,
        supported_sub_args='afrR',
        file_url_ok=False,
        provider_url_ok=False,
        urls_start_arg=0,
        gs_api_support=[ApiSelector.XML, ApiSelector.JSON],
        gs_default_api=ApiSelector.JSON,
    )
    # Help specification. See help_provider.py for documentation.
    help_spec = Command.HelpSpec(
        help_name='rm',
        help_name_aliases=['del', 'delete', 'remove'],
        help_type='command_help',
        help_one_line_summary='Remove objects',
        help_text=_DETAILED_HELP_TEXT,
        subcommand_help_text={},
    )

    def RunCommand(self):
        """Command entry point for the rm command."""
        # self.recursion_requested is initialized in command.py (so it can be
        # checked in parent class for all commands).
        self.continue_on_error = False
        self.all_versions = False
        if self.sub_opts:
            for o, unused_a in self.sub_opts:
                if o == '-a':
                    self.all_versions = True
                elif o == '-f':
                    self.continue_on_error = True
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True
                    self.all_versions = True

        bucket_urls_to_delete = []
        bucket_strings_to_delete = []
        if self.recursion_requested:
            bucket_fields = ['id']
            for url_str in self.args:
                url = StorageUrlFromString(url_str)
                if url.IsBucket() or url.IsProvider():
                    for blr in self.WildcardIterator(url_str).IterBuckets(
                            bucket_fields=bucket_fields):
                        bucket_urls_to_delete.append(blr.storage_url)
                        bucket_strings_to_delete.append(url_str)

        # Used to track if any files failed to be removed.
        self.everything_removed_okay = True

        try:
            # Expand wildcards, dirs, buckets, and bucket subdirs in URLs.
            name_expansion_iterator = NameExpansionIterator(
                self.command_name,
                self.debug,
                self.logger,
                self.gsutil_api,
                self.args,
                self.recursion_requested,
                project_id=self.project_id,
                all_versions=self.all_versions,
                continue_on_error=self.continue_on_error
                or self.parallel_operations)

            # Perform remove requests in parallel (-m) mode, if requested, using
            # configured number of parallel processes and threads. Otherwise,
            # perform requests with sequential function calls in current process.
            self.Apply(_RemoveFuncWrapper,
                       name_expansion_iterator,
                       _RemoveExceptionHandler,
                       fail_on_error=(not self.continue_on_error))

        # Assuming the bucket has versioning enabled, url's that don't map to
        # objects should throw an error even with all_versions, since the prior
        # round of deletes only sends objects to a history table.
        # This assumption that rm -a is only called for versioned buckets should be
        # corrected, but the fix is non-trivial.
        except CommandException as e:
            # Don't raise if there are buckets to delete -- it's valid to say:
            #   gsutil rm -r gs://some_bucket
            # if the bucket is empty.
            if not bucket_urls_to_delete and not self.continue_on_error:
                raise
            # Reset the failure count if we failed due to an empty bucket that we're
            # going to delete.
            msg = 'No URLs matched: '
            if msg in str(e):
                parts = str(e).split(msg)
                if len(parts) == 2 and parts[1] in bucket_strings_to_delete:
                    ResetFailureCount()
        except ServiceException, e:
            if not self.continue_on_error:
                raise

        if not self.everything_removed_okay and not self.continue_on_error:
            raise CommandException('Some files could not be removed.')

        # If this was a gsutil rm -r command covering any bucket subdirs,
        # remove any dir_$folder$ objects (which are created by various web UI
        # tools to simulate folders).
        if self.recursion_requested:
            had_previous_failures = GetFailureCount() > 0
            folder_object_wildcards = []
            for url_str in self.args:
                url = StorageUrlFromString(url_str)
                if url.IsObject():
                    folder_object_wildcards.append('%s**_$folder$' % url_str)
            if folder_object_wildcards:
                self.continue_on_error = True
                try:
                    name_expansion_iterator = NameExpansionIterator(
                        self.command_name,
                        self.debug,
                        self.logger,
                        self.gsutil_api,
                        folder_object_wildcards,
                        self.recursion_requested,
                        project_id=self.project_id,
                        all_versions=self.all_versions)
                    # When we're removing folder objects, always continue on error
                    self.Apply(_RemoveFuncWrapper,
                               name_expansion_iterator,
                               _RemoveFoldersExceptionHandler,
                               fail_on_error=False)
                except CommandException as e:
                    # Ignore exception from name expansion due to an absent folder file.
                    if not e.reason.startswith('No URLs matched:'):
                        raise
                if not had_previous_failures:
                    ResetFailureCount()

        # Now that all data has been deleted, delete any bucket URLs.
        for url in bucket_urls_to_delete:
            self.logger.info('Removing %s...', url)

            @Retry(NotEmptyException, tries=3, timeout_secs=1)
            def BucketDeleteWithRetry():
                self.gsutil_api.DeleteBucket(url.bucket_name,
                                             provider=url.scheme)

            BucketDeleteWithRetry()

        return 0
示例#19
0
                uri = self.suri_builder.StorageUri(uri_args[i])
                if uri.names_bucket():
                    uri_args[i] = uri.clone_replace_name('*').uri
        else:
            # Handle bucket ACL setting operations single-threaded, because
            # our threading machinery currently assumes it's working with objects
            # (name_expansion_iterator), and normally we wouldn't expect users to need
            # to set ACLs on huge numbers of buckets at once anyway.
            for i in range(len(uri_args)):
                uri_str = uri_args[i]
                if self.suri_builder.StorageUri(uri_str).names_bucket():
                    self._RunSingleThreadedSetAcl(acl_arg, uri_args)
                    return

        name_expansion_iterator = NameExpansionIterator(
            self.command_name, self.proj_id_handler, self.headers, self.debug,
            self.bucket_storage_uri_class, uri_args, self.recursion_requested,
            self.recursion_requested)

        # Perform requests in parallel (-m) mode, if requested, using
        # configured number of parallel processes and threads. Otherwise,
        # perform requests with sequential function calls in current process.
        self.Apply(_SetAclFunc, name_expansion_iterator,
                   _SetAclExceptionHandler)

        if not self.everything_set_okay:
            raise CommandException('ACLs for some objects could not be set.')

    def _RunSingleThreadedSetAcl(self, acl_arg, uri_args):
        some_matched = False
        for uri_str in uri_args:
            for blr in self.WildcardIterator(uri_str):
示例#20
0
文件: iam.py 项目: vjeffz/gsutil
    def _SetIam(self):
        """Set IAM policy for given wildcards on the command line."""

        self.continue_on_error = False
        self.recursion_requested = False
        self.all_versions = False
        force_etag = False
        etag = ''
        if self.sub_opts:
            for o, arg in self.sub_opts:
                if o in ['-r', '-R']:
                    self.recursion_requested = True
                elif o == '-f':
                    self.continue_on_error = True
                elif o == '-a':
                    self.all_versions = True
                elif o == '-e':
                    etag = str(arg)
                    force_etag = True
                else:
                    self.RaiseInvalidArgumentException()

        file_url = self.args[0]
        patterns = self.args[1:]

        # Load the IAM policy file and raise error if the file is invalid JSON or
        # does not exist.
        try:
            with open(file_url, 'r') as fp:
                policy = json.loads(fp.read())
        except IOError:
            raise ArgumentException(
                'Specified IAM policy file "%s" does not exist.' % file_url)
        except ValueError as e:
            self.logger.debug('Invalid IAM policy file, ValueError:\n', e)
            raise ArgumentException('Invalid IAM policy file "%s".' % file_url)

        bindings = policy.get('bindings', [])
        if not force_etag:
            etag = policy.get('etag', '')

        policy_json = json.dumps({'bindings': bindings, 'etag': etag})
        try:
            policy = protojson.decode_message(apitools_messages.Policy,
                                              policy_json)
        except DecodeError:
            raise ArgumentException(
                'Invalid IAM policy file "%s" or etag "%s".' %
                (file_url, etag))

        self.everything_set_okay = True

        # This list of wildcard strings will be handled by NameExpansionIterator.
        threaded_wildcards = []

        for pattern in patterns:
            surl = StorageUrlFromString(pattern)
            if surl.IsBucket():
                if self.recursion_requested:
                    surl.object_name = '*'
                    threaded_wildcards.append(surl.url_string)
                else:
                    self.SetIamHelper(surl, policy)
            else:
                threaded_wildcards.append(surl.url_string)

        # N.B.: If threaded_wildcards contains a non-existent bucket
        # (e.g. ["gs://non-existent", "gs://existent"]), NameExpansionIterator
        # will raise an exception in iter.next. This halts all iteration, even
        # when -f is set. This behavior is also evident in acl set. This behavior
        # also appears for any exception that will be raised when iterating over
        # wildcard expansions (access denied if bucket cannot be listed, etc.).
        if threaded_wildcards:
            name_expansion_iterator = NameExpansionIterator(
                self.command_name,
                self.debug,
                self.logger,
                self.gsutil_api,
                threaded_wildcards,
                self.recursion_requested,
                all_versions=self.all_versions,
                continue_on_error=self.continue_on_error
                or self.parallel_operations,
                bucket_listing_fields=['name'])

            seek_ahead_iterator = SeekAheadNameExpansionIterator(
                self.command_name,
                self.debug,
                self.GetSeekAheadGsutilApi(),
                threaded_wildcards,
                self.recursion_requested,
                all_versions=self.all_versions)

            policy_it = itertools.repeat(protojson.encode_message(policy))
            self.Apply(_SetIamWrapper,
                       itertools.izip(policy_it, name_expansion_iterator),
                       _SetIamExceptionHandler,
                       fail_on_error=not self.continue_on_error,
                       seek_ahead_iterator=seek_ahead_iterator)

            self.everything_set_okay &= not GetFailureCount() > 0

        # TODO: Add an error counter for files and objects.
        if not self.everything_set_okay:
            raise CommandException('Some IAM policies could not be set.')
示例#21
0
    def RunCommand(self):
        headers = []
        preserve_acl = True
        if self.sub_opts:
            for o, a in self.sub_opts:
                if o == '-n':
                    preserve_acl = False
                elif o == '-h':
                    headers.append(a)

        if headers:
            (metadata_minus,
             metadata_plus) = self._ParseMetadataHeaders(headers)
            uri_args = self.args
        else:
            (metadata_minus,
             metadata_plus) = self._ParseMetadataSpec(self.args[0])
            uri_args = self.args[1:]

        if (len(uri_args) == 1 and
                not self.suri_builder.StorageUri(uri_args[0]).names_object()):
            raise CommandException('URI (%s) must name an object' %
                                   uri_args[0])

        # Used to track if any objects' metaadata failed to be set.
        self.everything_set_okay = True

        def _SetMetadataExceptionHandler(e):
            """Simple exception handler to allow post-completion status."""
            self.THREADED_LOGGER.error(str(e))
            self.everything_set_okay = False

        def _SetMetadataFunc(name_expansion_result):
            exp_src_uri = self.suri_builder.StorageUri(
                name_expansion_result.GetExpandedUriStr())
            self.THREADED_LOGGER.info('Setting metadata on %s...' %
                                      exp_src_uri)
            metadata = self._ExtractMetadata(exp_src_uri)
            metadata.update(metadata_plus)
            for h in metadata_minus:
                if h in metadata:
                    del metadata[h]
            src_bucket = exp_src_uri.get_bucket()
            # Boto prepends the meta prefix when adding headers, so strip prefix in
            # metadata before sending back in to copy_key() call.
            rewritten_metadata = {}
            for h in metadata:
                if _IsCustomMeta(h):
                    h_pref_stripped = (h.replace('x-goog-meta-', '').replace(
                        'x-amz-meta-', ''))
                    rewritten_metadata[h_pref_stripped] = metadata[h]
                else:
                    rewritten_metadata[h] = metadata[h]
            metadata = rewritten_metadata
            src_bucket.copy_key(exp_src_uri.object_name,
                                exp_src_uri.bucket_name,
                                exp_src_uri.object_name,
                                metadata=metadata,
                                preserve_acl=preserve_acl)

        name_expansion_iterator = NameExpansionIterator(
            self.command_name, self.proj_id_handler, self.headers, self.debug,
            self.bucket_storage_uri_class, uri_args, self.recursion_requested,
            self.recursion_requested)

        # Perform requests in parallel (-m) mode, if requested, using
        # configured number of parallel processes and threads. Otherwise,
        # perform requests with sequential function calls in current process.
        self.Apply(_SetMetadataFunc, name_expansion_iterator,
                   _SetMetadataExceptionHandler)

        if not self.everything_set_okay:
            raise CommandException(
                'Metadata for some objects could not be set.')
示例#22
0
    def SetAclCommandHelper(self):
        """
    Common logic for setting ACLs. Sets the standard ACL or the default
    object ACL depending on self.command_name.
    """

        acl_arg = self.args[0]
        uri_args = self.args[1:]
        # Disallow multi-provider setacl requests, because there are differences in
        # the ACL models.
        storage_uri = self.UrisAreForSingleProvider(uri_args)
        if not storage_uri:
            raise CommandException(
                '"%s" command spanning providers not allowed.' %
                self.command_name)

        # Determine whether acl_arg names a file containing XML ACL text vs. the
        # string name of a canned ACL.
        if os.path.isfile(acl_arg):
            with codecs.open(acl_arg, 'r', 'utf-8') as f:
                acl_arg = f.read()
            self.canned = False
        else:
            # No file exists, so expect a canned ACL string.
            canned_acls = storage_uri.canned_acls()
            if acl_arg not in canned_acls:
                raise CommandException('Invalid canned ACL "%s".' % acl_arg)
            self.canned = True

        # Used to track if any ACLs failed to be set.
        self.everything_set_okay = True

        def _SetAclExceptionHandler(e):
            """Simple exception handler to allow post-completion status."""
            self.logger.error(str(e))
            self.everything_set_okay = False

        def _SetAclFunc(name_expansion_result):
            exp_src_uri = self.suri_builder.StorageUri(
                name_expansion_result.GetExpandedUriStr())
            # We don't do bucket operations multi-threaded (see comment below).
            assert self.command_name != 'setdefacl'
            self.logger.info('Setting ACL on %s...' %
                             name_expansion_result.expanded_uri_str)
            try:
                if self.canned:
                    exp_src_uri.set_acl(acl_arg, exp_src_uri.object_name,
                                        False, self.headers)
                else:
                    exp_src_uri.set_xml_acl(acl_arg, exp_src_uri.object_name,
                                            False, self.headers)
            except GSResponseError as e:
                if self.continue_on_error:
                    exc_name, error_detail = util.ExtractErrorDetail(e)
                    self.everything_set_okay = False
                    if error_detail:
                        sys.stderr.write(
                            '%s: status=%d, code=%s, reason=%s, detail=%s.\n' %
                            (exc_name, e.status, e.code, e.reason,
                             error_detail))
                else:
                    raise

        # If user specified -R option, convert any bucket args to bucket wildcards
        # (e.g., gs://bucket/*), to prevent the operation from being  applied to
        # the buckets themselves.
        if self.recursion_requested:
            for i in range(len(uri_args)):
                uri = self.suri_builder.StorageUri(uri_args[i])
                if uri.names_bucket():
                    uri_args[i] = uri.clone_replace_name('*').uri
        else:
            # Handle bucket ACL setting operations single-threaded, because
            # our threading machinery currently assumes it's working with objects
            # (name_expansion_iterator), and normally we wouldn't expect users to need
            # to set ACLs on huge numbers of buckets at once anyway.
            for i in range(len(uri_args)):
                uri_str = uri_args[i]
                if self.suri_builder.StorageUri(uri_str).names_bucket():
                    self._RunSingleThreadedSetAcl(acl_arg, uri_args)
                    return

        name_expansion_iterator = NameExpansionIterator(
            self.command_name,
            self.proj_id_handler,
            self.headers,
            self.debug,
            self.logger,
            self.bucket_storage_uri_class,
            uri_args,
            self.recursion_requested,
            self.recursion_requested,
            all_versions=self.all_versions)
        # Perform requests in parallel (-m) mode, if requested, using
        # configured number of parallel processes and threads. Otherwise,
        # perform requests with sequential function calls in current process.
        self.Apply(_SetAclFunc, name_expansion_iterator,
                   _SetAclExceptionHandler)

        if not self.everything_set_okay and not self.continue_on_error:
            raise CommandException('ACLs for some objects could not be set.')
示例#23
0
文件: rm.py 项目: unhooked/gsutil
    def RunCommand(self):
        """Command entry point for the rm command."""
        # self.recursion_requested is initialized in command.py (so it can be
        # checked in parent class for all commands).
        self.continue_on_error = self.parallel_operations
        self.read_args_from_stdin = False
        self.all_versions = False
        if self.sub_opts:
            for o, unused_a in self.sub_opts:
                if o == '-a':
                    self.all_versions = True
                elif o == '-f':
                    self.continue_on_error = True
                elif o == '-I':
                    self.read_args_from_stdin = True
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True
                    self.all_versions = True

        if self.read_args_from_stdin:
            if self.args:
                raise CommandException(
                    'No arguments allowed with the -I flag.')
            url_strs = StdinIterator()
        else:
            if not self.args:
                raise CommandException(
                    'The rm command (without -I) expects at '
                    'least one URL.')
            url_strs = self.args

        # Tracks number of object deletes that failed.
        self.op_failure_count = 0

        # Tracks if any buckets were missing.
        self.bucket_not_found_count = 0

        # Tracks buckets that are slated for recursive deletion.
        bucket_urls_to_delete = []
        self.bucket_strings_to_delete = []

        if self.recursion_requested:
            bucket_fields = ['id']
            for url_str in url_strs:
                url = StorageUrlFromString(url_str)
                if url.IsBucket() or url.IsProvider():
                    for blr in self.WildcardIterator(url_str).IterBuckets(
                            bucket_fields=bucket_fields):
                        bucket_urls_to_delete.append(blr.storage_url)
                        self.bucket_strings_to_delete.append(url_str)

        self.preconditions = PreconditionsFromHeaders(self.headers or {})

        try:
            # Expand wildcards, dirs, buckets, and bucket subdirs in URLs.
            name_expansion_iterator = NameExpansionIterator(
                self.command_name,
                self.debug,
                self.logger,
                self.gsutil_api,
                url_strs,
                self.recursion_requested,
                project_id=self.project_id,
                all_versions=self.all_versions,
                continue_on_error=self.continue_on_error
                or self.parallel_operations)

            seek_ahead_iterator = None
            # Cannot seek ahead with stdin args, since we can only iterate them
            # once without buffering in memory.
            if not self.read_args_from_stdin:
                seek_ahead_iterator = SeekAheadNameExpansionIterator(
                    self.command_name,
                    self.debug,
                    self.GetSeekAheadGsutilApi(),
                    url_strs,
                    self.recursion_requested,
                    all_versions=self.all_versions,
                    project_id=self.project_id)

            # Perform remove requests in parallel (-m) mode, if requested, using
            # configured number of parallel processes and threads. Otherwise,
            # perform requests with sequential function calls in current process.
            self.Apply(
                _RemoveFuncWrapper,
                name_expansion_iterator,
                _RemoveExceptionHandler,
                fail_on_error=(not self.continue_on_error),
                shared_attrs=['op_failure_count', 'bucket_not_found_count'],
                seek_ahead_iterator=seek_ahead_iterator)

        # Assuming the bucket has versioning enabled, url's that don't map to
        # objects should throw an error even with all_versions, since the prior
        # round of deletes only sends objects to a history table.
        # This assumption that rm -a is only called for versioned buckets should be
        # corrected, but the fix is non-trivial.
        except CommandException as e:
            # Don't raise if there are buckets to delete -- it's valid to say:
            #   gsutil rm -r gs://some_bucket
            # if the bucket is empty.
            if _ExceptionMatchesBucketToDelete(self.bucket_strings_to_delete,
                                               e):
                DecrementFailureCount()
            else:
                raise
        except ServiceException, e:
            if not self.continue_on_error:
                raise
示例#24
0
文件: iam.py 项目: bopopescu/JoinMi
    def _PatchIam(self):
        self.continue_on_error = False
        self.recursion_requested = False

        patch_bindings_tuples = []

        if self.sub_opts:
            for o, a in self.sub_opts:
                if o in ['-r', '-R']:
                    self.recursion_requested = True
                elif o == '-f':
                    self.continue_on_error = True
                elif o == '-d':
                    patch_bindings_tuples.append(BindingStringToTuple(
                        False, a))

        patterns = []

        # N.B.: self.sub_opts stops taking in options at the first non-flagged
        # token. The rest of the tokens are sent to self.args. Thus, in order to
        # handle input of the form "-d <binding> <binding> <url>", we will have to
        # parse self.args for a mix of both bindings and CloudUrls. We are not
        # expecting to come across the -r, -f flags here.
        it = iter(self.args)
        for token in it:
            if token == '-d':
                patch_bindings_tuples.append(
                    BindingStringToTuple(False, it.next()))
            else:
                try:
                    patch_bindings_tuples.append(
                        BindingStringToTuple(True, token))
                # All following arguments are urls.
                except (ArgumentException, CommandException):
                    patterns.append(token)
                    for token in it:
                        patterns.append(token)

        # We must have some bindings to process, else this is pointless.
        if not patch_bindings_tuples:
            raise CommandException('Must specify at least one binding.')

        self.everything_set_okay = True
        threaded_wildcards = []
        for pattern in patterns:
            surl = StorageUrlFromString(pattern)
            try:
                if surl.IsBucket():
                    if self.recursion_requested:
                        surl.object = '*'
                        threaded_wildcards.append(surl.url_string)
                    else:
                        self.PatchIamHelper(surl, patch_bindings_tuples)
                else:
                    threaded_wildcards.append(surl.url_string)
            except AttributeError:
                error_msg = 'Invalid Cloud URL "%s".' % surl.object_name
                if set(surl.object_name).issubset(set('-Rrf')):
                    error_msg += (
                        ' This resource handle looks like a flag, which must appear '
                        'before all bindings. See "gsutil help iam ch" for more details.'
                    )
                raise CommandException(error_msg)

        if threaded_wildcards:
            name_expansion_iterator = NameExpansionIterator(
                self.command_name,
                self.debug,
                self.logger,
                self.gsutil_api,
                threaded_wildcards,
                self.recursion_requested,
                all_versions=self.all_versions,
                continue_on_error=self.continue_on_error
                or self.parallel_operations,
                bucket_listing_fields=['name'])

            seek_ahead_iterator = SeekAheadNameExpansionIterator(
                self.command_name,
                self.debug,
                self.GetSeekAheadGsutilApi(),
                threaded_wildcards,
                self.recursion_requested,
                all_versions=self.all_versions)

            # N.B.: Python2.6 support means we can't use a partial function here to
            # curry the bindings tuples into the wrapper function. We instead pass
            # the bindings along by zipping them with each name_expansion_iterator
            # result. See http://bugs.python.org/issue5228.
            serialized_bindings_tuples_it = itertools.repeat(
                [SerializeBindingsTuple(t) for t in patch_bindings_tuples])
            self.Apply(_PatchIamWrapper,
                       itertools.izip(serialized_bindings_tuples_it,
                                      name_expansion_iterator),
                       _PatchIamExceptionHandler,
                       fail_on_error=not self.continue_on_error,
                       seek_ahead_iterator=seek_ahead_iterator)

            self.everything_set_okay &= not GetFailureCount() > 0

        # TODO: Add an error counter for files and objects.
        if not self.everything_set_okay:
            raise CommandException('Some IAM policies could not be patched.')