def RunCommand(self): """Command entry point for the setmeta command.""" headers = [] if self.sub_opts: for o, a in self.sub_opts: if o == '-n': self.logger.warning( 'Warning: gsutil setmeta -n is now on by default, and will be ' 'removed in the future.\nPlease use gsutil acl set ... to set ' 'canned ACLs.') elif o == '-h': if 'x-goog-acl' in a or 'x-amz-acl' in a: raise CommandException( 'gsutil setmeta no longer allows canned ACLs. Use gsutil acl ' 'set ... to set canned ACLs.') headers.append(a) (metadata_minus, metadata_plus) = self._ParseMetadataHeaders(headers) self.metadata_change = metadata_plus for header in metadata_minus: self.metadata_change[header] = '' if len(self.args) == 1 and not self.recursion_requested: url = StorageUrlFromString(self.args[0]) if not (url.IsCloudUrl() and url.IsObject()): raise CommandException('URL (%s) must name an object' % self.args[0]) # Used to track if any objects' metadata failed to be set. self.everything_set_okay = True name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, self.args, self.recursion_requested, all_versions=self.all_versions, continue_on_error=self.parallel_operations) try: # Perform requests in parallel (-m) mode, if requested, using # configured number of parallel processes and threads. Otherwise, # perform requests with sequential function calls in current process. self.Apply(_SetMetadataFuncWrapper, name_expansion_iterator, _SetMetadataExceptionHandler, fail_on_error=True) except AccessDeniedException as e: if e.status == 403: self._WarnServiceAccounts() raise if not self.everything_set_okay: raise CommandException( 'Metadata for some objects could not be set.') return 0
def RunCommand(self): target_uri = self.args[-1] self.args = self.args[:-1] target_suri = self.suri_builder.StorageUri(target_uri) self.CheckSUriProvider(target_suri) if target_suri.is_version_specific: raise CommandException('A version-specific URI\n(%s)\ncannot be ' 'the destination for gsutil compose - abort.' % target_suri) name_expansion_iterator = NameExpansionIterator( self.command_name, self.proj_id_handler, self.headers, self.debug, self.logger, self.bucket_storage_uri_class, self.args, False, cmd_supports_recursion=False) components = [] for ne_result in name_expansion_iterator: suri = self.suri_builder.StorageUri(ne_result.GetExpandedUriStr()) self.CheckSUriProvider(suri) components.append(suri) # Avoid expanding too many components, and sanity check each name # expansion result. if len(components) > self.command_spec[MAX_ARGS] - 1: raise CommandException('"compose" called with too many component ' 'arguments. Limit is %d.' % (self.command_spec[MAX_ARGS] - 1)) self.logger.info( 'Composing %s from %d component objects.' % (target_suri, len(components))) target_suri.compose(components)
def RunCommand(self): # self.recursion_requested initialized in command.py (so can be checked # in parent class for all commands). self.continue_on_error = False self.all_versions = False if self.sub_opts: for o, unused_a in self.sub_opts: if o == '-a': self.all_versions = True elif o == '-f': self.continue_on_error = True elif o == '-r' or o == '-R': self.recursion_requested = True elif o == '-v': self.logger.info( 'WARNING: The %s -v option is no longer' ' needed, and will eventually be removed.\n' % self.command_name) # Used to track if any files failed to be removed. self.everything_removed_okay = True # Tracks if any URIs matched the given args. remove_func = self._MkRemoveFunc() exception_handler = self._MkRemoveExceptionHandler() try: # Expand wildcards, dirs, buckets, and bucket subdirs in URIs. name_expansion_iterator = NameExpansionIterator( self.command_name, self.proj_id_handler, self.headers, self.debug, self.logger, self.bucket_storage_uri_class, self.args, self.recursion_requested, flat=self.recursion_requested, all_versions=self.all_versions) # Perform remove requests in parallel (-m) mode, if requested, using # configured number of parallel processes and threads. Otherwise, # perform requests with sequential function calls in current process. self.Apply(remove_func, name_expansion_iterator, exception_handler) # Assuming the bucket has versioning enabled, uri's that don't map to # objects should throw an error even with all_versions, since the prior # round of deletes only sends objects to a history table. # This assumption that rm -a is only called for versioned buckets should be # corrected, but the fix is non-trivial. except CommandException as e: if not self.continue_on_error: raise except GSResponseError, e: if not self.continue_on_error: raise
def _GetObjectNameExpansionIterator(self, url_args): return NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, url_args, self.recursion_requested, all_versions=self.all_versions, continue_on_error=self.parallel_operations, bucket_listing_fields=['generation', 'metageneration'])
def RunCommand(self): # self.recursion_requested initialized in command.py (so can be checked # in parent class for all commands). continue_on_error = False if self.sub_opts: for o, unused_a in self.sub_opts: if o == '-f': continue_on_error = True elif o == '-r' or o == '-R': self.recursion_requested = True # Used to track if any files failed to be removed. self.everything_removed_okay = True def _RemoveExceptionHandler(e): """Simple exception handler to allow post-completion status.""" self.THREADED_LOGGER.error(str(e)) self.everything_removed_okay = False def _RemoveFunc(name_expansion_result): exp_src_uri = self.suri_builder.StorageUri( name_expansion_result.GetExpandedUriStr()) if exp_src_uri.names_container(): if exp_src_uri.is_cloud_uri(): # Before offering advice about how to do rm + rb, ensure those # commands won't fail because of bucket naming problems. boto.s3.connection.check_lowercase_bucketname(exp_src_uri.bucket_name) uri_str = exp_src_uri.object_name.rstrip('/') raise CommandException('"rm" command will not remove buckets. To ' 'delete this/these bucket(s) do:\n\tgsutil rm ' '%s/*\n\tgsutil rb %s' % (uri_str, uri_str)) self.THREADED_LOGGER.info('Removing %s...', exp_src_uri) try: exp_src_uri.delete_key(validate=False, headers=self.headers) except: if continue_on_error: self.everything_removed_okay = False else: raise # Expand wildcards, dirs, buckets, and bucket subdirs in URIs. name_expansion_iterator = NameExpansionIterator( self.command_name, self.proj_id_handler, self.headers, self.debug, self.bucket_storage_uri_class, self.args, self.recursion_requested, flat=self.recursion_requested) # Perform remove requests in parallel (-m) mode, if requested, using # configured number of parallel processes and threads. Otherwise, # perform requests with sequential function calls in current process. self.Apply(_RemoveFunc, name_expansion_iterator, _RemoveExceptionHandler) if not self.everything_removed_okay: raise CommandException('Some files could not be removed.')
def RunCommand(self): if (len(self.args) == 1 and not self.recursion_requested and not self.suri_builder.StorageUri(self.args[0]).names_object()): raise CommandException('URI (%s) must name an object' % self.args[0]) # Used to track if any objects' metadata failed to be set. self.everything_set_okay = True name_expansion_iterator = NameExpansionIterator( self.command_name, self.proj_id_handler, self.headers, self.debug, self.logger, self.bucket_storage_uri_class, self.args, self.recursion_requested, flat=self.recursion_requested) try: # Perform requests in parallel (-m) mode, if requested, using # configured number of parallel processes and threads. Otherwise, # perform requests with sequential function calls in current process. self.Apply(_SetMetadataFuncWrapper, name_expansion_iterator, _SetMetadataExceptionHandler, fail_on_error=True) except GSResponseError as e: if e.code == 'AccessDenied' and e.reason == 'Forbidden' \ and e.status == 403: self._WarnServiceAccounts() raise if not self.everything_set_okay: raise CommandException( 'Metadata for some objects could not be set.') return 0
def _PatchIam(self): self.continue_on_error = False self.recursion_requested = False patch_bindings_tuples = [] if self.sub_opts: for o, a in self.sub_opts: if o in ['-r', '-R']: self.recursion_requested = True elif o == '-f': self.continue_on_error = True elif o == '-d': patch_bindings_tuples.append(BindingStringToTuple( False, a)) patterns = [] # N.B.: self.sub_opts stops taking in options at the first non-flagged # token. The rest of the tokens are sent to self.args. Thus, in order to # handle input of the form "-d <binding> <binding> <url>", we will have to # parse self.args for a mix of both bindings and CloudUrls. We are not # expecting to come across the -r, -f flags here. it = iter(self.args) for token in it: if STORAGE_URI_REGEX.match(token): patterns.append(token) break if token == '-d': patch_bindings_tuples.append( BindingStringToTuple(False, it.next())) else: patch_bindings_tuples.append(BindingStringToTuple(True, token)) if not patch_bindings_tuples: raise CommandException('Must specify at least one binding.') # All following arguments are urls. for token in it: patterns.append(token) self.everything_set_okay = True self.tried_ch_on_resource_with_conditions = False threaded_wildcards = [] for pattern in patterns: surl = StorageUrlFromString(pattern) try: if surl.IsBucket(): if self.recursion_requested: surl.object = '*' threaded_wildcards.append(surl.url_string) else: self.PatchIamHelper(surl, patch_bindings_tuples) else: threaded_wildcards.append(surl.url_string) except AttributeError: error_msg = 'Invalid Cloud URL "%s".' % surl.object_name if set(surl.object_name).issubset(set('-Rrf')): error_msg += ( ' This resource handle looks like a flag, which must appear ' 'before all bindings. See "gsutil help iam ch" for more details.' ) raise CommandException(error_msg) if threaded_wildcards: name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, threaded_wildcards, self.recursion_requested, all_versions=self.all_versions, continue_on_error=self.continue_on_error or self.parallel_operations, bucket_listing_fields=['name']) seek_ahead_iterator = SeekAheadNameExpansionIterator( self.command_name, self.debug, self.GetSeekAheadGsutilApi(), threaded_wildcards, self.recursion_requested, all_versions=self.all_versions) serialized_bindings_tuples_it = itertools.repeat( [SerializeBindingsTuple(t) for t in patch_bindings_tuples]) self.Apply(_PatchIamWrapper, itertools.izip(serialized_bindings_tuples_it, name_expansion_iterator), _PatchIamExceptionHandler, fail_on_error=not self.continue_on_error, seek_ahead_iterator=seek_ahead_iterator) self.everything_set_okay &= not GetFailureCount() > 0 # TODO: Add an error counter for files and objects. if not self.everything_set_okay: msg = 'Some IAM policies could not be patched.' if self.tried_ch_on_resource_with_conditions: msg += '\n' msg += '\n'.join( textwrap.wrap( 'Some resources had conditions present in their IAM policy ' 'bindings, which is not supported by "iam ch". %s' % (IAM_CH_CONDITIONS_WORKAROUND_MSG))) raise CommandException(msg)
def RunCommand(self): """Command entry point for the rewrite command.""" self.continue_on_error = self.parallel_operations self.dest_storage_class = None self.no_preserve_acl = False self.read_args_from_stdin = False self.supported_transformation_flags = ['-k', '-s'] self.transform_types = set() self.op_failure_count = 0 self.boto_file_encryption_tuple, self.boto_file_encryption_sha256 = ( GetEncryptionTupleAndSha256Hash()) if self.sub_opts: for o, a in self.sub_opts: if o == '-f': self.continue_on_error = True elif o == '-k': self.transform_types.add(_TransformTypes.CRYPTO_KEY) elif o == '-I': self.read_args_from_stdin = True elif o == '-O': self.no_preserve_acl = True elif o == '-r' or o == '-R': self.recursion_requested = True self.all_versions = True elif o == '-s': self.transform_types.add(_TransformTypes.STORAGE_CLASS) self.dest_storage_class = NormalizeStorageClass(a) if self.read_args_from_stdin: if self.args: raise CommandException( 'No arguments allowed with the -I flag.') url_strs = StdinIterator() else: if not self.args: raise CommandException( 'The rewrite command (without -I) expects at ' 'least one URL.') url_strs = self.args if not self.transform_types: raise CommandException( 'rewrite command requires at least one transformation flag. ' 'Currently supported transformation flags: %s' % self.supported_transformation_flags) self.preconditions = PreconditionsFromHeaders(self.headers or {}) url_strs_generator = GenerationCheckGenerator(url_strs) # Convert recursive flag to flat wildcard to avoid performing multiple # listings. if self.recursion_requested: url_strs_generator = ConvertRecursiveToFlatWildcard( url_strs_generator) # Expand the source argument(s). name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, url_strs_generator, self.recursion_requested, project_id=self.project_id, continue_on_error=self.continue_on_error or self.parallel_operations, bucket_listing_fields=['name', 'size']) seek_ahead_iterator = None # Cannot seek ahead with stdin args, since we can only iterate them # once without buffering in memory. if not self.read_args_from_stdin: # Perform the same recursive-to-flat conversion on original url_strs so # that it is as true to the original iterator as possible. seek_ahead_url_strs = ConvertRecursiveToFlatWildcard(url_strs) seek_ahead_iterator = SeekAheadNameExpansionIterator( self.command_name, self.debug, self.GetSeekAheadGsutilApi(), seek_ahead_url_strs, self.recursion_requested, all_versions=self.all_versions, project_id=self.project_id) # Perform rewrite requests in parallel (-m) mode, if requested. self.Apply(_RewriteFuncWrapper, name_expansion_iterator, _RewriteExceptionHandler, fail_on_error=(not self.continue_on_error), shared_attrs=['op_failure_count'], seek_ahead_iterator=seek_ahead_iterator) if self.op_failure_count: plural_str = 's' if self.op_failure_count else '' raise CommandException( '%d file%s/object%s could not be rewritten.' % (self.op_failure_count, plural_str, plural_str)) return 0
def RunCommand(self): """Command entry point for the rewrite command.""" self.continue_on_error = self.parallel_operations self.read_args_from_stdin = False self.no_preserve_acl = False self.supported_transformation_flags = ['-k'] self.transform_types = [] self.op_failure_count = 0 self.current_encryption_tuple, self.current_encryption_sha256 = ( GetEncryptionTupleAndSha256Hash()) if self.sub_opts: for o, unused_a in self.sub_opts: if o == '-f': self.continue_on_error = True elif o == '-k': self.transform_types.append(_TransformTypes.CRYPTO_KEY) elif o == '-I': self.read_args_from_stdin = True elif o == '-O': self.no_preserve_acl = True elif o == '-r' or o == '-R': self.recursion_requested = True self.all_versions = True if self.read_args_from_stdin: if self.args: raise CommandException( 'No arguments allowed with the -I flag.') url_strs = StdinIterator() else: if not self.args: raise CommandException( 'The rewrite command (without -I) expects at ' 'least one URL.') url_strs = self.args url_strs = GenerationCheckGenerator(url_strs) if not self.transform_types: raise CommandException( 'rewrite command requires at least one transformation flag. ' 'Currently supported transformation flags: %s' % self.supported_transformation_flags) self.preconditions = PreconditionsFromHeaders(self.headers or {}) # Convert recursive flag to flat wildcard to avoid performing multiple # listings. if self.recursion_requested: url_strs = ConvertRecursiveToFlatWildcard(url_strs) # Expand the source argument(s). name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, url_strs, self.recursion_requested, project_id=self.project_id, continue_on_error=self.continue_on_error or self.parallel_operations) # Perform rewrite requests in parallel (-m) mode, if requested. self.Apply(_RewriteFuncWrapper, name_expansion_iterator, _RewriteExceptionHandler, fail_on_error=(not self.continue_on_error), shared_attrs=['op_failure_count']) if self.op_failure_count: plural_str = 's' if self.op_failure_count else '' raise CommandException( '%d file%s/object%s could not be rewritten.' % (self.op_failure_count, plural_str, plural_str)) return 0
def RunCommand(self): """Command entry point for the rewrite command.""" self.continue_on_error = self.parallel_operations self.csek_hash_to_keywrapper = {} self.dest_storage_class = None self.no_preserve_acl = False self.read_args_from_stdin = False self.supported_transformation_flags = ['-k', '-s'] self.transform_types = set() self.op_failure_count = 0 self.boto_file_encryption_keywrapper = GetEncryptionKeyWrapper(config) self.boto_file_encryption_sha256 = ( self.boto_file_encryption_keywrapper.crypto_key_sha256 if self.boto_file_encryption_keywrapper else None) if self.sub_opts: for o, a in self.sub_opts: if o == '-f': self.continue_on_error = True elif o == '-k': self.transform_types.add(_TransformTypes.CRYPTO_KEY) elif o == '-I': self.read_args_from_stdin = True elif o == '-O': self.no_preserve_acl = True elif o == '-r' or o == '-R': self.recursion_requested = True self.all_versions = True elif o == '-s': self.transform_types.add(_TransformTypes.STORAGE_CLASS) self.dest_storage_class = NormalizeStorageClass(a) if self.read_args_from_stdin: if self.args: raise CommandException('No arguments allowed with the -I flag.') url_strs = StdinIterator() else: if not self.args: raise CommandException('The rewrite command (without -I) expects at ' 'least one URL.') url_strs = self.args if not self.transform_types: raise CommandException( 'rewrite command requires at least one transformation flag. ' 'Currently supported transformation flags: %s' % self.supported_transformation_flags) self.preconditions = PreconditionsFromHeaders(self.headers or {}) url_strs_generator = GenerationCheckGenerator(url_strs) # Convert recursive flag to flat wildcard to avoid performing multiple # listings. if self.recursion_requested: url_strs_generator = ConvertRecursiveToFlatWildcard(url_strs_generator) # Expand the source argument(s). name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, url_strs_generator, self.recursion_requested, project_id=self.project_id, continue_on_error=self.continue_on_error or self.parallel_operations, bucket_listing_fields=['name', 'size']) seek_ahead_iterator = None # Cannot seek ahead with stdin args, since we can only iterate them # once without buffering in memory. if not self.read_args_from_stdin: # Perform the same recursive-to-flat conversion on original url_strs so # that it is as true to the original iterator as possible. seek_ahead_url_strs = ConvertRecursiveToFlatWildcard(url_strs) seek_ahead_iterator = SeekAheadNameExpansionIterator( self.command_name, self.debug, self.GetSeekAheadGsutilApi(), seek_ahead_url_strs, self.recursion_requested, all_versions=self.all_versions, project_id=self.project_id) # Rather than have each worker repeatedly calculate the sha256 hash for each # decryption_key in the boto config, do this once now and cache the results. for i in range(0, MAX_DECRYPTION_KEYS): key_number = i + 1 keywrapper = CryptoKeyWrapperFromKey( config.get('GSUtil', 'decryption_key%s' % str(key_number), None)) if keywrapper is None: # Stop at first attribute absence in lexicographical iteration. break if keywrapper.crypto_type == CryptoKeyType.CSEK: self.csek_hash_to_keywrapper[keywrapper.crypto_key_sha256] = keywrapper # Also include the encryption_key, since it should be used to decrypt and # then encrypt if the object's CSEK should remain the same. if self.boto_file_encryption_sha256 is not None: self.csek_hash_to_keywrapper[self.boto_file_encryption_sha256] = ( self.boto_file_encryption_keywrapper) if self.boto_file_encryption_keywrapper is None: msg = '\n'.join( textwrap.wrap( 'NOTE: No encryption_key was specified in the boto configuration ' 'file, so gsutil will not provide an encryption key in its rewrite ' 'API requests. This will decrypt the objects unless they are in ' 'buckets with a default KMS key set, in which case the service ' 'will automatically encrypt the rewritten objects with that key.') ) print('%s\n' % msg, file=sys.stderr) # Perform rewrite requests in parallel (-m) mode, if requested. self.Apply(_RewriteFuncWrapper, name_expansion_iterator, _RewriteExceptionHandler, fail_on_error=(not self.continue_on_error), shared_attrs=['op_failure_count'], seek_ahead_iterator=seek_ahead_iterator) if self.op_failure_count: plural_str = 's' if self.op_failure_count else '' raise CommandException('%d file%s/object%s could not be rewritten.' % (self.op_failure_count, plural_str, plural_str)) return 0
def RunCommand(self): """Command entry point for the rm command.""" # self.recursion_requested is initialized in command.py (so it can be # checked in parent class for all commands). self.continue_on_error = False self.read_args_from_stdin = False self.all_versions = False if self.sub_opts: for o, unused_a in self.sub_opts: if o == '-a': self.all_versions = True elif o == '-f': self.continue_on_error = True elif o == '-I': self.read_args_from_stdin = True elif o == '-r' or o == '-R': self.recursion_requested = True self.all_versions = True if self.read_args_from_stdin: if self.args: raise CommandException( 'No arguments allowed with the -I flag.') url_strs = StdinIterator() else: if not self.args: raise CommandException( 'The rm command (without -I) expects at ' 'least one URL.') url_strs = self.args bucket_urls_to_delete = [] bucket_strings_to_delete = [] if self.recursion_requested: bucket_fields = ['id'] for url_str in url_strs: url = StorageUrlFromString(url_str) if url.IsBucket() or url.IsProvider(): for blr in self.WildcardIterator(url_str).IterBuckets( bucket_fields=bucket_fields): bucket_urls_to_delete.append(blr.storage_url) bucket_strings_to_delete.append(url_str) self.preconditions = PreconditionsFromHeaders(self.headers or {}) # Used to track if any files failed to be removed. self.everything_removed_okay = True try: # Expand wildcards, dirs, buckets, and bucket subdirs in URLs. name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, url_strs, self.recursion_requested, project_id=self.project_id, all_versions=self.all_versions, continue_on_error=self.continue_on_error or self.parallel_operations) # Perform remove requests in parallel (-m) mode, if requested, using # configured number of parallel processes and threads. Otherwise, # perform requests with sequential function calls in current process. self.Apply(_RemoveFuncWrapper, name_expansion_iterator, _RemoveExceptionHandler, fail_on_error=(not self.continue_on_error)) # Assuming the bucket has versioning enabled, url's that don't map to # objects should throw an error even with all_versions, since the prior # round of deletes only sends objects to a history table. # This assumption that rm -a is only called for versioned buckets should be # corrected, but the fix is non-trivial. except CommandException as e: # Don't raise if there are buckets to delete -- it's valid to say: # gsutil rm -r gs://some_bucket # if the bucket is empty. if not bucket_urls_to_delete and not self.continue_on_error: raise # Reset the failure count if we failed due to an empty bucket that we're # going to delete. msg = 'No URLs matched: ' if msg in str(e): parts = str(e).split(msg) if len(parts) == 2 and parts[1] in bucket_strings_to_delete: ResetFailureCount() except ServiceException, e: if not self.continue_on_error: raise
def RunCommand(self): headers = [] preserve_acl = True if self.sub_opts: for o, a in self.sub_opts: if o == '-n': preserve_acl = False elif o == '-h': headers.append(a) if headers: (metadata_minus, metadata_plus) = self._ParseMetadataHeaders(headers) uri_args = self.args else: (metadata_minus, metadata_plus) = self._ParseMetadataSpec(self.args[0]) uri_args = self.args[1:] if (len(uri_args) == 1 and not self.suri_builder.StorageUri(uri_args[0]).names_object()): raise CommandException('URI (%s) must name an object' % uri_args[0]) # Used to track if any objects' metadata failed to be set. self.everything_set_okay = True def _SetMetadataExceptionHandler(e): """Simple exception handler to allow post-completion status.""" self.THREADED_LOGGER.error(str(e)) self.everything_set_okay = False def _SetMetadataFunc(name_expansion_result, retry=3): exp_src_uri = self.suri_builder.StorageUri( name_expansion_result.GetExpandedUriStr()) self.THREADED_LOGGER.info('Setting metadata on %s...', exp_src_uri) key = exp_src_uri.get_key() meta_generation = key.meta_generation generation = key.generation headers = {} if generation: headers['x-goog-if-generation-match'] = generation if meta_generation: headers['x-goog-if-metageneration-match'] = meta_generation try: exp_src_uri.set_metadata(metadata_plus, metadata_minus, preserve_acl, headers=headers) except GSResponseError as response_error: # HTTP error 412 is "Precondition Failed." if response_error.status == 412: if retry <= 0: self.THREADED_LOGGER.error( 'Exhausted retries. Giving up.') raise self.THREADED_LOGGER.warn('Collision - %d tries left.', retry) time.sleep(random.uniform(0.5, 1.0)) _SetMetadataFunc(name_expansion_result, retry - 1) else: raise name_expansion_iterator = NameExpansionIterator( self.command_name, self.proj_id_handler, self.headers, self.debug, self.bucket_storage_uri_class, uri_args, self.recursion_requested, self.recursion_requested) # Perform requests in parallel (-m) mode, if requested, using # configured number of parallel processes and threads. Otherwise, # perform requests with sequential function calls in current process. self.Apply(_SetMetadataFunc, name_expansion_iterator, _SetMetadataExceptionHandler) if not self.everything_set_okay: raise CommandException( 'Metadata for some objects could not be set.') return 0
def RunCommand(self): headers = [] preserve_acl = True if self.sub_opts: for o, a in self.sub_opts: if o == '-n': preserve_acl = False elif o == '-h': headers.append(a) (metadata_minus, metadata_plus) = self._ParseMetadataHeaders(headers) if (len(self.args) == 1 and not self.suri_builder.StorageUri(self.args[0]).names_object()): raise CommandException('URI (%s) must name an object' % self.args[0]) # Used to track if any objects' metadata failed to be set. self.everything_set_okay = True def _SetMetadataExceptionHandler(e): """Simple exception handler to allow post-completion status.""" self.logger.error(str(e)) self.everything_set_okay = False @Retry(GSResponseError, tries=3, delay=1, backoff=2) def _SetMetadataFunc(name_expansion_result): exp_src_uri = self.suri_builder.StorageUri( name_expansion_result.GetExpandedUriStr()) self.logger.info('Setting metadata on %s...', exp_src_uri) key = exp_src_uri.get_key() metageneration = getattr(key, 'metageneration', None) generation = getattr(key, 'generation', None) headers = {} if generation: headers['x-goog-if-generation-match'] = generation if metageneration: headers['x-goog-if-metageneration-match'] = metageneration # If this fails because of a precondition, it will raise a # GSResponseError for @Retry to handle. exp_src_uri.set_metadata(metadata_plus, metadata_minus, preserve_acl, headers=headers) name_expansion_iterator = NameExpansionIterator( self.command_name, self.proj_id_handler, self.headers, self.debug, self.logger, self.bucket_storage_uri_class, self.args, self.recursion_requested, self.recursion_requested) try: # Perform requests in parallel (-m) mode, if requested, using # configured number of parallel processes and threads. Otherwise, # perform requests with sequential function calls in current process. self.Apply(_SetMetadataFunc, name_expansion_iterator, _SetMetadataExceptionHandler) except GSResponseError as e: if e.code == 'AccessDenied' and e.reason == 'Forbidden' \ and e.status == 403: self._WarnServiceAccounts() raise if not self.everything_set_okay: raise CommandException( 'Metadata for some objects could not be set.') return 0
class RmCommand(Command): """Implementation of gsutil rm command.""" # Command specification. See base class for documentation. command_spec = Command.CreateCommandSpec( 'rm', command_name_aliases=['del', 'delete', 'remove'], usage_synopsis=_SYNOPSIS, min_args=0, max_args=NO_MAX, supported_sub_args='afIrR', file_url_ok=False, provider_url_ok=False, urls_start_arg=0, gs_api_support=[ApiSelector.XML, ApiSelector.JSON], gs_default_api=ApiSelector.JSON, argparse_arguments=[CommandArgument.MakeZeroOrMoreCloudURLsArgument()]) # Help specification. See help_provider.py for documentation. help_spec = Command.HelpSpec( help_name='rm', help_name_aliases=['del', 'delete', 'remove'], help_type='command_help', help_one_line_summary='Remove objects', help_text=_DETAILED_HELP_TEXT, subcommand_help_text={}, ) def RunCommand(self): """Command entry point for the rm command.""" # self.recursion_requested is initialized in command.py (so it can be # checked in parent class for all commands). self.continue_on_error = self.parallel_operations self.read_args_from_stdin = False self.all_versions = False if self.sub_opts: for o, unused_a in self.sub_opts: if o == '-a': self.all_versions = True elif o == '-f': self.continue_on_error = True elif o == '-I': self.read_args_from_stdin = True elif o == '-r' or o == '-R': self.recursion_requested = True self.all_versions = True if self.read_args_from_stdin: if self.args: raise CommandException( 'No arguments allowed with the -I flag.') url_strs = StdinIterator() else: if not self.args: raise CommandException( 'The rm command (without -I) expects at ' 'least one URL.') url_strs = self.args # Tracks number of object deletes that failed. self.op_failure_count = 0 # Tracks if any buckets were missing. self.bucket_not_found_count = 0 # Tracks buckets that are slated for recursive deletion. bucket_urls_to_delete = [] self.bucket_strings_to_delete = [] if self.recursion_requested: bucket_fields = ['id'] for url_str in url_strs: url = StorageUrlFromString(url_str) if url.IsBucket() or url.IsProvider(): for blr in self.WildcardIterator(url_str).IterBuckets( bucket_fields=bucket_fields): bucket_urls_to_delete.append(blr.storage_url) self.bucket_strings_to_delete.append(url_str) self.preconditions = PreconditionsFromHeaders(self.headers or {}) try: # Expand wildcards, dirs, buckets, and bucket subdirs in URLs. name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, url_strs, self.recursion_requested, project_id=self.project_id, all_versions=self.all_versions, continue_on_error=self.continue_on_error or self.parallel_operations) seek_ahead_iterator = None # Cannot seek ahead with stdin args, since we can only iterate them # once without buffering in memory. if not self.read_args_from_stdin: seek_ahead_iterator = SeekAheadNameExpansionIterator( self.command_name, self.debug, self.GetSeekAheadGsutilApi(), url_strs, self.recursion_requested, all_versions=self.all_versions, project_id=self.project_id) # Perform remove requests in parallel (-m) mode, if requested, using # configured number of parallel processes and threads. Otherwise, # perform requests with sequential function calls in current process. self.Apply( _RemoveFuncWrapper, name_expansion_iterator, _RemoveExceptionHandler, fail_on_error=(not self.continue_on_error), shared_attrs=['op_failure_count', 'bucket_not_found_count'], seek_ahead_iterator=seek_ahead_iterator) # Assuming the bucket has versioning enabled, url's that don't map to # objects should throw an error even with all_versions, since the prior # round of deletes only sends objects to a history table. # This assumption that rm -a is only called for versioned buckets should be # corrected, but the fix is non-trivial. except CommandException as e: # Don't raise if there are buckets to delete -- it's valid to say: # gsutil rm -r gs://some_bucket # if the bucket is empty. if _ExceptionMatchesBucketToDelete(self.bucket_strings_to_delete, e): DecrementFailureCount() else: raise except ServiceException, e: if not self.continue_on_error: raise if self.bucket_not_found_count: raise CommandException( 'Encountered non-existent bucket during listing') if self.op_failure_count and not self.continue_on_error: raise CommandException('Some files could not be removed.') # If this was a gsutil rm -r command covering any bucket subdirs, # remove any dir_$folder$ objects (which are created by various web UI # tools to simulate folders). if self.recursion_requested: folder_object_wildcards = [] for url_str in url_strs: url = StorageUrlFromString(url_str) if url.IsObject(): folder_object_wildcards.append('%s**_$folder$' % url_str) if folder_object_wildcards: self.continue_on_error = True try: name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, folder_object_wildcards, self.recursion_requested, project_id=self.project_id, all_versions=self.all_versions) # When we're removing folder objects, always continue on error self.Apply(_RemoveFuncWrapper, name_expansion_iterator, _RemoveFoldersExceptionHandler, fail_on_error=False) except CommandException as e: # Ignore exception from name expansion due to an absent folder file. if not e.reason.startswith(NO_URLS_MATCHED_GENERIC): raise # Now that all data has been deleted, delete any bucket URLs. for url in bucket_urls_to_delete: self.logger.info('Removing %s...', url) @Retry(NotEmptyException, tries=3, timeout_secs=1) def BucketDeleteWithRetry(): self.gsutil_api.DeleteBucket(url.bucket_name, provider=url.scheme) BucketDeleteWithRetry() if self.op_failure_count: plural_str = 's' if self.op_failure_count else '' raise CommandException( '%d file%s/object%s could not be removed.' % (self.op_failure_count, plural_str, plural_str)) return 0
def RunCommand(self): """Command entry point for the setmeta command.""" headers = [] if self.sub_opts: for o, a in self.sub_opts: if o == '-h': if 'x-goog-acl' in a or 'x-amz-acl' in a: raise CommandException( 'gsutil setmeta no longer allows canned ACLs. Use gsutil acl ' 'set ... to set canned ACLs.') headers.append(a) (metadata_minus, metadata_plus) = self._ParseMetadataHeaders(headers) self.metadata_change = metadata_plus for header in metadata_minus: self.metadata_change[header] = '' if not self.metadata_change: raise CommandException( 'gsutil setmeta requires one or more headers to be provided with the' ' -h flag. See "gsutil help setmeta" for more information.') if len(self.args) == 1 and not self.recursion_requested: url = StorageUrlFromString(self.args[0]) if not (url.IsCloudUrl() and url.IsObject()): raise CommandException('URL (%s) must name an object' % self.args[0]) # Used to track if any objects' metadata failed to be set. self.everything_set_okay = True self.preconditions = PreconditionsFromHeaders(self.headers) name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, self.args, self.recursion_requested, all_versions=self.all_versions, continue_on_error=self.parallel_operations, bucket_listing_fields=['generation', 'metadata', 'metageneration']) seek_ahead_iterator = SeekAheadNameExpansionIterator( self.command_name, self.debug, self.GetSeekAheadGsutilApi(), self.args, self.recursion_requested, all_versions=self.all_versions, project_id=self.project_id) try: # Perform requests in parallel (-m) mode, if requested, using # configured number of parallel processes and threads. Otherwise, # perform requests with sequential function calls in current process. self.Apply(_SetMetadataFuncWrapper, name_expansion_iterator, _SetMetadataExceptionHandler, fail_on_error=True, seek_ahead_iterator=seek_ahead_iterator) except AccessDeniedException as e: if e.status == 403: self._WarnServiceAccounts() raise if not self.everything_set_okay: raise CommandException('Metadata for some objects could not be set.') return 0
def RunCommand(self): # self.recursion_requested initialized in command.py (so can be checked # in parent class for all commands). self.continue_on_error = False self.all_versions = False if self.sub_opts: for o, unused_a in self.sub_opts: if o == '-a': self.all_versions = True elif o == '-f': self.continue_on_error = True elif o == '-r' or o == '-R': self.recursion_requested = True elif o == '-v': self.logger.info( 'WARNING: The %s -v option is no longer' ' needed, and will eventually be removed.\n' % self.command_name) if self.recursion_requested and not self.all_versions: for uri_str in self.args: # WildcardIterator returns BucketListingRefs. for blr in self.WildcardIterator(uri_str): uri = blr.GetUri() if uri.names_bucket() and uri.get_versioning_config(): raise CommandException( 'Running gsutil rm -R on a bucket-only URI (%s)\nwith ' 'versioning enabled will not work without specifying the -a ' 'flag. Please try\nagain, using:\n\tgsutil rm -Ra %s' % (uri_str, ' '.join(self.args))) # Used to track if any files failed to be removed. self.everything_removed_okay = True # Tracks if any URIs matched the given args. remove_func = self._MkRemoveFunc() exception_handler = self._MkRemoveExceptionHandler() bucket_uris_to_delete = [] if self.recursion_requested: for uri_str in self.args: for blr in self.WildcardIterator(uri_str): uri = blr.GetUri() if uri.names_bucket(): bucket_uris_to_delete.append(uri) try: # Expand wildcards, dirs, buckets, and bucket subdirs in URIs. name_expansion_iterator = NameExpansionIterator( self.command_name, self.proj_id_handler, self.headers, self.debug, self.logger, self.bucket_storage_uri_class, self.args, self.recursion_requested, flat=self.recursion_requested, all_versions=self.all_versions) # Perform remove requests in parallel (-m) mode, if requested, using # configured number of parallel processes and threads. Otherwise, # perform requests with sequential function calls in current process. self.Apply(remove_func, name_expansion_iterator, exception_handler) # Assuming the bucket has versioning enabled, uri's that don't map to # objects should throw an error even with all_versions, since the prior # round of deletes only sends objects to a history table. # This assumption that rm -a is only called for versioned buckets should be # corrected, but the fix is non-trivial. except CommandException as e: # Don't raise if there are buckets to delete -- it's valid to say: # gsutil rm -r gs://some_bucket # if the bucket is empty. if not bucket_uris_to_delete and not self.continue_on_error: raise except GSResponseError, e: if not self.continue_on_error: raise
class RmCommand(Command): """Implementation of gsutil rm command.""" # Command specification (processed by parent class). command_spec = { # Name of command. COMMAND_NAME: 'rm', # List of command name aliases. COMMAND_NAME_ALIASES: ['del', 'delete', 'remove'], # Min number of args required by this command. MIN_ARGS: 1, # Max number of args required by this command, or NO_MAX. MAX_ARGS: NO_MAX, # Getopt-style string specifying acceptable sub args. SUPPORTED_SUB_ARGS: 'afrRv', # True if file URIs acceptable for this command. FILE_URIS_OK: False, # True if provider-only URIs acceptable for this command. PROVIDER_URIS_OK: False, # Index in args of first URI arg. URIS_START_ARG: 0, } help_spec = { # Name of command or auxiliary help info for which this help applies. HELP_NAME: 'rm', # List of help name aliases. HELP_NAME_ALIASES: ['del', 'delete', 'remove'], # Type of help: HELP_TYPE: HelpType.COMMAND_HELP, # One line summary of this help. HELP_ONE_LINE_SUMMARY: 'Remove objects', # The full help text. HELP_TEXT: _detailed_help_text, } # Command entry point. def RunCommand(self): # self.recursion_requested initialized in command.py (so can be checked # in parent class for all commands). self.continue_on_error = False self.all_versions = False if self.sub_opts: for o, unused_a in self.sub_opts: if o == '-a': self.all_versions = True elif o == '-f': self.continue_on_error = True elif o == '-r' or o == '-R': self.recursion_requested = True elif o == '-v': self.logger.info( 'WARNING: The %s -v option is no longer' ' needed, and will eventually be removed.\n' % self.command_name) if self.recursion_requested and not self.all_versions: for uri_str in self.args: # WildcardIterator returns BucketListingRefs. for blr in self.WildcardIterator(uri_str): uri = blr.GetUri() if uri.names_bucket() and uri.get_versioning_config(): raise CommandException( 'Running gsutil rm -R on a bucket-only URI (%s)\nwith ' 'versioning enabled will not work without specifying the -a ' 'flag. Please try\nagain, using:\n\tgsutil rm -Ra %s' % (uri_str, ' '.join(self.args))) # Used to track if any files failed to be removed. self.everything_removed_okay = True # Tracks if any URIs matched the given args. remove_func = self._MkRemoveFunc() exception_handler = self._MkRemoveExceptionHandler() bucket_uris_to_delete = [] if self.recursion_requested: for uri_str in self.args: for blr in self.WildcardIterator(uri_str): uri = blr.GetUri() if uri.names_bucket(): bucket_uris_to_delete.append(uri) try: # Expand wildcards, dirs, buckets, and bucket subdirs in URIs. name_expansion_iterator = NameExpansionIterator( self.command_name, self.proj_id_handler, self.headers, self.debug, self.logger, self.bucket_storage_uri_class, self.args, self.recursion_requested, flat=self.recursion_requested, all_versions=self.all_versions) # Perform remove requests in parallel (-m) mode, if requested, using # configured number of parallel processes and threads. Otherwise, # perform requests with sequential function calls in current process. self.Apply(remove_func, name_expansion_iterator, exception_handler) # Assuming the bucket has versioning enabled, uri's that don't map to # objects should throw an error even with all_versions, since the prior # round of deletes only sends objects to a history table. # This assumption that rm -a is only called for versioned buckets should be # corrected, but the fix is non-trivial. except CommandException as e: # Don't raise if there are buckets to delete -- it's valid to say: # gsutil rm -r gs://some_bucket # if the bucket is empty. if not bucket_uris_to_delete and not self.continue_on_error: raise except GSResponseError, e: if not self.continue_on_error: raise if not self.everything_removed_okay and not self.continue_on_error: raise CommandException('Some files could not be removed.') # If this was a gsutil rm -r command covering any bucket subdirs, # remove any dir_$folder$ objects (which are created by various web UI # tools to simulate folders). if self.recursion_requested: folder_object_wildcards = [] for uri_str in self.args: uri = self.suri_builder.StorageUri(uri_str) if uri.names_object: folder_object_wildcards.append('%s**_$folder$' % uri) if len(folder_object_wildcards): self.continue_on_error = True try: name_expansion_iterator = NameExpansionIterator( self.command_name, self.proj_id_handler, self.headers, self.debug, self.logger, self.bucket_storage_uri_class, folder_object_wildcards, self.recursion_requested, flat=True, all_versions=self.all_versions) self.Apply(remove_func, name_expansion_iterator, exception_handler) except CommandException as e: # Ignore exception from name expansion due to an absent folder file. if not e.reason.startswith('No URIs matched:'): raise # Now that all data has been deleted, delete any bucket URIs. for uri in bucket_uris_to_delete: self.logger.info('Removing %s...', uri) uri.delete_bucket(self.headers) return 0
class RmCommand(Command): """Implementation of gsutil rm command.""" # Command specification. See base class for documentation. command_spec = Command.CreateCommandSpec( 'rm', command_name_aliases=['del', 'delete', 'remove'], min_args=1, max_args=NO_MAX, supported_sub_args='afrR', file_url_ok=False, provider_url_ok=False, urls_start_arg=0, gs_api_support=[ApiSelector.XML, ApiSelector.JSON], gs_default_api=ApiSelector.JSON, ) # Help specification. See help_provider.py for documentation. help_spec = Command.HelpSpec( help_name='rm', help_name_aliases=['del', 'delete', 'remove'], help_type='command_help', help_one_line_summary='Remove objects', help_text=_DETAILED_HELP_TEXT, subcommand_help_text={}, ) def RunCommand(self): """Command entry point for the rm command.""" # self.recursion_requested is initialized in command.py (so it can be # checked in parent class for all commands). self.continue_on_error = False self.all_versions = False if self.sub_opts: for o, unused_a in self.sub_opts: if o == '-a': self.all_versions = True elif o == '-f': self.continue_on_error = True elif o == '-r' or o == '-R': self.recursion_requested = True self.all_versions = True bucket_urls_to_delete = [] bucket_strings_to_delete = [] if self.recursion_requested: bucket_fields = ['id'] for url_str in self.args: url = StorageUrlFromString(url_str) if url.IsBucket() or url.IsProvider(): for blr in self.WildcardIterator(url_str).IterBuckets( bucket_fields=bucket_fields): bucket_urls_to_delete.append(blr.storage_url) bucket_strings_to_delete.append(url_str) # Used to track if any files failed to be removed. self.everything_removed_okay = True try: # Expand wildcards, dirs, buckets, and bucket subdirs in URLs. name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, self.args, self.recursion_requested, project_id=self.project_id, all_versions=self.all_versions, continue_on_error=self.continue_on_error or self.parallel_operations) # Perform remove requests in parallel (-m) mode, if requested, using # configured number of parallel processes and threads. Otherwise, # perform requests with sequential function calls in current process. self.Apply(_RemoveFuncWrapper, name_expansion_iterator, _RemoveExceptionHandler, fail_on_error=(not self.continue_on_error)) # Assuming the bucket has versioning enabled, url's that don't map to # objects should throw an error even with all_versions, since the prior # round of deletes only sends objects to a history table. # This assumption that rm -a is only called for versioned buckets should be # corrected, but the fix is non-trivial. except CommandException as e: # Don't raise if there are buckets to delete -- it's valid to say: # gsutil rm -r gs://some_bucket # if the bucket is empty. if not bucket_urls_to_delete and not self.continue_on_error: raise # Reset the failure count if we failed due to an empty bucket that we're # going to delete. msg = 'No URLs matched: ' if msg in str(e): parts = str(e).split(msg) if len(parts) == 2 and parts[1] in bucket_strings_to_delete: ResetFailureCount() except ServiceException, e: if not self.continue_on_error: raise if not self.everything_removed_okay and not self.continue_on_error: raise CommandException('Some files could not be removed.') # If this was a gsutil rm -r command covering any bucket subdirs, # remove any dir_$folder$ objects (which are created by various web UI # tools to simulate folders). if self.recursion_requested: had_previous_failures = GetFailureCount() > 0 folder_object_wildcards = [] for url_str in self.args: url = StorageUrlFromString(url_str) if url.IsObject(): folder_object_wildcards.append('%s**_$folder$' % url_str) if folder_object_wildcards: self.continue_on_error = True try: name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, folder_object_wildcards, self.recursion_requested, project_id=self.project_id, all_versions=self.all_versions) # When we're removing folder objects, always continue on error self.Apply(_RemoveFuncWrapper, name_expansion_iterator, _RemoveFoldersExceptionHandler, fail_on_error=False) except CommandException as e: # Ignore exception from name expansion due to an absent folder file. if not e.reason.startswith('No URLs matched:'): raise if not had_previous_failures: ResetFailureCount() # Now that all data has been deleted, delete any bucket URLs. for url in bucket_urls_to_delete: self.logger.info('Removing %s...', url) @Retry(NotEmptyException, tries=3, timeout_secs=1) def BucketDeleteWithRetry(): self.gsutil_api.DeleteBucket(url.bucket_name, provider=url.scheme) BucketDeleteWithRetry() return 0
uri = self.suri_builder.StorageUri(uri_args[i]) if uri.names_bucket(): uri_args[i] = uri.clone_replace_name('*').uri else: # Handle bucket ACL setting operations single-threaded, because # our threading machinery currently assumes it's working with objects # (name_expansion_iterator), and normally we wouldn't expect users to need # to set ACLs on huge numbers of buckets at once anyway. for i in range(len(uri_args)): uri_str = uri_args[i] if self.suri_builder.StorageUri(uri_str).names_bucket(): self._RunSingleThreadedSetAcl(acl_arg, uri_args) return name_expansion_iterator = NameExpansionIterator( self.command_name, self.proj_id_handler, self.headers, self.debug, self.bucket_storage_uri_class, uri_args, self.recursion_requested, self.recursion_requested) # Perform requests in parallel (-m) mode, if requested, using # configured number of parallel processes and threads. Otherwise, # perform requests with sequential function calls in current process. self.Apply(_SetAclFunc, name_expansion_iterator, _SetAclExceptionHandler) if not self.everything_set_okay: raise CommandException('ACLs for some objects could not be set.') def _RunSingleThreadedSetAcl(self, acl_arg, uri_args): some_matched = False for uri_str in uri_args: for blr in self.WildcardIterator(uri_str):
def _SetIam(self): """Set IAM policy for given wildcards on the command line.""" self.continue_on_error = False self.recursion_requested = False self.all_versions = False force_etag = False etag = '' if self.sub_opts: for o, arg in self.sub_opts: if o in ['-r', '-R']: self.recursion_requested = True elif o == '-f': self.continue_on_error = True elif o == '-a': self.all_versions = True elif o == '-e': etag = str(arg) force_etag = True else: self.RaiseInvalidArgumentException() file_url = self.args[0] patterns = self.args[1:] # Load the IAM policy file and raise error if the file is invalid JSON or # does not exist. try: with open(file_url, 'r') as fp: policy = json.loads(fp.read()) except IOError: raise ArgumentException( 'Specified IAM policy file "%s" does not exist.' % file_url) except ValueError as e: self.logger.debug('Invalid IAM policy file, ValueError:\n', e) raise ArgumentException('Invalid IAM policy file "%s".' % file_url) bindings = policy.get('bindings', []) if not force_etag: etag = policy.get('etag', '') policy_json = json.dumps({'bindings': bindings, 'etag': etag}) try: policy = protojson.decode_message(apitools_messages.Policy, policy_json) except DecodeError: raise ArgumentException( 'Invalid IAM policy file "%s" or etag "%s".' % (file_url, etag)) self.everything_set_okay = True # This list of wildcard strings will be handled by NameExpansionIterator. threaded_wildcards = [] for pattern in patterns: surl = StorageUrlFromString(pattern) if surl.IsBucket(): if self.recursion_requested: surl.object_name = '*' threaded_wildcards.append(surl.url_string) else: self.SetIamHelper(surl, policy) else: threaded_wildcards.append(surl.url_string) # N.B.: If threaded_wildcards contains a non-existent bucket # (e.g. ["gs://non-existent", "gs://existent"]), NameExpansionIterator # will raise an exception in iter.next. This halts all iteration, even # when -f is set. This behavior is also evident in acl set. This behavior # also appears for any exception that will be raised when iterating over # wildcard expansions (access denied if bucket cannot be listed, etc.). if threaded_wildcards: name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, threaded_wildcards, self.recursion_requested, all_versions=self.all_versions, continue_on_error=self.continue_on_error or self.parallel_operations, bucket_listing_fields=['name']) seek_ahead_iterator = SeekAheadNameExpansionIterator( self.command_name, self.debug, self.GetSeekAheadGsutilApi(), threaded_wildcards, self.recursion_requested, all_versions=self.all_versions) policy_it = itertools.repeat(protojson.encode_message(policy)) self.Apply(_SetIamWrapper, itertools.izip(policy_it, name_expansion_iterator), _SetIamExceptionHandler, fail_on_error=not self.continue_on_error, seek_ahead_iterator=seek_ahead_iterator) self.everything_set_okay &= not GetFailureCount() > 0 # TODO: Add an error counter for files and objects. if not self.everything_set_okay: raise CommandException('Some IAM policies could not be set.')
def RunCommand(self): headers = [] preserve_acl = True if self.sub_opts: for o, a in self.sub_opts: if o == '-n': preserve_acl = False elif o == '-h': headers.append(a) if headers: (metadata_minus, metadata_plus) = self._ParseMetadataHeaders(headers) uri_args = self.args else: (metadata_minus, metadata_plus) = self._ParseMetadataSpec(self.args[0]) uri_args = self.args[1:] if (len(uri_args) == 1 and not self.suri_builder.StorageUri(uri_args[0]).names_object()): raise CommandException('URI (%s) must name an object' % uri_args[0]) # Used to track if any objects' metaadata failed to be set. self.everything_set_okay = True def _SetMetadataExceptionHandler(e): """Simple exception handler to allow post-completion status.""" self.THREADED_LOGGER.error(str(e)) self.everything_set_okay = False def _SetMetadataFunc(name_expansion_result): exp_src_uri = self.suri_builder.StorageUri( name_expansion_result.GetExpandedUriStr()) self.THREADED_LOGGER.info('Setting metadata on %s...' % exp_src_uri) metadata = self._ExtractMetadata(exp_src_uri) metadata.update(metadata_plus) for h in metadata_minus: if h in metadata: del metadata[h] src_bucket = exp_src_uri.get_bucket() # Boto prepends the meta prefix when adding headers, so strip prefix in # metadata before sending back in to copy_key() call. rewritten_metadata = {} for h in metadata: if _IsCustomMeta(h): h_pref_stripped = (h.replace('x-goog-meta-', '').replace( 'x-amz-meta-', '')) rewritten_metadata[h_pref_stripped] = metadata[h] else: rewritten_metadata[h] = metadata[h] metadata = rewritten_metadata src_bucket.copy_key(exp_src_uri.object_name, exp_src_uri.bucket_name, exp_src_uri.object_name, metadata=metadata, preserve_acl=preserve_acl) name_expansion_iterator = NameExpansionIterator( self.command_name, self.proj_id_handler, self.headers, self.debug, self.bucket_storage_uri_class, uri_args, self.recursion_requested, self.recursion_requested) # Perform requests in parallel (-m) mode, if requested, using # configured number of parallel processes and threads. Otherwise, # perform requests with sequential function calls in current process. self.Apply(_SetMetadataFunc, name_expansion_iterator, _SetMetadataExceptionHandler) if not self.everything_set_okay: raise CommandException( 'Metadata for some objects could not be set.')
def SetAclCommandHelper(self): """ Common logic for setting ACLs. Sets the standard ACL or the default object ACL depending on self.command_name. """ acl_arg = self.args[0] uri_args = self.args[1:] # Disallow multi-provider setacl requests, because there are differences in # the ACL models. storage_uri = self.UrisAreForSingleProvider(uri_args) if not storage_uri: raise CommandException( '"%s" command spanning providers not allowed.' % self.command_name) # Determine whether acl_arg names a file containing XML ACL text vs. the # string name of a canned ACL. if os.path.isfile(acl_arg): with codecs.open(acl_arg, 'r', 'utf-8') as f: acl_arg = f.read() self.canned = False else: # No file exists, so expect a canned ACL string. canned_acls = storage_uri.canned_acls() if acl_arg not in canned_acls: raise CommandException('Invalid canned ACL "%s".' % acl_arg) self.canned = True # Used to track if any ACLs failed to be set. self.everything_set_okay = True def _SetAclExceptionHandler(e): """Simple exception handler to allow post-completion status.""" self.logger.error(str(e)) self.everything_set_okay = False def _SetAclFunc(name_expansion_result): exp_src_uri = self.suri_builder.StorageUri( name_expansion_result.GetExpandedUriStr()) # We don't do bucket operations multi-threaded (see comment below). assert self.command_name != 'setdefacl' self.logger.info('Setting ACL on %s...' % name_expansion_result.expanded_uri_str) try: if self.canned: exp_src_uri.set_acl(acl_arg, exp_src_uri.object_name, False, self.headers) else: exp_src_uri.set_xml_acl(acl_arg, exp_src_uri.object_name, False, self.headers) except GSResponseError as e: if self.continue_on_error: exc_name, error_detail = util.ExtractErrorDetail(e) self.everything_set_okay = False if error_detail: sys.stderr.write( '%s: status=%d, code=%s, reason=%s, detail=%s.\n' % (exc_name, e.status, e.code, e.reason, error_detail)) else: raise # If user specified -R option, convert any bucket args to bucket wildcards # (e.g., gs://bucket/*), to prevent the operation from being applied to # the buckets themselves. if self.recursion_requested: for i in range(len(uri_args)): uri = self.suri_builder.StorageUri(uri_args[i]) if uri.names_bucket(): uri_args[i] = uri.clone_replace_name('*').uri else: # Handle bucket ACL setting operations single-threaded, because # our threading machinery currently assumes it's working with objects # (name_expansion_iterator), and normally we wouldn't expect users to need # to set ACLs on huge numbers of buckets at once anyway. for i in range(len(uri_args)): uri_str = uri_args[i] if self.suri_builder.StorageUri(uri_str).names_bucket(): self._RunSingleThreadedSetAcl(acl_arg, uri_args) return name_expansion_iterator = NameExpansionIterator( self.command_name, self.proj_id_handler, self.headers, self.debug, self.logger, self.bucket_storage_uri_class, uri_args, self.recursion_requested, self.recursion_requested, all_versions=self.all_versions) # Perform requests in parallel (-m) mode, if requested, using # configured number of parallel processes and threads. Otherwise, # perform requests with sequential function calls in current process. self.Apply(_SetAclFunc, name_expansion_iterator, _SetAclExceptionHandler) if not self.everything_set_okay and not self.continue_on_error: raise CommandException('ACLs for some objects could not be set.')
def RunCommand(self): """Command entry point for the rm command.""" # self.recursion_requested is initialized in command.py (so it can be # checked in parent class for all commands). self.continue_on_error = self.parallel_operations self.read_args_from_stdin = False self.all_versions = False if self.sub_opts: for o, unused_a in self.sub_opts: if o == '-a': self.all_versions = True elif o == '-f': self.continue_on_error = True elif o == '-I': self.read_args_from_stdin = True elif o == '-r' or o == '-R': self.recursion_requested = True self.all_versions = True if self.read_args_from_stdin: if self.args: raise CommandException( 'No arguments allowed with the -I flag.') url_strs = StdinIterator() else: if not self.args: raise CommandException( 'The rm command (without -I) expects at ' 'least one URL.') url_strs = self.args # Tracks number of object deletes that failed. self.op_failure_count = 0 # Tracks if any buckets were missing. self.bucket_not_found_count = 0 # Tracks buckets that are slated for recursive deletion. bucket_urls_to_delete = [] self.bucket_strings_to_delete = [] if self.recursion_requested: bucket_fields = ['id'] for url_str in url_strs: url = StorageUrlFromString(url_str) if url.IsBucket() or url.IsProvider(): for blr in self.WildcardIterator(url_str).IterBuckets( bucket_fields=bucket_fields): bucket_urls_to_delete.append(blr.storage_url) self.bucket_strings_to_delete.append(url_str) self.preconditions = PreconditionsFromHeaders(self.headers or {}) try: # Expand wildcards, dirs, buckets, and bucket subdirs in URLs. name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, url_strs, self.recursion_requested, project_id=self.project_id, all_versions=self.all_versions, continue_on_error=self.continue_on_error or self.parallel_operations) seek_ahead_iterator = None # Cannot seek ahead with stdin args, since we can only iterate them # once without buffering in memory. if not self.read_args_from_stdin: seek_ahead_iterator = SeekAheadNameExpansionIterator( self.command_name, self.debug, self.GetSeekAheadGsutilApi(), url_strs, self.recursion_requested, all_versions=self.all_versions, project_id=self.project_id) # Perform remove requests in parallel (-m) mode, if requested, using # configured number of parallel processes and threads. Otherwise, # perform requests with sequential function calls in current process. self.Apply( _RemoveFuncWrapper, name_expansion_iterator, _RemoveExceptionHandler, fail_on_error=(not self.continue_on_error), shared_attrs=['op_failure_count', 'bucket_not_found_count'], seek_ahead_iterator=seek_ahead_iterator) # Assuming the bucket has versioning enabled, url's that don't map to # objects should throw an error even with all_versions, since the prior # round of deletes only sends objects to a history table. # This assumption that rm -a is only called for versioned buckets should be # corrected, but the fix is non-trivial. except CommandException as e: # Don't raise if there are buckets to delete -- it's valid to say: # gsutil rm -r gs://some_bucket # if the bucket is empty. if _ExceptionMatchesBucketToDelete(self.bucket_strings_to_delete, e): DecrementFailureCount() else: raise except ServiceException, e: if not self.continue_on_error: raise
def _PatchIam(self): self.continue_on_error = False self.recursion_requested = False patch_bindings_tuples = [] if self.sub_opts: for o, a in self.sub_opts: if o in ['-r', '-R']: self.recursion_requested = True elif o == '-f': self.continue_on_error = True elif o == '-d': patch_bindings_tuples.append(BindingStringToTuple( False, a)) patterns = [] # N.B.: self.sub_opts stops taking in options at the first non-flagged # token. The rest of the tokens are sent to self.args. Thus, in order to # handle input of the form "-d <binding> <binding> <url>", we will have to # parse self.args for a mix of both bindings and CloudUrls. We are not # expecting to come across the -r, -f flags here. it = iter(self.args) for token in it: if token == '-d': patch_bindings_tuples.append( BindingStringToTuple(False, it.next())) else: try: patch_bindings_tuples.append( BindingStringToTuple(True, token)) # All following arguments are urls. except (ArgumentException, CommandException): patterns.append(token) for token in it: patterns.append(token) # We must have some bindings to process, else this is pointless. if not patch_bindings_tuples: raise CommandException('Must specify at least one binding.') self.everything_set_okay = True threaded_wildcards = [] for pattern in patterns: surl = StorageUrlFromString(pattern) try: if surl.IsBucket(): if self.recursion_requested: surl.object = '*' threaded_wildcards.append(surl.url_string) else: self.PatchIamHelper(surl, patch_bindings_tuples) else: threaded_wildcards.append(surl.url_string) except AttributeError: error_msg = 'Invalid Cloud URL "%s".' % surl.object_name if set(surl.object_name).issubset(set('-Rrf')): error_msg += ( ' This resource handle looks like a flag, which must appear ' 'before all bindings. See "gsutil help iam ch" for more details.' ) raise CommandException(error_msg) if threaded_wildcards: name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, threaded_wildcards, self.recursion_requested, all_versions=self.all_versions, continue_on_error=self.continue_on_error or self.parallel_operations, bucket_listing_fields=['name']) seek_ahead_iterator = SeekAheadNameExpansionIterator( self.command_name, self.debug, self.GetSeekAheadGsutilApi(), threaded_wildcards, self.recursion_requested, all_versions=self.all_versions) # N.B.: Python2.6 support means we can't use a partial function here to # curry the bindings tuples into the wrapper function. We instead pass # the bindings along by zipping them with each name_expansion_iterator # result. See http://bugs.python.org/issue5228. serialized_bindings_tuples_it = itertools.repeat( [SerializeBindingsTuple(t) for t in patch_bindings_tuples]) self.Apply(_PatchIamWrapper, itertools.izip(serialized_bindings_tuples_it, name_expansion_iterator), _PatchIamExceptionHandler, fail_on_error=not self.continue_on_error, seek_ahead_iterator=seek_ahead_iterator) self.everything_set_okay &= not GetFailureCount() > 0 # TODO: Add an error counter for files and objects. if not self.everything_set_okay: raise CommandException('Some IAM policies could not be patched.')