def RunCommand(self): """Command entry point for the compose command.""" target_url_str = self.args[-1] self.args = self.args[:-1] target_url = StorageUrlFromString(target_url_str) self.CheckProvider(target_url) if target_url.HasGeneration(): raise CommandException('A version-specific URL (%s) cannot be ' 'the destination for gsutil compose - abort.' % target_url) dst_obj_metadata = apitools_messages.Object(name=target_url.object_name, bucket=target_url.bucket_name) components = [] # Remember the first source object so we can get its content type. first_src_url = None for src_url_str in self.args: if ContainsWildcard(src_url_str): src_url_iter = self.WildcardIterator(src_url_str).IterObjects() else: src_url_iter = [BucketListingObject(StorageUrlFromString(src_url_str))] for blr in src_url_iter: src_url = blr.storage_url self.CheckProvider(src_url) if src_url.bucket_name != target_url.bucket_name: raise CommandException( 'GCS does not support inter-bucket composing.') if not first_src_url: first_src_url = src_url src_obj_metadata = ( apitools_messages.ComposeRequest.SourceObjectsValueListEntry( name=src_url.object_name)) if src_url.HasGeneration(): src_obj_metadata.generation = src_url.generation components.append(src_obj_metadata) # Avoid expanding too many components, and sanity check each name # expansion result. if len(components) > MAX_COMPOSE_ARITY: raise CommandException('"compose" called with too many component ' 'objects. Limit is %d.' % MAX_COMPOSE_ARITY) if not components: raise CommandException('"compose" requires at least 1 component object.') dst_obj_metadata.contentType = self.gsutil_api.GetObjectMetadata( first_src_url.bucket_name, first_src_url.object_name, provider=first_src_url.scheme, fields=['contentType']).contentType preconditions = PreconditionsFromHeaders(self.headers or {}) self.logger.info( 'Composing %s from %d component object(s).', target_url, len(components)) self.gsutil_api.ComposeObject( components, dst_obj_metadata, preconditions=preconditions, provider=target_url.scheme, encryption_tuple=GetEncryptionKeyWrapper(config))
def RunCommand(self): """Command entry point for the setmeta command.""" headers = [] if self.sub_opts: for o, a in self.sub_opts: if o == '-h': if 'x-goog-acl' in a or 'x-amz-acl' in a: raise CommandException( 'gsutil setmeta no longer allows canned ACLs. Use gsutil acl ' 'set ... to set canned ACLs.') headers.append(a) (metadata_minus, metadata_plus) = self._ParseMetadataHeaders(headers) self.metadata_change = metadata_plus for header in metadata_minus: self.metadata_change[header] = '' if len(self.args) == 1 and not self.recursion_requested: url = StorageUrlFromString(self.args[0]) if not (url.IsCloudUrl() and url.IsObject()): raise CommandException('URL (%s) must name an object' % self.args[0]) # Used to track if any objects' metadata failed to be set. self.everything_set_okay = True self.preconditions = PreconditionsFromHeaders(self.headers) name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, self.args, self.recursion_requested, all_versions=self.all_versions, continue_on_error=self.parallel_operations, bucket_listing_fields=['generation', 'metadata', 'metageneration']) seek_ahead_iterator = SeekAheadNameExpansionIterator( self.command_name, self.debug, self.GetSeekAheadGsutilApi(), self.args, self.recursion_requested, all_versions=self.all_versions, project_id=self.project_id) try: # Perform requests in parallel (-m) mode, if requested, using # configured number of parallel processes and threads. Otherwise, # perform requests with sequential function calls in current process. self.Apply(_SetMetadataFuncWrapper, name_expansion_iterator, _SetMetadataExceptionHandler, fail_on_error=True, seek_ahead_iterator=seek_ahead_iterator) except AccessDeniedException as e: if e.status == 403: self._WarnServiceAccounts() raise if not self.everything_set_okay: raise CommandException('Metadata for some objects could not be set.') return 0
def RunCommand(self): """Command entry point for the rm command.""" # self.recursion_requested is initialized in command.py (so it can be # checked in parent class for all commands). self.continue_on_error = self.parallel_operations self.read_args_from_stdin = False self.all_versions = False if self.sub_opts: for o, unused_a in self.sub_opts: if o == '-a': self.all_versions = True elif o == '-f': self.continue_on_error = True elif o == '-I': self.read_args_from_stdin = True elif o == '-r' or o == '-R': self.recursion_requested = True self.all_versions = True if self.read_args_from_stdin: if self.args: raise CommandException( 'No arguments allowed with the -I flag.') url_strs = StdinIterator() else: if not self.args: raise CommandException( 'The rm command (without -I) expects at ' 'least one URL.') url_strs = self.args # Tracks number of object deletes that failed. self.op_failure_count = 0 # Tracks if any buckets were missing. self.bucket_not_found_count = 0 # Tracks buckets that are slated for recursive deletion. bucket_urls_to_delete = [] self.bucket_strings_to_delete = [] if self.recursion_requested: bucket_fields = ['id'] for url_str in url_strs: url = StorageUrlFromString(url_str) if url.IsBucket() or url.IsProvider(): for blr in self.WildcardIterator(url_str).IterBuckets( bucket_fields=bucket_fields): bucket_urls_to_delete.append(blr.storage_url) self.bucket_strings_to_delete.append(url_str) self.preconditions = PreconditionsFromHeaders(self.headers or {}) try: # Expand wildcards, dirs, buckets, and bucket subdirs in URLs. name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, url_strs, self.recursion_requested, project_id=self.project_id, all_versions=self.all_versions, continue_on_error=self.continue_on_error or self.parallel_operations) seek_ahead_iterator = None # Cannot seek ahead with stdin args, since we can only iterate them # once without buffering in memory. if not self.read_args_from_stdin: seek_ahead_iterator = SeekAheadNameExpansionIterator( self.command_name, self.debug, self.GetSeekAheadGsutilApi(), url_strs, self.recursion_requested, all_versions=self.all_versions, project_id=self.project_id) # Perform remove requests in parallel (-m) mode, if requested, using # configured number of parallel processes and threads. Otherwise, # perform requests with sequential function calls in current process. self.Apply( _RemoveFuncWrapper, name_expansion_iterator, _RemoveExceptionHandler, fail_on_error=(not self.continue_on_error), shared_attrs=['op_failure_count', 'bucket_not_found_count'], seek_ahead_iterator=seek_ahead_iterator) # Assuming the bucket has versioning enabled, url's that don't map to # objects should throw an error even with all_versions, since the prior # round of deletes only sends objects to a history table. # This assumption that rm -a is only called for versioned buckets should be # corrected, but the fix is non-trivial. except CommandException as e: # Don't raise if there are buckets to delete -- it's valid to say: # gsutil rm -r gs://some_bucket # if the bucket is empty. if _ExceptionMatchesBucketToDelete(self.bucket_strings_to_delete, e): DecrementFailureCount() else: raise except ServiceException, e: if not self.continue_on_error: raise
def RunCommand(self): """Command entry point for the rewrite command.""" self.continue_on_error = self.parallel_operations self.dest_storage_class = None self.no_preserve_acl = False self.read_args_from_stdin = False self.supported_transformation_flags = ['-k', '-s'] self.transform_types = set() self.op_failure_count = 0 self.boto_file_encryption_tuple, self.boto_file_encryption_sha256 = ( GetEncryptionTupleAndSha256Hash()) if self.sub_opts: for o, a in self.sub_opts: if o == '-f': self.continue_on_error = True elif o == '-k': self.transform_types.add(_TransformTypes.CRYPTO_KEY) elif o == '-I': self.read_args_from_stdin = True elif o == '-O': self.no_preserve_acl = True elif o == '-r' or o == '-R': self.recursion_requested = True self.all_versions = True elif o == '-s': self.transform_types.add(_TransformTypes.STORAGE_CLASS) self.dest_storage_class = NormalizeStorageClass(a) if self.read_args_from_stdin: if self.args: raise CommandException( 'No arguments allowed with the -I flag.') url_strs = StdinIterator() else: if not self.args: raise CommandException( 'The rewrite command (without -I) expects at ' 'least one URL.') url_strs = self.args if not self.transform_types: raise CommandException( 'rewrite command requires at least one transformation flag. ' 'Currently supported transformation flags: %s' % self.supported_transformation_flags) self.preconditions = PreconditionsFromHeaders(self.headers or {}) url_strs_generator = GenerationCheckGenerator(url_strs) # Convert recursive flag to flat wildcard to avoid performing multiple # listings. if self.recursion_requested: url_strs_generator = ConvertRecursiveToFlatWildcard( url_strs_generator) # Expand the source argument(s). name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, url_strs_generator, self.recursion_requested, project_id=self.project_id, continue_on_error=self.continue_on_error or self.parallel_operations, bucket_listing_fields=['name', 'size']) seek_ahead_iterator = None # Cannot seek ahead with stdin args, since we can only iterate them # once without buffering in memory. if not self.read_args_from_stdin: # Perform the same recursive-to-flat conversion on original url_strs so # that it is as true to the original iterator as possible. seek_ahead_url_strs = ConvertRecursiveToFlatWildcard(url_strs) seek_ahead_iterator = SeekAheadNameExpansionIterator( self.command_name, self.debug, self.GetSeekAheadGsutilApi(), seek_ahead_url_strs, self.recursion_requested, all_versions=self.all_versions, project_id=self.project_id) # Perform rewrite requests in parallel (-m) mode, if requested. self.Apply(_RewriteFuncWrapper, name_expansion_iterator, _RewriteExceptionHandler, fail_on_error=(not self.continue_on_error), shared_attrs=['op_failure_count'], seek_ahead_iterator=seek_ahead_iterator) if self.op_failure_count: plural_str = 's' if self.op_failure_count else '' raise CommandException( '%d file%s/object%s could not be rewritten.' % (self.op_failure_count, plural_str, plural_str)) return 0
def RunCommand(self): """Command entry point for the rewrite command.""" self.continue_on_error = self.parallel_operations self.read_args_from_stdin = False self.no_preserve_acl = False self.supported_transformation_flags = ['-k'] self.transform_types = [] self.op_failure_count = 0 self.current_encryption_tuple, self.current_encryption_sha256 = ( GetEncryptionTupleAndSha256Hash()) if self.sub_opts: for o, unused_a in self.sub_opts: if o == '-f': self.continue_on_error = True elif o == '-k': self.transform_types.append(_TransformTypes.CRYPTO_KEY) elif o == '-I': self.read_args_from_stdin = True elif o == '-O': self.no_preserve_acl = True elif o == '-r' or o == '-R': self.recursion_requested = True self.all_versions = True if self.read_args_from_stdin: if self.args: raise CommandException( 'No arguments allowed with the -I flag.') url_strs = StdinIterator() else: if not self.args: raise CommandException( 'The rewrite command (without -I) expects at ' 'least one URL.') url_strs = self.args url_strs = GenerationCheckGenerator(url_strs) if not self.transform_types: raise CommandException( 'rewrite command requires at least one transformation flag. ' 'Currently supported transformation flags: %s' % self.supported_transformation_flags) self.preconditions = PreconditionsFromHeaders(self.headers or {}) # Convert recursive flag to flat wildcard to avoid performing multiple # listings. if self.recursion_requested: url_strs = ConvertRecursiveToFlatWildcard(url_strs) # Expand the source argument(s). name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, url_strs, self.recursion_requested, project_id=self.project_id, continue_on_error=self.continue_on_error or self.parallel_operations) # Perform rewrite requests in parallel (-m) mode, if requested. self.Apply(_RewriteFuncWrapper, name_expansion_iterator, _RewriteExceptionHandler, fail_on_error=(not self.continue_on_error), shared_attrs=['op_failure_count']) if self.op_failure_count: plural_str = 's' if self.op_failure_count else '' raise CommandException( '%d file%s/object%s could not be rewritten.' % (self.op_failure_count, plural_str, plural_str)) return 0
def RunCommand(self): """Command entry point for the rm command.""" # self.recursion_requested is initialized in command.py (so it can be # checked in parent class for all commands). self.continue_on_error = False self.read_args_from_stdin = False self.all_versions = False if self.sub_opts: for o, unused_a in self.sub_opts: if o == '-a': self.all_versions = True elif o == '-f': self.continue_on_error = True elif o == '-I': self.read_args_from_stdin = True elif o == '-r' or o == '-R': self.recursion_requested = True self.all_versions = True if self.read_args_from_stdin: if self.args: raise CommandException( 'No arguments allowed with the -I flag.') url_strs = StdinIterator() else: if not self.args: raise CommandException( 'The rm command (without -I) expects at ' 'least one URL.') url_strs = self.args bucket_urls_to_delete = [] bucket_strings_to_delete = [] if self.recursion_requested: bucket_fields = ['id'] for url_str in url_strs: url = StorageUrlFromString(url_str) if url.IsBucket() or url.IsProvider(): for blr in self.WildcardIterator(url_str).IterBuckets( bucket_fields=bucket_fields): bucket_urls_to_delete.append(blr.storage_url) bucket_strings_to_delete.append(url_str) self.preconditions = PreconditionsFromHeaders(self.headers or {}) # Used to track if any files failed to be removed. self.everything_removed_okay = True try: # Expand wildcards, dirs, buckets, and bucket subdirs in URLs. name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, url_strs, self.recursion_requested, project_id=self.project_id, all_versions=self.all_versions, continue_on_error=self.continue_on_error or self.parallel_operations) # Perform remove requests in parallel (-m) mode, if requested, using # configured number of parallel processes and threads. Otherwise, # perform requests with sequential function calls in current process. self.Apply(_RemoveFuncWrapper, name_expansion_iterator, _RemoveExceptionHandler, fail_on_error=(not self.continue_on_error)) # Assuming the bucket has versioning enabled, url's that don't map to # objects should throw an error even with all_versions, since the prior # round of deletes only sends objects to a history table. # This assumption that rm -a is only called for versioned buckets should be # corrected, but the fix is non-trivial. except CommandException as e: # Don't raise if there are buckets to delete -- it's valid to say: # gsutil rm -r gs://some_bucket # if the bucket is empty. if not bucket_urls_to_delete and not self.continue_on_error: raise # Reset the failure count if we failed due to an empty bucket that we're # going to delete. msg = 'No URLs matched: ' if msg in str(e): parts = str(e).split(msg) if len(parts) == 2 and parts[1] in bucket_strings_to_delete: ResetFailureCount() except ServiceException, e: if not self.continue_on_error: raise
def RunCommand(self): """Command entry point for the rewrite command.""" self.continue_on_error = self.parallel_operations self.csek_hash_to_keywrapper = {} self.dest_storage_class = None self.no_preserve_acl = False self.read_args_from_stdin = False self.supported_transformation_flags = ['-k', '-s'] self.transform_types = set() self.op_failure_count = 0 self.boto_file_encryption_keywrapper = GetEncryptionKeyWrapper(config) self.boto_file_encryption_sha256 = ( self.boto_file_encryption_keywrapper.crypto_key_sha256 if self.boto_file_encryption_keywrapper else None) if self.sub_opts: for o, a in self.sub_opts: if o == '-f': self.continue_on_error = True elif o == '-k': self.transform_types.add(_TransformTypes.CRYPTO_KEY) elif o == '-I': self.read_args_from_stdin = True elif o == '-O': self.no_preserve_acl = True elif o == '-r' or o == '-R': self.recursion_requested = True self.all_versions = True elif o == '-s': self.transform_types.add(_TransformTypes.STORAGE_CLASS) self.dest_storage_class = NormalizeStorageClass(a) if self.read_args_from_stdin: if self.args: raise CommandException( 'No arguments allowed with the -I flag.') url_strs = StdinIterator() else: if not self.args: raise CommandException( 'The rewrite command (without -I) expects at ' 'least one URL.') url_strs = self.args if not self.transform_types: raise CommandException( 'rewrite command requires at least one transformation flag. ' 'Currently supported transformation flags: %s' % self.supported_transformation_flags) self.preconditions = PreconditionsFromHeaders(self.headers or {}) url_strs_generator = GenerationCheckGenerator(url_strs) # Convert recursive flag to flat wildcard to avoid performing multiple # listings. if self.recursion_requested: url_strs_generator = ConvertRecursiveToFlatWildcard( url_strs_generator) # Expand the source argument(s). name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, url_strs_generator, self.recursion_requested, project_id=self.project_id, continue_on_error=self.continue_on_error or self.parallel_operations, bucket_listing_fields=['name', 'size']) seek_ahead_iterator = None # Cannot seek ahead with stdin args, since we can only iterate them # once without buffering in memory. if not self.read_args_from_stdin: # Perform the same recursive-to-flat conversion on original url_strs so # that it is as true to the original iterator as possible. seek_ahead_url_strs = ConvertRecursiveToFlatWildcard(url_strs) seek_ahead_iterator = SeekAheadNameExpansionIterator( self.command_name, self.debug, self.GetSeekAheadGsutilApi(), seek_ahead_url_strs, self.recursion_requested, all_versions=self.all_versions, project_id=self.project_id) # Rather than have each worker repeatedly calculate the sha256 hash for each # decryption_key in the boto config, do this once now and cache the results. for i in range(0, MAX_DECRYPTION_KEYS): key_number = i + 1 keywrapper = CryptoKeyWrapperFromKey( config.get('GSUtil', 'decryption_key%s' % str(key_number), None)) if keywrapper is None: # Stop at first attribute absence in lexicographical iteration. break if keywrapper.crypto_type == CryptoKeyType.CSEK: self.csek_hash_to_keywrapper[ keywrapper.crypto_key_sha256] = keywrapper # Also include the encryption_key, since it should be used to decrypt and # then encrypt if the object's CSEK should remain the same. if self.boto_file_encryption_sha256 is not None: self.csek_hash_to_keywrapper[self.boto_file_encryption_sha256] = ( self.boto_file_encryption_keywrapper) # Perform rewrite requests in parallel (-m) mode, if requested. self.Apply(_RewriteFuncWrapper, name_expansion_iterator, _RewriteExceptionHandler, fail_on_error=(not self.continue_on_error), shared_attrs=['op_failure_count'], seek_ahead_iterator=seek_ahead_iterator) if self.op_failure_count: plural_str = 's' if self.op_failure_count else '' raise CommandException( '%d file%s/object%s could not be rewritten.' % (self.op_failure_count, plural_str, plural_str)) return 0