def RunCommand(self): """Command entry point for the rb command.""" self.continue_on_error = False if self.sub_opts: for o, unused_a in self.sub_opts: if o == '-f': self.continue_on_error = True did_some_work = False some_failed = False for url_str in self.args: wildcard_url = StorageUrlFromString(url_str) if wildcard_url.IsObject(): raise CommandException('"rb" command requires a provider or ' 'bucket URL') # Wrap WildcardIterator call in try/except so we can avoid printing errors # with -f option if a non-existent URL listed, permission denial happens # while listing, etc. try: # Materialize iterator results into a list to catch exceptions. # Since this is listing buckets this list shouldn't be too large to fit # in memory at once. # Also, avoid listing all fields to avoid performing unnecessary bucket # metadata GETs. These would also be problematic when billing is # disabled, as deletes are allowed but GetBucket is not. blrs = list( self.WildcardIterator(url_str).IterBuckets( bucket_fields=['id'])) except: # pylint: disable=bare-except some_failed = True if self.continue_on_error: continue else: raise for blr in blrs: url = blr.storage_url self.logger.info('Removing %s...', url) try: self.gsutil_api.DeleteBucket(url.bucket_name, provider=url.scheme) except NotEmptyException as e: some_failed = True if self.continue_on_error: continue elif 'VersionedBucketNotEmpty' in e.reason: raise CommandException( 'Bucket is not empty. Note: this is a ' 'versioned bucket, so to delete all ' 'objects\nyou need to use:' '\n\tgsutil rm -r %s' % url) else: raise except: # pylint: disable=bare-except some_failed = True if not self.continue_on_error: raise did_some_work = True if not did_some_work: raise CommandException(NO_URLS_MATCHED_TARGET % list(self.args)) return 1 if some_failed else 0
def RunCommand(self): """Command entry point for the setmeta command.""" headers = [] if self.sub_opts: for o, a in self.sub_opts: if o == '-n': self.logger.warning( 'Warning: gsutil setmeta -n is now on by default, and will be ' 'removed in the future.\nPlease use gsutil acl set ... to set ' 'canned ACLs.') elif o == '-h': if 'x-goog-acl' in a or 'x-amz-acl' in a: raise CommandException( 'gsutil setmeta no longer allows canned ACLs. Use gsutil acl ' 'set ... to set canned ACLs.') headers.append(a) (metadata_minus, metadata_plus) = self._ParseMetadataHeaders(headers) self.metadata_change = metadata_plus for header in metadata_minus: self.metadata_change[header] = '' if len(self.args) == 1 and not self.recursion_requested: url = StorageUrlFromString(self.args[0]) if not (url.IsCloudUrl() and url.IsObject()): raise CommandException('URL (%s) must name an object' % self.args[0]) # Used to track if any objects' metadata failed to be set. self.everything_set_okay = True name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, self.args, self.recursion_requested, all_versions=self.all_versions, continue_on_error=self.parallel_operations) try: # Perform requests in parallel (-m) mode, if requested, using # configured number of parallel processes and threads. Otherwise, # perform requests with sequential function calls in current process. self.Apply(_SetMetadataFuncWrapper, name_expansion_iterator, _SetMetadataExceptionHandler, fail_on_error=True) except AccessDeniedException as e: if e.status == 403: self._WarnServiceAccounts() raise if not self.everything_set_okay: raise CommandException( 'Metadata for some objects could not be set.') return 0
def RunCommand(self): """Command entry point for stat command.""" stat_fields = ENCRYPTED_FIELDS + UNENCRYPTED_FULL_LISTING_FIELDS found_nonmatching_arg = False for url_str in self.args: arg_matches = 0 url = StorageUrlFromString(url_str) if not url.IsObject(): raise CommandException( 'The stat command only works with object URLs') try: if ContainsWildcard(url_str): blr_iter = self.WildcardIterator(url_str).IterObjects( bucket_listing_fields=stat_fields) else: try: single_obj = self.gsutil_api.GetObjectMetadata( url.bucket_name, url.object_name, generation=url.generation, provider=url.scheme, fields=stat_fields) except EncryptionException: # Retry without requesting hashes. single_obj = self.gsutil_api.GetObjectMetadata( url.bucket_name, url.object_name, generation=url.generation, provider=url.scheme, fields=UNENCRYPTED_FULL_LISTING_FIELDS) blr_iter = [ BucketListingObject(url, root_object=single_obj) ] for blr in blr_iter: if blr.IsObject(): arg_matches += 1 # TODO: Request fewer fields if we're not printing the object. if logging.getLogger().isEnabledFor(logging.INFO): PrintFullInfoAboutObject(blr, incl_acl=False) except AccessDeniedException: if logging.getLogger().isEnabledFor(logging.INFO): sys.stderr.write( 'You aren\'t authorized to read %s - skipping' % url_str) except InvalidUrlError: raise except NotFoundException: pass if not arg_matches: if logging.getLogger().isEnabledFor(logging.INFO): sys.stderr.write(NO_URLS_MATCHED_TARGET % url_str) found_nonmatching_arg = True if found_nonmatching_arg: return 1 return 0
def RunCommand(self): """Command entry point for stat command.""" # List of fields we'll print for stat objects. stat_fields = [ 'updated', 'cacheControl', 'contentDisposition', 'contentEncoding', 'contentLanguage', 'size', 'contentType', 'componentCount', 'metadata', 'crc32c', 'md5Hash', 'etag', 'generation', 'metageneration' ] found_nonmatching_arg = False for url_str in self.args: arg_matches = 0 url = StorageUrlFromString(url_str) if not url.IsObject(): raise CommandException( 'The stat command only works with object URLs') try: if ContainsWildcard(url_str): blr_iter = self.WildcardIterator(url_str).IterObjects( bucket_listing_fields=stat_fields) else: single_obj = self.gsutil_api.GetObjectMetadata( url.bucket_name, url.object_name, generation=url.generation, provider=url.scheme, fields=stat_fields) blr_iter = [ BucketListingObject(url, root_object=single_obj) ] for blr in blr_iter: if blr.IsObject(): arg_matches += 1 if logging.getLogger().isEnabledFor(logging.INFO): PrintFullInfoAboutObject(blr, incl_acl=False) except AccessDeniedException: if logging.getLogger().isEnabledFor(logging.INFO): sys.stderr.write( 'You aren\'t authorized to read %s - skipping' % url_str) except InvalidUrlError: raise except NotFoundException: pass if not arg_matches: if logging.getLogger().isEnabledFor(logging.INFO): sys.stderr.write('No URLs matched %s' % url_str) found_nonmatching_arg = True if found_nonmatching_arg: return 1 return 0
def _SetHold(self, obj_metadata_update_wrapper, url_args, sub_command_full_name): """Common logic to set or unset Event-Based/Temporary Hold on objects. Args: obj_metadata_update_wrapper: The function for updating related fields in Object metadata. url_args: List of object URIs. sub_command_full_name: The full name for sub-command: "Temporary" / "Event-Based" """ if len(url_args) == 1 and not self.recursion_requested: url = StorageUrlFromString(url_args[0]) if not (url.IsCloudUrl() and url.IsObject()): raise CommandException('URL ({}) must name an object'.format( url_args[0])) name_expansion_iterator = self._GetObjectNameExpansionIterator( url_args) seek_ahead_iterator = self._GetSeekAheadNameExpansionIterator(url_args) # Used to track if any objects' metadata failed to be set. self.everything_set_okay = True try: # TODO: implement '-c' flag to continue_on_error # Perform requests in parallel (-m) mode, if requested, using # configured number of parallel processes and threads. Otherwise, # perform requests with sequential function calls in current process. self.Apply(obj_metadata_update_wrapper, name_expansion_iterator, UpdateObjectMetadataExceptionHandler, fail_on_error=True, seek_ahead_iterator=seek_ahead_iterator) except AccessDeniedException as e: if e.status == 403: self._WarnServiceAccounts() raise if not self.everything_set_okay: raise CommandException( '{} Hold for some objects could not be set.'.format( sub_command_full_name))
def RunCommand(self): """Command entry point for the ls command.""" got_nomatch_errors = False got_bucket_nomatch_errors = False listing_style = ListingStyle.SHORT get_bucket_info = False self.recursion_requested = False self.all_versions = False self.include_etag = False self.human_readable = False if self.sub_opts: for o, a in self.sub_opts: if o == '-a': self.all_versions = True elif o == '-e': self.include_etag = True elif o == '-b': get_bucket_info = True elif o == '-h': self.human_readable = True elif o == '-l': listing_style = ListingStyle.LONG elif o == '-L': listing_style = ListingStyle.LONG_LONG elif o == '-p': self.project_id = a elif o == '-r' or o == '-R': self.recursion_requested = True if not self.args: # default to listing all gs buckets self.args = ['gs://'] total_objs = 0 total_bytes = 0 def MaybePrintBucketHeader(blr): if len(self.args) > 1: print '%s:' % blr.url_string.encode(UTF8) print_bucket_header = MaybePrintBucketHeader for url_str in self.args: storage_url = StorageUrlFromString(url_str) if storage_url.IsFileUrl(): raise CommandException('Only cloud URLs are supported for %s' % self.command_name) bucket_fields = None if (listing_style == ListingStyle.SHORT or listing_style == ListingStyle.LONG): bucket_fields = ['id'] elif listing_style == ListingStyle.LONG_LONG: bucket_fields = [ 'location', 'storageClass', 'versioning', 'acl', 'defaultObjectAcl', 'website', 'logging', 'cors', 'lifecycle' ] if storage_url.IsProvider(): # Provider URL: use bucket wildcard to list buckets. for blr in self.WildcardIterator( '%s://*' % storage_url.scheme).IterBuckets( bucket_fields=bucket_fields): self._PrintBucketInfo(blr, listing_style) elif storage_url.IsBucket() and get_bucket_info: # ls -b bucket listing request: List info about bucket(s). total_buckets = 0 for blr in self.WildcardIterator(url_str).IterBuckets( bucket_fields=bucket_fields): if not ContainsWildcard(url_str) and not blr.root_object: # Iterator does not make an HTTP call for non-wildcarded # listings with fields=='id'. Ensure the bucket exists by calling # GetBucket. self.gsutil_api.GetBucket(blr.storage_url.bucket_name, fields=['id'], provider=storage_url.scheme) self._PrintBucketInfo(blr, listing_style) total_buckets += 1 if not ContainsWildcard(url_str) and not total_buckets: got_bucket_nomatch_errors = True else: # URL names a bucket, object, or object subdir -> # list matching object(s) / subdirs. def _PrintPrefixLong(blr): print '%-33s%s' % ('', blr.url_string.encode(UTF8)) if listing_style == ListingStyle.SHORT: # ls helper by default readies us for a short listing. ls_helper = LsHelper( self.WildcardIterator, self.logger, all_versions=self.all_versions, print_bucket_header_func=print_bucket_header, should_recurse=self.recursion_requested) elif listing_style == ListingStyle.LONG: bucket_listing_fields = ['name', 'updated', 'size'] if self.all_versions: bucket_listing_fields.extend( ['generation', 'metageneration']) if self.include_etag: bucket_listing_fields.append('etag') ls_helper = LsHelper( self.WildcardIterator, self.logger, print_object_func=self._PrintLongListing, print_dir_func=_PrintPrefixLong, print_bucket_header_func=print_bucket_header, all_versions=self.all_versions, should_recurse=self.recursion_requested, fields=bucket_listing_fields) elif listing_style == ListingStyle.LONG_LONG: # List all fields bucket_listing_fields = None ls_helper = LsHelper( self.WildcardIterator, self.logger, print_object_func=PrintFullInfoAboutObject, print_dir_func=_PrintPrefixLong, print_bucket_header_func=print_bucket_header, all_versions=self.all_versions, should_recurse=self.recursion_requested, fields=bucket_listing_fields) else: raise CommandException('Unknown listing style: %s' % listing_style) exp_dirs, exp_objs, exp_bytes = ls_helper.ExpandUrlAndPrint( storage_url) if storage_url.IsObject() and exp_objs == 0 and exp_dirs == 0: got_nomatch_errors = True total_bytes += exp_bytes total_objs += exp_objs if total_objs and listing_style != ListingStyle.SHORT: print('TOTAL: %d objects, %d bytes (%s)' % (total_objs, total_bytes, MakeHumanReadable(float(total_bytes)))) if got_nomatch_errors: raise CommandException('One or more URLs matched no objects.') if got_bucket_nomatch_errors: raise NotFoundException( 'One or more bucket URLs matched no buckets.') return 0
def RunCommand(self): """Command entry point for the rm command.""" # self.recursion_requested is initialized in command.py (so it can be # checked in parent class for all commands). self.continue_on_error = self.parallel_operations self.read_args_from_stdin = False self.all_versions = False if self.sub_opts: for o, unused_a in self.sub_opts: if o == '-a': self.all_versions = True elif o == '-f': self.continue_on_error = True elif o == '-I': self.read_args_from_stdin = True elif o == '-r' or o == '-R': self.recursion_requested = True self.all_versions = True if self.read_args_from_stdin: if self.args: raise CommandException( 'No arguments allowed with the -I flag.') url_strs = StdinIterator() else: if not self.args: raise CommandException( 'The rm command (without -I) expects at ' 'least one URL.') url_strs = self.args # Tracks number of object deletes that failed. self.op_failure_count = 0 # Tracks if any buckets were missing. self.bucket_not_found_count = 0 # Tracks buckets that are slated for recursive deletion. bucket_urls_to_delete = [] self.bucket_strings_to_delete = [] if self.recursion_requested: bucket_fields = ['id'] for url_str in url_strs: url = StorageUrlFromString(url_str) if url.IsBucket() or url.IsProvider(): for blr in self.WildcardIterator(url_str).IterBuckets( bucket_fields=bucket_fields): bucket_urls_to_delete.append(blr.storage_url) self.bucket_strings_to_delete.append(url_str) self.preconditions = PreconditionsFromHeaders(self.headers or {}) try: # Expand wildcards, dirs, buckets, and bucket subdirs in URLs. name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, url_strs, self.recursion_requested, project_id=self.project_id, all_versions=self.all_versions, continue_on_error=self.continue_on_error or self.parallel_operations) seek_ahead_iterator = None # Cannot seek ahead with stdin args, since we can only iterate them # once without buffering in memory. if not self.read_args_from_stdin: seek_ahead_iterator = SeekAheadNameExpansionIterator( self.command_name, self.debug, self.GetSeekAheadGsutilApi(), url_strs, self.recursion_requested, all_versions=self.all_versions, project_id=self.project_id) # Perform remove requests in parallel (-m) mode, if requested, using # configured number of parallel processes and threads. Otherwise, # perform requests with sequential function calls in current process. self.Apply( _RemoveFuncWrapper, name_expansion_iterator, _RemoveExceptionHandler, fail_on_error=(not self.continue_on_error), shared_attrs=['op_failure_count', 'bucket_not_found_count'], seek_ahead_iterator=seek_ahead_iterator) # Assuming the bucket has versioning enabled, url's that don't map to # objects should throw an error even with all_versions, since the prior # round of deletes only sends objects to a history table. # This assumption that rm -a is only called for versioned buckets should be # corrected, but the fix is non-trivial. except CommandException as e: # Don't raise if there are buckets to delete -- it's valid to say: # gsutil rm -r gs://some_bucket # if the bucket is empty. if _ExceptionMatchesBucketToDelete(self.bucket_strings_to_delete, e): DecrementFailureCount() else: raise except ServiceException as e: if not self.continue_on_error: raise if self.bucket_not_found_count: raise CommandException( 'Encountered non-existent bucket during listing') if self.op_failure_count and not self.continue_on_error: raise CommandException('Some files could not be removed.') # If this was a gsutil rm -r command covering any bucket subdirs, # remove any dir_$folder$ objects (which are created by various web UI # tools to simulate folders). if self.recursion_requested: folder_object_wildcards = [] for url_str in url_strs: url = StorageUrlFromString(url_str) if url.IsObject(): folder_object_wildcards.append('%s**_$folder$' % url_str) if folder_object_wildcards: self.continue_on_error = True try: name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, folder_object_wildcards, self.recursion_requested, project_id=self.project_id, all_versions=self.all_versions) # When we're removing folder objects, always continue on error self.Apply(_RemoveFuncWrapper, name_expansion_iterator, _RemoveFoldersExceptionHandler, fail_on_error=False) except CommandException as e: # Ignore exception from name expansion due to an absent folder file. if not e.reason.startswith(NO_URLS_MATCHED_GENERIC): raise # Now that all data has been deleted, delete any bucket URLs. for url in bucket_urls_to_delete: self.logger.info('Removing %s...', url) @Retry(NotEmptyException, tries=3, timeout_secs=1) def BucketDeleteWithRetry(): self.gsutil_api.DeleteBucket(url.bucket_name, provider=url.scheme) BucketDeleteWithRetry() if self.op_failure_count: plural_str = 's' if self.op_failure_count else '' raise CommandException( '%d file%s/object%s could not be removed.' % (self.op_failure_count, plural_str, plural_str)) return 0
def RunCommand(self): """Command entry point for the du command.""" self.line_ending = '\n' self.all_versions = False self.produce_total = False self.human_readable = False self.summary_only = False self.exclude_patterns = [] if self.sub_opts: for o, a in self.sub_opts: if o == '-0': self.line_ending = '\0' elif o == '-a': self.all_versions = True elif o == '-c': self.produce_total = True elif o == '-e': self.exclude_patterns.append(a) elif o == '-h': self.human_readable = True elif o == '-s': self.summary_only = True elif o == '-X': if a == '-': f = sys.stdin else: f = open(a, 'r') try: for line in f: line = line.strip() if line: self.exclude_patterns.append(line) finally: f.close() if not self.args: # Default to listing all gs buckets. self.args = ['gs://'] total_bytes = 0 got_nomatch_errors = False def _PrintObjectLong(blr): return self._PrintInfoAboutBucketListingRef(blr) def _PrintNothing(unused_blr=None): pass def _PrintDirectory(num_bytes, name): if not self.summary_only: self._PrintSummaryLine(num_bytes, name) for url_arg in self.args: top_level_storage_url = StorageUrlFromString(url_arg) if top_level_storage_url.IsFileUrl(): raise CommandException('Only cloud URLs are supported for %s' % self.command_name) bucket_listing_fields = ['size'] ls_helper = LsHelper(self.WildcardIterator, self.logger, print_object_func=_PrintObjectLong, print_dir_func=_PrintNothing, print_dir_header_func=_PrintNothing, print_dir_summary_func=_PrintDirectory, print_newline_func=_PrintNothing, all_versions=self.all_versions, should_recurse=True, exclude_patterns=self.exclude_patterns, fields=bucket_listing_fields) # ls_helper expands to objects and prefixes, so perform a top-level # expansion first. if top_level_storage_url.IsProvider(): # Provider URL: use bucket wildcard to iterate over all buckets. top_level_iter = self.WildcardIterator( '%s://*' % top_level_storage_url.scheme).IterBuckets( bucket_fields=['id']) elif top_level_storage_url.IsBucket(): top_level_iter = self.WildcardIterator( '%s://%s' % (top_level_storage_url.scheme, top_level_storage_url.bucket_name)).IterBuckets( bucket_fields=['id']) else: top_level_iter = [BucketListingObject(top_level_storage_url)] for blr in top_level_iter: storage_url = blr.storage_url if storage_url.IsBucket() and self.summary_only: storage_url = StorageUrlFromString( storage_url.CreatePrefixUrl(wildcard_suffix='**')) _, exp_objs, exp_bytes = ls_helper.ExpandUrlAndPrint( storage_url) if (storage_url.IsObject() and exp_objs == 0 and ContainsWildcard(url_arg) and not self.exclude_patterns): got_nomatch_errors = True total_bytes += exp_bytes if self.summary_only: self._PrintSummaryLine(exp_bytes, blr.url_string.rstrip('/')) if self.produce_total: self._PrintSummaryLine(total_bytes, 'total') if got_nomatch_errors: raise CommandException('One or more URLs matched no objects.') return 0
def RunCommand(self): """Command entry point for the setmeta command.""" headers = [] if self.sub_opts: for o, a in self.sub_opts: if o == '-h': if 'x-goog-acl' in a or 'x-amz-acl' in a: raise CommandException( 'gsutil setmeta no longer allows canned ACLs. Use gsutil acl ' 'set ... to set canned ACLs.') headers.append(a) (metadata_minus, metadata_plus) = self._ParseMetadataHeaders(headers) self.metadata_change = metadata_plus for header in metadata_minus: self.metadata_change[header] = '' if not self.metadata_change: raise CommandException( 'gsutil setmeta requires one or more headers to be provided with the' ' -h flag. See "gsutil help setmeta" for more information.') if len(self.args) == 1 and not self.recursion_requested: url = StorageUrlFromString(self.args[0]) if not (url.IsCloudUrl() and url.IsObject()): raise CommandException('URL (%s) must name an object' % self.args[0]) # Used to track if any objects' metadata failed to be set. self.everything_set_okay = True self.preconditions = PreconditionsFromHeaders(self.headers) name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, self.args, self.recursion_requested, all_versions=self.all_versions, continue_on_error=self.parallel_operations, bucket_listing_fields=['generation', 'metadata', 'metageneration']) seek_ahead_iterator = SeekAheadNameExpansionIterator( self.command_name, self.debug, self.GetSeekAheadGsutilApi(), self.args, self.recursion_requested, all_versions=self.all_versions, project_id=self.project_id) try: # Perform requests in parallel (-m) mode, if requested, using # configured number of parallel processes and threads. Otherwise, # perform requests with sequential function calls in current process. self.Apply(_SetMetadataFuncWrapper, name_expansion_iterator, _SetMetadataExceptionHandler, fail_on_error=True, seek_ahead_iterator=seek_ahead_iterator) except AccessDeniedException as e: if e.status == 403: self._WarnServiceAccounts() raise if not self.everything_set_okay: raise CommandException('Metadata for some objects could not be set.') return 0