class CatCommand(Command): """Implementation of gsutil cat command.""" # Command specification. See base class for documentation. command_spec = Command.CreateCommandSpec( 'cat', command_name_aliases=[], usage_synopsis=_SYNOPSIS, min_args=1, max_args=NO_MAX, supported_sub_args='hr:', file_url_ok=False, provider_url_ok=False, urls_start_arg=0, gs_api_support=[ApiSelector.XML, ApiSelector.JSON], gs_default_api=ApiSelector.JSON, argparse_arguments=[CommandArgument.MakeZeroOrMoreCloudURLsArgument()]) # Help specification. See help_provider.py for documentation. help_spec = Command.HelpSpec( help_name='cat', help_name_aliases=[], help_type='command_help', help_one_line_summary='Concatenate object content to stdout', help_text=_DETAILED_HELP_TEXT, subcommand_help_text={}, ) # Command entry point. def RunCommand(self): """Command entry point for the cat command.""" show_header = False request_range = None start_byte = 0 end_byte = None if self.sub_opts: for o, a in self.sub_opts: if o == '-h': show_header = True elif o == '-r': request_range = a.strip() range_matcher = re.compile( '^(?P<start>[0-9]+)-(?P<end>[0-9]*)$|^(?P<endslice>-[0-9]+)$' ) range_match = range_matcher.match(request_range) if not range_match: raise CommandException('Invalid range (%s)' % request_range) if range_match.group('start'): start_byte = long(range_match.group('start')) if range_match.group('end'): end_byte = long(range_match.group('end')) if range_match.group('endslice'): start_byte = long(range_match.group('endslice')) else: self.RaiseInvalidArgumentException() return CatHelper(self).CatUrlStrings(self.args, show_header=show_header, start_byte=start_byte, end_byte=end_byte)
class FakeCommandWithCompleters(Command): """Command with various completer types.""" command_spec = Command.CreateCommandSpec( 'fake2', argparse_arguments=[ CommandArgument.MakeZeroOrMoreCloudURLsArgument(), CommandArgument.MakeZeroOrMoreFileURLsArgument(), CommandArgument.MakeZeroOrMoreCloudOrFileURLsArgument(), CommandArgument.MakeFreeTextArgument(), CommandArgument.MakeZeroOrMoreCloudBucketURLsArgument(), CommandArgument.MakeFileURLOrCannedACLArgument(), ] ) help_spec = Command.HelpSpec( help_name='fake2', help_name_aliases=[], help_type='command_help', help_one_line_summary='fake command for tests', help_text='fake command for tests', subcommand_help_text={} ) def __init__(self): pass
class StatCommand(Command): """Implementation of gsutil stat command.""" # Command specification. See base class for documentation. command_spec = Command.CreateCommandSpec( 'stat', command_name_aliases=[], usage_synopsis=_SYNOPSIS, min_args=1, max_args=NO_MAX, supported_sub_args='', file_url_ok=False, provider_url_ok=False, urls_start_arg=0, gs_api_support=[ApiSelector.XML, ApiSelector.JSON], gs_default_api=ApiSelector.JSON, argparse_arguments=[CommandArgument.MakeZeroOrMoreCloudURLsArgument()]) # Help specification. See help_provider.py for documentation. help_spec = Command.HelpSpec( help_name='stat', help_name_aliases=[], help_type='command_help', help_one_line_summary='Display object status', help_text=_DETAILED_HELP_TEXT, subcommand_help_text={}, ) def RunCommand(self): """Command entry point for stat command.""" # List of fields we'll print for stat objects. stat_fields = [ 'updated', 'cacheControl', 'contentDisposition', 'contentEncoding', 'contentLanguage', 'size', 'contentType', 'componentCount', 'metadata', 'crc32c', 'md5Hash', 'etag', 'generation', 'metageneration' ] found_nonmatching_arg = False for url_str in self.args: arg_matches = 0 url = StorageUrlFromString(url_str) if not url.IsObject(): raise CommandException( 'The stat command only works with object URLs') try: if ContainsWildcard(url_str): blr_iter = self.WildcardIterator(url_str).IterObjects( bucket_listing_fields=stat_fields) else: single_obj = self.gsutil_api.GetObjectMetadata( url.bucket_name, url.object_name, generation=url.generation, provider=url.scheme, fields=stat_fields) blr_iter = [ BucketListingObject(url, root_object=single_obj) ] for blr in blr_iter: if blr.IsObject(): arg_matches += 1 if logging.getLogger().isEnabledFor(logging.INFO): PrintFullInfoAboutObject(blr, incl_acl=False) except AccessDeniedException: print 'You aren\'t authorized to read %s - skipping' % url_str except InvalidUrlError: raise except NotFoundException: pass if not arg_matches: if logging.getLogger().isEnabledFor(logging.INFO): print 'No URLs matched %s' % url_str found_nonmatching_arg = True if found_nonmatching_arg: return 1 return 0
class NotificationCommand(Command): """Implementation of gsutil notification command.""" # Notification names might look like one of these: # canonical form: projects/_/buckets/bucket/notificationConfigs/3 # JSON API form: b/bucket/notificationConfigs/5 # Either of the above might start with a / if a user is copying & pasting. def _GetNotificationPathRegex(self): if not NotificationCommand._notification_path_regex: NotificationCommand._notification_path_regex = re.compile( ('/?(projects/[^/]+/)?b(uckets)?/(?P<bucket>[^/]+)/' 'notificationConfigs/(?P<notification>[0-9]+)')) return NotificationCommand._notification_path_regex _notification_path_regex = None # Command specification. See base class for documentation. command_spec = Command.CreateCommandSpec( 'notification', command_name_aliases=[ 'notify', 'notifyconfig', 'notifications', 'notif' ], usage_synopsis=_SYNOPSIS, min_args=2, max_args=NO_MAX, supported_sub_args='i:t:m:t:o:f:e:p:s', file_url_ok=False, provider_url_ok=False, urls_start_arg=1, gs_api_support=[ApiSelector.JSON], gs_default_api=ApiSelector.JSON, argparse_arguments={ 'watchbucket': [ CommandArgument.MakeFreeTextArgument(), CommandArgument.MakeZeroOrMoreCloudBucketURLsArgument() ], 'stopchannel': [], 'list': [CommandArgument.MakeZeroOrMoreCloudBucketURLsArgument()], 'delete': [ # Takes a list of one of the following: # notification: projects/_/buckets/bla/notificationConfigs/5, # bucket: gs://foobar CommandArgument.MakeZeroOrMoreCloudURLsArgument() ], 'create': [ CommandArgument.MakeFreeTextArgument(), # Cloud Pub/Sub topic CommandArgument.MakeNCloudBucketURLsArgument(1) ] }) # Help specification. See help_provider.py for documentation. help_spec = Command.HelpSpec( help_name='notification', help_name_aliases=['watchbucket', 'stopchannel', 'notifyconfig'], help_type='command_help', help_one_line_summary='Configure object change notification', help_text=_DETAILED_HELP_TEXT, subcommand_help_text={ 'create': _create_help_text, 'list': _list_help_text, 'delete': _delete_help_text, 'watchbucket': _watchbucket_help_text, 'stopchannel': _stopchannel_help_text }, ) def _WatchBucket(self): """Creates a watch on a bucket given in self.args.""" self.CheckArguments() identifier = None client_token = None if self.sub_opts: for o, a in self.sub_opts: if o == '-i': identifier = a if o == '-t': client_token = a identifier = identifier or str(uuid.uuid4()) watch_url = self.args[0] bucket_arg = self.args[-1] if not watch_url.lower().startswith('https://'): raise CommandException( 'The application URL must be an https:// URL.') bucket_url = StorageUrlFromString(bucket_arg) if not (bucket_url.IsBucket() and bucket_url.scheme == 'gs'): raise CommandException( 'The %s command can only be used with gs:// bucket URLs.' % self.command_name) if not bucket_url.IsBucket(): raise CommandException( 'URL must name a bucket for the %s command.' % self.command_name) self.logger.info('Watching bucket %s with application URL %s ...', bucket_url, watch_url) try: channel = self.gsutil_api.WatchBucket(bucket_url.bucket_name, watch_url, identifier, token=client_token, provider=bucket_url.scheme) except AccessDeniedException, e: self.logger.warn( NOTIFICATION_AUTHORIZATION_FAILED_MESSAGE.format( watch_error=str(e), watch_url=watch_url)) raise channel_id = channel.id resource_id = channel.resourceId client_token = channel.token self.logger.info('Successfully created watch notification channel.') self.logger.info('Watch channel identifier: %s', channel_id) self.logger.info('Canonicalized resource identifier: %s', resource_id) self.logger.info('Client state token: %s', client_token) return 0
class RetentionCommand(Command): """Implementation of gsutil retention command.""" # Command specification. See base class for documentation. command_spec = Command.CreateCommandSpec( 'retention', command_name_aliases=[], usage_synopsis=_SYNOPSIS, min_args=2, max_args=NO_MAX, file_url_ok=False, provider_url_ok=False, urls_start_arg=1, gs_api_support=[ApiSelector.JSON], gs_default_api=ApiSelector.JSON, argparse_arguments={ 'set': [CommandArgument.MakeZeroOrMoreCloudBucketURLsArgument()], 'clear': [CommandArgument.MakeZeroOrMoreCloudBucketURLsArgument()], 'get': [CommandArgument.MakeNCloudBucketURLsArgument(1)], 'lock': [CommandArgument.MakeZeroOrMoreCloudBucketURLsArgument()], 'event-default': { 'set': [CommandArgument.MakeZeroOrMoreCloudURLsArgument()], 'release': [CommandArgument.MakeZeroOrMoreCloudURLsArgument()] }, 'event': { 'set': [CommandArgument.MakeZeroOrMoreCloudURLsArgument()], 'release': [CommandArgument.MakeZeroOrMoreCloudURLsArgument()] }, 'temp': { 'set': [CommandArgument.MakeZeroOrMoreCloudURLsArgument()], 'release': [CommandArgument.MakeZeroOrMoreCloudURLsArgument()] }, }) # Help specification. See help_provider.py for documentation. help_spec = Command.HelpSpec( help_name='retention', help_name_aliases=[], help_type='command_help', help_one_line_summary=( 'Provides utilities to interact with Retention Policy feature.'), help_text=_DETAILED_HELP_TEXT, subcommand_help_text={ 'get': _get_help_text, 'set': _set_help_text, 'clear': _clear_help_text, 'lock': _lock_help_text, 'event-default': _event_default_help_text, 'event': _event_help_text, 'temp': _temp_help_text }, ) def RunCommand(self): """Command entry point for the retention command.""" # If the only credential type the user supplies in their boto file is HMAC, # GetApiSelector logic will force us to use the XML API, which bucket lock # does not support at the moment. if self.gsutil_api.GetApiSelector('gs') != ApiSelector.JSON: raise CommandException(('The {} command can only be used with the GCS ' 'JSON API. If you have only supplied hmac ' 'credentials in your boto file, please instead ' 'supply a credential type that can be used with ' 'the JSON API.').format(self.command_name)) self.preconditions = PreconditionsFromHeaders(self.headers) action_subcommand = self.args.pop(0) self.ParseSubOpts(check_args=True) if action_subcommand == 'set': func = self._SetRetention elif action_subcommand == 'clear': func = self._ClearRetention elif action_subcommand == 'get': func = self._GetRetention elif action_subcommand == 'lock': func = self._LockRetention elif action_subcommand == 'event-default': func = self._DefaultEventHold elif action_subcommand == 'event': func = self._EventHold elif action_subcommand == 'temp': func = self._TempHold else: raise CommandException( ('Invalid subcommand "{}" for the {} command.\n' 'See "gsutil help retention".').format(action_subcommand, self.command_name)) # Commands with both suboptions and subcommands need to reparse for # suboptions, so we log again. metrics.LogCommandParams(subcommands=[action_subcommand], sub_opts=self.sub_opts) return func() def BucketUpdateFunc(self, url_args, bucket_metadata_update, fields, log_msg_template): preconditions = Preconditions( meta_gen_match=self.preconditions.meta_gen_match) # Iterate over URLs, expanding wildcards and setting the new bucket metadata # on each bucket. some_matched = False for url_str in url_args: bucket_iter = self.GetBucketUrlIterFromArg(url_str, bucket_fields=['id']) for blr in bucket_iter: url = blr.storage_url some_matched = True self.logger.info(log_msg_template, blr) self.gsutil_api.PatchBucket(url.bucket_name, bucket_metadata_update, preconditions=preconditions, provider=url.scheme, fields=fields) if not some_matched: raise CommandException(NO_URLS_MATCHED_TARGET % list(url_args)) def ObjectUpdateMetadataFunc(self, patch_obj_metadata, log_template, name_expansion_result, thread_state=None): """Updates metadata on an object using PatchObjectMetadata. Args: patch_obj_metadata: Metadata changes that should be applied to the existing object. log_template: The log template that should be printed for each object. name_expansion_result: NameExpansionResult describing target object. thread_state: gsutil Cloud API instance to use for the operation. """ gsutil_api = GetCloudApiInstance(self, thread_state=thread_state) exp_src_url = name_expansion_result.expanded_storage_url self.logger.info(log_template, exp_src_url) cloud_obj_metadata = encoding.JsonToMessage( apitools_messages.Object, name_expansion_result.expanded_result) preconditions = Preconditions( gen_match=self.preconditions.gen_match, meta_gen_match=self.preconditions.meta_gen_match) if preconditions.gen_match is None: preconditions.gen_match = cloud_obj_metadata.generation if preconditions.meta_gen_match is None: preconditions.meta_gen_match = cloud_obj_metadata.metageneration gsutil_api.PatchObjectMetadata(exp_src_url.bucket_name, exp_src_url.object_name, patch_obj_metadata, generation=exp_src_url.generation, preconditions=preconditions, provider=exp_src_url.scheme, fields=['id']) PutToQueueWithTimeout(gsutil_api.status_queue, MetadataMessage(message_time=time.time())) def _GetObjectNameExpansionIterator(self, url_args): return NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, url_args, self.recursion_requested, all_versions=self.all_versions, continue_on_error=self.parallel_operations, bucket_listing_fields=['generation', 'metageneration']) def _GetSeekAheadNameExpansionIterator(self, url_args): return SeekAheadNameExpansionIterator(self.command_name, self.debug, self.GetSeekAheadGsutilApi(), url_args, self.recursion_requested, all_versions=self.all_versions, project_id=self.project_id) def _SetRetention(self): """Set retention retention_period on one or more buckets.""" seconds = RetentionInSeconds(self.args[0]) retention_policy = (apitools_messages.Bucket.RetentionPolicyValue( retentionPeriod=seconds)) log_msg_template = 'Setting Retention Policy on %s...' bucket_metadata_update = apitools_messages.Bucket( retentionPolicy=retention_policy) url_args = self.args[1:] self.BucketUpdateFunc(url_args, bucket_metadata_update, fields=['id', 'retentionPolicy'], log_msg_template=log_msg_template) return 0 def _ClearRetention(self): """Clear retention retention_period on one or more buckets.""" retention_policy = (apitools_messages.Bucket.RetentionPolicyValue( retentionPeriod=None)) log_msg_template = 'Clearing Retention Policy on %s...' bucket_metadata_update = apitools_messages.Bucket( retentionPolicy=retention_policy) url_args = self.args self.BucketUpdateFunc(url_args, bucket_metadata_update, fields=['id', 'retentionPolicy'], log_msg_template=log_msg_template) return 0 def _GetRetention(self): """Get Retention Policy for a single bucket.""" bucket_url, bucket_metadata = self.GetSingleBucketUrlFromArg( self.args[0], bucket_fields=['retentionPolicy']) print(RetentionPolicyToString(bucket_metadata.retentionPolicy, bucket_url)) return 0 def _LockRetention(self): """Lock Retention Policy on one or more buckets.""" url_args = self.args # Iterate over URLs, expanding wildcards and setting the Retention Policy # configuration on each. some_matched = False for url_str in url_args: bucket_iter = self.GetBucketUrlIterFromArg(url_str, bucket_fields=['id']) for blr in bucket_iter: url = blr.storage_url some_matched = True # Get bucket metadata to provide a precondition. bucket_metadata = self.gsutil_api.GetBucket( url.bucket_name, provider=url.scheme, fields=['id', 'metageneration', 'retentionPolicy']) if (not (bucket_metadata.retentionPolicy and bucket_metadata.retentionPolicy.retentionPeriod)): # TODO: implement '-c' flag to continue_on_error raise CommandException( 'Bucket "{}" does not have an Unlocked Retention Policy.'.format( url.bucket_name)) elif bucket_metadata.retentionPolicy.isLocked is True: self.logger.error('Retention Policy on "%s" is already locked.', blr) elif ConfirmLockRequest(url.bucket_name, bucket_metadata.retentionPolicy): self.logger.info('Locking Retention Policy on %s...', blr) self.gsutil_api.LockRetentionPolicy(url.bucket_name, bucket_metadata.metageneration, provider=url.scheme) else: self.logger.error( ' Abort Locking Retention Policy on {}'.format(blr)) if not some_matched: raise CommandException(NO_URLS_MATCHED_TARGET % list(url_args)) return 0 def _DefaultEventHold(self): """Sets default value for Event-Based Hold on one or more buckets.""" hold = None if self.args: if self.args[0].lower() == 'set': hold = True elif self.args[0].lower() == 'release': hold = False else: raise CommandException( ('Invalid subcommand "{}" for the "retention event-default"' ' command.\nSee "gsutil help retention event".').format( self.sub_opts)) verb = 'Setting' if hold else 'Releasing' log_msg_template = '{} default Event-Based Hold on %s...'.format(verb) bucket_metadata_update = apitools_messages.Bucket( defaultEventBasedHold=hold) url_args = self.args[1:] self.BucketUpdateFunc(url_args, bucket_metadata_update, fields=['id', 'defaultEventBasedHold'], log_msg_template=log_msg_template) return 0 def _EventHold(self): """Sets or unsets Event-Based Hold on one or more objects.""" sub_command_name = 'event' sub_command_full_name = 'Event-Based' hold = self._ProcessHoldArgs(sub_command_name) url_args = self.args[1:] obj_metadata_update_wrapper = (SetEventHoldFuncWrapper if hold else ReleaseEventHoldFuncWrapper) self._SetHold(obj_metadata_update_wrapper, url_args, sub_command_full_name) return 0 def _TempHold(self): """Sets or unsets Temporary Hold on one or more objects.""" sub_command_name = 'temp' sub_command_full_name = 'Temporary' hold = self._ProcessHoldArgs(sub_command_name) url_args = self.args[1:] obj_metadata_update_wrapper = (SetTempHoldFuncWrapper if hold else ReleaseTempHoldFuncWrapper) self._SetHold(obj_metadata_update_wrapper, url_args, sub_command_full_name) return 0 def _ProcessHoldArgs(self, sub_command_name): """Processes command args for Temporary and Event-Based Hold sub-command. Args: sub_command_name: The name of the subcommand: "temp" / "event" Returns: Returns a boolean value indicating whether to set (True) or release (False)the Hold. """ hold = None if self.args[0].lower() == 'set': hold = True elif self.args[0].lower() == 'release': hold = False else: raise CommandException( ('Invalid subcommand "{}" for the "retention {}" command.\n' 'See "gsutil help retention {}".').format(self.args[0], sub_command_name, sub_command_name)) return hold def _SetHold(self, obj_metadata_update_wrapper, url_args, sub_command_full_name): """Common logic to set or unset Event-Based/Temporary Hold on objects. Args: obj_metadata_update_wrapper: The function for updating related fields in Object metadata. url_args: List of object URIs. sub_command_full_name: The full name for sub-command: "Temporary" / "Event-Based" """ if len(url_args) == 1 and not self.recursion_requested: url = StorageUrlFromString(url_args[0]) if not (url.IsCloudUrl() and url.IsObject()): raise CommandException('URL ({}) must name an object'.format( url_args[0])) name_expansion_iterator = self._GetObjectNameExpansionIterator(url_args) seek_ahead_iterator = self._GetSeekAheadNameExpansionIterator(url_args) # Used to track if any objects' metadata failed to be set. self.everything_set_okay = True try: # TODO: implement '-c' flag to continue_on_error # Perform requests in parallel (-m) mode, if requested, using # configured number of parallel processes and threads. Otherwise, # perform requests with sequential function calls in current process. self.Apply(obj_metadata_update_wrapper, name_expansion_iterator, UpdateObjectMetadataExceptionHandler, fail_on_error=True, seek_ahead_iterator=seek_ahead_iterator) except AccessDeniedException as e: if e.status == 403: self._WarnServiceAccounts() raise if not self.everything_set_okay: raise CommandException( '{} Hold for some objects could not be set.'.format( sub_command_full_name))
class IamCommand(Command): """Implementation of gsutil iam command.""" command_spec = Command.CreateCommandSpec( 'iam', min_args=2, max_args=NO_MAX, supported_sub_args='afRrd:e:', file_url_ok=True, provider_url_ok=False, urls_start_arg=1, gs_api_support=[ApiSelector.JSON], gs_default_api=ApiSelector.JSON, argparse_arguments={ 'get': [CommandArgument.MakeNCloudURLsArgument(1)], 'set': [ CommandArgument.MakeNFileURLsArgument(1), CommandArgument.MakeZeroOrMoreCloudURLsArgument() ], 'ch': [ CommandArgument.MakeOneOrMoreBindingsArgument(), CommandArgument.MakeZeroOrMoreCloudURLsArgument() ], }, ) help_spec = Command.HelpSpec( help_name='iam', help_name_aliases=[], help_type='command_help', help_one_line_summary=('Get, set, or change' ' bucket and/or object IAM permissions.'), help_text=_DETAILED_HELP_TEXT, subcommand_help_text={ 'get': _get_help_text, 'set': _set_help_text, 'ch': _ch_help_text, }) def GetIamHelper(self, storage_url, thread_state=None): """Gets an IAM policy for a single, resolved bucket / object URL. Args: storage_url: A CloudUrl instance with no wildcards, pointing to a specific bucket or object. thread_state: CloudApiDelegator instance which is passed from command.WorkerThread.__init__() if the global -m flag is specified. Will use self.gsutil_api if thread_state is set to None. Returns: Policy instance. """ gsutil_api = GetCloudApiInstance(self, thread_state=thread_state) if storage_url.IsBucket(): policy = gsutil_api.GetBucketIamPolicy( storage_url.bucket_name, provider=storage_url.scheme, fields=['bindings', 'etag'], ) else: policy = gsutil_api.GetObjectIamPolicy( storage_url.bucket_name, storage_url.object_name, generation=storage_url.generation, provider=storage_url.scheme, fields=['bindings', 'etag'], ) return policy def _GetIam(self, thread_state=None): """Gets IAM policy for single bucket or object.""" pattern = self.args[0] matches = PluralityCheckableIterator( self.WildcardIterator(pattern).IterAll( bucket_listing_fields=['name'])) if matches.IsEmpty(): raise CommandException('%s matched no URLs' % pattern) if matches.HasPlurality(): raise CommandException( '%s matched more than one URL, which is not allowed by the %s ' 'command' % (pattern, self.command_name)) storage_url = StorageUrlFromString(list(matches)[0].url_string) policy = self.GetIamHelper(storage_url, thread_state=thread_state) print json.dumps(json.loads(protojson.encode_message(policy)), sort_keys=True, indent=2) def _SetIamHelperInternal(self, storage_url, policy, thread_state=None): """Sets IAM policy for a single, resolved bucket / object URL. Args: storage_url: A CloudUrl instance with no wildcards, pointing to a specific bucket or object. policy: A Policy object to set on the bucket / object. thread_state: CloudApiDelegator instance which is passed from command.WorkerThread.__init__() if the -m flag is specified. Will use self.gsutil_api if thread_state is set to None. Raises: ServiceException passed from the API call if an HTTP error was returned. """ # SetIamHelper may be called by a command.WorkerThread. In the # single-threaded case, WorkerThread will not pass the CloudApiDelegator # instance to thread_state. GetCloudInstance is called to resolve the # edge case. gsutil_api = GetCloudApiInstance(self, thread_state=thread_state) if storage_url.IsBucket(): gsutil_api.SetBucketIamPolicy(storage_url.bucket_name, policy, provider=storage_url.scheme) else: gsutil_api.SetObjectIamPolicy(storage_url.bucket_name, storage_url.object_name, policy, generation=storage_url.generation, provider=storage_url.scheme) def SetIamHelper(self, storage_url, policy, thread_state=None): """Handles the potential exception raised by the internal set function.""" try: self._SetIamHelperInternal(storage_url, policy, thread_state=thread_state) except ServiceException: if self.continue_on_error: self.everything_set_okay = False else: raise def PatchIamHelper(self, storage_url, bindings_tuples, thread_state=None): """Patches an IAM policy for a single, resolved bucket / object URL. The patch is applied by altering the policy from an IAM get request, and setting the new IAM with the specified etag. Because concurrent IAM set requests may alter the etag, we may need to retry this operation several times before success. Args: storage_url: A CloudUrl instance with no wildcards, pointing to a specific bucket or object. bindings_tuples: A list of BindingsTuple instances. thread_state: CloudApiDelegator instance which is passed from command.WorkerThread.__init__() if the -m flag is specified. Will use self.gsutil_api if thread_state is set to None. """ try: self._PatchIamHelperInternal(storage_url, bindings_tuples, thread_state=thread_state) except ServiceException: if self.continue_on_error: self.everything_set_okay = False else: raise except IamChOnResourceWithConditionsException as e: if self.continue_on_error: self.everything_set_okay = False self.tried_ch_on_resource_with_conditions = True self.logger.debug(e.message) else: raise CommandException(e.message) @Retry(PreconditionException, tries=3, timeout_secs=1.0) def _PatchIamHelperInternal(self, storage_url, bindings_tuples, thread_state=None): policy = self.GetIamHelper(storage_url, thread_state=thread_state) (etag, bindings) = (policy.etag, policy.bindings) # If any of the bindings have conditions present, raise an exception. # See the docstring for the IamChOnResourceWithConditionsException class # for more details on why we raise this exception. for binding in bindings: if binding.condition: message = 'Could not patch IAM policy for %s.' % storage_url message += '\n' message += '\n'.join( textwrap.wrap( 'The resource had conditions present in its IAM policy bindings, ' 'which is not supported by "iam ch". %s' % IAM_CH_CONDITIONS_WORKAROUND_MSG)) raise IamChOnResourceWithConditionsException(message) # Create a backup which is untainted by any references to the original # bindings. orig_bindings = list(bindings) for (is_grant, diff) in bindings_tuples: bindings = PatchBindings(bindings, BindingsTuple(is_grant, diff)) if IsEqualBindings(bindings, orig_bindings): self.logger.info('No changes made to %s', storage_url) return policy = apitools_messages.Policy(bindings=bindings, etag=etag) # We explicitly wish for etag mismatches to raise an error and allow this # function to error out, so we are bypassing the exception handling offered # by IamCommand.SetIamHelper in lieu of our own handling (@Retry). self._SetIamHelperInternal(storage_url, policy, thread_state=thread_state) def _PatchIam(self): self.continue_on_error = False self.recursion_requested = False patch_bindings_tuples = [] if self.sub_opts: for o, a in self.sub_opts: if o in ['-r', '-R']: self.recursion_requested = True elif o == '-f': self.continue_on_error = True elif o == '-d': patch_bindings_tuples.append(BindingStringToTuple( False, a)) patterns = [] # N.B.: self.sub_opts stops taking in options at the first non-flagged # token. The rest of the tokens are sent to self.args. Thus, in order to # handle input of the form "-d <binding> <binding> <url>", we will have to # parse self.args for a mix of both bindings and CloudUrls. We are not # expecting to come across the -r, -f flags here. it = iter(self.args) for token in it: if STORAGE_URI_REGEX.match(token): patterns.append(token) break if token == '-d': patch_bindings_tuples.append( BindingStringToTuple(False, it.next())) else: patch_bindings_tuples.append(BindingStringToTuple(True, token)) if not patch_bindings_tuples: raise CommandException('Must specify at least one binding.') # All following arguments are urls. for token in it: patterns.append(token) self.everything_set_okay = True self.tried_ch_on_resource_with_conditions = False threaded_wildcards = [] for pattern in patterns: surl = StorageUrlFromString(pattern) try: if surl.IsBucket(): if self.recursion_requested: surl.object = '*' threaded_wildcards.append(surl.url_string) else: self.PatchIamHelper(surl, patch_bindings_tuples) else: threaded_wildcards.append(surl.url_string) except AttributeError: error_msg = 'Invalid Cloud URL "%s".' % surl.object_name if set(surl.object_name).issubset(set('-Rrf')): error_msg += ( ' This resource handle looks like a flag, which must appear ' 'before all bindings. See "gsutil help iam ch" for more details.' ) raise CommandException(error_msg) if threaded_wildcards: name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, threaded_wildcards, self.recursion_requested, all_versions=self.all_versions, continue_on_error=self.continue_on_error or self.parallel_operations, bucket_listing_fields=['name']) seek_ahead_iterator = SeekAheadNameExpansionIterator( self.command_name, self.debug, self.GetSeekAheadGsutilApi(), threaded_wildcards, self.recursion_requested, all_versions=self.all_versions) serialized_bindings_tuples_it = itertools.repeat( [SerializeBindingsTuple(t) for t in patch_bindings_tuples]) self.Apply(_PatchIamWrapper, itertools.izip(serialized_bindings_tuples_it, name_expansion_iterator), _PatchIamExceptionHandler, fail_on_error=not self.continue_on_error, seek_ahead_iterator=seek_ahead_iterator) self.everything_set_okay &= not GetFailureCount() > 0 # TODO: Add an error counter for files and objects. if not self.everything_set_okay: msg = 'Some IAM policies could not be patched.' if self.tried_ch_on_resource_with_conditions: msg += '\n' msg += '\n'.join( textwrap.wrap( 'Some resources had conditions present in their IAM policy ' 'bindings, which is not supported by "iam ch". %s' % (IAM_CH_CONDITIONS_WORKAROUND_MSG))) raise CommandException(msg) # TODO(iam-beta): Add an optional flag to specify etag and edit the policy # accordingly to be passed into the helper functions. def _SetIam(self): """Set IAM policy for given wildcards on the command line.""" self.continue_on_error = False self.recursion_requested = False self.all_versions = False force_etag = False etag = '' if self.sub_opts: for o, arg in self.sub_opts: if o in ['-r', '-R']: self.recursion_requested = True elif o == '-f': self.continue_on_error = True elif o == '-a': self.all_versions = True elif o == '-e': etag = str(arg) force_etag = True else: self.RaiseInvalidArgumentException() file_url = self.args[0] patterns = self.args[1:] # Load the IAM policy file and raise error if the file is invalid JSON or # does not exist. try: with open(file_url, 'r') as fp: policy = json.loads(fp.read()) except IOError: raise ArgumentException( 'Specified IAM policy file "%s" does not exist.' % file_url) except ValueError as e: self.logger.debug('Invalid IAM policy file, ValueError:\n', e) raise ArgumentException('Invalid IAM policy file "%s".' % file_url) bindings = policy.get('bindings', []) if not force_etag: etag = policy.get('etag', '') policy_json = json.dumps({'bindings': bindings, 'etag': etag}) try: policy = protojson.decode_message(apitools_messages.Policy, policy_json) except DecodeError: raise ArgumentException( 'Invalid IAM policy file "%s" or etag "%s".' % (file_url, etag)) self.everything_set_okay = True # This list of wildcard strings will be handled by NameExpansionIterator. threaded_wildcards = [] for pattern in patterns: surl = StorageUrlFromString(pattern) if surl.IsBucket(): if self.recursion_requested: surl.object_name = '*' threaded_wildcards.append(surl.url_string) else: self.SetIamHelper(surl, policy) else: threaded_wildcards.append(surl.url_string) # N.B.: If threaded_wildcards contains a non-existent bucket # (e.g. ["gs://non-existent", "gs://existent"]), NameExpansionIterator # will raise an exception in iter.next. This halts all iteration, even # when -f is set. This behavior is also evident in acl set. This behavior # also appears for any exception that will be raised when iterating over # wildcard expansions (access denied if bucket cannot be listed, etc.). if threaded_wildcards: name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, threaded_wildcards, self.recursion_requested, all_versions=self.all_versions, continue_on_error=self.continue_on_error or self.parallel_operations, bucket_listing_fields=['name']) seek_ahead_iterator = SeekAheadNameExpansionIterator( self.command_name, self.debug, self.GetSeekAheadGsutilApi(), threaded_wildcards, self.recursion_requested, all_versions=self.all_versions) policy_it = itertools.repeat(protojson.encode_message(policy)) self.Apply(_SetIamWrapper, itertools.izip(policy_it, name_expansion_iterator), _SetIamExceptionHandler, fail_on_error=not self.continue_on_error, seek_ahead_iterator=seek_ahead_iterator) self.everything_set_okay &= not GetFailureCount() > 0 # TODO: Add an error counter for files and objects. if not self.everything_set_okay: raise CommandException('Some IAM policies could not be set.') def RunCommand(self): """Command entry point for the acl command.""" action_subcommand = self.args.pop(0) self.ParseSubOpts(check_args=True) # Commands with both suboptions and subcommands need to reparse for # suboptions, so we log again. LogCommandParams(sub_opts=self.sub_opts) self.def_acl = False if action_subcommand == 'get': LogCommandParams(subcommands=[action_subcommand]) self._GetIam() elif action_subcommand == 'set': LogCommandParams(subcommands=[action_subcommand]) self._SetIam() elif action_subcommand == 'ch': LogCommandParams(subcommands=[action_subcommand]) self._PatchIam() else: raise CommandException( 'Invalid subcommand "%s" for the %s command.\n' 'See "gsutil help iam".' % (action_subcommand, self.command_name)) return 0
class StatCommand(Command): """Implementation of gsutil stat command.""" # Command specification. See base class for documentation. command_spec = Command.CreateCommandSpec( 'stat', command_name_aliases=[], usage_synopsis=_SYNOPSIS, min_args=1, max_args=NO_MAX, supported_sub_args='', file_url_ok=False, provider_url_ok=False, urls_start_arg=0, gs_api_support=[ApiSelector.XML, ApiSelector.JSON], gs_default_api=ApiSelector.JSON, argparse_arguments=[ CommandArgument.MakeZeroOrMoreCloudURLsArgument() ] ) # Help specification. See help_provider.py for documentation. help_spec = Command.HelpSpec( help_name='stat', help_name_aliases=[], help_type='command_help', help_one_line_summary='Display object status', help_text=_DETAILED_HELP_TEXT, subcommand_help_text={}, ) def RunCommand(self): """Command entry point for stat command.""" stat_fields = ENCRYPTED_FIELDS + UNENCRYPTED_FULL_LISTING_FIELDS found_nonmatching_arg = False for url_str in self.args: arg_matches = 0 url = StorageUrlFromString(url_str) if not url.IsObject(): raise CommandException('The stat command only works with object URLs') try: if ContainsWildcard(url_str): blr_iter = self.WildcardIterator(url_str).IterObjects( bucket_listing_fields=stat_fields) else: try: single_obj = self.gsutil_api.GetObjectMetadata( url.bucket_name, url.object_name, generation=url.generation, provider=url.scheme, fields=stat_fields) except EncryptionException: # Retry without requesting hashes. single_obj = self.gsutil_api.GetObjectMetadata( url.bucket_name, url.object_name, generation=url.generation, provider=url.scheme, fields=UNENCRYPTED_FULL_LISTING_FIELDS) blr_iter = [BucketListingObject(url, root_object=single_obj)] for blr in blr_iter: if blr.IsObject(): arg_matches += 1 # TODO: Request fewer fields if we're not printing the object. if logging.getLogger().isEnabledFor(logging.INFO): PrintFullInfoAboutObject(blr, incl_acl=False) except AccessDeniedException: if logging.getLogger().isEnabledFor(logging.INFO): sys.stderr.write('You aren\'t authorized to read %s - skipping' % url_str) except InvalidUrlError: raise except NotFoundException: pass if not arg_matches: if logging.getLogger().isEnabledFor(logging.INFO): sys.stderr.write(NO_URLS_MATCHED_TARGET % url_str) found_nonmatching_arg = True if found_nonmatching_arg: return 1 return 0
class AclCommand(Command): """Implementation of gsutil acl command.""" # Command specification. See base class for documentation. command_spec = Command.CreateCommandSpec( 'acl', command_name_aliases=['getacl', 'setacl', 'chacl'], usage_synopsis=_SYNOPSIS, min_args=2, max_args=NO_MAX, supported_sub_args='afRrg:u:d:p:', file_url_ok=False, provider_url_ok=False, urls_start_arg=1, gs_api_support=[ApiSelector.XML, ApiSelector.JSON], gs_default_api=ApiSelector.JSON, argparse_arguments={ 'set': [ CommandArgument.MakeFileURLOrCannedACLArgument(), CommandArgument.MakeZeroOrMoreCloudURLsArgument() ], 'get': [ CommandArgument.MakeNCloudURLsArgument(1) ], 'ch': [ CommandArgument.MakeZeroOrMoreCloudURLsArgument() ], } ) # Help specification. See help_provider.py for documentation. help_spec = Command.HelpSpec( help_name='acl', help_name_aliases=['getacl', 'setacl', 'chmod', 'chacl'], help_type='command_help', help_one_line_summary='Get, set, or change bucket and/or object ACLs', help_text=_DETAILED_HELP_TEXT, subcommand_help_text={ 'get': _get_help_text, 'set': _set_help_text, 'ch': _ch_help_text}, ) def _CalculateUrlsStartArg(self): if not self.args: self.RaiseWrongNumberOfArgumentsException() if (self.args[0].lower() == 'set') or (self.command_alias_used == 'setacl'): return 1 else: return 0 def _SetAcl(self): """Parses options and sets ACLs on the specified buckets/objects.""" self.continue_on_error = False if self.sub_opts: for o, unused_a in self.sub_opts: if o == '-a': self.all_versions = True elif o == '-f': self.continue_on_error = True elif o == '-r' or o == '-R': self.recursion_requested = True else: self.RaiseInvalidArgumentException() try: self.SetAclCommandHelper(SetAclFuncWrapper, SetAclExceptionHandler) except AccessDeniedException, unused_e: self._WarnServiceAccounts() raise if not self.everything_set_okay: raise CommandException('ACLs for some objects could not be set.')
class RewriteCommand(Command): """Implementation of gsutil rewrite command.""" # Command specification. See base class for documentation. command_spec = Command.CreateCommandSpec( 'rewrite', command_name_aliases=[], usage_synopsis=_SYNOPSIS, min_args=0, max_args=NO_MAX, supported_sub_args='fkIrROs:', file_url_ok=False, provider_url_ok=False, urls_start_arg=0, gs_api_support=[ApiSelector.JSON], gs_default_api=ApiSelector.JSON, argparse_arguments=[CommandArgument.MakeZeroOrMoreCloudURLsArgument()]) # Help specification. See help_provider.py for documentation. help_spec = Command.HelpSpec( help_name='rewrite', help_name_aliases=['rekey', 'rotate'], help_type='command_help', help_one_line_summary='Rewrite objects', help_text=_DETAILED_HELP_TEXT, subcommand_help_text={}, ) def CheckProvider(self, url): if url.scheme != 'gs': raise CommandException( '"rewrite" called on URL with unsupported provider: %s' % str(url)) def RunCommand(self): """Command entry point for the rewrite command.""" self.continue_on_error = self.parallel_operations self.dest_storage_class = None self.no_preserve_acl = False self.read_args_from_stdin = False self.supported_transformation_flags = ['-k', '-s'] self.transform_types = set() self.op_failure_count = 0 self.boto_file_encryption_tuple, self.boto_file_encryption_sha256 = ( GetEncryptionTupleAndSha256Hash()) if self.sub_opts: for o, a in self.sub_opts: if o == '-f': self.continue_on_error = True elif o == '-k': self.transform_types.add(_TransformTypes.CRYPTO_KEY) elif o == '-I': self.read_args_from_stdin = True elif o == '-O': self.no_preserve_acl = True elif o == '-r' or o == '-R': self.recursion_requested = True self.all_versions = True elif o == '-s': self.transform_types.add(_TransformTypes.STORAGE_CLASS) self.dest_storage_class = NormalizeStorageClass(a) if self.read_args_from_stdin: if self.args: raise CommandException( 'No arguments allowed with the -I flag.') url_strs = StdinIterator() else: if not self.args: raise CommandException( 'The rewrite command (without -I) expects at ' 'least one URL.') url_strs = self.args if not self.transform_types: raise CommandException( 'rewrite command requires at least one transformation flag. ' 'Currently supported transformation flags: %s' % self.supported_transformation_flags) self.preconditions = PreconditionsFromHeaders(self.headers or {}) url_strs_generator = GenerationCheckGenerator(url_strs) # Convert recursive flag to flat wildcard to avoid performing multiple # listings. if self.recursion_requested: url_strs_generator = ConvertRecursiveToFlatWildcard( url_strs_generator) # Expand the source argument(s). name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, url_strs_generator, self.recursion_requested, project_id=self.project_id, continue_on_error=self.continue_on_error or self.parallel_operations, bucket_listing_fields=['name', 'size']) seek_ahead_iterator = None # Cannot seek ahead with stdin args, since we can only iterate them # once without buffering in memory. if not self.read_args_from_stdin: # Perform the same recursive-to-flat conversion on original url_strs so # that it is as true to the original iterator as possible. seek_ahead_url_strs = ConvertRecursiveToFlatWildcard(url_strs) seek_ahead_iterator = SeekAheadNameExpansionIterator( self.command_name, self.debug, self.GetSeekAheadGsutilApi(), seek_ahead_url_strs, self.recursion_requested, all_versions=self.all_versions, project_id=self.project_id) # Perform rewrite requests in parallel (-m) mode, if requested. self.Apply(_RewriteFuncWrapper, name_expansion_iterator, _RewriteExceptionHandler, fail_on_error=(not self.continue_on_error), shared_attrs=['op_failure_count'], seek_ahead_iterator=seek_ahead_iterator) if self.op_failure_count: plural_str = 's' if self.op_failure_count else '' raise CommandException( '%d file%s/object%s could not be rewritten.' % (self.op_failure_count, plural_str, plural_str)) return 0 def RewriteFunc(self, name_expansion_result, thread_state=None): gsutil_api = GetCloudApiInstance(self, thread_state=thread_state) transform_url = name_expansion_result.expanded_storage_url # Make a local copy of the requested transformations for each thread. As # a redundant transformation for one object might not be redundant for # another, we wouldn't want to remove it from the transform_types set that # all threads share. transforms_to_perform = set(self.transform_types) self.CheckProvider(transform_url) # Get all fields so that we can ensure that the target metadata is # specified correctly. src_metadata = gsutil_api.GetObjectMetadata( transform_url.bucket_name, transform_url.object_name, generation=transform_url.generation, provider=transform_url.scheme) if self.no_preserve_acl: # Leave ACL unchanged. src_metadata.acl = [] elif not src_metadata.acl: raise CommandException( 'No OWNER permission found for object %s. OWNER permission is ' 'required for rewriting objects, (otherwise their ACLs would be ' 'reset).' % transform_url) # Note: If other transform types are added, they must ensure that the # encryption key configuration matches the boto configuration, because # gsutil maintains an invariant that all objects it writes use the # encryption_key value (including decrypting if no key is present). src_encryption_sha256 = None if (src_metadata.customerEncryption and src_metadata.customerEncryption.keySha256): src_encryption_sha256 = src_metadata.customerEncryption.keySha256 should_encrypt_target = self.boto_file_encryption_sha256 is not None source_was_encrypted = src_encryption_sha256 is not None using_same_encryption_key_value = ( src_encryption_sha256 == self.boto_file_encryption_sha256) # Prevent accidental key rotation. if (_TransformTypes.CRYPTO_KEY not in transforms_to_perform and not using_same_encryption_key_value): raise EncryptionException( 'The "-k" flag was not passed to the rewrite command, but the ' 'encryption_key value in your boto config file did not match the key ' 'used to encrypt the object "%s" (hash: %s). To encrypt the object ' 'using a different key, you must specify the "-k" flag.' % (transform_url, src_encryption_sha256)) # Remove any redundant changes. # STORAGE_CLASS transform should be skipped if the target storage class # matches the existing storage class. if (_TransformTypes.STORAGE_CLASS in transforms_to_perform and self.dest_storage_class == NormalizeStorageClass( src_metadata.storageClass)): transforms_to_perform.remove(_TransformTypes.STORAGE_CLASS) self.logger.info( 'Redundant transform: %s already had storage class of ' '%s.' % (transform_url, src_metadata.storageClass)) # CRYPTO_KEY transform should be skipped if we're using the same encryption # key (if any) that was used to encrypt the source. if (_TransformTypes.CRYPTO_KEY in transforms_to_perform and using_same_encryption_key_value): if self.boto_file_encryption_sha256 is None: log_msg = '%s is already decrypted.' % transform_url else: log_msg = '%s already has current encryption key.' % transform_url transforms_to_perform.remove(_TransformTypes.CRYPTO_KEY) self.logger.info('Redundant transform: %s' % log_msg) if not transforms_to_perform: self.logger.info( 'Skipping %s, all transformations were redundant.' % transform_url) return # Make a deep copy of the source metadata. dst_metadata = encoding.PyValueToMessage( apitools_messages.Object, encoding.MessageToPyValue(src_metadata)) # Remove some unnecessary/invalid fields. dst_metadata.customerEncryption = None dst_metadata.generation = None # Service has problems if we supply an ID, but it is responsible for # generating one, so it is not necessary to include it here. dst_metadata.id = None decryption_tuple = None # Use a generic operation name by default - this can be altered below for # specific transformations (encryption changes, etc.). operation_name = 'Rewriting' if source_was_encrypted: decryption_key = FindMatchingCryptoKey(src_encryption_sha256) if not decryption_key: raise EncryptionException( 'Missing decryption key with SHA256 hash %s. No decryption key ' 'matches object %s' % (src_encryption_sha256, transform_url)) decryption_tuple = CryptoTupleFromKey(decryption_key) if _TransformTypes.CRYPTO_KEY in transforms_to_perform: if not source_was_encrypted: operation_name = 'Encrypting' elif not should_encrypt_target: operation_name = 'Decrypting' else: operation_name = 'Rotating' if _TransformTypes.STORAGE_CLASS in transforms_to_perform: dst_metadata.storageClass = self.dest_storage_class # TODO: Remove this call (used to verify tests) and make it processed by # the UIThread. sys.stderr.write( _ConstructAnnounceText(operation_name, transform_url.url_string)) # Message indicating beginning of operation. gsutil_api.status_queue.put( FileMessage(transform_url, None, time.time(), finished=False, size=src_metadata.size, message_type=FileMessage.FILE_REWRITE)) progress_callback = FileProgressCallbackHandler( gsutil_api.status_queue, src_url=transform_url, operation_name=operation_name).call gsutil_api.CopyObject(src_metadata, dst_metadata, src_generation=transform_url.generation, preconditions=self.preconditions, progress_callback=progress_callback, decryption_tuple=decryption_tuple, encryption_tuple=self.boto_file_encryption_tuple, provider=transform_url.scheme, fields=[]) # Message indicating end of operation. gsutil_api.status_queue.put( FileMessage(transform_url, None, time.time(), finished=True, size=src_metadata.size, message_type=FileMessage.FILE_REWRITE))
class RewriteCommand(Command): """Implementation of gsutil rewrite command.""" # Command specification. See base class for documentation. command_spec = Command.CreateCommandSpec( 'rewrite', command_name_aliases=[], usage_synopsis=_SYNOPSIS, min_args=0, max_args=NO_MAX, supported_sub_args='fkIrRO', file_url_ok=False, provider_url_ok=False, urls_start_arg=0, gs_api_support=[ApiSelector.JSON], gs_default_api=ApiSelector.JSON, argparse_arguments=[CommandArgument.MakeZeroOrMoreCloudURLsArgument()]) # Help specification. See help_provider.py for documentation. help_spec = Command.HelpSpec( help_name='rewrite', help_name_aliases=['rekey', 'rotate'], help_type='command_help', help_one_line_summary='Rewrite objects', help_text=_DETAILED_HELP_TEXT, subcommand_help_text={}, ) def CheckProvider(self, url): if url.scheme != 'gs': raise CommandException( '"rewrite" called on URL with unsupported provider (%s).' % str(url)) def RunCommand(self): """Command entry point for the rewrite command.""" self.continue_on_error = self.parallel_operations self.read_args_from_stdin = False self.no_preserve_acl = False self.supported_transformation_flags = ['-k'] self.transform_types = [] self.op_failure_count = 0 self.current_encryption_tuple, self.current_encryption_sha256 = ( GetEncryptionTupleAndSha256Hash()) if self.sub_opts: for o, unused_a in self.sub_opts: if o == '-f': self.continue_on_error = True elif o == '-k': self.transform_types.append(_TransformTypes.CRYPTO_KEY) elif o == '-I': self.read_args_from_stdin = True elif o == '-O': self.no_preserve_acl = True elif o == '-r' or o == '-R': self.recursion_requested = True self.all_versions = True if self.read_args_from_stdin: if self.args: raise CommandException( 'No arguments allowed with the -I flag.') url_strs = StdinIterator() else: if not self.args: raise CommandException( 'The rewrite command (without -I) expects at ' 'least one URL.') url_strs = self.args url_strs = GenerationCheckGenerator(url_strs) if not self.transform_types: raise CommandException( 'rewrite command requires at least one transformation flag. ' 'Currently supported transformation flags: %s' % self.supported_transformation_flags) self.preconditions = PreconditionsFromHeaders(self.headers or {}) # Convert recursive flag to flat wildcard to avoid performing multiple # listings. if self.recursion_requested: url_strs = ConvertRecursiveToFlatWildcard(url_strs) # Expand the source argument(s). name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, url_strs, self.recursion_requested, project_id=self.project_id, continue_on_error=self.continue_on_error or self.parallel_operations) # Perform rewrite requests in parallel (-m) mode, if requested. self.Apply(_RewriteFuncWrapper, name_expansion_iterator, _RewriteExceptionHandler, fail_on_error=(not self.continue_on_error), shared_attrs=['op_failure_count']) if self.op_failure_count: plural_str = 's' if self.op_failure_count else '' raise CommandException( '%d file%s/object%s could not be rewritten.' % (self.op_failure_count, plural_str, plural_str)) return 0 def RewriteFunc(self, name_expansion_result, thread_state=None): gsutil_api = GetCloudApiInstance(self, thread_state=thread_state) self.CheckProvider(name_expansion_result.expanded_storage_url) # If other transform types are added here, they must ensure that the # encryption key configuration matches the boto configuration, because # gsutil maintains an invariant that all objects it writes use the # encryption_key value (including decrypting if no key is present). if _TransformTypes.CRYPTO_KEY in self.transform_types: self.CryptoRewrite(name_expansion_result.expanded_storage_url, gsutil_api) def CryptoRewrite(self, transform_url, gsutil_api): """Make the cloud object at transform_url match encryption configuration. Args: transform_url: CloudUrl to rewrite. gsutil_api: gsutil CloudApi instance for making API calls. """ # Get all fields so that we can ensure that the target metadata is # specified correctly. src_metadata = gsutil_api.GetObjectMetadata( transform_url.bucket_name, transform_url.object_name, generation=transform_url.generation, provider=transform_url.scheme) if self.no_preserve_acl: # Leave ACL unchanged. src_metadata.acl = [] elif not src_metadata.acl: raise CommandException( 'No OWNER permission found for object %s. OWNER permission is ' 'required for rewriting objects, (otherwise their ACLs would be ' 'reset).' % transform_url) src_encryption_sha256 = None if (src_metadata.customerEncryption and src_metadata.customerEncryption.keySha256): src_encryption_sha256 = src_metadata.customerEncryption.keySha256 if src_encryption_sha256 == self.current_encryption_sha256: if self.current_encryption_sha256 is not None: self.logger.info( 'Skipping %s, already has current encryption key' % transform_url) else: self.logger.info('Skipping %s, already decrypted' % transform_url) else: # Make a deep copy of the source metadata dst_metadata = encoding.PyValueToMessage( apitools_messages.Object, encoding.MessageToPyValue(src_metadata)) # Remove some unnecessary/invalid fields. dst_metadata.customerEncryption = None dst_metadata.generation = None # Service has problems if we supply an ID, but it is responsible for # generating one, so it is not necessary to include it here. dst_metadata.id = None decryption_tuple = None if src_encryption_sha256 is None: announce_text = 'Encrypting' else: decryption_key = FindMatchingCryptoKey(src_encryption_sha256) if not decryption_key: raise EncryptionException( 'Missing decryption key with SHA256 hash %s. No decryption key ' 'matches object %s' % (src_encryption_sha256, transform_url)) decryption_tuple = CryptoTupleFromKey(decryption_key) if self.current_encryption_sha256 is None: announce_text = 'Decrypting' else: announce_text = 'Rotating' progress_callback = FileProgressCallbackHandler( ConstructAnnounceText(announce_text, transform_url.url_string), gsutil_api.status_queue).call gsutil_api.CopyObject( src_metadata, dst_metadata, src_generation=transform_url.generation, preconditions=self.preconditions, progress_callback=progress_callback, decryption_tuple=decryption_tuple, encryption_tuple=self.current_encryption_tuple, provider=transform_url.scheme, fields=[])
class RewriteCommand(Command): """Implementation of gsutil rewrite command.""" # Command specification. See base class for documentation. command_spec = Command.CreateCommandSpec( 'rewrite', command_name_aliases=[], usage_synopsis=_SYNOPSIS, min_args=0, max_args=NO_MAX, supported_sub_args='fkIrROs:', file_url_ok=False, provider_url_ok=False, urls_start_arg=0, gs_api_support=[ApiSelector.JSON], gs_default_api=ApiSelector.JSON, argparse_arguments=[CommandArgument.MakeZeroOrMoreCloudURLsArgument()]) # Help specification. See help_provider.py for documentation. help_spec = Command.HelpSpec( help_name='rewrite', help_name_aliases=['rekey', 'rotate'], help_type='command_help', help_one_line_summary='Rewrite objects', help_text=_DETAILED_HELP_TEXT, subcommand_help_text={}, ) def CheckProvider(self, url): if url.scheme != 'gs': raise CommandException( '"rewrite" called on URL with unsupported provider: %s' % str(url)) def RunCommand(self): """Command entry point for the rewrite command.""" self.continue_on_error = self.parallel_operations self.csek_hash_to_keywrapper = {} self.dest_storage_class = None self.no_preserve_acl = False self.read_args_from_stdin = False self.supported_transformation_flags = ['-k', '-s'] self.transform_types = set() self.op_failure_count = 0 self.boto_file_encryption_keywrapper = GetEncryptionKeyWrapper(config) self.boto_file_encryption_sha256 = ( self.boto_file_encryption_keywrapper.crypto_key_sha256 if self.boto_file_encryption_keywrapper else None) if self.sub_opts: for o, a in self.sub_opts: if o == '-f': self.continue_on_error = True elif o == '-k': self.transform_types.add(_TransformTypes.CRYPTO_KEY) elif o == '-I': self.read_args_from_stdin = True elif o == '-O': self.no_preserve_acl = True elif o == '-r' or o == '-R': self.recursion_requested = True self.all_versions = True elif o == '-s': self.transform_types.add(_TransformTypes.STORAGE_CLASS) self.dest_storage_class = NormalizeStorageClass(a) if self.read_args_from_stdin: if self.args: raise CommandException('No arguments allowed with the -I flag.') url_strs = StdinIterator() else: if not self.args: raise CommandException('The rewrite command (without -I) expects at ' 'least one URL.') url_strs = self.args if not self.transform_types: raise CommandException( 'rewrite command requires at least one transformation flag. ' 'Currently supported transformation flags: %s' % self.supported_transformation_flags) self.preconditions = PreconditionsFromHeaders(self.headers or {}) url_strs_generator = GenerationCheckGenerator(url_strs) # Convert recursive flag to flat wildcard to avoid performing multiple # listings. if self.recursion_requested: url_strs_generator = ConvertRecursiveToFlatWildcard(url_strs_generator) # Expand the source argument(s). name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, url_strs_generator, self.recursion_requested, project_id=self.project_id, continue_on_error=self.continue_on_error or self.parallel_operations, bucket_listing_fields=['name', 'size']) seek_ahead_iterator = None # Cannot seek ahead with stdin args, since we can only iterate them # once without buffering in memory. if not self.read_args_from_stdin: # Perform the same recursive-to-flat conversion on original url_strs so # that it is as true to the original iterator as possible. seek_ahead_url_strs = ConvertRecursiveToFlatWildcard(url_strs) seek_ahead_iterator = SeekAheadNameExpansionIterator( self.command_name, self.debug, self.GetSeekAheadGsutilApi(), seek_ahead_url_strs, self.recursion_requested, all_versions=self.all_versions, project_id=self.project_id) # Rather than have each worker repeatedly calculate the sha256 hash for each # decryption_key in the boto config, do this once now and cache the results. for i in range(0, MAX_DECRYPTION_KEYS): key_number = i + 1 keywrapper = CryptoKeyWrapperFromKey( config.get('GSUtil', 'decryption_key%s' % str(key_number), None)) if keywrapper is None: # Stop at first attribute absence in lexicographical iteration. break if keywrapper.crypto_type == CryptoKeyType.CSEK: self.csek_hash_to_keywrapper[keywrapper.crypto_key_sha256] = keywrapper # Also include the encryption_key, since it should be used to decrypt and # then encrypt if the object's CSEK should remain the same. if self.boto_file_encryption_sha256 is not None: self.csek_hash_to_keywrapper[self.boto_file_encryption_sha256] = ( self.boto_file_encryption_keywrapper) if self.boto_file_encryption_keywrapper is None: msg = '\n'.join( textwrap.wrap( 'NOTE: No encryption_key was specified in the boto configuration ' 'file, so gsutil will not provide an encryption key in its rewrite ' 'API requests. This will decrypt the objects unless they are in ' 'buckets with a default KMS key set, in which case the service ' 'will automatically encrypt the rewritten objects with that key.') ) print('%s\n' % msg, file=sys.stderr) # Perform rewrite requests in parallel (-m) mode, if requested. self.Apply(_RewriteFuncWrapper, name_expansion_iterator, _RewriteExceptionHandler, fail_on_error=(not self.continue_on_error), shared_attrs=['op_failure_count'], seek_ahead_iterator=seek_ahead_iterator) if self.op_failure_count: plural_str = 's' if self.op_failure_count else '' raise CommandException('%d file%s/object%s could not be rewritten.' % (self.op_failure_count, plural_str, plural_str)) return 0 def RewriteFunc(self, name_expansion_result, thread_state=None): gsutil_api = GetCloudApiInstance(self, thread_state=thread_state) transform_url = name_expansion_result.expanded_storage_url self.CheckProvider(transform_url) # Get all fields so that we can ensure that the target metadata is # specified correctly. src_metadata = gsutil_api.GetObjectMetadata( transform_url.bucket_name, transform_url.object_name, generation=transform_url.generation, provider=transform_url.scheme) if self.no_preserve_acl: # Leave ACL unchanged. src_metadata.acl = [] elif not src_metadata.acl: raise CommandException( 'No OWNER permission found for object %s. OWNER permission is ' 'required for rewriting objects, (otherwise their ACLs would be ' 'reset).' % transform_url) # Note: If other transform types are added, they must ensure that the # encryption key configuration matches the boto configuration, because # gsutil maintains an invariant that all objects it writes use the # encryption_key value (including decrypting if no key is present). # Store metadata about src encryption to make logic below easier to read. src_encryption_kms_key = (src_metadata.kmsKeyName if src_metadata.kmsKeyName else None) src_encryption_sha256 = None if (src_metadata.customerEncryption and src_metadata.customerEncryption.keySha256): src_encryption_sha256 = src_metadata.customerEncryption.keySha256 # In python3, hashes are bytes, use ascii since it should be ascii src_encryption_sha256 = src_encryption_sha256.encode('ascii') src_was_encrypted = (src_encryption_sha256 is not None or src_encryption_kms_key is not None) # Also store metadata about dest encryption. dest_encryption_kms_key = None if (self.boto_file_encryption_keywrapper is not None and self.boto_file_encryption_keywrapper.crypto_type == CryptoKeyType.CMEK): dest_encryption_kms_key = self.boto_file_encryption_keywrapper.crypto_key dest_encryption_sha256 = None if (self.boto_file_encryption_keywrapper is not None and self.boto_file_encryption_keywrapper.crypto_type == CryptoKeyType.CSEK): dest_encryption_sha256 = ( self.boto_file_encryption_keywrapper.crypto_key_sha256) should_encrypt_dest = self.boto_file_encryption_keywrapper is not None encryption_unchanged = (src_encryption_sha256 == dest_encryption_sha256 and src_encryption_kms_key == dest_encryption_kms_key) # Prevent accidental key rotation. if (_TransformTypes.CRYPTO_KEY not in self.transform_types and not encryption_unchanged): raise EncryptionException( 'The "-k" flag was not passed to the rewrite command, but the ' 'encryption_key value in your boto config file did not match the key ' 'used to encrypt the object "%s" (hash: %s). To encrypt the object ' 'using a different key, you must specify the "-k" flag.' % (transform_url, src_encryption_sha256)) # Determine if we can skip this rewrite operation (this should only be done # when ALL of the specified transformations are redundant). redundant_transforms = [] # STORAGE_CLASS transform is redundant if the target storage class matches # the existing storage class. if (_TransformTypes.STORAGE_CLASS in self.transform_types and self.dest_storage_class == NormalizeStorageClass( src_metadata.storageClass)): redundant_transforms.append('storage class') # CRYPTO_KEY transform is redundant if we're using the same encryption # key that was used to encrypt the source. However, if no encryption key was # specified, we should still perform the rewrite. This results in the # rewritten object either being encrypted with its bucket's default KMS key # or having no CSEK/CMEK encryption applied. While we could attempt fetching # the bucket's metadata and checking its default KMS key before performing # the rewrite (in the case where we appear to be transitioning from # no key to no key), that is vulnerable to the race condition where the # default KMS key is changed between when we check it and when we rewrite # the object. if (_TransformTypes.CRYPTO_KEY in self.transform_types and should_encrypt_dest and encryption_unchanged): redundant_transforms.append('encryption key') if len(redundant_transforms) == len(self.transform_types): self.logger.info('Skipping %s, all transformations were redundant: %s' % (transform_url, redundant_transforms)) return # First make a deep copy of the source metadata, then overwrite any # requested attributes (e.g. if a storage class change was specified). dest_metadata = encoding.PyValueToMessage( apitools_messages.Object, encoding.MessageToPyValue(src_metadata)) # Remove some unnecessary/invalid fields. dest_metadata.generation = None # Service has problems if we supply an ID, but it is responsible for # generating one, so it is not necessary to include it here. dest_metadata.id = None # Ensure we don't copy over the KMS key name or CSEK key info from the # source object; those should only come from the boto config's # encryption_key value. dest_metadata.customerEncryption = None dest_metadata.kmsKeyName = None # Both a storage class change and CMEK encryption should be set as part of # the dest object's metadata. CSEK encryption, if specified, is added to the # request later via headers obtained from the keywrapper value passed to # encryption_tuple. if _TransformTypes.STORAGE_CLASS in self.transform_types: dest_metadata.storageClass = self.dest_storage_class if dest_encryption_kms_key is not None: dest_metadata.kmsKeyName = dest_encryption_kms_key # Make sure we have the CSEK key necessary to decrypt. decryption_keywrapper = None if src_encryption_sha256 is not None: if src_encryption_sha256 in self.csek_hash_to_keywrapper: decryption_keywrapper = ( self.csek_hash_to_keywrapper[src_encryption_sha256]) else: raise EncryptionException( 'Missing decryption key with SHA256 hash %s. No decryption key ' 'matches object %s' % (src_encryption_sha256, transform_url)) operation_name = 'Rewriting' if _TransformTypes.CRYPTO_KEY in self.transform_types: if src_was_encrypted and should_encrypt_dest: if not encryption_unchanged: operation_name = 'Rotating' # Else, keep "Rewriting". This might occur when -k was specified and was # redundant, but we're performing the operation anyway because some # other transformation was not redundant. elif src_was_encrypted and not should_encrypt_dest: operation_name = 'Decrypting' elif not src_was_encrypted and should_encrypt_dest: operation_name = 'Encrypting' # TODO: Remove this call (used to verify tests) and make it processed by # the UIThread. sys.stderr.write( _ConstructAnnounceText(operation_name, transform_url.url_string)) sys.stderr.flush() # Message indicating beginning of operation. gsutil_api.status_queue.put( FileMessage(transform_url, None, time.time(), finished=False, size=src_metadata.size, message_type=FileMessage.FILE_REWRITE)) progress_callback = FileProgressCallbackHandler( gsutil_api.status_queue, src_url=transform_url, operation_name=operation_name).call gsutil_api.CopyObject(src_metadata, dest_metadata, src_generation=transform_url.generation, preconditions=self.preconditions, progress_callback=progress_callback, decryption_tuple=decryption_keywrapper, encryption_tuple=self.boto_file_encryption_keywrapper, provider=transform_url.scheme, fields=[]) # Message indicating end of operation. gsutil_api.status_queue.put( FileMessage(transform_url, None, time.time(), finished=True, size=src_metadata.size, message_type=FileMessage.FILE_REWRITE))
class UrlSignCommand(Command): """Implementation of gsutil url_sign command.""" # Command specification. See base class for documentation. command_spec = Command.CreateCommandSpec( 'signurl', command_name_aliases=['signedurl', 'queryauth'], usage_synopsis=_SYNOPSIS, min_args=2, max_args=NO_MAX, supported_sub_args='m:d:c:p:', file_url_ok=False, provider_url_ok=False, urls_start_arg=1, gs_api_support=[ApiSelector.XML, ApiSelector.JSON], gs_default_api=ApiSelector.JSON, argparse_arguments=[ CommandArgument.MakeNFileURLsArgument(1), CommandArgument.MakeZeroOrMoreCloudURLsArgument() ]) # Help specification. See help_provider.py for documentation. help_spec = Command.HelpSpec( help_name='signurl', help_name_aliases=['signedurl', 'queryauth'], help_type='command_help', help_one_line_summary='Create a signed url', help_text=_DETAILED_HELP_TEXT, subcommand_help_text={}, ) def _ParseAndCheckSubOpts(self): # Default argument values delta = None method = 'GET' content_type = '' passwd = None for o, v in self.sub_opts: if o == '-d': if delta is not None: delta += _DurationToTimeDelta(v) else: delta = _DurationToTimeDelta(v) elif o == '-m': method = v elif o == '-c': content_type = v elif o == '-p': passwd = v else: self.RaiseInvalidArgumentException() if delta is None: delta = timedelta(hours=1) expiration = calendar.timegm( (datetime.utcnow() + delta).utctimetuple()) if method not in ['GET', 'PUT', 'DELETE', 'HEAD', 'RESUMABLE']: raise CommandException('HTTP method must be one of' '[GET|HEAD|PUT|DELETE|RESUMABLE]') return method, expiration, content_type, passwd def _ProbeObjectAccessWithClient(self, key, client_email, gcs_path, logger): """Performs a head request against a signed url to check for read access.""" # Choose a reasonable time in the future; if the user's system clock is # 60 or more seconds behind the server's this will generate an error. signed_url = _GenSignedUrl(key, client_email, 'HEAD', '', '', int(time.time()) + 60, gcs_path, logger) try: h = GetNewHttp() req = Request(signed_url, 'HEAD') response = MakeRequest(h, req) if response.status_code not in [200, 403, 404]: raise HttpError.FromResponse(response) return response.status_code except HttpError: error_string = ( 'Unexpected HTTP response code %s while querying ' 'object readability. Is your system clock accurate?' % response.status_code) if response.content: error_string += ' Content: %s' % response.content raise CommandException(error_string) def _EnumerateStorageUrls(self, in_urls): ret = [] for url_str in in_urls: if ContainsWildcard(url_str): ret.extend([ blr.storage_url for blr in self.WildcardIterator(url_str) ]) else: ret.append(StorageUrlFromString(url_str)) return ret def RunCommand(self): """Command entry point for signurl command.""" if not HAVE_OPENSSL: raise CommandException( 'The signurl command requires the pyopenssl library (try pip ' 'install pyopenssl or easy_install pyopenssl)') method, expiration, content_type, passwd = self._ParseAndCheckSubOpts() storage_urls = self._EnumerateStorageUrls(self.args[1:]) key = None client_email = None try: key, client_email = _ReadJSONKeystore( open(self.args[0], 'rb').read(), passwd) except ValueError: # Ignore and try parsing as a pkcs12. if not passwd: passwd = getpass.getpass('Keystore password:'******'rb').read(), passwd) except ValueError: raise CommandException( 'Unable to parse private key from {0}'.format( self.args[0])) print 'URL\tHTTP Method\tExpiration\tSigned URL' for url in storage_urls: if url.scheme != 'gs': raise CommandException( 'Can only create signed urls from gs:// urls') if url.IsBucket(): gcs_path = url.bucket_name if method == 'RESUMABLE': raise CommandException( 'Resumable signed URLs require an object ' 'name.') else: # Need to url encode the object name as Google Cloud Storage does when # computing the string to sign when checking the signature. gcs_path = '{0}/{1}'.format( url.bucket_name, urllib.quote(url.object_name.encode(UTF8))) final_url = _GenSignedUrl(key, client_email, method, '', content_type, expiration, gcs_path, self.logger, string_to_sign_debug=True) expiration_dt = datetime.fromtimestamp(expiration) print '{0}\t{1}\t{2}\t{3}'.format( url.url_string.encode(UTF8), method, (expiration_dt.strftime('%Y-%m-%d %H:%M:%S')), final_url.encode(UTF8)) response_code = self._ProbeObjectAccessWithClient( key, client_email, gcs_path, self.logger) if response_code == 404: if url.IsBucket() and method != 'PUT': raise CommandException( 'Bucket {0} does not exist. Please create a bucket with ' 'that name before a creating signed URL to access it.'. format(url)) else: if method != 'PUT' and method != 'RESUMABLE': raise CommandException( 'Object {0} does not exist. Please create/upload an object ' 'with that name before a creating signed URL to access it.' .format(url)) elif response_code == 403: self.logger.warn( '%s does not have permissions on %s, using this link will likely ' 'result in a 403 error until at least READ permissions are granted', client_email, url) return 0
class AclCommand(Command): """Implementation of gsutil acl command.""" # Command specification. See base class for documentation. command_spec = Command.CreateCommandSpec( 'acl', command_name_aliases=['getacl', 'setacl', 'chacl'], usage_synopsis=_SYNOPSIS, min_args=2, max_args=NO_MAX, supported_sub_args='afRrg:u:d:p:', file_url_ok=False, provider_url_ok=False, urls_start_arg=1, gs_api_support=[ApiSelector.XML, ApiSelector.JSON], gs_default_api=ApiSelector.JSON, argparse_arguments={ 'set': [ CommandArgument.MakeFileURLOrCannedACLArgument(), CommandArgument.MakeZeroOrMoreCloudURLsArgument() ], 'get': [CommandArgument.MakeNCloudURLsArgument(1)], 'ch': [CommandArgument.MakeZeroOrMoreCloudURLsArgument()], }) # Help specification. See help_provider.py for documentation. help_spec = Command.HelpSpec( help_name='acl', help_name_aliases=['getacl', 'setacl', 'chmod', 'chacl'], help_type='command_help', help_one_line_summary='Get, set, or change bucket and/or object ACLs', help_text=_DETAILED_HELP_TEXT, subcommand_help_text={ 'get': _get_help_text, 'set': _set_help_text, 'ch': _ch_help_text }, ) def _CalculateUrlsStartArg(self): if not self.args: self.RaiseWrongNumberOfArgumentsException() if (self.args[0].lower() == 'set') or (self.command_alias_used == 'setacl'): return 1 else: return 0 def _SetAcl(self): """Parses options and sets ACLs on the specified buckets/objects.""" self.continue_on_error = False if self.sub_opts: for o, unused_a in self.sub_opts: if o == '-a': self.all_versions = True elif o == '-f': self.continue_on_error = True elif o == '-r' or o == '-R': self.recursion_requested = True else: self.RaiseInvalidArgumentException() try: self.SetAclCommandHelper(SetAclFuncWrapper, SetAclExceptionHandler) except AccessDeniedException as unused_e: self._WarnServiceAccounts() raise if not self.everything_set_okay: raise CommandException('ACLs for some objects could not be set.') def _ChAcl(self): """Parses options and changes ACLs on the specified buckets/objects.""" self.parse_versions = True self.changes = [] self.continue_on_error = False if self.sub_opts: for o, a in self.sub_opts: if o == '-f': self.continue_on_error = True elif o == '-g': if 'gserviceaccount.com' in a: raise CommandException( 'Service accounts are considered users, not groups; please use ' '"gsutil acl ch -u" instead of "gsutil acl ch -g"') self.changes.append( acl_helper.AclChange( a, scope_type=acl_helper.ChangeType.GROUP)) elif o == '-p': self.changes.append( acl_helper.AclChange( a, scope_type=acl_helper.ChangeType.PROJECT)) elif o == '-u': self.changes.append( acl_helper.AclChange( a, scope_type=acl_helper.ChangeType.USER)) elif o == '-d': self.changes.append(acl_helper.AclDel(a)) elif o == '-r' or o == '-R': self.recursion_requested = True else: self.RaiseInvalidArgumentException() if not self.changes: raise CommandException('Please specify at least one access change ' 'with the -g, -u, or -d flags') if (not UrlsAreForSingleProvider(self.args) or StorageUrlFromString(self.args[0]).scheme != 'gs'): raise CommandException( 'The "{0}" command can only be used with gs:// URLs'.format( self.command_name)) self.everything_set_okay = True self.ApplyAclFunc( _ApplyAclChangesWrapper, _ApplyExceptionHandler, self.args, object_fields=['acl', 'generation', 'metageneration']) if not self.everything_set_okay: raise CommandException('ACLs for some objects could not be set.') def _RaiseForAccessDenied(self, url): self._WarnServiceAccounts() raise CommandException( 'Failed to set acl for %s. Please ensure you have ' 'OWNER-role access to this resource.' % url) @Retry(ServiceException, tries=3, timeout_secs=1) def ApplyAclChanges(self, name_expansion_result, thread_state=None): """Applies the changes in self.changes to the provided URL. Args: name_expansion_result: NameExpansionResult describing the target object. thread_state: If present, gsutil Cloud API instance to apply the changes. """ if thread_state: gsutil_api = thread_state else: gsutil_api = self.gsutil_api url = name_expansion_result.expanded_storage_url if url.IsBucket(): bucket = gsutil_api.GetBucket(url.bucket_name, provider=url.scheme, fields=['acl', 'metageneration']) current_acl = bucket.acl elif url.IsObject(): gcs_object = encoding.JsonToMessage( apitools_messages.Object, name_expansion_result.expanded_result) current_acl = gcs_object.acl if not current_acl: self._RaiseForAccessDenied(url) if self._ApplyAclChangesAndReturnChangeCount(url, current_acl) == 0: self.logger.info('No changes to %s', url) return try: if url.IsBucket(): preconditions = Preconditions( meta_gen_match=bucket.metageneration) bucket_metadata = apitools_messages.Bucket(acl=current_acl) gsutil_api.PatchBucket(url.bucket_name, bucket_metadata, preconditions=preconditions, provider=url.scheme, fields=['id']) else: # Object preconditions = Preconditions( gen_match=gcs_object.generation, meta_gen_match=gcs_object.metageneration) object_metadata = apitools_messages.Object(acl=current_acl) try: gsutil_api.PatchObjectMetadata(url.bucket_name, url.object_name, object_metadata, preconditions=preconditions, provider=url.scheme, generation=url.generation, fields=['id']) except PreconditionException as e: # Special retry case where we want to do an additional step, the read # of the read-modify-write cycle, to fetch the correct object # metadata before reattempting ACL changes. self._RefetchObjectMetadataAndApplyAclChanges( url, gsutil_api) self.logger.info('Updated ACL on %s', url) except BadRequestException as e: # Don't retry on bad requests, e.g. invalid email address. raise CommandException('Received bad request from server: %s' % str(e)) except AccessDeniedException: self._RaiseForAccessDenied(url) except PreconditionException as e: # For objects, retry attempts should have already been handled. if url.IsObject(): raise CommandException(str(e)) # For buckets, raise PreconditionException and continue to next retry. raise e @Retry(PreconditionException, tries=3, timeout_secs=1) def _RefetchObjectMetadataAndApplyAclChanges(self, url, gsutil_api): """Reattempts object ACL changes after a PreconditionException.""" gcs_object = gsutil_api.GetObjectMetadata( url.bucket_name, url.object_name, provider=url.scheme, fields=['acl', 'generation', 'metageneration']) current_acl = gcs_object.acl if self._ApplyAclChangesAndReturnChangeCount(url, current_acl) == 0: self.logger.info('No changes to %s', url) return object_metadata = apitools_messages.Object(acl=current_acl) preconditions = Preconditions(gen_match=gcs_object.generation, meta_gen_match=gcs_object.metageneration) gsutil_api.PatchObjectMetadata(url.bucket_name, url.object_name, object_metadata, preconditions=preconditions, provider=url.scheme, generation=gcs_object.generation, fields=['id']) def _ApplyAclChangesAndReturnChangeCount(self, storage_url, acl_message): modification_count = 0 for change in self.changes: modification_count += change.Execute(storage_url, acl_message, 'acl', self.logger) return modification_count def RunCommand(self): """Command entry point for the acl command.""" action_subcommand = self.args.pop(0) self.ParseSubOpts(check_args=True) # Commands with both suboptions and subcommands need to reparse for # suboptions, so we log again. metrics.LogCommandParams(sub_opts=self.sub_opts) self.def_acl = False if action_subcommand == 'get': metrics.LogCommandParams(subcommands=[action_subcommand]) self.GetAndPrintAcl(self.args[0]) elif action_subcommand == 'set': metrics.LogCommandParams(subcommands=[action_subcommand]) self._SetAcl() elif action_subcommand in ('ch', 'change'): metrics.LogCommandParams(subcommands=[action_subcommand]) self._ChAcl() else: raise CommandException( ('Invalid subcommand "%s" for the %s command.\n' 'See "gsutil help acl".') % (action_subcommand, self.command_name)) return 0
class DuCommand(Command): """Implementation of gsutil du command.""" # Command specification. See base class for documentation. command_spec = Command.CreateCommandSpec( 'du', command_name_aliases=[], usage_synopsis=_SYNOPSIS, min_args=0, max_args=NO_MAX, supported_sub_args='0ace:hsX:', file_url_ok=False, provider_url_ok=True, urls_start_arg=0, gs_api_support=[ApiSelector.XML, ApiSelector.JSON], gs_default_api=ApiSelector.JSON, argparse_arguments=[ CommandArgument.MakeZeroOrMoreCloudURLsArgument(), ], ) # Help specification. See help_provider.py for documentation. help_spec = Command.HelpSpec( help_name='du', help_name_aliases=[], help_type='command_help', help_one_line_summary='Display object size usage', help_text=_DETAILED_HELP_TEXT, subcommand_help_text={}, ) def _PrintSummaryLine(self, num_bytes, name): size_string = (MakeHumanReadable(num_bytes) if self.human_readable else six.text_type(num_bytes)) text_util.print_to_fd('{size:<11} {name}'.format( size=size_string, name=six.ensure_text(name)), end=self.line_ending) def _PrintInfoAboutBucketListingRef(self, bucket_listing_ref): """Print listing info for given bucket_listing_ref. Args: bucket_listing_ref: BucketListing being listed. Returns: Tuple (number of objects, object size) Raises: Exception: if calling bug encountered. """ obj = bucket_listing_ref.root_object url_str = bucket_listing_ref.url_string if (obj.metadata and S3_DELETE_MARKER_GUID in obj.metadata.additionalProperties): size_string = '0' num_bytes = 0 num_objs = 0 url_str += '<DeleteMarker>' else: size_string = (MakeHumanReadable(obj.size) if self.human_readable else str(obj.size)) num_bytes = obj.size num_objs = 1 if not self.summary_only: url_detail = '{size:<11} {url}{ending}'.format( size=size_string, url=six.ensure_text(url_str), ending=six.ensure_text(self.line_ending)) print_to_fd(url_detail, file=sys.stdout, end='') return (num_objs, num_bytes) def RunCommand(self): """Command entry point for the du command.""" self.line_ending = '\n' self.all_versions = False self.produce_total = False self.human_readable = False self.summary_only = False self.exclude_patterns = [] if self.sub_opts: for o, a in self.sub_opts: if o == '-0': self.line_ending = '\0' elif o == '-a': self.all_versions = True elif o == '-c': self.produce_total = True elif o == '-e': self.exclude_patterns.append(a) elif o == '-h': self.human_readable = True elif o == '-s': self.summary_only = True elif o == '-X': if a == '-': f = sys.stdin f_close = False else: f = open(a, 'r') if six.PY2 else open( a, 'r', encoding=UTF8) f_close = True self.exclude_patterns = [ six.ensure_text(line.strip()) for line in f ] if f_close: f.close() if not self.args: # Default to listing all gs buckets. self.args = ['gs://'] total_bytes = 0 got_nomatch_errors = False def _PrintObjectLong(blr): return self._PrintInfoAboutBucketListingRef(blr) def _PrintNothing(unused_blr=None): pass def _PrintDirectory(num_bytes, blr): if not self.summary_only: self._PrintSummaryLine(num_bytes, blr.url_string.encode(UTF8)) for url_arg in self.args: top_level_storage_url = StorageUrlFromString(url_arg) if top_level_storage_url.IsFileUrl(): raise CommandException('Only cloud URLs are supported for %s' % self.command_name) bucket_listing_fields = ['size'] listing_helper = ls_helper.LsHelper( self.WildcardIterator, self.logger, print_object_func=_PrintObjectLong, print_dir_func=_PrintNothing, print_dir_header_func=_PrintNothing, print_dir_summary_func=_PrintDirectory, print_newline_func=_PrintNothing, all_versions=self.all_versions, should_recurse=True, exclude_patterns=self.exclude_patterns, fields=bucket_listing_fields) # LsHelper expands to objects and prefixes, so perform a top-level # expansion first. if top_level_storage_url.IsProvider(): # Provider URL: use bucket wildcard to iterate over all buckets. top_level_iter = self.WildcardIterator( '%s://*' % top_level_storage_url.scheme).IterBuckets( bucket_fields=['id']) elif top_level_storage_url.IsBucket(): top_level_iter = self.WildcardIterator( '%s://%s' % (top_level_storage_url.scheme, top_level_storage_url.bucket_name)).IterBuckets( bucket_fields=['id']) else: top_level_iter = [BucketListingObject(top_level_storage_url)] for blr in top_level_iter: storage_url = blr.storage_url if storage_url.IsBucket() and self.summary_only: storage_url = StorageUrlFromString( storage_url.CreatePrefixUrl(wildcard_suffix='**')) _, exp_objs, exp_bytes = listing_helper.ExpandUrlAndPrint( storage_url) if (storage_url.IsObject() and exp_objs == 0 and ContainsWildcard(url_arg) and not self.exclude_patterns): got_nomatch_errors = True total_bytes += exp_bytes if self.summary_only: self._PrintSummaryLine( exp_bytes, blr.url_string.rstrip('/').encode(UTF8)) if self.produce_total: self._PrintSummaryLine(total_bytes, 'total') if got_nomatch_errors: raise CommandException('One or more URLs matched no objects.') return 0
class SetMetaCommand(Command): """Implementation of gsutil setmeta command.""" # Command specification. See base class for documentation. command_spec = Command.CreateCommandSpec( 'setmeta', command_name_aliases=['setheader'], usage_synopsis=_SYNOPSIS, min_args=1, max_args=constants.NO_MAX, supported_sub_args='h:rR', file_url_ok=False, provider_url_ok=False, urls_start_arg=1, gs_api_support=[ApiSelector.XML, ApiSelector.JSON], gs_default_api=ApiSelector.JSON, argparse_arguments=[CommandArgument.MakeZeroOrMoreCloudURLsArgument()]) # Help specification. See help_provider.py for documentation. help_spec = Command.HelpSpec( help_name='setmeta', help_name_aliases=['setheader'], help_type='command_help', help_one_line_summary='Set metadata on already uploaded objects', help_text=_DETAILED_HELP_TEXT, subcommand_help_text={}, ) def RunCommand(self): """Command entry point for the setmeta command.""" headers = [] if self.sub_opts: for o, a in self.sub_opts: if o == '-h': if 'x-goog-acl' in a or 'x-amz-acl' in a: raise CommandException( 'gsutil setmeta no longer allows canned ACLs. Use gsutil acl ' 'set ... to set canned ACLs.') headers.append(a) (metadata_minus, metadata_plus) = self._ParseMetadataHeaders(headers) self.metadata_change = metadata_plus for header in metadata_minus: self.metadata_change[header] = '' if len(self.args) == 1 and not self.recursion_requested: url = StorageUrlFromString(self.args[0]) if not (url.IsCloudUrl() and url.IsObject()): raise CommandException('URL (%s) must name an object' % self.args[0]) # Used to track if any objects' metadata failed to be set. self.everything_set_okay = True self.preconditions = PreconditionsFromHeaders(self.headers) name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, self.args, self.recursion_requested, all_versions=self.all_versions, continue_on_error=self.parallel_operations, bucket_listing_fields=['generation', 'metadata', 'metageneration']) seek_ahead_iterator = SeekAheadNameExpansionIterator( self.command_name, self.debug, self.GetSeekAheadGsutilApi(), self.args, self.recursion_requested, all_versions=self.all_versions, project_id=self.project_id) try: # Perform requests in parallel (-m) mode, if requested, using # configured number of parallel processes and threads. Otherwise, # perform requests with sequential function calls in current process. self.Apply(_SetMetadataFuncWrapper, name_expansion_iterator, _SetMetadataExceptionHandler, fail_on_error=True, seek_ahead_iterator=seek_ahead_iterator) except AccessDeniedException as e: if e.status == 403: self._WarnServiceAccounts() raise if not self.everything_set_okay: raise CommandException( 'Metadata for some objects could not be set.') return 0 @Retry(PreconditionException, tries=3, timeout_secs=1) def SetMetadataFunc(self, name_expansion_result, thread_state=None): """Sets metadata on an object. Args: name_expansion_result: NameExpansionResult describing target object. thread_state: gsutil Cloud API instance to use for the operation. """ gsutil_api = GetCloudApiInstance(self, thread_state=thread_state) exp_src_url = name_expansion_result.expanded_storage_url self.logger.info('Setting metadata on %s...', exp_src_url) cloud_obj_metadata = encoding.JsonToMessage( apitools_messages.Object, name_expansion_result.expanded_result) preconditions = Preconditions( gen_match=self.preconditions.gen_match, meta_gen_match=self.preconditions.meta_gen_match) if preconditions.gen_match is None: preconditions.gen_match = cloud_obj_metadata.generation if preconditions.meta_gen_match is None: preconditions.meta_gen_match = cloud_obj_metadata.metageneration # Patch handles the patch semantics for most metadata, but we need to # merge the custom metadata field manually. patch_obj_metadata = ObjectMetadataFromHeaders(self.metadata_change) api = gsutil_api.GetApiSelector(provider=exp_src_url.scheme) # For XML we only want to patch through custom metadata that has # changed. For JSON we need to build the complete set. if api == ApiSelector.XML: pass elif api == ApiSelector.JSON: CopyObjectMetadata(patch_obj_metadata, cloud_obj_metadata, override=True) patch_obj_metadata = cloud_obj_metadata # Patch body does not need the object generation and metageneration. patch_obj_metadata.generation = None patch_obj_metadata.metageneration = None gsutil_api.PatchObjectMetadata(exp_src_url.bucket_name, exp_src_url.object_name, patch_obj_metadata, generation=exp_src_url.generation, preconditions=preconditions, provider=exp_src_url.scheme, fields=['id']) _PutToQueueWithTimeout(gsutil_api.status_queue, MetadataMessage(message_time=time.time())) def _ParseMetadataHeaders(self, headers): """Validates and parses metadata changes from the headers argument. Args: headers: Header dict to validate and parse. Returns: (metadata_plus, metadata_minus): Tuple of header sets to add and remove. """ metadata_minus = set() cust_metadata_minus = set() metadata_plus = {} cust_metadata_plus = {} # Build a count of the keys encountered from each plus and minus arg so we # can check for dupe field specs. num_metadata_plus_elems = 0 num_cust_metadata_plus_elems = 0 num_metadata_minus_elems = 0 num_cust_metadata_minus_elems = 0 for md_arg in headers: # Use partition rather than split, as we should treat all characters past # the initial : as part of the header's value. parts = md_arg.partition(':') (header, _, value) = parts InsistAsciiHeader(header) # Translate headers to lowercase to match the casing assumed by our # sanity-checking operations. lowercase_header = header.lower() # This check is overly simple; it would be stronger to check, for each # URL argument, whether the header starts with the provider # metadata_prefix, but here we just parse the spec once, before # processing any of the URLs. This means we will not detect if the user # tries to set an x-goog-meta- field on an another provider's object, # for example. is_custom_meta = IsCustomMetadataHeader(lowercase_header) if not is_custom_meta and lowercase_header not in SETTABLE_FIELDS: raise CommandException( 'Invalid or disallowed header (%s).\nOnly these fields (plus ' 'x-goog-meta-* fields) can be set or unset:\n%s' % (header, sorted(list(SETTABLE_FIELDS)))) if value: if is_custom_meta: # Allow non-ASCII data for custom metadata fields. cust_metadata_plus[header] = value num_cust_metadata_plus_elems += 1 else: # Don't unicode encode other fields because that would perturb their # content (e.g., adding %2F's into the middle of a Cache-Control # value). InsistAsciiHeaderValue(header, value) value = str(value) metadata_plus[lowercase_header] = value num_metadata_plus_elems += 1 else: if is_custom_meta: cust_metadata_minus.add(header) num_cust_metadata_minus_elems += 1 else: metadata_minus.add(lowercase_header) num_metadata_minus_elems += 1 if (num_metadata_plus_elems != len(metadata_plus) or num_cust_metadata_plus_elems != len(cust_metadata_plus) or num_metadata_minus_elems != len(metadata_minus) or num_cust_metadata_minus_elems != len(cust_metadata_minus) or metadata_minus.intersection(set(metadata_plus.keys()))): raise CommandException('Each header must appear at most once.') metadata_plus.update(cust_metadata_plus) metadata_minus.update(cust_metadata_minus) return (metadata_minus, metadata_plus)
class ComposeCommand(Command): """Implementation of gsutil compose command.""" # Command specification. See base class for documentation. command_spec = Command.CreateCommandSpec( 'compose', command_name_aliases=['concat'], usage_synopsis=_SYNOPSIS, min_args=1, max_args=MAX_COMPOSE_ARITY + 1, supported_sub_args='', # Not files, just object names without gs:// prefix. file_url_ok=False, provider_url_ok=False, urls_start_arg=1, gs_api_support=[ApiSelector.XML, ApiSelector.JSON], gs_default_api=ApiSelector.JSON, argparse_arguments=[CommandArgument.MakeZeroOrMoreCloudURLsArgument()]) # Help specification. See help_provider.py for documentation. help_spec = Command.HelpSpec( help_name='compose', help_name_aliases=['concat'], help_type='command_help', help_one_line_summary=( 'Concatenate a sequence of objects into a new composite object.'), help_text=_DETAILED_HELP_TEXT, subcommand_help_text={}, ) def CheckProvider(self, url): if url.scheme != 'gs': raise CommandException( '"compose" called on URL with unsupported provider (%s).' % str(url)) # Command entry point. def RunCommand(self): """Command entry point for the compose command.""" target_url_str = self.args[-1] self.args = self.args[:-1] target_url = StorageUrlFromString(target_url_str) self.CheckProvider(target_url) if target_url.HasGeneration(): raise CommandException( 'A version-specific URL (%s) cannot be ' 'the destination for gsutil compose - abort.' % target_url) dst_obj_metadata = apitools_messages.Object( name=target_url.object_name, bucket=target_url.bucket_name) components = [] # Remember the first source object so we can get its content type. first_src_url = None for src_url_str in self.args: if ContainsWildcard(src_url_str): src_url_iter = self.WildcardIterator(src_url_str).IterObjects() else: src_url_iter = [ BucketListingObject(StorageUrlFromString(src_url_str)) ] for blr in src_url_iter: src_url = blr.storage_url self.CheckProvider(src_url) if src_url.bucket_name != target_url.bucket_name: raise CommandException( 'GCS does not support inter-bucket composing.') if not first_src_url: first_src_url = src_url src_obj_metadata = (apitools_messages.ComposeRequest. SourceObjectsValueListEntry( name=src_url.object_name)) if src_url.HasGeneration(): src_obj_metadata.generation = int(src_url.generation) components.append(src_obj_metadata) # Avoid expanding too many components, and sanity check each name # expansion result. if len(components) > MAX_COMPOSE_ARITY: raise CommandException( '"compose" called with too many component ' 'objects. Limit is %d.' % MAX_COMPOSE_ARITY) if not components: raise CommandException( '"compose" requires at least 1 component object.') dst_obj_metadata.contentType = self.gsutil_api.GetObjectMetadata( first_src_url.bucket_name, first_src_url.object_name, provider=first_src_url.scheme, fields=['contentType']).contentType preconditions = PreconditionsFromHeaders(self.headers or {}) self.logger.info('Composing %s from %d component object(s).', target_url, len(components)) self.gsutil_api.ComposeObject( components, dst_obj_metadata, preconditions=preconditions, provider=target_url.scheme, encryption_tuple=GetEncryptionKeyWrapper(config))
class NotificationCommand(Command): """Implementation of gsutil notification command.""" # Notification names might look like one of these: # canonical form: projects/_/buckets/bucket/notificationConfigs/3 # JSON API form: b/bucket/notificationConfigs/5 # Either of the above might start with a / if a user is copying & pasting. def _GetNotificationPathRegex(self): if not NotificationCommand._notification_path_regex: NotificationCommand._notification_path_regex = re.compile( ('/?(projects/[^/]+/)?b(uckets)?/(?P<bucket>[^/]+)/' 'notificationConfigs/(?P<notification>[0-9]+)')) return NotificationCommand._notification_path_regex _notification_path_regex = None # Command specification. See base class for documentation. command_spec = Command.CreateCommandSpec( 'notification', command_name_aliases=[ 'notify', 'notifyconfig', 'notifications', 'notif', ], usage_synopsis=_SYNOPSIS, min_args=2, max_args=NO_MAX, supported_sub_args='i:t:m:t:of:e:p:s', file_url_ok=False, provider_url_ok=False, urls_start_arg=1, gs_api_support=[ApiSelector.JSON], gs_default_api=ApiSelector.JSON, argparse_arguments={ 'watchbucket': [ CommandArgument.MakeFreeTextArgument(), CommandArgument.MakeZeroOrMoreCloudBucketURLsArgument(), ], 'stopchannel': [], 'list': [ CommandArgument.MakeZeroOrMoreCloudBucketURLsArgument(), ], 'delete': [ # Takes a list of one of the following: # notification: projects/_/buckets/bla/notificationConfigs/5, # bucket: gs://foobar CommandArgument.MakeZeroOrMoreCloudURLsArgument(), ], 'create': [ CommandArgument.MakeFreeTextArgument(), # Cloud Pub/Sub topic CommandArgument.MakeNCloudBucketURLsArgument(1), ] }, ) # Help specification. See help_provider.py for documentation. help_spec = Command.HelpSpec( help_name='notification', help_name_aliases=[ 'watchbucket', 'stopchannel', 'notifyconfig', ], help_type='command_help', help_one_line_summary='Configure object change notification', help_text=_DETAILED_HELP_TEXT, subcommand_help_text={ 'create': _create_help_text, 'list': _list_help_text, 'delete': _delete_help_text, 'watchbucket': _watchbucket_help_text, 'stopchannel': _stopchannel_help_text, }, ) def _WatchBucket(self): """Creates a watch on a bucket given in self.args.""" self.CheckArguments() identifier = None client_token = None if self.sub_opts: for o, a in self.sub_opts: if o == '-i': identifier = a if o == '-t': client_token = a identifier = identifier or str(uuid.uuid4()) watch_url = self.args[0] bucket_arg = self.args[-1] if not watch_url.lower().startswith('https://'): raise CommandException( 'The application URL must be an https:// URL.') bucket_url = StorageUrlFromString(bucket_arg) if not (bucket_url.IsBucket() and bucket_url.scheme == 'gs'): raise CommandException( 'The %s command can only be used with gs:// bucket URLs.' % self.command_name) if not bucket_url.IsBucket(): raise CommandException( 'URL must name a bucket for the %s command.' % self.command_name) self.logger.info('Watching bucket %s with application URL %s ...', bucket_url, watch_url) try: channel = self.gsutil_api.WatchBucket(bucket_url.bucket_name, watch_url, identifier, token=client_token, provider=bucket_url.scheme) except AccessDeniedException as e: self.logger.warn( NOTIFICATION_AUTHORIZATION_FAILED_MESSAGE.format( watch_error=str(e), watch_url=watch_url)) raise channel_id = channel.id resource_id = channel.resourceId client_token = channel.token self.logger.info('Successfully created watch notification channel.') self.logger.info('Watch channel identifier: %s', channel_id) self.logger.info('Canonicalized resource identifier: %s', resource_id) self.logger.info('Client state token: %s', client_token) return 0 def _StopChannel(self): channel_id = self.args[0] resource_id = self.args[1] self.logger.info('Removing channel %s with resource identifier %s ...', channel_id, resource_id) self.gsutil_api.StopChannel(channel_id, resource_id, provider='gs') self.logger.info('Succesfully removed channel.') return 0 def _ListChannels(self, bucket_arg): """Lists active channel watches on a bucket given in self.args.""" bucket_url = StorageUrlFromString(bucket_arg) if not (bucket_url.IsBucket() and bucket_url.scheme == 'gs'): raise CommandException( 'The %s command can only be used with gs:// bucket URLs.' % self.command_name) if not bucket_url.IsBucket(): raise CommandException( 'URL must name a bucket for the %s command.' % self.command_name) channels = self.gsutil_api.ListChannels(bucket_url.bucket_name, provider='gs').items self.logger.info( 'Bucket %s has the following active Object Change Notifications:', bucket_url.bucket_name) for idx, channel in enumerate(channels): self.logger.info('\tNotification channel %d:', idx + 1) self.logger.info('\t\tChannel identifier: %s', channel.channel_id) self.logger.info('\t\tResource identifier: %s', channel.resource_id) self.logger.info('\t\tApplication URL: %s', channel.push_url) self.logger.info('\t\tCreated by: %s', channel.subscriber_email) self.logger.info( '\t\tCreation time: %s', str(datetime.fromtimestamp(channel.creation_time_ms / 1000))) return 0 def _Create(self): self.CheckArguments() # User-specified options pubsub_topic = None payload_format = None custom_attributes = {} event_types = [] object_name_prefix = None should_setup_topic = True if self.sub_opts: for o, a in self.sub_opts: if o == '-e': event_types.append(a) elif o == '-f': payload_format = a elif o == '-m': if ':' not in a: raise CommandException( 'Custom attributes specified with -m should be of the form ' 'key:value') key, value = a.split(':') custom_attributes[key] = value elif o == '-p': object_name_prefix = a elif o == '-s': should_setup_topic = False elif o == '-t': pubsub_topic = a if payload_format not in PAYLOAD_FORMAT_MAP: raise CommandException( "Must provide a payload format with -f of either 'json' or 'none'" ) payload_format = PAYLOAD_FORMAT_MAP[payload_format] bucket_arg = self.args[-1] bucket_url = StorageUrlFromString(bucket_arg) if not bucket_url.IsCloudUrl() or not bucket_url.IsBucket(): raise CommandException( "%s %s requires a GCS bucket name, but got '%s'" % (self.command_name, self.subcommand_name, bucket_arg)) if bucket_url.scheme != 'gs': raise CommandException( 'The %s command can only be used with gs:// bucket URLs.' % self.command_name) bucket_name = bucket_url.bucket_name self.logger.debug('Creating notification for bucket %s', bucket_url) # Find the project this bucket belongs to bucket_metadata = self.gsutil_api.GetBucket(bucket_name, fields=['projectNumber'], provider=bucket_url.scheme) bucket_project_number = bucket_metadata.projectNumber # If not specified, choose a sensible default for the Cloud Pub/Sub topic # name. if not pubsub_topic: pubsub_topic = 'projects/%s/topics/%s' % (PopulateProjectId(None), bucket_name) if not pubsub_topic.startswith('projects/'): # If a user picks a topic ID (mytopic) but doesn't pass the whole name ( # projects/my-project/topics/mytopic ), pick a default project. pubsub_topic = 'projects/%s/topics/%s' % (PopulateProjectId(None), pubsub_topic) self.logger.debug('Using Cloud Pub/Sub topic %s', pubsub_topic) just_modified_topic_permissions = False if should_setup_topic: # Ask GCS for the email address that represents GCS's permission to # publish to a Cloud Pub/Sub topic from this project. service_account = self.gsutil_api.GetProjectServiceAccount( bucket_project_number, provider=bucket_url.scheme).email_address self.logger.debug('Service account for project %d: %s', bucket_project_number, service_account) just_modified_topic_permissions = self._CreateTopic( pubsub_topic, service_account) for attempt_number in range(0, 2): try: create_response = self.gsutil_api.CreateNotificationConfig( bucket_name, pubsub_topic=pubsub_topic, payload_format=payload_format, custom_attributes=custom_attributes, event_types=event_types if event_types else None, object_name_prefix=object_name_prefix, provider=bucket_url.scheme) break except PublishPermissionDeniedException: if attempt_number == 0 and just_modified_topic_permissions: # If we have just set the IAM policy, it may take up to 10 seconds to # take effect. self.logger.info( 'Retrying create notification in 10 seconds ' '(new permissions may take up to 10 seconds to take effect.)' ) time.sleep(10) else: raise notification_name = 'projects/_/buckets/%s/notificationConfigs/%s' % ( bucket_name, create_response.id) self.logger.info('Created notification config %s', notification_name) return 0 def _CreateTopic(self, pubsub_topic, service_account): """Assures that a topic exists, creating it if necessary. Also adds GCS as a publisher on that bucket, if necessary. Args: pubsub_topic: name of the Cloud Pub/Sub topic to use/create. service_account: the GCS service account that needs publish permission. Returns: true if we modified IAM permissions, otherwise false. """ pubsub_api = PubsubApi(logger=self.logger) # Verify that the Pub/Sub topic exists. If it does not, create it. try: pubsub_api.GetTopic(topic_name=pubsub_topic) self.logger.debug('Topic %s already exists', pubsub_topic) except NotFoundException: self.logger.debug('Creating topic %s', pubsub_topic) pubsub_api.CreateTopic(topic_name=pubsub_topic) self.logger.info('Created Cloud Pub/Sub topic %s', pubsub_topic) # Verify that the service account is in the IAM policy. policy = pubsub_api.GetTopicIamPolicy(topic_name=pubsub_topic) binding = Binding(role='roles/pubsub.publisher', members=['serviceAccount:%s' % service_account]) # This could be more extensive. We could, for instance, check for roles # that are stronger that pubsub.publisher, like owner. We could also # recurse up the hierarchy looking to see if there are project-level # permissions. This can get very complex very quickly, as the caller # may not necessarily have access to the project-level IAM policy. # There's no danger in double-granting permission just to make sure it's # there, though. if binding not in policy.bindings: policy.bindings.append(binding) # transactional safety via etag field. pubsub_api.SetTopicIamPolicy(topic_name=pubsub_topic, policy=policy) return True else: self.logger.debug( 'GCS already has publish permission to topic %s.', pubsub_topic) return False def _EnumerateNotificationsFromArgs(self, accept_notification_configs=True): """Yields bucket/notification tuples from command-line args. Given a list of strings that are bucket names (gs://foo) or notification config IDs, yield tuples of bucket names and their associated notifications. Args: accept_notification_configs: whether notification configs are valid args. Yields: Tuples of the form (bucket_name, Notification) """ path_regex = self._GetNotificationPathRegex() for list_entry in self.args: match = path_regex.match(list_entry) if match: if not accept_notification_configs: raise CommandException( '%s %s accepts only bucket names, but you provided %s' % (self.command_name, self.subcommand_name, list_entry)) bucket_name = match.group('bucket') notification_id = match.group('notification') found = False for notification in self.gsutil_api.ListNotificationConfigs( bucket_name, provider='gs'): if notification.id == notification_id: yield (bucket_name, notification) found = True break if not found: raise NotFoundException('Could not find notification %s' % list_entry) else: storage_url = StorageUrlFromString(list_entry) if not storage_url.IsCloudUrl(): raise CommandException( 'The %s command must be used on cloud buckets or notification ' 'config names.' % self.command_name) if storage_url.scheme != 'gs': raise CommandException( 'The %s command only works on gs:// buckets.') path = None if storage_url.IsProvider(): path = 'gs://*' elif storage_url.IsBucket(): path = list_entry if not path: raise CommandException( 'The %s command cannot be used on cloud objects, only buckets' % self.command_name) for blr in self.WildcardIterator(path).IterBuckets( bucket_fields=['id']): for notification in self.gsutil_api.ListNotificationConfigs( blr.storage_url.bucket_name, provider='gs'): yield (blr.storage_url.bucket_name, notification) def _List(self): self.CheckArguments() if self.sub_opts: if '-o' in dict(self.sub_opts): for bucket_name in self.args: self._ListChannels(bucket_name) else: for bucket_name, notification in self._EnumerateNotificationsFromArgs( accept_notification_configs=False): self._PrintNotificationDetails(bucket_name, notification) return 0 def _PrintNotificationDetails(self, bucket, notification): print( 'projects/_/buckets/{bucket}/notificationConfigs/{notification}\n' '\tCloud Pub/Sub topic: {topic}'.format( bucket=bucket, notification=notification.id, topic=notification.topic[len('//pubsub.googleapis.com/'):])) if notification.custom_attributes: print('\tCustom attributes:') for attr in notification.custom_attributes.additionalProperties: print('\t\t%s: %s' % (attr.key, attr.value)) filters = [] if notification.event_types: filters.append('\t\tEvent Types: %s' % ', '.join(notification.event_types)) if notification.object_name_prefix: filters.append("\t\tObject name prefix: '%s'" % notification.object_name_prefix) if filters: print('\tFilters:') for line in filters: print(line) self.logger.info('') def _Delete(self): for bucket_name, notification in self._EnumerateNotificationsFromArgs( ): self._DeleteNotification(bucket_name, notification.id) return 0 def _DeleteNotification(self, bucket_name, notification_id): self.gsutil_api.DeleteNotificationConfig(bucket_name, notification=notification_id, provider='gs') return 0 def _RunSubCommand(self, func): try: (self.sub_opts, self.args) = getopt.getopt(self.args, self.command_spec.supported_sub_args) # Commands with both suboptions and subcommands need to reparse for # suboptions, so we log again. metrics.LogCommandParams(sub_opts=self.sub_opts) return func(self) except getopt.GetoptError: self.RaiseInvalidArgumentException() SUBCOMMANDS = { 'create': _Create, 'list': _List, 'delete': _Delete, 'watchbucket': _WatchBucket, 'stopchannel': _StopChannel } def RunCommand(self): """Command entry point for the notification command.""" self.subcommand_name = self.args.pop(0) if self.subcommand_name in NotificationCommand.SUBCOMMANDS: metrics.LogCommandParams(subcommands=[self.subcommand_name]) return self._RunSubCommand( NotificationCommand.SUBCOMMANDS[self.subcommand_name]) else: raise CommandException( 'Invalid subcommand "%s" for the %s command.' % (self.subcommand_name, self.command_name))
class UrlSignCommand(Command): """Implementation of gsutil url_sign command.""" # Command specification. See base class for documentation. command_spec = Command.CreateCommandSpec( 'signurl', command_name_aliases=['signedurl', 'queryauth'], usage_synopsis=_SYNOPSIS, min_args=1, max_args=constants.NO_MAX, supported_sub_args='m:d:c:p:r:u', supported_private_args=['use-service-account'], file_url_ok=False, provider_url_ok=False, urls_start_arg=1, gs_api_support=[ApiSelector.XML, ApiSelector.JSON], gs_default_api=ApiSelector.JSON, argparse_arguments=[ CommandArgument.MakeZeroOrMoreFileURLsArgument(), CommandArgument.MakeZeroOrMoreCloudURLsArgument(), ], ) # Help specification. See help_provider.py for documentation. help_spec = Command.HelpSpec( help_name='signurl', help_name_aliases=[ 'signedurl', 'queryauth', ], help_type='command_help', help_one_line_summary='Create a signed url', help_text=_DETAILED_HELP_TEXT, subcommand_help_text={}, ) def _ParseAndCheckSubOpts(self): # Default argument values delta = None method = 'GET' content_type = '' passwd = None region = _AUTO_DETECT_REGION use_service_account = False for o, v in self.sub_opts: # TODO(PY3-ONLY): Delete this if block. if six.PY2: v = v.decode(sys.stdin.encoding or constants.UTF8) if o == '-d': if delta is not None: delta += _DurationToTimeDelta(v) else: delta = _DurationToTimeDelta(v) elif o == '-m': method = v elif o == '-c': content_type = v elif o == '-p': passwd = v elif o == '-r': region = v elif o == '-u' or o == '--use-service-account': use_service_account = True else: self.RaiseInvalidArgumentException() if delta is None: delta = timedelta(hours=1) else: if use_service_account and delta > _MAX_EXPIRATION_TIME_WITH_MINUS_U: # This restriction comes from the IAM SignBlob API. The SignBlob # API uses a system-managed key which can guarantee validation only # up to 12 hours. b/156160482#comment4 raise CommandException( 'Max valid duration allowed is %s when -u flag is used. For longer' ' duration, consider using the private-key-file instead of the -u' ' option.' % _MAX_EXPIRATION_TIME_WITH_MINUS_U) elif delta > _MAX_EXPIRATION_TIME: raise CommandException('Max valid duration allowed is ' '%s' % _MAX_EXPIRATION_TIME) if method not in ['GET', 'PUT', 'DELETE', 'HEAD', 'RESUMABLE']: raise CommandException('HTTP method must be one of' '[GET|HEAD|PUT|DELETE|RESUMABLE]') if not use_service_account and len(self.args) < 2: raise CommandException( 'The command requires a key file argument and one or more ' 'url arguments if the --use-service-account flag is missing. ' 'Run `gsutil help signurl` for more info') return method, delta, content_type, passwd, region, use_service_account def _ProbeObjectAccessWithClient(self, key, use_service_account, provider, client_email, gcs_path, logger, region): """Performs a head request against a signed url to check for read access.""" # Choose a reasonable time in the future; if the user's system clock is # 60 or more seconds behind the server's this will generate an error. signed_url = _GenSignedUrl(key=key, api=self.gsutil_api, use_service_account=use_service_account, provider=provider, client_id=client_email, method='HEAD', duration=timedelta(seconds=60), gcs_path=gcs_path, logger=logger, region=region, string_to_sign_debug=True) try: h = GetNewHttp() req = Request(signed_url, 'HEAD') response = MakeRequest(h, req) if response.status_code not in [200, 403, 404]: raise HttpError.FromResponse(response) return response.status_code except HttpError as http_error: if http_error.has_attr('response'): error_response = http_error.response error_string = ('Unexpected HTTP response code %s while querying ' 'object readability. Is your system clock accurate?' % error_response.status_code) if error_response.content: error_string += ' Content: %s' % error_response.content else: error_string = ('Expected an HTTP response code of ' '200 while querying object readability, but received ' 'an error: %s' % http_error) raise CommandException(error_string) def _EnumerateStorageUrls(self, in_urls): ret = [] for url_str in in_urls: if ContainsWildcard(url_str): ret.extend([blr.storage_url for blr in self.WildcardIterator(url_str)]) else: ret.append(StorageUrlFromString(url_str)) return ret def RunCommand(self): """Command entry point for signurl command.""" if not HAVE_OPENSSL: raise CommandException( 'The signurl command requires the pyopenssl library (try pip ' 'install pyopenssl or easy_install pyopenssl)') method, delta, content_type, passwd, region, use_service_account = ( self._ParseAndCheckSubOpts()) arg_start_index = 0 if use_service_account else 1 storage_urls = self._EnumerateStorageUrls(self.args[arg_start_index:]) region_cache = {} key = None if not use_service_account: try: key, client_email = _ReadJSONKeystore( open(self.args[0], 'rb').read(), passwd) except ValueError: # Ignore and try parsing as a pkcs12. if not passwd: passwd = getpass.getpass('Keystore password:'******'rb').read(), passwd) except ValueError: raise CommandException('Unable to parse private key from {0}'.format( self.args[0])) else: client_email = self.gsutil_api.GetServiceAccountId(provider='gs') print('URL\tHTTP Method\tExpiration\tSigned URL') for url in storage_urls: if url.scheme != 'gs': raise CommandException('Can only create signed urls from gs:// urls') if url.IsBucket(): if region == _AUTO_DETECT_REGION: raise CommandException('Generating signed URLs for creating buckets' ' requires a region be specified via the -r ' 'option. Run `gsutil help signurl` for more ' 'information about the \'-r\' option.') gcs_path = url.bucket_name if method == 'RESUMABLE': raise CommandException('Resumable signed URLs require an object ' 'name.') else: # Need to url encode the object name as Google Cloud Storage does when # computing the string to sign when checking the signature. gcs_path = '{0}/{1}'.format( url.bucket_name, urllib.parse.quote(url.object_name.encode(constants.UTF8), safe=b'/~')) if region == _AUTO_DETECT_REGION: if url.bucket_name in region_cache: bucket_region = region_cache[url.bucket_name] else: try: _, bucket = self.GetSingleBucketUrlFromArg( 'gs://{}'.format(url.bucket_name), bucket_fields=['location']) except Exception as e: raise CommandException( '{}: Failed to auto-detect location for bucket \'{}\'. Please ' 'ensure you have storage.buckets.get permission on the bucket ' 'or specify the bucket\'s location using the \'-r\' option.'. format(e.__class__.__name__, url.bucket_name)) bucket_region = bucket.location.lower() region_cache[url.bucket_name] = bucket_region else: bucket_region = region final_url = _GenSignedUrl(key=key, api=self.gsutil_api, use_service_account=use_service_account, provider=url.scheme, client_id=client_email, method=method, duration=delta, gcs_path=gcs_path, logger=self.logger, region=bucket_region, content_type=content_type, string_to_sign_debug=True) expiration = calendar.timegm((datetime.utcnow() + delta).utctimetuple()) expiration_dt = datetime.fromtimestamp(expiration) time_str = expiration_dt.strftime('%Y-%m-%d %H:%M:%S') # TODO(PY3-ONLY): Delete this if block. if six.PY2: time_str = time_str.decode(constants.UTF8) url_info_str = '{0}\t{1}\t{2}\t{3}'.format(url.url_string, method, time_str, final_url) # TODO(PY3-ONLY): Delete this if block. if six.PY2: url_info_str = url_info_str.encode(constants.UTF8) print(url_info_str) response_code = self._ProbeObjectAccessWithClient( key, use_service_account, url.scheme, client_email, gcs_path, self.logger, bucket_region) if response_code == 404: if url.IsBucket() and method != 'PUT': raise CommandException( 'Bucket {0} does not exist. Please create a bucket with ' 'that name before a creating signed URL to access it.'.format( url)) else: if method != 'PUT' and method != 'RESUMABLE': raise CommandException( 'Object {0} does not exist. Please create/upload an object ' 'with that name before a creating signed URL to access it.'. format(url)) elif response_code == 403: self.logger.warn( '%s does not have permissions on %s, using this link will likely ' 'result in a 403 error until at least READ permissions are granted', client_email or 'The account', url) return 0
class RmCommand(Command): """Implementation of gsutil rm command.""" # Command specification. See base class for documentation. command_spec = Command.CreateCommandSpec( 'rm', command_name_aliases=['del', 'delete', 'remove'], usage_synopsis=_SYNOPSIS, min_args=0, max_args=NO_MAX, supported_sub_args='afIrR', file_url_ok=False, provider_url_ok=False, urls_start_arg=0, gs_api_support=[ApiSelector.XML, ApiSelector.JSON], gs_default_api=ApiSelector.JSON, argparse_arguments=[CommandArgument.MakeZeroOrMoreCloudURLsArgument()]) # Help specification. See help_provider.py for documentation. help_spec = Command.HelpSpec( help_name='rm', help_name_aliases=['del', 'delete', 'remove'], help_type='command_help', help_one_line_summary='Remove objects', help_text=_DETAILED_HELP_TEXT, subcommand_help_text={}, ) def RunCommand(self): """Command entry point for the rm command.""" # self.recursion_requested is initialized in command.py (so it can be # checked in parent class for all commands). self.continue_on_error = self.parallel_operations self.read_args_from_stdin = False self.all_versions = False if self.sub_opts: for o, unused_a in self.sub_opts: if o == '-a': self.all_versions = True elif o == '-f': self.continue_on_error = True elif o == '-I': self.read_args_from_stdin = True elif o == '-r' or o == '-R': self.recursion_requested = True self.all_versions = True if self.read_args_from_stdin: if self.args: raise CommandException( 'No arguments allowed with the -I flag.') url_strs = StdinIterator() else: if not self.args: raise CommandException( 'The rm command (without -I) expects at ' 'least one URL.') url_strs = self.args # Tracks number of object deletes that failed. self.op_failure_count = 0 # Tracks if any buckets were missing. self.bucket_not_found_count = 0 # Tracks buckets that are slated for recursive deletion. bucket_urls_to_delete = [] self.bucket_strings_to_delete = [] if self.recursion_requested: bucket_fields = ['id'] for url_str in url_strs: url = StorageUrlFromString(url_str) if url.IsBucket() or url.IsProvider(): for blr in self.WildcardIterator(url_str).IterBuckets( bucket_fields=bucket_fields): bucket_urls_to_delete.append(blr.storage_url) self.bucket_strings_to_delete.append(url_str) self.preconditions = PreconditionsFromHeaders(self.headers or {}) try: # Expand wildcards, dirs, buckets, and bucket subdirs in URLs. name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, url_strs, self.recursion_requested, project_id=self.project_id, all_versions=self.all_versions, continue_on_error=self.continue_on_error or self.parallel_operations) seek_ahead_iterator = None # Cannot seek ahead with stdin args, since we can only iterate them # once without buffering in memory. if not self.read_args_from_stdin: seek_ahead_iterator = SeekAheadNameExpansionIterator( self.command_name, self.debug, self.GetSeekAheadGsutilApi(), url_strs, self.recursion_requested, all_versions=self.all_versions, project_id=self.project_id) # Perform remove requests in parallel (-m) mode, if requested, using # configured number of parallel processes and threads. Otherwise, # perform requests with sequential function calls in current process. self.Apply( _RemoveFuncWrapper, name_expansion_iterator, _RemoveExceptionHandler, fail_on_error=(not self.continue_on_error), shared_attrs=['op_failure_count', 'bucket_not_found_count'], seek_ahead_iterator=seek_ahead_iterator) # Assuming the bucket has versioning enabled, url's that don't map to # objects should throw an error even with all_versions, since the prior # round of deletes only sends objects to a history table. # This assumption that rm -a is only called for versioned buckets should be # corrected, but the fix is non-trivial. except CommandException as e: # Don't raise if there are buckets to delete -- it's valid to say: # gsutil rm -r gs://some_bucket # if the bucket is empty. if _ExceptionMatchesBucketToDelete(self.bucket_strings_to_delete, e): DecrementFailureCount() else: raise except ServiceException, e: if not self.continue_on_error: raise if self.bucket_not_found_count: raise CommandException( 'Encountered non-existent bucket during listing') if self.op_failure_count and not self.continue_on_error: raise CommandException('Some files could not be removed.') # If this was a gsutil rm -r command covering any bucket subdirs, # remove any dir_$folder$ objects (which are created by various web UI # tools to simulate folders). if self.recursion_requested: folder_object_wildcards = [] for url_str in url_strs: url = StorageUrlFromString(url_str) if url.IsObject(): folder_object_wildcards.append('%s**_$folder$' % url_str) if folder_object_wildcards: self.continue_on_error = True try: name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, folder_object_wildcards, self.recursion_requested, project_id=self.project_id, all_versions=self.all_versions) # When we're removing folder objects, always continue on error self.Apply(_RemoveFuncWrapper, name_expansion_iterator, _RemoveFoldersExceptionHandler, fail_on_error=False) except CommandException as e: # Ignore exception from name expansion due to an absent folder file. if not e.reason.startswith(NO_URLS_MATCHED_GENERIC): raise # Now that all data has been deleted, delete any bucket URLs. for url in bucket_urls_to_delete: self.logger.info('Removing %s...', url) @Retry(NotEmptyException, tries=3, timeout_secs=1) def BucketDeleteWithRetry(): self.gsutil_api.DeleteBucket(url.bucket_name, provider=url.scheme) BucketDeleteWithRetry() if self.op_failure_count: plural_str = 's' if self.op_failure_count else '' raise CommandException( '%d file%s/object%s could not be removed.' % (self.op_failure_count, plural_str, plural_str)) return 0
class LsCommand(Command): """Implementation of gsutil ls command.""" # Command specification. See base class for documentation. command_spec = Command.CreateCommandSpec( 'ls', command_name_aliases=[ 'dir', 'list', ], usage_synopsis=_SYNOPSIS, min_args=0, max_args=NO_MAX, supported_sub_args='aebdlLhp:rR', file_url_ok=False, provider_url_ok=True, urls_start_arg=0, gs_api_support=[ ApiSelector.XML, ApiSelector.JSON, ], gs_default_api=ApiSelector.JSON, argparse_arguments=[ CommandArgument.MakeZeroOrMoreCloudURLsArgument(), ], ) # Help specification. See help_provider.py for documentation. help_spec = Command.HelpSpec( help_name='ls', help_name_aliases=[ 'dir', 'list', ], help_type='command_help', help_one_line_summary='List providers, buckets, or objects', help_text=_DETAILED_HELP_TEXT, subcommand_help_text={}, ) def _PrintBucketInfo(self, bucket_blr, listing_style): """Print listing info for given bucket. Args: bucket_blr: BucketListingReference for the bucket being listed listing_style: ListingStyle enum describing type of output desired. Returns: Tuple (total objects, total bytes) in the bucket. """ if (listing_style == ListingStyle.SHORT or listing_style == ListingStyle.LONG): text_util.print_to_fd(bucket_blr) return # listing_style == ListingStyle.LONG_LONG: # We're guaranteed by the caller that the root object is populated. bucket = bucket_blr.root_object location_constraint = bucket.location storage_class = bucket.storageClass fields = { 'bucket': bucket_blr.url_string, 'storage_class': storage_class, 'location_constraint': location_constraint, 'acl': AclTranslation.JsonFromMessage(bucket.acl), 'default_acl': AclTranslation.JsonFromMessage(bucket.defaultObjectAcl), 'versioning': bucket.versioning and bucket.versioning.enabled, 'website_config': 'Present' if bucket.website else 'None', 'logging_config': 'Present' if bucket.logging else 'None', 'cors_config': 'Present' if bucket.cors else 'None', 'lifecycle_config': 'Present' if bucket.lifecycle else 'None', 'requester_pays': bucket.billing and bucket.billing.requesterPays } if bucket.retentionPolicy: fields['retention_policy'] = 'Present' if bucket.labels: fields['labels'] = LabelTranslation.JsonFromMessage( bucket.labels, pretty_print=True) else: fields['labels'] = 'None' if bucket.encryption and bucket.encryption.defaultKmsKeyName: fields['default_kms_key'] = bucket.encryption.defaultKmsKeyName else: fields['default_kms_key'] = 'None' fields[ 'encryption_config'] = 'Present' if bucket.encryption else 'None' # Fields not available in all APIs (e.g. the XML API) if bucket.locationType: fields['location_type'] = bucket.locationType if bucket.metageneration: fields['metageneration'] = bucket.metageneration if bucket.timeCreated: fields['time_created'] = bucket.timeCreated.strftime( '%a, %d %b %Y %H:%M:%S GMT') if bucket.updated: fields['updated'] = bucket.updated.strftime( '%a, %d %b %Y %H:%M:%S GMT') if bucket.defaultEventBasedHold: fields['default_eventbased_hold'] = bucket.defaultEventBasedHold if bucket.iamConfiguration and bucket.iamConfiguration.bucketPolicyOnly: enabled = bucket.iamConfiguration.bucketPolicyOnly.enabled fields['bucket_policy_only_enabled'] = enabled # For field values that are multiline, add indenting to make it look # prettier. for key in fields: previous_value = fields[key] if (not isinstance(previous_value, six.string_types) or '\n' not in previous_value): continue new_value = previous_value.replace('\n', '\n\t ') # Start multiline values on a new line if they aren't already. if not new_value.startswith('\n'): new_value = '\n\t ' + new_value fields[key] = new_value # Only display certain properties if the given API returned them (JSON API # returns many fields that the XML API does not). location_type_line = '' metageneration_line = '' time_created_line = '' time_updated_line = '' default_eventbased_hold_line = '' retention_policy_line = '' bucket_policy_only_enabled_line = '' if 'location_type' in fields: location_type_line = '\tLocation type:\t\t\t{location_type}\n' if 'metageneration' in fields: metageneration_line = '\tMetageneration:\t\t\t{metageneration}\n' if 'time_created' in fields: time_created_line = '\tTime created:\t\t\t{time_created}\n' if 'updated' in fields: time_updated_line = '\tTime updated:\t\t\t{updated}\n' if 'default_eventbased_hold' in fields: default_eventbased_hold_line = ( '\tDefault Event-Based Hold:\t{default_eventbased_hold}\n') if 'retention_policy' in fields: retention_policy_line = '\tRetention Policy:\t\t{retention_policy}\n' if 'bucket_policy_only_enabled' in fields: bucket_policy_only_enabled_line = ( '\tBucket Policy Only enabled:\t' '{bucket_policy_only_enabled}\n') text_util.print_to_fd( ('{bucket} :\n' '\tStorage class:\t\t\t{storage_class}\n' + location_type_line + '\tLocation constraint:\t\t{location_constraint}\n' '\tVersioning enabled:\t\t{versioning}\n' '\tLogging configuration:\t\t{logging_config}\n' '\tWebsite configuration:\t\t{website_config}\n' '\tCORS configuration: \t\t{cors_config}\n' '\tLifecycle configuration:\t{lifecycle_config}\n' '\tRequester Pays enabled:\t\t{requester_pays}\n' + retention_policy_line + default_eventbased_hold_line + '\tLabels:\t\t\t\t{labels}\n' + '\tDefault KMS key:\t\t{default_kms_key}\n' + time_created_line + time_updated_line + metageneration_line + bucket_policy_only_enabled_line + '\tACL:\t\t\t\t{acl}\n' '\tDefault ACL:\t\t\t{default_acl}').format(**fields)) if bucket_blr.storage_url.scheme == 's3': text_util.print_to_fd( 'Note: this is an S3 bucket so configuration values may be ' 'blank. To retrieve bucket configuration values, use ' 'individual configuration commands such as gsutil acl get ' '<bucket>.') def _PrintLongListing(self, bucket_listing_ref): """Prints an object with ListingStyle.LONG.""" obj = bucket_listing_ref.root_object url_str = bucket_listing_ref.url_string if (obj.metadata and S3_DELETE_MARKER_GUID in obj.metadata.additionalProperties): size_string = '0' num_bytes = 0 num_objs = 0 url_str += '<DeleteMarker>' else: size_string = (MakeHumanReadable(obj.size) if self.human_readable else str(obj.size)) num_bytes = obj.size num_objs = 1 timestamp = JSON_TIMESTAMP_RE.sub(r'\1T\2Z', str(obj.timeCreated)) printstr = '%(size)10s %(timestamp)s %(url)s' encoded_etag = None encoded_metagen = None if self.all_versions: printstr += ' metageneration=%(metageneration)s' encoded_metagen = str(obj.metageneration) if self.include_etag: printstr += ' etag=%(etag)s' encoded_etag = obj.etag format_args = { 'size': size_string, 'timestamp': timestamp, 'url': url_str, 'metageneration': encoded_metagen, 'etag': encoded_etag } text_util.print_to_fd(printstr % format_args) return (num_objs, num_bytes) def RunCommand(self): """Command entry point for the ls command.""" got_nomatch_errors = False got_bucket_nomatch_errors = False listing_style = ListingStyle.SHORT get_bucket_info = False self.recursion_requested = False self.all_versions = False self.include_etag = False self.human_readable = False self.list_subdir_contents = True if self.sub_opts: for o, a in self.sub_opts: if o == '-a': self.all_versions = True elif o == '-e': self.include_etag = True elif o == '-b': get_bucket_info = True elif o == '-h': self.human_readable = True elif o == '-l': listing_style = ListingStyle.LONG elif o == '-L': listing_style = ListingStyle.LONG_LONG elif o == '-p': # Project IDs are sent as header values when using gs and s3 XML APIs. InsistAscii( a, 'Invalid non-ASCII character found in project ID') self.project_id = a elif o == '-r' or o == '-R': self.recursion_requested = True elif o == '-d': self.list_subdir_contents = False if not self.args: # default to listing all gs buckets self.args = ['gs://'] total_objs = 0 total_bytes = 0 def MaybePrintBucketHeader(blr): if len(self.args) > 1: text_util.print_to_fd('%s:' % six.ensure_text(blr.url_string)) print_bucket_header = MaybePrintBucketHeader for url_str in self.args: storage_url = StorageUrlFromString(url_str) if storage_url.IsFileUrl(): raise CommandException('Only cloud URLs are supported for %s' % self.command_name) bucket_fields = None if (listing_style == ListingStyle.SHORT or listing_style == ListingStyle.LONG): bucket_fields = ['id'] elif listing_style == ListingStyle.LONG_LONG: bucket_fields = [ 'acl', 'billing', 'cors', 'defaultObjectAcl', 'encryption', 'iamConfiguration', 'labels', 'location', 'locationType', 'logging', 'lifecycle', 'metageneration', 'retentionPolicy', 'defaultEventBasedHold', 'storageClass', 'timeCreated', 'updated', 'versioning', 'website', ] if storage_url.IsProvider(): # Provider URL: use bucket wildcard to list buckets. for blr in self.WildcardIterator( '%s://*' % storage_url.scheme).IterBuckets( bucket_fields=bucket_fields): self._PrintBucketInfo(blr, listing_style) elif storage_url.IsBucket() and get_bucket_info: # ls -b bucket listing request: List info about bucket(s). total_buckets = 0 for blr in self.WildcardIterator(url_str).IterBuckets( bucket_fields=bucket_fields): if not ContainsWildcard(url_str) and not blr.root_object: # Iterator does not make an HTTP call for non-wildcarded # listings with fields=='id'. Ensure the bucket exists by calling # GetBucket. self.gsutil_api.GetBucket(blr.storage_url.bucket_name, fields=['id'], provider=storage_url.scheme) self._PrintBucketInfo(blr, listing_style) total_buckets += 1 if not ContainsWildcard(url_str) and not total_buckets: got_bucket_nomatch_errors = True else: # URL names a bucket, object, or object subdir -> # list matching object(s) / subdirs. def _PrintPrefixLong(blr): text_util.print_to_fd( '%-33s%s' % ('', six.ensure_text(blr.url_string))) if listing_style == ListingStyle.SHORT: # ls helper by default readies us for a short listing. listing_helper = LsHelper( self.WildcardIterator, self.logger, all_versions=self.all_versions, print_bucket_header_func=print_bucket_header, should_recurse=self.recursion_requested, list_subdir_contents=self.list_subdir_contents) elif listing_style == ListingStyle.LONG: bucket_listing_fields = [ 'name', 'size', 'timeCreated', 'updated', ] if self.all_versions: bucket_listing_fields.extend([ 'generation', 'metageneration', ]) if self.include_etag: bucket_listing_fields.append('etag') listing_helper = LsHelper( self.WildcardIterator, self.logger, print_object_func=self._PrintLongListing, print_dir_func=_PrintPrefixLong, print_bucket_header_func=print_bucket_header, all_versions=self.all_versions, should_recurse=self.recursion_requested, fields=bucket_listing_fields, list_subdir_contents=self.list_subdir_contents) elif listing_style == ListingStyle.LONG_LONG: # List all fields bucket_listing_fields = (UNENCRYPTED_FULL_LISTING_FIELDS + ENCRYPTED_FIELDS) listing_helper = LsHelper( self.WildcardIterator, self.logger, print_object_func=PrintFullInfoAboutObject, print_dir_func=_PrintPrefixLong, print_bucket_header_func=print_bucket_header, all_versions=self.all_versions, should_recurse=self.recursion_requested, fields=bucket_listing_fields, list_subdir_contents=self.list_subdir_contents) else: raise CommandException('Unknown listing style: %s' % listing_style) exp_dirs, exp_objs, exp_bytes = ( listing_helper.ExpandUrlAndPrint(storage_url)) if storage_url.IsObject() and exp_objs == 0 and exp_dirs == 0: got_nomatch_errors = True total_bytes += exp_bytes total_objs += exp_objs if total_objs and listing_style != ListingStyle.SHORT: text_util.print_to_fd('TOTAL: %d objects, %d bytes (%s)' % (total_objs, total_bytes, MakeHumanReadable(float(total_bytes)))) if got_nomatch_errors: raise CommandException('One or more URLs matched no objects.') if got_bucket_nomatch_errors: raise NotFoundException( 'One or more bucket URLs matched no buckets.') return 0
class LsCommand(Command): """Implementation of gsutil ls command.""" # Command specification. See base class for documentation. command_spec = Command.CreateCommandSpec( 'ls', command_name_aliases=['dir', 'list'], usage_synopsis=_SYNOPSIS, min_args=0, max_args=NO_MAX, supported_sub_args='aeblLhp:rR', file_url_ok=False, provider_url_ok=True, urls_start_arg=0, gs_api_support=[ApiSelector.XML, ApiSelector.JSON], gs_default_api=ApiSelector.JSON, argparse_arguments=[CommandArgument.MakeZeroOrMoreCloudURLsArgument()]) # Help specification. See help_provider.py for documentation. help_spec = Command.HelpSpec( help_name='ls', help_name_aliases=['dir', 'list'], help_type='command_help', help_one_line_summary='List providers, buckets, or objects', help_text=_DETAILED_HELP_TEXT, subcommand_help_text={}, ) def _PrintBucketInfo(self, bucket_blr, listing_style): """Print listing info for given bucket. Args: bucket_blr: BucketListingReference for the bucket being listed listing_style: ListingStyle enum describing type of output desired. Returns: Tuple (total objects, total bytes) in the bucket. """ if (listing_style == ListingStyle.SHORT or listing_style == ListingStyle.LONG): print bucket_blr return # listing_style == ListingStyle.LONG_LONG: # We're guaranteed by the caller that the root object is populated. bucket = bucket_blr.root_object location_constraint = bucket.location storage_class = bucket.storageClass fields = { 'bucket': bucket_blr.url_string, 'storage_class': storage_class, 'location_constraint': location_constraint, 'acl': AclTranslation.JsonFromMessage(bucket.acl), 'default_acl': AclTranslation.JsonFromMessage(bucket.defaultObjectAcl) } fields['versioning'] = bucket.versioning and bucket.versioning.enabled fields['website_config'] = 'Present' if bucket.website else 'None' fields['logging_config'] = 'Present' if bucket.logging else 'None' fields['cors_config'] = 'Present' if bucket.cors else 'None' fields['lifecycle_config'] = 'Present' if bucket.lifecycle else 'None' # For field values that are multiline, add indenting to make it look # prettier. for key in fields: previous_value = fields[key] if (not isinstance(previous_value, basestring) or '\n' not in previous_value): continue new_value = previous_value.replace('\n', '\n\t ') # Start multiline values on a new line if they aren't already. if not new_value.startswith('\n'): new_value = '\n\t ' + new_value fields[key] = new_value print( '{bucket} :\n' '\tStorage class:\t\t\t{storage_class}\n' '\tLocation constraint:\t\t{location_constraint}\n' '\tVersioning enabled:\t\t{versioning}\n' '\tLogging configuration:\t\t{logging_config}\n' '\tWebsite configuration:\t\t{website_config}\n' '\tCORS configuration: \t\t{cors_config}\n' '\tLifecycle configuration:\t{lifecycle_config}\n' '\tACL:\t\t\t\t{acl}\n' '\tDefault ACL:\t\t\t{default_acl}'.format(**fields)) if bucket_blr.storage_url.scheme == 's3': print( 'Note: this is an S3 bucket so configuration values may be ' 'blank. To retrieve bucket configuration values, use ' 'individual configuration commands such as gsutil acl get ' '<bucket>.') def _PrintLongListing(self, bucket_listing_ref): """Prints an object with ListingStyle.LONG.""" obj = bucket_listing_ref.root_object url_str = bucket_listing_ref.url_string if (obj.metadata and S3_DELETE_MARKER_GUID in obj.metadata.additionalProperties): size_string = '0' num_bytes = 0 num_objs = 0 url_str += '<DeleteMarker>' else: size_string = (MakeHumanReadable(obj.size) if self.human_readable else str(obj.size)) num_bytes = obj.size num_objs = 1 timestamp = JSON_TIMESTAMP_RE.sub( r'\1T\2Z', str(obj.updated).decode(UTF8).encode('ascii')) printstr = '%(size)10s %(timestamp)s %(url)s' encoded_etag = None encoded_metagen = None if self.all_versions: printstr += ' metageneration=%(metageneration)s' encoded_metagen = str(obj.metageneration).encode(UTF8) if self.include_etag: printstr += ' etag=%(etag)s' encoded_etag = obj.etag.encode(UTF8) format_args = { 'size': size_string, 'timestamp': timestamp, 'url': url_str.encode(UTF8), 'metageneration': encoded_metagen, 'etag': encoded_etag } print printstr % format_args return (num_objs, num_bytes) def RunCommand(self): """Command entry point for the ls command.""" got_nomatch_errors = False got_bucket_nomatch_errors = False listing_style = ListingStyle.SHORT get_bucket_info = False self.recursion_requested = False self.all_versions = False self.include_etag = False self.human_readable = False if self.sub_opts: for o, a in self.sub_opts: if o == '-a': self.all_versions = True elif o == '-e': self.include_etag = True elif o == '-b': get_bucket_info = True elif o == '-h': self.human_readable = True elif o == '-l': listing_style = ListingStyle.LONG elif o == '-L': listing_style = ListingStyle.LONG_LONG elif o == '-p': self.project_id = a elif o == '-r' or o == '-R': self.recursion_requested = True if not self.args: # default to listing all gs buckets self.args = ['gs://'] total_objs = 0 total_bytes = 0 def MaybePrintBucketHeader(blr): if len(self.args) > 1: print '%s:' % blr.url_string.encode(UTF8) print_bucket_header = MaybePrintBucketHeader for url_str in self.args: storage_url = StorageUrlFromString(url_str) if storage_url.IsFileUrl(): raise CommandException('Only cloud URLs are supported for %s' % self.command_name) bucket_fields = None if (listing_style == ListingStyle.SHORT or listing_style == ListingStyle.LONG): bucket_fields = ['id'] elif listing_style == ListingStyle.LONG_LONG: bucket_fields = [ 'location', 'storageClass', 'versioning', 'acl', 'defaultObjectAcl', 'website', 'logging', 'cors', 'lifecycle' ] if storage_url.IsProvider(): # Provider URL: use bucket wildcard to list buckets. for blr in self.WildcardIterator( '%s://*' % storage_url.scheme).IterBuckets( bucket_fields=bucket_fields): self._PrintBucketInfo(blr, listing_style) elif storage_url.IsBucket() and get_bucket_info: # ls -b bucket listing request: List info about bucket(s). total_buckets = 0 for blr in self.WildcardIterator(url_str).IterBuckets( bucket_fields=bucket_fields): if not ContainsWildcard(url_str) and not blr.root_object: # Iterator does not make an HTTP call for non-wildcarded # listings with fields=='id'. Ensure the bucket exists by calling # GetBucket. self.gsutil_api.GetBucket(blr.storage_url.bucket_name, fields=['id'], provider=storage_url.scheme) self._PrintBucketInfo(blr, listing_style) total_buckets += 1 if not ContainsWildcard(url_str) and not total_buckets: got_bucket_nomatch_errors = True else: # URL names a bucket, object, or object subdir -> # list matching object(s) / subdirs. def _PrintPrefixLong(blr): print '%-33s%s' % ('', blr.url_string.encode(UTF8)) if listing_style == ListingStyle.SHORT: # ls helper by default readies us for a short listing. ls_helper = LsHelper( self.WildcardIterator, self.logger, all_versions=self.all_versions, print_bucket_header_func=print_bucket_header, should_recurse=self.recursion_requested) elif listing_style == ListingStyle.LONG: bucket_listing_fields = ['name', 'updated', 'size'] if self.all_versions: bucket_listing_fields.extend( ['generation', 'metageneration']) if self.include_etag: bucket_listing_fields.append('etag') ls_helper = LsHelper( self.WildcardIterator, self.logger, print_object_func=self._PrintLongListing, print_dir_func=_PrintPrefixLong, print_bucket_header_func=print_bucket_header, all_versions=self.all_versions, should_recurse=self.recursion_requested, fields=bucket_listing_fields) elif listing_style == ListingStyle.LONG_LONG: # List all fields bucket_listing_fields = None ls_helper = LsHelper( self.WildcardIterator, self.logger, print_object_func=PrintFullInfoAboutObject, print_dir_func=_PrintPrefixLong, print_bucket_header_func=print_bucket_header, all_versions=self.all_versions, should_recurse=self.recursion_requested, fields=bucket_listing_fields) else: raise CommandException('Unknown listing style: %s' % listing_style) exp_dirs, exp_objs, exp_bytes = ls_helper.ExpandUrlAndPrint( storage_url) if storage_url.IsObject() and exp_objs == 0 and exp_dirs == 0: got_nomatch_errors = True total_bytes += exp_bytes total_objs += exp_objs if total_objs and listing_style != ListingStyle.SHORT: print('TOTAL: %d objects, %d bytes (%s)' % (total_objs, total_bytes, MakeHumanReadable(float(total_bytes)))) if got_nomatch_errors: raise CommandException('One or more URLs matched no objects.') if got_bucket_nomatch_errors: raise NotFoundException( 'One or more bucket URLs matched no buckets.') return 0
class RmCommand(Command): """Implementation of gsutil rm command.""" # Command specification. See base class for documentation. command_spec = Command.CreateCommandSpec( 'rm', command_name_aliases=['del', 'delete', 'remove'], usage_synopsis=_SYNOPSIS, min_args=1, max_args=NO_MAX, supported_sub_args='afrR', file_url_ok=False, provider_url_ok=False, urls_start_arg=0, gs_api_support=[ApiSelector.XML, ApiSelector.JSON], gs_default_api=ApiSelector.JSON, argparse_arguments=[CommandArgument.MakeZeroOrMoreCloudURLsArgument()]) # Help specification. See help_provider.py for documentation. help_spec = Command.HelpSpec( help_name='rm', help_name_aliases=['del', 'delete', 'remove'], help_type='command_help', help_one_line_summary='Remove objects', help_text=_DETAILED_HELP_TEXT, subcommand_help_text={}, ) def RunCommand(self): """Command entry point for the rm command.""" # self.recursion_requested is initialized in command.py (so it can be # checked in parent class for all commands). self.continue_on_error = False self.all_versions = False if self.sub_opts: for o, unused_a in self.sub_opts: if o == '-a': self.all_versions = True elif o == '-f': self.continue_on_error = True elif o == '-r' or o == '-R': self.recursion_requested = True self.all_versions = True bucket_urls_to_delete = [] bucket_strings_to_delete = [] if self.recursion_requested: bucket_fields = ['id'] for url_str in self.args: url = StorageUrlFromString(url_str) if url.IsBucket() or url.IsProvider(): for blr in self.WildcardIterator(url_str).IterBuckets( bucket_fields=bucket_fields): bucket_urls_to_delete.append(blr.storage_url) bucket_strings_to_delete.append(url_str) self.preconditions = PreconditionsFromHeaders(self.headers or {}) # Used to track if any files failed to be removed. self.everything_removed_okay = True try: # Expand wildcards, dirs, buckets, and bucket subdirs in URLs. name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, self.args, self.recursion_requested, project_id=self.project_id, all_versions=self.all_versions, continue_on_error=self.continue_on_error or self.parallel_operations) # Perform remove requests in parallel (-m) mode, if requested, using # configured number of parallel processes and threads. Otherwise, # perform requests with sequential function calls in current process. self.Apply(_RemoveFuncWrapper, name_expansion_iterator, _RemoveExceptionHandler, fail_on_error=(not self.continue_on_error)) # Assuming the bucket has versioning enabled, url's that don't map to # objects should throw an error even with all_versions, since the prior # round of deletes only sends objects to a history table. # This assumption that rm -a is only called for versioned buckets should be # corrected, but the fix is non-trivial. except CommandException as e: # Don't raise if there are buckets to delete -- it's valid to say: # gsutil rm -r gs://some_bucket # if the bucket is empty. if not bucket_urls_to_delete and not self.continue_on_error: raise # Reset the failure count if we failed due to an empty bucket that we're # going to delete. msg = 'No URLs matched: ' if msg in str(e): parts = str(e).split(msg) if len(parts) == 2 and parts[1] in bucket_strings_to_delete: ResetFailureCount() except ServiceException, e: if not self.continue_on_error: raise if not self.everything_removed_okay and not self.continue_on_error: raise CommandException('Some files could not be removed.') # If this was a gsutil rm -r command covering any bucket subdirs, # remove any dir_$folder$ objects (which are created by various web UI # tools to simulate folders). if self.recursion_requested: had_previous_failures = GetFailureCount() > 0 folder_object_wildcards = [] for url_str in self.args: url = StorageUrlFromString(url_str) if url.IsObject(): folder_object_wildcards.append('%s**_$folder$' % url_str) if folder_object_wildcards: self.continue_on_error = True try: name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, folder_object_wildcards, self.recursion_requested, project_id=self.project_id, all_versions=self.all_versions) # When we're removing folder objects, always continue on error self.Apply(_RemoveFuncWrapper, name_expansion_iterator, _RemoveFoldersExceptionHandler, fail_on_error=False) except CommandException as e: # Ignore exception from name expansion due to an absent folder file. if not e.reason.startswith('No URLs matched:'): raise if not had_previous_failures: ResetFailureCount() # Now that all data has been deleted, delete any bucket URLs. for url in bucket_urls_to_delete: self.logger.info('Removing %s...', url) @Retry(NotEmptyException, tries=3, timeout_secs=1) def BucketDeleteWithRetry(): self.gsutil_api.DeleteBucket(url.bucket_name, provider=url.scheme) BucketDeleteWithRetry() return 0