def RunCommand(self): """Command entry point for the setmeta command.""" headers = [] if self.sub_opts: for o, a in self.sub_opts: if o == '-n': self.logger.warning( 'Warning: gsutil setmeta -n is now on by default, and will be ' 'removed in the future.\nPlease use gsutil acl set ... to set ' 'canned ACLs.') elif o == '-h': if 'x-goog-acl' in a or 'x-amz-acl' in a: raise CommandException( 'gsutil setmeta no longer allows canned ACLs. Use gsutil acl ' 'set ... to set canned ACLs.') headers.append(a) (metadata_minus, metadata_plus) = self._ParseMetadataHeaders(headers) self.metadata_change = metadata_plus for header in metadata_minus: self.metadata_change[header] = '' if len(self.args) == 1 and not self.recursion_requested: url = StorageUrlFromString(self.args[0]) if not (url.IsCloudUrl() and url.IsObject()): raise CommandException('URL (%s) must name an object' % self.args[0]) # Used to track if any objects' metadata failed to be set. self.everything_set_okay = True name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, self.args, self.recursion_requested, all_versions=self.all_versions, continue_on_error=self.parallel_operations) try: # Perform requests in parallel (-m) mode, if requested, using # configured number of parallel processes and threads. Otherwise, # perform requests with sequential function calls in current process. self.Apply(_SetMetadataFuncWrapper, name_expansion_iterator, _SetMetadataExceptionHandler, fail_on_error=True) except AccessDeniedException as e: if e.status == 403: self._WarnServiceAccounts() raise if not self.everything_set_okay: raise CommandException( 'Metadata for some objects could not be set.') return 0
def RunCommand(self): """Command entry point for the mv command.""" # Check each source arg up, refusing to delete a bucket src URL (force users # to explicitly do that as a separate operation). for arg_to_check in self.args[0:-1]: url = StorageUrlFromString(arg_to_check) if url.IsCloudUrl() and (url.IsBucket() or url.IsProvider()): raise CommandException( 'You cannot move a source bucket using the mv ' 'command. If you meant to move\nall objects in ' 'the bucket, you can use a command like:\n' '\tgsutil mv %s/* %s' % (arg_to_check, self.args[-1])) # Insert command-line opts in front of args so they'll be picked up by cp # and rm commands (e.g., for -p option). Use undocumented (internal # use-only) cp -M option, which causes each original object to be deleted # after successfully copying to its destination, and also causes naming # behavior consistent with Unix mv naming behavior (see comments in # ConstructDstUrl). unparsed_args = ['-M'] if self.recursion_requested: unparsed_args.append('-R') unparsed_args.extend(self.unparsed_args) self.command_runner.RunNamedCommand('cp', unparsed_args, self.headers, self.debug, self.parallel_operations) return 0
def GetUserAgent(args, metrics_off=True): """Using the command arguments return a suffix for the UserAgent string. Args: args: str[], parsed set of arguments entered in the CLI. metrics_off: boolean, whether the MetricsCollector is disabled. Returns: str, A string value that can be appended to an existing UserAgent. """ user_agent = ' gsutil/%s' % gslib.VERSION user_agent += ' (%s)' % sys.platform user_agent += ' analytics/%s' % ('disabled' if metrics_off else 'enabled') user_agent += ' interactive/%s' % system_util.IsRunningInteractively() if len(args) > 0: user_agent += ' command/%s' % args[0] if len(args) > 2: if args[0] in ['cp', 'mv', 'rsync']: # Any cp, mv or rsync commands that use daisy chain mode should be noted # as that represents a unique use case that may be better served by the # storage transfer service. try: src = StorageUrlFromString(six.ensure_text(args[-2])) dst = StorageUrlFromString(six.ensure_text(args[-1])) if src.IsCloudUrl() and dst.IsCloudUrl( ) and src.scheme != dst.scheme: user_agent += '-DaisyChain' except InvalidUrlError: pass elif args[0] == 'rewrite': if '-k' in args: # Rewrite encryption key. user_agent += '-k' if '-s' in args: # Rewrite storage class. user_agent += '-s' if system_util.InvokedViaCloudSdk(): user_agent += ' google-cloud-sdk' if system_util.CloudSdkVersion(): user_agent += '/%s' % system_util.CloudSdkVersion() return user_agent
def _EnumerateNotificationsFromArgs(self, accept_notification_configs=True): """Yields bucket/notification tuples from command-line args. Given a list of strings that are bucket names (gs://foo) or notification config IDs, yield tuples of bucket names and their associated notifications. Args: accept_notification_configs: whether notification configs are valid args. Yields: Tuples of the form (bucket_name, Notification) """ path_regex = self._GetNotificationPathRegex() for list_entry in self.args: match = path_regex.match(list_entry) if match: if not accept_notification_configs: raise CommandException( '%s %s accepts only bucket names, but you provided %s' % (self.command_name, self.subcommand_name, list_entry)) bucket_name = match.group('bucket') notification_id = match.group('notification') found = False for notification in self.gsutil_api.ListNotificationConfigs( bucket_name, provider='gs'): if notification.id == notification_id: yield (bucket_name, notification) found = True break if not found: raise NotFoundException('Could not find notification %s' % list_entry) else: storage_url = StorageUrlFromString(list_entry) if not storage_url.IsCloudUrl(): raise CommandException( 'The %s command must be used on cloud buckets or notification ' 'config names.' % self.command_name) if storage_url.scheme != 'gs': raise CommandException( 'The %s command only works on gs:// buckets.') path = None if storage_url.IsProvider(): path = 'gs://*' elif storage_url.IsBucket(): path = list_entry if not path: raise CommandException( 'The %s command cannot be used on cloud objects, only buckets' % self.command_name) for blr in self.WildcardIterator(path).IterBuckets( bucket_fields=['id']): for notification in self.gsutil_api.ListNotificationConfigs( blr.storage_url.bucket_name, provider='gs'): yield (blr.storage_url.bucket_name, notification)
def test_storage_url_from_string(self): storage_url = StorageUrlFromString('abc') self.assertTrue(storage_url.IsFileUrl()) self.assertEquals('abc', storage_url.object_name) storage_url = StorageUrlFromString('file://abc/123') self.assertTrue(storage_url.IsFileUrl()) self.assertEquals('abc/123', storage_url.object_name) storage_url = StorageUrlFromString('gs://abc/123') self.assertTrue(storage_url.IsCloudUrl()) self.assertEquals('abc', storage_url.bucket_name) self.assertEquals('123', storage_url.object_name) storage_url = StorageUrlFromString('s3://abc/123') self.assertTrue(storage_url.IsCloudUrl()) self.assertEquals('abc', storage_url.bucket_name) self.assertEquals('123', storage_url.object_name)
def HaveProviderUrls(args_to_check): """Checks whether args_to_check contains any provider URLs (like 'gs://'). Args: args_to_check: Command-line argument subset to check. Returns: True if args_to_check contains any provider URLs. """ for url_str in args_to_check: storage_url = StorageUrlFromString(url_str) if storage_url.IsCloudUrl() and storage_url.IsProvider(): return True return False
def _SetHold(self, obj_metadata_update_wrapper, url_args, sub_command_full_name): """Common logic to set or unset Event-Based/Temporary Hold on objects. Args: obj_metadata_update_wrapper: The function for updating related fields in Object metadata. url_args: List of object URIs. sub_command_full_name: The full name for sub-command: "Temporary" / "Event-Based" """ if len(url_args) == 1 and not self.recursion_requested: url = StorageUrlFromString(url_args[0]) if not (url.IsCloudUrl() and url.IsObject()): raise CommandException('URL ({}) must name an object'.format( url_args[0])) name_expansion_iterator = self._GetObjectNameExpansionIterator( url_args) seek_ahead_iterator = self._GetSeekAheadNameExpansionIterator(url_args) # Used to track if any objects' metadata failed to be set. self.everything_set_okay = True try: # TODO: implement '-c' flag to continue_on_error # Perform requests in parallel (-m) mode, if requested, using # configured number of parallel processes and threads. Otherwise, # perform requests with sequential function calls in current process. self.Apply(obj_metadata_update_wrapper, name_expansion_iterator, UpdateObjectMetadataExceptionHandler, fail_on_error=True, seek_ahead_iterator=seek_ahead_iterator) except AccessDeniedException as e: if e.status == 403: self._WarnServiceAccounts() raise if not self.everything_set_okay: raise CommandException( '{} Hold for some objects could not be set.'.format( sub_command_full_name))
def LookUpGsutilVersion(gsutil_api, url_str): """Looks up the gsutil version of the specified gsutil tarball URL. Version is specified in the metadata field set on that object. Args: gsutil_api: gsutil Cloud API to use when retrieving gsutil tarball. url_str: tarball URL to retrieve (such as 'gs://pub/gsutil.tar.gz'). Returns: Version string if URL is a cloud URL containing x-goog-meta-gsutil-version metadata, else None. """ url = StorageUrlFromString(url_str) if url.IsCloudUrl(): obj = gsutil_api.GetObjectMetadata(url.bucket_name, url.object_name, provider=url.scheme, fields=['metadata']) if obj.metadata and obj.metadata.additionalProperties: for prop in obj.metadata.additionalProperties: if prop.key == 'gsutil_version': return prop.value
def _Create(self): self.CheckArguments() # User-specified options pubsub_topic = None payload_format = None custom_attributes = {} event_types = [] object_name_prefix = None should_setup_topic = True if self.sub_opts: for o, a in self.sub_opts: if o == '-e': event_types.append(a) elif o == '-f': payload_format = a elif o == '-m': if ':' not in a: raise CommandException( 'Custom attributes specified with -m should be of the form ' 'key:value') key, value = a.split(':') custom_attributes[key] = value elif o == '-p': object_name_prefix = a elif o == '-s': should_setup_topic = False elif o == '-t': pubsub_topic = a if payload_format not in PAYLOAD_FORMAT_MAP: raise CommandException( "Must provide a payload format with -f of either 'json' or 'none'" ) payload_format = PAYLOAD_FORMAT_MAP[payload_format] bucket_arg = self.args[-1] bucket_url = StorageUrlFromString(bucket_arg) if not bucket_url.IsCloudUrl() or not bucket_url.IsBucket(): raise CommandException( "%s %s requires a GCS bucket name, but got '%s'" % (self.command_name, self.subcommand_name, bucket_arg)) if bucket_url.scheme != 'gs': raise CommandException( 'The %s command can only be used with gs:// bucket URLs.' % self.command_name) bucket_name = bucket_url.bucket_name self.logger.debug('Creating notification for bucket %s', bucket_url) # Find the project this bucket belongs to bucket_metadata = self.gsutil_api.GetBucket(bucket_name, fields=['projectNumber'], provider=bucket_url.scheme) bucket_project_number = bucket_metadata.projectNumber # If not specified, choose a sensible default for the Cloud Pub/Sub topic # name. if not pubsub_topic: pubsub_topic = 'projects/%s/topics/%s' % (PopulateProjectId(None), bucket_name) if not pubsub_topic.startswith('projects/'): # If a user picks a topic ID (mytopic) but doesn't pass the whole name ( # projects/my-project/topics/mytopic ), pick a default project. pubsub_topic = 'projects/%s/topics/%s' % (PopulateProjectId(None), pubsub_topic) self.logger.debug('Using Cloud Pub/Sub topic %s', pubsub_topic) just_modified_topic_permissions = False if should_setup_topic: # Ask GCS for the email address that represents GCS's permission to # publish to a Cloud Pub/Sub topic from this project. service_account = self.gsutil_api.GetProjectServiceAccount( bucket_project_number, provider=bucket_url.scheme).email_address self.logger.debug('Service account for project %d: %s', bucket_project_number, service_account) just_modified_topic_permissions = self._CreateTopic( pubsub_topic, service_account) for attempt_number in range(0, 2): try: create_response = self.gsutil_api.CreateNotificationConfig( bucket_name, pubsub_topic=pubsub_topic, payload_format=payload_format, custom_attributes=custom_attributes, event_types=event_types if event_types else None, object_name_prefix=object_name_prefix, provider=bucket_url.scheme) break except PublishPermissionDeniedException: if attempt_number == 0 and just_modified_topic_permissions: # If we have just set the IAM policy, it may take up to 10 seconds to # take effect. self.logger.info( 'Retrying create notification in 10 seconds ' '(new permissions may take up to 10 seconds to take effect.)' ) time.sleep(10) else: raise notification_name = 'projects/_/buckets/%s/notificationConfigs/%s' % ( bucket_name, create_response.id) self.logger.info('Created notification config %s', notification_name) return 0
def CatUrlStrings(self, url_strings, show_header=False, start_byte=0, end_byte=None, cat_out_fd=None): """Prints each of the url strings to stdout. Args: url_strings: String iterable. show_header: If true, print a header per file. start_byte: Starting byte of the file to print, used for constructing range requests. end_byte: Ending byte of the file to print; used for constructing range requests. If this is negative, the start_byte is ignored and and end range is sent over HTTP (such as range: bytes -9) cat_out_fd: File descriptor to which output should be written. Defaults to stdout if no file descriptor is supplied. Returns: 0 on success. Raises: CommandException if no URLs can be found. """ printed_one = False # This should refer to whatever sys.stdin refers to when this method is # run, not when this method is defined, so we do the initialization here # rather than define sys.stdin as the cat_out_fd parameter's default value. if cat_out_fd is None: cat_out_fd = sys.stdout # We manipulate the stdout so that all other data other than the Object # contents go to stderr. old_stdout = sys.stdout sys.stdout = sys.stderr try: if url_strings and url_strings[0] in ('-', 'file://-'): self._WriteBytesBufferedFileToFile(sys.stdin, cat_out_fd) else: for url_str in url_strings: did_some_work = False # TODO: Get only the needed fields here. for blr in self.command_obj.WildcardIterator(url_str).IterObjects( bucket_listing_fields=_CAT_BUCKET_LISTING_FIELDS): decryption_keywrapper = None if (blr.root_object and blr.root_object.customerEncryption and blr.root_object.customerEncryption.keySha256): decryption_key = FindMatchingCSEKInBotoConfig( blr.root_object.customerEncryption.keySha256, config) if not decryption_key: raise EncryptionException( 'Missing decryption key with SHA256 hash %s. No decryption ' 'key matches object %s' % (blr.root_object.customerEncryption.keySha256, blr.url_string)) decryption_keywrapper = CryptoKeyWrapperFromKey(decryption_key) did_some_work = True if show_header: if printed_one: print() print('==> %s <==' % blr) printed_one = True cat_object = blr.root_object storage_url = StorageUrlFromString(blr.url_string) if storage_url.IsCloudUrl(): compressed_encoding = ObjectIsGzipEncoded(cat_object) self.command_obj.gsutil_api.GetObjectMedia( cat_object.bucket, cat_object.name, cat_out_fd, compressed_encoding=compressed_encoding, start_byte=start_byte, end_byte=end_byte, object_size=cat_object.size, generation=storage_url.generation, decryption_tuple=decryption_keywrapper, provider=storage_url.scheme) else: with open(storage_url.object_name, 'rb') as f: self._WriteBytesBufferedFileToFile(f, cat_out_fd) if not did_some_work: raise CommandException(NO_URLS_MATCHED_TARGET % url_str) finally: sys.stdout = old_stdout return 0
def RunCommand(self): """Command entry point for the setmeta command.""" headers = [] if self.sub_opts: for o, a in self.sub_opts: if o == '-h': if 'x-goog-acl' in a or 'x-amz-acl' in a: raise CommandException( 'gsutil setmeta no longer allows canned ACLs. Use gsutil acl ' 'set ... to set canned ACLs.') headers.append(a) (metadata_minus, metadata_plus) = self._ParseMetadataHeaders(headers) self.metadata_change = metadata_plus for header in metadata_minus: self.metadata_change[header] = '' if not self.metadata_change: raise CommandException( 'gsutil setmeta requires one or more headers to be provided with the' ' -h flag. See "gsutil help setmeta" for more information.') if len(self.args) == 1 and not self.recursion_requested: url = StorageUrlFromString(self.args[0]) if not (url.IsCloudUrl() and url.IsObject()): raise CommandException('URL (%s) must name an object' % self.args[0]) # Used to track if any objects' metadata failed to be set. self.everything_set_okay = True self.preconditions = PreconditionsFromHeaders(self.headers) name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, self.args, self.recursion_requested, all_versions=self.all_versions, continue_on_error=self.parallel_operations, bucket_listing_fields=['generation', 'metadata', 'metageneration']) seek_ahead_iterator = SeekAheadNameExpansionIterator( self.command_name, self.debug, self.GetSeekAheadGsutilApi(), self.args, self.recursion_requested, all_versions=self.all_versions, project_id=self.project_id) try: # Perform requests in parallel (-m) mode, if requested, using # configured number of parallel processes and threads. Otherwise, # perform requests with sequential function calls in current process. self.Apply(_SetMetadataFuncWrapper, name_expansion_iterator, _SetMetadataExceptionHandler, fail_on_error=True, seek_ahead_iterator=seek_ahead_iterator) except AccessDeniedException as e: if e.status == 403: self._WarnServiceAccounts() raise if not self.everything_set_okay: raise CommandException('Metadata for some objects could not be set.') return 0