def _SetIamHelperInternal(self, storage_url, policy, thread_state=None): """Sets IAM policy for a single, resolved bucket / object URL. Args: storage_url: A CloudUrl instance with no wildcards, pointing to a specific bucket or object. policy: A Policy object to set on the bucket / object. thread_state: CloudApiDelegator instance which is passed from command.WorkerThread.__init__() if the -m flag is specified. Will use self.gsutil_api if thread_state is set to None. Raises: ServiceException passed from the API call if an HTTP error was returned. """ # SetIamHelper may be called by a command.WorkerThread. In the # single-threaded case, WorkerThread will not pass the CloudApiDelegator # instance to thread_state. GetCloudInstance is called to resolve the # edge case. gsutil_api = GetCloudApiInstance(self, thread_state=thread_state) if storage_url.IsBucket(): gsutil_api.SetBucketIamPolicy(storage_url.bucket_name, policy, provider=storage_url.scheme) else: gsutil_api.SetObjectIamPolicy(storage_url.bucket_name, storage_url.object_name, policy, generation=storage_url.generation, provider=storage_url.scheme)
def _RsyncFunc(cls, diff_to_apply, thread_state=None): """Worker function for performing the actual copy and remove operations.""" gsutil_api = GetCloudApiInstance(cls, thread_state=thread_state) dst_url_str = diff_to_apply.dst_url_str dst_url = StorageUrlFromString(dst_url_str) if diff_to_apply.diff_action == _DiffAction.REMOVE: if cls.dryrun: cls.logger.info('Would remove %s', dst_url) else: cls.logger.info('Removing %s', dst_url) if dst_url.IsFileUrl(): os.unlink(dst_url.object_name) else: try: gsutil_api.DeleteObject( dst_url.bucket_name, dst_url.object_name, generation=dst_url.generation, provider=dst_url.scheme) except NotFoundException: # If the object happened to be deleted by an external process, this # is fine because it moves us closer to the desired state. pass elif diff_to_apply.diff_action == _DiffAction.COPY: src_url_str = diff_to_apply.src_url_str src_url = StorageUrlFromString(src_url_str) if cls.dryrun: cls.logger.info('Would copy %s to %s', src_url, dst_url) else: copy_helper.PerformCopy(cls.logger, src_url, dst_url, gsutil_api, cls, _RsyncExceptionHandler, headers=cls.headers) else: raise CommandException('Got unexpected DiffAction (%d)' % diff_to_apply.diff_action)
def GetIamHelper(self, storage_url, thread_state=None): """Gets an IAM policy for a single, resolved bucket / object URL. Args: storage_url: A CloudUrl instance with no wildcards, pointing to a specific bucket or object. thread_state: CloudApiDelegator instance which is passed from command.WorkerThread.__init__() if the global -m flag is specified. Will use self.gsutil_api if thread_state is set to None. Returns: Serialized Policy instance. """ gsutil_api = GetCloudApiInstance(self, thread_state=thread_state) if storage_url.IsBucket(): policy = gsutil_api.GetBucketIamPolicy( storage_url.bucket_name, provider=storage_url.scheme, ) else: policy = gsutil_api.GetObjectIamPolicy( storage_url.bucket_name, storage_url.object_name, generation=storage_url.generation, provider=storage_url.scheme, ) return policy
def _ListUrlRootFunc(cls, args_tuple, thread_state=None): """Worker function for listing files/objects under to be sync'd. Outputs sorted list to out_file_name, formatted per _BuildTmpOutputLine. We sort the listed URLs because we don't want to depend on consistent sort order across file systems and cloud providers. Args: cls: Command instance. args_tuple: (base_url_str, out_file_name, desc), where base_url_str is top-level URL string to list; out_filename is name of file to which sorted output should be written; desc is 'source' or 'destination'. thread_state: gsutil Cloud API instance to use. """ gsutil_api = GetCloudApiInstance(cls, thread_state=thread_state) (base_url_str, out_filename, desc) = args_tuple # We sort while iterating over base_url_str, allowing parallelism of batched # sorting with collecting the listing. out_file = io.open(out_filename, mode='w', encoding=UTF8) try: _BatchSort(_FieldedListingIterator(cls, gsutil_api, base_url_str, desc), out_file) except Exception as e: # pylint: disable=broad-except # Abandon rsync if an exception percolates up to this layer - retryable # exceptions are handled in the lower layers, so we got a non-retryable # exception (like 404 bucket not found) and proceeding would either be # futile or could result in data loss - for example: # gsutil rsync -d gs://non-existent-bucket ./localdir # would delete files from localdir. cls.logger.error( 'Caught non-retryable exception while listing %s: %s' % (base_url_str, e)) cls.non_retryable_listing_failures = 1 out_file.close()
def RemoveFunc(self, name_expansion_result, thread_state=None): gsutil_api = GetCloudApiInstance(self, thread_state=thread_state) exp_src_url = name_expansion_result.expanded_storage_url self.logger.info('Removing %s...', exp_src_url) gsutil_api.DeleteObject( exp_src_url.bucket_name, exp_src_url.object_name, preconditions=self.preconditions, generation=exp_src_url.generation, provider=exp_src_url.scheme)
def RewriteFunc(self, name_expansion_result, thread_state=None): gsutil_api = GetCloudApiInstance(self, thread_state=thread_state) self.CheckProvider(name_expansion_result.expanded_storage_url) # If other transform types are added here, they must ensure that the # encryption key configuration matches the boto configuration, because # gsutil maintains an invariant that all objects it writes use the # encryption_key value (including decrypting if no key is present). if _TransformTypes.CRYPTO_KEY in self.transform_types: self.CryptoRewrite(name_expansion_result.expanded_storage_url, gsutil_api)
def SetMetadataFunc(self, name_expansion_result, thread_state=None): """Sets metadata on an object. Args: name_expansion_result: NameExpansionResult describing target object. thread_state: gsutil Cloud API instance to use for the operation. """ gsutil_api = GetCloudApiInstance(self, thread_state=thread_state) exp_src_url = name_expansion_result.expanded_storage_url self.logger.info('Setting metadata on %s...', exp_src_url) cloud_obj_metadata = encoding.JsonToMessage( apitools_messages.Object, name_expansion_result.expanded_result) preconditions = Preconditions( gen_match=self.preconditions.gen_match, meta_gen_match=self.preconditions.meta_gen_match) if preconditions.gen_match is None: preconditions.gen_match = cloud_obj_metadata.generation if preconditions.meta_gen_match is None: preconditions.meta_gen_match = cloud_obj_metadata.metageneration # Patch handles the patch semantics for most metadata, but we need to # merge the custom metadata field manually. patch_obj_metadata = ObjectMetadataFromHeaders(self.metadata_change) api = gsutil_api.GetApiSelector(provider=exp_src_url.scheme) # For XML we only want to patch through custom metadata that has # changed. For JSON we need to build the complete set. if api == ApiSelector.XML: pass elif api == ApiSelector.JSON: CopyObjectMetadata(patch_obj_metadata, cloud_obj_metadata, override=True) patch_obj_metadata = cloud_obj_metadata # Patch body does not need the object generation and metageneration. patch_obj_metadata.generation = None patch_obj_metadata.metageneration = None gsutil_api.PatchObjectMetadata(exp_src_url.bucket_name, exp_src_url.object_name, patch_obj_metadata, generation=exp_src_url.generation, preconditions=preconditions, provider=exp_src_url.scheme, fields=['id']) PutToQueueWithTimeout(gsutil_api.status_queue, MetadataMessage(message_time=time.time()))
def SetMetadataFunc(self, name_expansion_result, thread_state=None): """Sets metadata on an object. Args: name_expansion_result: NameExpansionResult describing target object. thread_state: gsutil Cloud API instance to use for the operation. """ gsutil_api = GetCloudApiInstance(self, thread_state=thread_state) exp_src_url = name_expansion_result.expanded_storage_url self.logger.info('Setting metadata on %s...', exp_src_url) fields = ['generation', 'metadata', 'metageneration'] cloud_obj_metadata = gsutil_api.GetObjectMetadata( exp_src_url.bucket_name, exp_src_url.object_name, generation=exp_src_url.generation, provider=exp_src_url.scheme, fields=fields) preconditions = Preconditions( gen_match=cloud_obj_metadata.generation, meta_gen_match=cloud_obj_metadata.metageneration) # Patch handles the patch semantics for most metadata, but we need to # merge the custom metadata field manually. patch_obj_metadata = ObjectMetadataFromHeaders(self.metadata_change) api = gsutil_api.GetApiSelector(provider=exp_src_url.scheme) # For XML we only want to patch through custom metadata that has # changed. For JSON we need to build the complete set. if api == ApiSelector.XML: pass elif api == ApiSelector.JSON: CopyObjectMetadata(patch_obj_metadata, cloud_obj_metadata, override=True) patch_obj_metadata = cloud_obj_metadata gsutil_api.PatchObjectMetadata(exp_src_url.bucket_name, exp_src_url.object_name, patch_obj_metadata, generation=exp_src_url.generation, preconditions=preconditions, provider=exp_src_url.scheme)
def _ListUrlRootFunc(cls, args_tuple, thread_state=None): """Worker function for listing files/objects under to be sync'd. Outputs sorted list to out_file_name, formatted per _BuildTmpOutputLine. We sort the listed URLs because we don't want to depend on consistent sort order across file systems and cloud providers. Args: cls: Command instance. args_tuple: (url_str, out_file_name, desc), where url_str is URL string to list; out_file_name is name of file to which sorted output should be written; desc is 'source' or 'destination'. thread_state: gsutil Cloud API instance to use. """ gsutil_api = GetCloudApiInstance(cls, thread_state=thread_state) (url_str, out_file_name, desc) = args_tuple # We sort while iterating over url_str, allowing parallelism of batched # sorting with collecting the listing. out_file = io.open(out_file_name, mode='w', encoding=UTF8) _BatchSort(_FieldedListingIterator(cls, gsutil_api, url_str, desc), out_file) out_file.close()
def RewriteFunc(self, name_expansion_result, thread_state=None): gsutil_api = GetCloudApiInstance(self, thread_state=thread_state) transform_url = name_expansion_result.expanded_storage_url # Make a local copy of the requested transformations for each thread. As # a redundant transformation for one object might not be redundant for # another, we wouldn't want to remove it from the transform_types set that # all threads share. transforms_to_perform = set(self.transform_types) self.CheckProvider(transform_url) # Get all fields so that we can ensure that the target metadata is # specified correctly. src_metadata = gsutil_api.GetObjectMetadata( transform_url.bucket_name, transform_url.object_name, generation=transform_url.generation, provider=transform_url.scheme) if self.no_preserve_acl: # Leave ACL unchanged. src_metadata.acl = [] elif not src_metadata.acl: raise CommandException( 'No OWNER permission found for object %s. OWNER permission is ' 'required for rewriting objects, (otherwise their ACLs would be ' 'reset).' % transform_url) # Note: If other transform types are added, they must ensure that the # encryption key configuration matches the boto configuration, because # gsutil maintains an invariant that all objects it writes use the # encryption_key value (including decrypting if no key is present). src_encryption_sha256 = None if (src_metadata.customerEncryption and src_metadata.customerEncryption.keySha256): src_encryption_sha256 = src_metadata.customerEncryption.keySha256 should_encrypt_target = self.boto_file_encryption_sha256 is not None source_was_encrypted = src_encryption_sha256 is not None using_same_encryption_key_value = ( src_encryption_sha256 == self.boto_file_encryption_sha256) # Prevent accidental key rotation. if (_TransformTypes.CRYPTO_KEY not in transforms_to_perform and not using_same_encryption_key_value): raise EncryptionException( 'The "-k" flag was not passed to the rewrite command, but the ' 'encryption_key value in your boto config file did not match the key ' 'used to encrypt the object "%s" (hash: %s). To encrypt the object ' 'using a different key, you must specify the "-k" flag.' % (transform_url, src_encryption_sha256)) # Remove any redundant changes. # STORAGE_CLASS transform should be skipped if the target storage class # matches the existing storage class. if (_TransformTypes.STORAGE_CLASS in transforms_to_perform and self.dest_storage_class == NormalizeStorageClass( src_metadata.storageClass)): transforms_to_perform.remove(_TransformTypes.STORAGE_CLASS) self.logger.info( 'Redundant transform: %s already had storage class of ' '%s.' % (transform_url, src_metadata.storageClass)) # CRYPTO_KEY transform should be skipped if we're using the same encryption # key (if any) that was used to encrypt the source. if (_TransformTypes.CRYPTO_KEY in transforms_to_perform and using_same_encryption_key_value): if self.boto_file_encryption_sha256 is None: log_msg = '%s is already decrypted.' % transform_url else: log_msg = '%s already has current encryption key.' % transform_url transforms_to_perform.remove(_TransformTypes.CRYPTO_KEY) self.logger.info('Redundant transform: %s' % log_msg) if not transforms_to_perform: self.logger.info( 'Skipping %s, all transformations were redundant.' % transform_url) return # Make a deep copy of the source metadata. dst_metadata = encoding.PyValueToMessage( apitools_messages.Object, encoding.MessageToPyValue(src_metadata)) # Remove some unnecessary/invalid fields. dst_metadata.customerEncryption = None dst_metadata.generation = None # Service has problems if we supply an ID, but it is responsible for # generating one, so it is not necessary to include it here. dst_metadata.id = None decryption_tuple = None # Use a generic operation name by default - this can be altered below for # specific transformations (encryption changes, etc.). operation_name = 'Rewriting' if source_was_encrypted: decryption_key = FindMatchingCryptoKey(src_encryption_sha256) if not decryption_key: raise EncryptionException( 'Missing decryption key with SHA256 hash %s. No decryption key ' 'matches object %s' % (src_encryption_sha256, transform_url)) decryption_tuple = CryptoTupleFromKey(decryption_key) if _TransformTypes.CRYPTO_KEY in transforms_to_perform: if not source_was_encrypted: operation_name = 'Encrypting' elif not should_encrypt_target: operation_name = 'Decrypting' else: operation_name = 'Rotating' if _TransformTypes.STORAGE_CLASS in transforms_to_perform: dst_metadata.storageClass = self.dest_storage_class # TODO: Remove this call (used to verify tests) and make it processed by # the UIThread. sys.stderr.write( _ConstructAnnounceText(operation_name, transform_url.url_string)) # Message indicating beginning of operation. gsutil_api.status_queue.put( FileMessage(transform_url, None, time.time(), finished=False, size=src_metadata.size, message_type=FileMessage.FILE_REWRITE)) progress_callback = FileProgressCallbackHandler( gsutil_api.status_queue, src_url=transform_url, operation_name=operation_name).call gsutil_api.CopyObject(src_metadata, dst_metadata, src_generation=transform_url.generation, preconditions=self.preconditions, progress_callback=progress_callback, decryption_tuple=decryption_tuple, encryption_tuple=self.boto_file_encryption_tuple, provider=transform_url.scheme, fields=[]) # Message indicating end of operation. gsutil_api.status_queue.put( FileMessage(transform_url, None, time.time(), finished=True, size=src_metadata.size, message_type=FileMessage.FILE_REWRITE))
def RewriteFunc(self, name_expansion_result, thread_state=None): gsutil_api = GetCloudApiInstance(self, thread_state=thread_state) transform_url = name_expansion_result.expanded_storage_url self.CheckProvider(transform_url) # Get all fields so that we can ensure that the target metadata is # specified correctly. src_metadata = gsutil_api.GetObjectMetadata( transform_url.bucket_name, transform_url.object_name, generation=transform_url.generation, provider=transform_url.scheme) if self.no_preserve_acl: # Leave ACL unchanged. src_metadata.acl = [] elif not src_metadata.acl: raise CommandException( 'No OWNER permission found for object %s. OWNER permission is ' 'required for rewriting objects, (otherwise their ACLs would be ' 'reset).' % transform_url) # Note: If other transform types are added, they must ensure that the # encryption key configuration matches the boto configuration, because # gsutil maintains an invariant that all objects it writes use the # encryption_key value (including decrypting if no key is present). # Store metadata about src encryption to make logic below easier to read. src_encryption_kms_key = (src_metadata.kmsKeyName if src_metadata.kmsKeyName else None) src_encryption_sha256 = None if (src_metadata.customerEncryption and src_metadata.customerEncryption.keySha256): src_encryption_sha256 = src_metadata.customerEncryption.keySha256 src_was_encrypted = (src_encryption_sha256 is not None or src_encryption_kms_key is not None) # Also store metadata about dest encryption. dest_encryption_kms_key = None if (self.boto_file_encryption_keywrapper is not None and self.boto_file_encryption_keywrapper.crypto_type == CryptoKeyType.CMEK): dest_encryption_kms_key = self.boto_file_encryption_keywrapper.crypto_key dest_encryption_sha256 = None if (self.boto_file_encryption_keywrapper is not None and self.boto_file_encryption_keywrapper.crypto_type == CryptoKeyType.CSEK): dest_encryption_sha256 = ( self.boto_file_encryption_keywrapper.crypto_key_sha256) should_encrypt_dest = self.boto_file_encryption_keywrapper is not None encryption_unchanged = (src_encryption_sha256 == dest_encryption_sha256 and src_encryption_kms_key == dest_encryption_kms_key) # Prevent accidental key rotation. if (_TransformTypes.CRYPTO_KEY not in self.transform_types and not encryption_unchanged): raise EncryptionException( 'The "-k" flag was not passed to the rewrite command, but the ' 'encryption_key value in your boto config file did not match the key ' 'used to encrypt the object "%s" (hash: %s). To encrypt the object ' 'using a different key, you must specify the "-k" flag.' % (transform_url, src_encryption_sha256)) # Determine if we can skip this rewrite operation (this should only be done # when ALL of the specified transformations are redundant). redundant_transforms = [] # STORAGE_CLASS transform is redundant if the target storage class matches # the existing storage class. if (_TransformTypes.STORAGE_CLASS in self.transform_types and self.dest_storage_class == NormalizeStorageClass( src_metadata.storageClass)): redundant_transforms.append('storage class') # CRYPTO_KEY transform is redundant if we're using the same encryption # key (if any) that was used to encrypt the source. if (_TransformTypes.CRYPTO_KEY in self.transform_types and encryption_unchanged): redundant_transforms.append('encryption key') if len(redundant_transforms) == len(self.transform_types): self.logger.info( 'Skipping %s, all transformations were redundant: %s' % (transform_url, redundant_transforms)) return # First make a deep copy of the source metadata, then overwrite any # requested attributes (e.g. if a storage class change was specified). dest_metadata = encoding.PyValueToMessage( apitools_messages.Object, encoding.MessageToPyValue(src_metadata)) # Remove some unnecessary/invalid fields. dest_metadata.generation = None # Service has problems if we supply an ID, but it is responsible for # generating one, so it is not necessary to include it here. dest_metadata.id = None # Ensure we don't copy over the KMS key name or CSEK key info from the # source object; those should only come from the boto config's # encryption_key value. dest_metadata.customerEncryption = None dest_metadata.kmsKeyName = None # Both a storage class change and CMEK encryption should be set as part of # the dest object's metadata. CSEK encryption, if specified, is added to the # request later via headers obtained from the keywrapper value passed to # encryption_tuple. if _TransformTypes.STORAGE_CLASS in self.transform_types: dest_metadata.storageClass = self.dest_storage_class if dest_encryption_kms_key is not None: dest_metadata.kmsKeyName = dest_encryption_kms_key # Make sure we have the CSEK key necessary to decrypt. decryption_keywrapper = None if src_encryption_sha256 is not None: if src_encryption_sha256 in self.csek_hash_to_keywrapper: decryption_keywrapper = ( self.csek_hash_to_keywrapper[src_encryption_sha256]) else: raise EncryptionException( 'Missing decryption key with SHA256 hash %s. No decryption key ' 'matches object %s' % (src_encryption_sha256, transform_url)) operation_name = 'Rewriting' if _TransformTypes.CRYPTO_KEY in self.transform_types: if src_was_encrypted and should_encrypt_dest: if not encryption_unchanged: operation_name = 'Rotating' # Else, keep "Rewriting". This might occur when -k was specified and was # redundant, but we're performing the operation anyway because some # other transformation was not redundant. elif src_was_encrypted and not should_encrypt_dest: operation_name = 'Decrypting' elif not src_was_encrypted and should_encrypt_dest: operation_name = 'Encrypting' # TODO: Remove this call (used to verify tests) and make it processed by # the UIThread. sys.stderr.write( _ConstructAnnounceText(operation_name, transform_url.url_string)) # Message indicating beginning of operation. gsutil_api.status_queue.put( FileMessage(transform_url, None, time.time(), finished=False, size=src_metadata.size, message_type=FileMessage.FILE_REWRITE)) progress_callback = FileProgressCallbackHandler( gsutil_api.status_queue, src_url=transform_url, operation_name=operation_name).call gsutil_api.CopyObject( src_metadata, dest_metadata, src_generation=transform_url.generation, preconditions=self.preconditions, progress_callback=progress_callback, decryption_tuple=decryption_keywrapper, encryption_tuple=self.boto_file_encryption_keywrapper, provider=transform_url.scheme, fields=[]) # Message indicating end of operation. gsutil_api.status_queue.put( FileMessage(transform_url, None, time.time(), finished=True, size=src_metadata.size, message_type=FileMessage.FILE_REWRITE))