def testWarnIfMvEarlyDeletionChargeApplies(self): """Tests that WarnIfEarlyDeletionChargeApplies warns when appropriate.""" test_logger = logging.Logger('test') src_url = StorageUrlFromString('gs://bucket/object') # Recent nearline objects should generate a warning. for object_time_created in (self._PI_DAY, self._PI_DAY - datetime.timedelta(days=29, hours=23)): recent_nearline_obj = apitools_messages.Object( storageClass='NEARLINE', timeCreated=object_time_created) with mock.patch.object(test_logger, 'warn') as mocked_warn: WarnIfMvEarlyDeletionChargeApplies(src_url, recent_nearline_obj, test_logger) mocked_warn.assert_called_with( 'Warning: moving %s object %s may incur an early deletion ' 'charge, because the original object is less than %s days old ' 'according to the local system time.', 'nearline', src_url.url_string, 30) # Recent coldine objects should generate a warning. for object_time_created in (self._PI_DAY, self._PI_DAY - datetime.timedelta(days=89, hours=23)): recent_nearline_obj = apitools_messages.Object( storageClass='COLDLINE', timeCreated=object_time_created) with mock.patch.object(test_logger, 'warn') as mocked_warn: WarnIfMvEarlyDeletionChargeApplies(src_url, recent_nearline_obj, test_logger) mocked_warn.assert_called_with( 'Warning: moving %s object %s may incur an early deletion ' 'charge, because the original object is less than %s days old ' 'according to the local system time.', 'coldline', src_url.url_string, 90) # Recent archive objects should generate a warning. for object_time_created in (self._PI_DAY, self._PI_DAY - datetime.timedelta(days=364, hours=23)): recent_archive_obj = apitools_messages.Object( storageClass='ARCHIVE', timeCreated=object_time_created) with mock.patch.object(test_logger, 'warn') as mocked_warn: WarnIfMvEarlyDeletionChargeApplies(src_url, recent_archive_obj, test_logger) mocked_warn.assert_called_with( 'Warning: moving %s object %s may incur an early deletion ' 'charge, because the original object is less than %s days old ' 'according to the local system time.', 'archive', src_url.url_string, 365) # Sufficiently old objects should not generate a warning. with mock.patch.object(test_logger, 'warn') as mocked_warn: old_nearline_obj = apitools_messages.Object( storageClass='NEARLINE', timeCreated=self._PI_DAY - datetime.timedelta(days=30, seconds=1)) WarnIfMvEarlyDeletionChargeApplies(src_url, old_nearline_obj, test_logger) mocked_warn.assert_not_called() with mock.patch.object(test_logger, 'warn') as mocked_warn: old_coldline_obj = apitools_messages.Object( storageClass='COLDLINE', timeCreated=self._PI_DAY - datetime.timedelta(days=90, seconds=1)) WarnIfMvEarlyDeletionChargeApplies(src_url, old_coldline_obj, test_logger) mocked_warn.assert_not_called() with mock.patch.object(test_logger, 'warn') as mocked_warn: old_archive_obj = apitools_messages.Object( storageClass='ARCHIVE', timeCreated=self._PI_DAY - datetime.timedelta(days=365, seconds=1)) WarnIfMvEarlyDeletionChargeApplies(src_url, old_archive_obj, test_logger) mocked_warn.assert_not_called() # Recent standard storage class object should not generate a warning. with mock.patch.object(test_logger, 'warn') as mocked_warn: not_old_enough_nearline_obj = apitools_messages.Object( storageClass='STANDARD', timeCreated=self._PI_DAY) WarnIfMvEarlyDeletionChargeApplies(src_url, not_old_enough_nearline_obj, test_logger) mocked_warn.assert_not_called()
def _PatchIam(self): self.continue_on_error = False self.recursion_requested = False patch_bindings_tuples = [] if self.sub_opts: for o, a in self.sub_opts: if o in ['-r', '-R']: self.recursion_requested = True elif o == '-f': self.continue_on_error = True elif o == '-d': patch_bindings_tuples.append(BindingStringToTuple(False, a)) patterns = [] # N.B.: self.sub_opts stops taking in options at the first non-flagged # token. The rest of the tokens are sent to self.args. Thus, in order to # handle input of the form "-d <binding> <binding> <url>", we will have to # parse self.args for a mix of both bindings and CloudUrls. We are not # expecting to come across the -r, -f flags here. it = iter(self.args) for token in it: if token == '-d': patch_bindings_tuples.append( BindingStringToTuple(False, it.next())) else: try: patch_bindings_tuples.append( BindingStringToTuple(True, token) ) # All following arguments are urls. except (ArgumentException, CommandException): patterns.append(token) for token in it: patterns.append(token) # We must have some bindings to process, else this is pointless. if not patch_bindings_tuples: raise CommandException('Must specify at least one binding.') self.everything_set_okay = True threaded_wildcards = [] for pattern in patterns: surl = StorageUrlFromString(pattern) try: if surl.IsBucket(): if self.recursion_requested: surl.object = '*' threaded_wildcards.append(surl.url_string) else: self.PatchIamHelper(surl, patch_bindings_tuples) else: threaded_wildcards.append(surl.url_string) except AttributeError: error_msg = 'Invalid Cloud URL "%s".' % surl.object_name if set(surl.object_name).issubset(set('-Rrf')): error_msg += ( ' This resource handle looks like a flag, which must appear ' 'before all bindings. See "gsutil help iam ch" for more details.' ) raise CommandException(error_msg) if threaded_wildcards: name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, threaded_wildcards, self.recursion_requested, all_versions=self.all_versions, continue_on_error=self.continue_on_error or self.parallel_operations, bucket_listing_fields=['name']) seek_ahead_iterator = SeekAheadNameExpansionIterator( self.command_name, self.debug, self.GetSeekAheadGsutilApi(), threaded_wildcards, self.recursion_requested, all_versions=self.all_versions) # N.B.: Python2.6 support means we can't use a partial function here to # curry the bindings tuples into the wrapper function. We instead pass # the bindings along by zipping them with each name_expansion_iterator # result. See http://bugs.python.org/issue5228. serialized_bindings_tuples_it = itertools.repeat( [SerializeBindingsTuple(t) for t in patch_bindings_tuples]) self.Apply( _PatchIamWrapper, itertools.izip( serialized_bindings_tuples_it, name_expansion_iterator), _PatchIamExceptionHandler, fail_on_error=not self.continue_on_error, seek_ahead_iterator=seek_ahead_iterator) self.everything_set_okay &= not GetFailureCount() > 0 # TODO: Add an error counter for files and objects. if not self.everything_set_okay: raise CommandException('Some IAM policies could not be patched.')
def CatUrlStrings(self, url_strings, show_header=False, start_byte=0, end_byte=None, cat_out_fd=None): """Prints each of the url strings to stdout. Args: url_strings: String iterable. show_header: If true, print a header per file. start_byte: Starting byte of the file to print, used for constructing range requests. end_byte: Ending byte of the file to print; used for constructing range requests. If this is negative, the start_byte is ignored and and end range is sent over HTTP (such as range: bytes -9) cat_out_fd: File descriptor to which output should be written. Defaults to stdout if no file descriptor is supplied. Returns: 0 on success. Raises: CommandException if no URLs can be found. """ printed_one = False # This should refer to whatever sys.stdin refers to when this method is # run, not when this method is defined, so we do the initialization here # rather than define sys.stdin as the cat_out_fd parameter's default value. if cat_out_fd is None: cat_out_fd = sys.stdout # We manipulate the stdout so that all other data other than the Object # contents go to stderr. old_stdout = sys.stdout sys.stdout = sys.stderr try: if url_strings and url_strings[0] in ('-', 'file://-'): self._WriteBytesBufferedFileToFile(sys.stdin, cat_out_fd) else: for url_str in url_strings: did_some_work = False # TODO: Get only the needed fields here. for blr in self.command_obj.WildcardIterator(url_str).IterObjects( bucket_listing_fields=_CAT_BUCKET_LISTING_FIELDS): decryption_keywrapper = None if (blr.root_object and blr.root_object.customerEncryption and blr.root_object.customerEncryption.keySha256): decryption_key = FindMatchingCSEKInBotoConfig( blr.root_object.customerEncryption.keySha256, config) if not decryption_key: raise EncryptionException( 'Missing decryption key with SHA256 hash %s. No decryption ' 'key matches object %s' % ( blr.root_object.customerEncryption.keySha256, blr.url_string)) decryption_keywrapper = CryptoKeyWrapperFromKey(decryption_key) did_some_work = True if show_header: if printed_one: print print '==> %s <==' % blr printed_one = True cat_object = blr.root_object storage_url = StorageUrlFromString(blr.url_string) if storage_url.IsCloudUrl(): compressed_encoding = ObjectIsGzipEncoded(cat_object) self.command_obj.gsutil_api.GetObjectMedia( cat_object.bucket, cat_object.name, cat_out_fd, compressed_encoding=compressed_encoding, start_byte=start_byte, end_byte=end_byte, object_size=cat_object.size, generation=storage_url.generation, decryption_tuple=decryption_keywrapper, provider=storage_url.scheme) else: with open(storage_url.object_name, 'rb') as f: self._WriteBytesBufferedFileToFile(f, cat_out_fd) if not did_some_work: raise CommandException(NO_URLS_MATCHED_TARGET % url_str) finally: sys.stdout = old_stdout return 0
class TestHashingFileUploadWrapper(testcase.GsUtilUnitTestCase): """Unit tests for the HashingFileUploadWrapper class.""" _temp_test_file = None _dummy_url = StorageUrlFromString('gs://bucket/object') def _GetTestFile(self): contents = pkgutil.get_data('gslib', 'tests/test_data/%s' % _TEST_FILE) if not self._temp_test_file: self._temp_test_file = self.CreateTempFile(file_name=_TEST_FILE, contents=contents) return self._temp_test_file def testReadToEOF(self): digesters = {'md5': GetMd5()} tmp_file = self.CreateTempFile(contents=b'a' * TRANSFER_BUFFER_SIZE * 4) with open(tmp_file, 'rb') as stream: wrapper = HashingFileUploadWrapper(stream, digesters, {'md5': GetMd5}, self._dummy_url, self.logger) wrapper.read() with open(tmp_file, 'rb') as stream: actual = CalculateMd5FromContents(stream) self.assertEqual(actual, digesters['md5'].hexdigest()) def _testSeekBack(self, initial_position, seek_back_amount): """Tests reading then seeking backwards. This function simulates an upload that is resumed after a connection break. It reads one transfer buffer at a time until it reaches initial_position, then seeks backwards (as if the server did not receive some of the bytes) and reads to the end of the file, ensuring the hash matches the original file upon completion. Args: initial_position: Initial number of bytes to read before seek. seek_back_amount: Number of bytes to seek backward. Raises: AssertionError on wrong amount of data remaining or hash mismatch. """ tmp_file = self._GetTestFile() tmp_file_len = os.path.getsize(tmp_file) self.assertGreaterEqual( initial_position, seek_back_amount, 'seek_back_amount must be less than initial position %s ' '(but was actually: %s)' % (initial_position, seek_back_amount)) self.assertLess( initial_position, tmp_file_len, 'initial_position must be less than test file size %s ' '(but was actually: %s)' % (tmp_file_len, initial_position)) digesters = {'md5': GetMd5()} with open(tmp_file, 'rb') as stream: wrapper = HashingFileUploadWrapper(stream, digesters, {'md5': GetMd5}, self._dummy_url, self.logger) position = 0 while position < initial_position - TRANSFER_BUFFER_SIZE: data = wrapper.read(TRANSFER_BUFFER_SIZE) position += len(data) wrapper.read(initial_position - position) wrapper.seek(initial_position - seek_back_amount) self.assertEqual(wrapper.tell(), initial_position - seek_back_amount) data = wrapper.read() self.assertEqual( len(data), tmp_file_len - (initial_position - seek_back_amount)) with open(tmp_file, 'rb') as stream: actual = CalculateMd5FromContents(stream) self.assertEqual(actual, digesters['md5'].hexdigest()) def testSeekToBeginning(self): for num_bytes in (TRANSFER_BUFFER_SIZE - 1, TRANSFER_BUFFER_SIZE, TRANSFER_BUFFER_SIZE + 1, TRANSFER_BUFFER_SIZE * 2 - 1, TRANSFER_BUFFER_SIZE * 2, TRANSFER_BUFFER_SIZE * 2 + 1, TRANSFER_BUFFER_SIZE * 3 - 1, TRANSFER_BUFFER_SIZE * 3, TRANSFER_BUFFER_SIZE * 3 + 1): self._testSeekBack(num_bytes, num_bytes) def testSeekBackAroundOneBuffer(self): for initial_position in (TRANSFER_BUFFER_SIZE + 1, TRANSFER_BUFFER_SIZE * 2 - 1, TRANSFER_BUFFER_SIZE * 2, TRANSFER_BUFFER_SIZE * 2 + 1, TRANSFER_BUFFER_SIZE * 3 - 1, TRANSFER_BUFFER_SIZE * 3, TRANSFER_BUFFER_SIZE * 3 + 1): for seek_back_amount in (TRANSFER_BUFFER_SIZE - 1, TRANSFER_BUFFER_SIZE, TRANSFER_BUFFER_SIZE + 1): self._testSeekBack(initial_position, seek_back_amount) def testSeekBackMoreThanOneBuffer(self): for initial_position in (TRANSFER_BUFFER_SIZE * 2 + 1, TRANSFER_BUFFER_SIZE * 3 - 1, TRANSFER_BUFFER_SIZE * 3, TRANSFER_BUFFER_SIZE * 3 + 1): for seek_back_amount in (TRANSFER_BUFFER_SIZE * 2 - 1, TRANSFER_BUFFER_SIZE * 2, TRANSFER_BUFFER_SIZE * 2 + 1): self._testSeekBack(initial_position, seek_back_amount) def _testSeekForward(self, initial_seek): """Tests seeking to an initial position and then reading. This function simulates an upload that is resumed after a process break. It seeks from zero to the initial position (as if the server already had those bytes). Then it reads to the end of the file, ensuring the hash matches the original file upon completion. Args: initial_seek: Number of bytes to initially seek. Raises: AssertionError on wrong amount of data remaining or hash mismatch. """ tmp_file = self._GetTestFile() tmp_file_len = os.path.getsize(tmp_file) self.assertLess( initial_seek, tmp_file_len, 'initial_seek must be less than test file size %s ' '(but was actually: %s)' % (tmp_file_len, initial_seek)) digesters = {'md5': GetMd5()} with open(tmp_file, 'rb') as stream: wrapper = HashingFileUploadWrapper(stream, digesters, {'md5': GetMd5}, self._dummy_url, self.logger) wrapper.seek(initial_seek) self.assertEqual(wrapper.tell(), initial_seek) data = wrapper.read() self.assertEqual(len(data), tmp_file_len - initial_seek) with open(tmp_file, 'rb') as stream: actual = CalculateMd5FromContents(stream) self.assertEqual(actual, digesters['md5'].hexdigest()) def testSeekForward(self): for initial_seek in (0, TRANSFER_BUFFER_SIZE - 1, TRANSFER_BUFFER_SIZE, TRANSFER_BUFFER_SIZE + 1, TRANSFER_BUFFER_SIZE * 2 - 1, TRANSFER_BUFFER_SIZE * 2, TRANSFER_BUFFER_SIZE * 2 + 1): self._testSeekForward(initial_seek) def _testSeekAway(self, initial_read): """Tests reading to an initial position and then seeking to EOF and back. This function simulates an size check on the input file by seeking to the end of the file and then back to the current position. Then it reads to the end of the file, ensuring the hash matches the original file upon completion. Args: initial_read: Number of bytes to initially read. Raises: AssertionError on wrong amount of data remaining or hash mismatch. """ tmp_file = self._GetTestFile() tmp_file_len = os.path.getsize(tmp_file) self.assertLess( initial_read, tmp_file_len, 'initial_read must be less than test file size %s ' '(but was actually: %s)' % (tmp_file_len, initial_read)) digesters = {'md5': GetMd5()} with open(tmp_file, 'rb') as stream: wrapper = HashingFileUploadWrapper(stream, digesters, {'md5': GetMd5}, self._dummy_url, self.logger) wrapper.read(initial_read) self.assertEqual(wrapper.tell(), initial_read) wrapper.seek(0, os.SEEK_END) self.assertEqual(wrapper.tell(), tmp_file_len) wrapper.seek(initial_read, os.SEEK_SET) data = wrapper.read() self.assertEqual(len(data), tmp_file_len - initial_read) with open(tmp_file, 'rb') as stream: actual = CalculateMd5FromContents(stream) self.assertEqual(actual, digesters['md5'].hexdigest()) def testValidSeekAway(self): for initial_read in (0, TRANSFER_BUFFER_SIZE - 1, TRANSFER_BUFFER_SIZE, TRANSFER_BUFFER_SIZE + 1, TRANSFER_BUFFER_SIZE * 2 - 1, TRANSFER_BUFFER_SIZE * 2, TRANSFER_BUFFER_SIZE * 2 + 1): self._testSeekAway(initial_read) def testInvalidSeekAway(self): """Tests seeking to EOF and then reading without first doing a SEEK_SET.""" tmp_file = self._GetTestFile() digesters = {'md5': GetMd5()} with open(tmp_file, 'rb') as stream: wrapper = HashingFileUploadWrapper(stream, digesters, {'md5': GetMd5}, self._dummy_url, self.logger) wrapper.read(TRANSFER_BUFFER_SIZE) wrapper.seek(0, os.SEEK_END) try: wrapper.read() self.fail('Expected CommandException for invalid seek.') except CommandException as e: self.assertIn( 'Read called on hashing file pointer in an unknown position', str(e))
def _GetDefAcl(self): if not StorageUrlFromString(self.args[0]).IsBucket(): raise CommandException('URL must name a bucket for the %s command' % self.command_name) self.GetAndPrintAcl(self.args[0])
def test_FilterExistingComponentsVersioned(self): """Tests upload with versionined parallel components.""" mock_api = MockCloudApi() bucket_name = self.MakeTempName('bucket') mock_api.MockCreateVersionedBucket(bucket_name) # dst_obj_metadata used for passing content-type. empty_object = apitools_messages.Object() tracker_file = self.CreateTempFile(file_name='foo', contents='asdf') tracker_file_lock = CreateLock() # Already uploaded, contents still match, component still used. fpath_uploaded_correctly = self.CreateTempFile(file_name='foo1', contents='1') fpath_uploaded_correctly_url = StorageUrlFromString( str(fpath_uploaded_correctly)) with open(fpath_uploaded_correctly) as f_in: fpath_uploaded_correctly_md5 = CalculateB64EncodedMd5FromContents( f_in) object_uploaded_correctly = mock_api.MockCreateObjectWithMetadata( apitools_messages.Object(bucket=bucket_name, name=fpath_uploaded_correctly, md5Hash=fpath_uploaded_correctly_md5), contents='1') object_uploaded_correctly_url = StorageUrlFromString( '%s://%s/%s#%s' % (self.default_provider, bucket_name, fpath_uploaded_correctly, object_uploaded_correctly.generation)) args_uploaded_correctly = PerformParallelUploadFileToObjectArgs( fpath_uploaded_correctly, 0, 1, fpath_uploaded_correctly_url, object_uploaded_correctly_url, object_uploaded_correctly.generation, empty_object, tracker_file, tracker_file_lock) # Duplicate object name in tracker file, but uploaded correctly. fpath_duplicate = fpath_uploaded_correctly fpath_duplicate_url = StorageUrlFromString(str(fpath_duplicate)) duplicate_uploaded_correctly = mock_api.MockCreateObjectWithMetadata( apitools_messages.Object(bucket=bucket_name, name=fpath_duplicate, md5Hash=fpath_uploaded_correctly_md5), contents='1') duplicate_uploaded_correctly_url = StorageUrlFromString( '%s://%s/%s#%s' % (self.default_provider, bucket_name, fpath_uploaded_correctly, duplicate_uploaded_correctly.generation)) args_duplicate = PerformParallelUploadFileToObjectArgs( fpath_duplicate, 0, 1, fpath_duplicate_url, duplicate_uploaded_correctly_url, duplicate_uploaded_correctly.generation, empty_object, tracker_file, tracker_file_lock) # Already uploaded, but contents no longer match. fpath_wrong_contents = self.CreateTempFile(file_name='foo4', contents='4') fpath_wrong_contents_url = StorageUrlFromString( str(fpath_wrong_contents)) with open(self.CreateTempFile(contents='_')) as f_in: fpath_wrong_contents_md5 = CalculateB64EncodedMd5FromContents(f_in) object_wrong_contents = mock_api.MockCreateObjectWithMetadata( apitools_messages.Object(bucket=bucket_name, name=fpath_wrong_contents, md5Hash=fpath_wrong_contents_md5), contents='_') wrong_contents_url = StorageUrlFromString( '%s://%s/%s#%s' % (self.default_provider, bucket_name, fpath_wrong_contents, object_wrong_contents.generation)) args_wrong_contents = PerformParallelUploadFileToObjectArgs( fpath_wrong_contents, 0, 1, fpath_wrong_contents_url, wrong_contents_url, '', empty_object, tracker_file, tracker_file_lock) dst_args = { fpath_uploaded_correctly: args_uploaded_correctly, fpath_wrong_contents: args_wrong_contents } existing_components = [ ObjectFromTracker(fpath_uploaded_correctly, object_uploaded_correctly_url.generation), ObjectFromTracker(fpath_duplicate, duplicate_uploaded_correctly_url.generation), ObjectFromTracker(fpath_wrong_contents, wrong_contents_url.generation) ] bucket_url = StorageUrlFromString('%s://%s' % (self.default_provider, bucket_name)) (components_to_upload, uploaded_components, existing_objects_to_delete) = (FilterExistingComponents( dst_args, existing_components, bucket_url, mock_api)) self.assertEqual([args_wrong_contents], components_to_upload) self.assertEqual(args_uploaded_correctly.dst_url.url_string, uploaded_components[0].url_string) expected_to_delete = [(args_wrong_contents.dst_url.object_name, args_wrong_contents.dst_url.generation), (args_duplicate.dst_url.object_name, args_duplicate.dst_url.generation)] for uri in existing_objects_to_delete: self.assertTrue((uri.object_name, uri.generation) in expected_to_delete) self.assertEqual(len(expected_to_delete), len(existing_objects_to_delete))
def _Create(self): self.CheckArguments() # User-specified options pubsub_topic = None payload_format = None custom_attributes = {} event_types = [] object_name_prefix = None should_setup_topic = True if self.sub_opts: for o, a in self.sub_opts: if o == '-e': event_types.append(a) elif o == '-f': payload_format = a elif o == '-m': if ':' not in a: raise CommandException( 'Custom attributes specified with -m should be of the form ' 'key:value') key, value = a.split(':') custom_attributes[key] = value elif o == '-p': object_name_prefix = a elif o == '-s': should_setup_topic = False elif o == '-t': pubsub_topic = a if payload_format not in PAYLOAD_FORMAT_MAP: raise CommandException( "Must provide a payload format with -f of either 'json' or 'none'" ) payload_format = PAYLOAD_FORMAT_MAP[payload_format] bucket_arg = self.args[-1] bucket_url = StorageUrlFromString(bucket_arg) if not bucket_url.IsCloudUrl() or not bucket_url.IsBucket(): raise CommandException( "%s %s requires a GCS bucket name, but got '%s'" % (self.command_name, self.subcommand_name, bucket_arg)) if bucket_url.scheme != 'gs': raise CommandException( 'The %s command can only be used with gs:// bucket URLs.' % self.command_name) bucket_name = bucket_url.bucket_name self.logger.debug('Creating notification for bucket %s', bucket_url) # Find the project this bucket belongs to bucket_metadata = self.gsutil_api.GetBucket(bucket_name, fields=['projectNumber'], provider=bucket_url.scheme) bucket_project_number = bucket_metadata.projectNumber # If not specified, choose a sensible default for the Cloud Pub/Sub topic # name. if not pubsub_topic: pubsub_topic = 'projects/%s/topics/%s' % (PopulateProjectId(None), bucket_name) if not pubsub_topic.startswith('projects/'): # If a user picks a topic ID (mytopic) but doesn't pass the whole name ( # projects/my-project/topics/mytopic ), pick a default project. pubsub_topic = 'projects/%s/topics/%s' % (PopulateProjectId(None), pubsub_topic) self.logger.debug('Using Cloud Pub/Sub topic %s', pubsub_topic) just_modified_topic_permissions = False if should_setup_topic: # Ask GCS for the email address that represents GCS's permission to # publish to a Cloud Pub/Sub topic from this project. service_account = self.gsutil_api.GetProjectServiceAccount( bucket_project_number, provider=bucket_url.scheme).email_address self.logger.debug('Service account for project %d: %s', bucket_project_number, service_account) just_modified_topic_permissions = self._CreateTopic( pubsub_topic, service_account) for attempt_number in range(0, 2): try: create_response = self.gsutil_api.CreateNotificationConfig( bucket_name, pubsub_topic=pubsub_topic, payload_format=payload_format, custom_attributes=custom_attributes, event_types=event_types if event_types else None, object_name_prefix=object_name_prefix, provider=bucket_url.scheme) break except PublishPermissionDeniedException: if attempt_number == 0 and just_modified_topic_permissions: # If we have just set the IAM policy, it may take up to 10 seconds to # take effect. self.logger.info( 'Retrying create notification in 10 seconds ' '(new permissions may take up to 10 seconds to take effect.)' ) time.sleep(10) else: raise notification_name = 'projects/_/buckets/%s/notificationConfigs/%s' % ( bucket_name, create_response.id) self.logger.info('Created notification config %s', notification_name) return 0
def __iter__(self): """Iterates over src/dst URLs and produces a _DiffToApply sequence. Yields: The _DiffToApply. """ # Strip trailing slashes, if any, so we compute tail length against # consistent position regardless of whether trailing slashes were included # or not in URL. base_src_url_len = len(self.base_src_url.url_string.rstrip('/\\')) base_dst_url_len = len(self.base_dst_url.url_string.rstrip('/\\')) src_url_str = dst_url_str = None # Invariant: After each yield, the URLs in src_url_str, dst_url_str, # self.sorted_src_urls_it, and self.sorted_dst_urls_it are not yet # processed. Each time we encounter None in src_url_str or dst_url_str we # populate from the respective iterator, and we reset one or the other value # to None after yielding an action that disposes of that URL. while not self.sorted_src_urls_it.IsEmpty() or src_url_str is not None: if src_url_str is None: (src_url_str, src_size, src_crc32c, src_md5) = self._ParseTmpFileLine( self.sorted_src_urls_it.next()) # Skip past base URL and normalize slashes so we can compare across # clouds/file systems (including Windows). src_url_str_to_check = _EncodeUrl( src_url_str[base_src_url_len:].replace('\\', '/')) dst_url_str_would_copy_to = copy_helper.ConstructDstUrl( self.base_src_url, StorageUrlFromString(src_url_str), True, True, self.base_dst_url, False, self.recursion_requested).url_string if self.sorted_dst_urls_it.IsEmpty(): # We've reached end of dst URLs, so copy src to dst. yield _DiffToApply(src_url_str, dst_url_str_would_copy_to, _DiffAction.COPY) src_url_str = None continue if not dst_url_str: (dst_url_str, dst_size, dst_crc32c, dst_md5) = (self._ParseTmpFileLine( self.sorted_dst_urls_it.next())) # Skip past base URL and normalize slashes so we can compare acros # clouds/file systems (including Windows). dst_url_str_to_check = _EncodeUrl( dst_url_str[base_dst_url_len:].replace('\\', '/')) if src_url_str_to_check < dst_url_str_to_check: # There's no dst object corresponding to src object, so copy src to dst. yield _DiffToApply(src_url_str, dst_url_str_would_copy_to, _DiffAction.COPY) src_url_str = None elif src_url_str_to_check > dst_url_str_to_check: # dst object without a corresponding src object, so remove dst if -d # option was specified. if self.delete_extras: yield _DiffToApply(None, dst_url_str, _DiffAction.REMOVE) dst_url_str = None else: # There is a dst object corresponding to src object, so check if objects # match. if self._ObjectsMatch(src_url_str, src_size, src_crc32c, src_md5, dst_url_str, dst_size, dst_crc32c, dst_md5): # Continue iterating without yielding a _DiffToApply. pass else: yield _DiffToApply(src_url_str, dst_url_str, _DiffAction.COPY) src_url_str = None dst_url_str = None # If -d option specified any files/objects left in dst iteration should be # removed. if not self.delete_extras: return if dst_url_str: yield _DiffToApply(None, dst_url_str, _DiffAction.REMOVE) dst_url_str = None for line in self.sorted_dst_urls_it: (dst_url_str, _, _, _) = self._ParseTmpFileLine(line) yield _DiffToApply(None, dst_url_str, _DiffAction.REMOVE)
def _PatchIam(self): self.continue_on_error = False self.recursion_requested = False patch_bindings_tuples = [] if self.sub_opts: for o, a in self.sub_opts: if o in ['-r', '-R']: self.recursion_requested = True elif o == '-f': self.continue_on_error = True elif o == '-d': patch_bindings_tuples.append(BindingStringToTuple( False, a)) patterns = [] # N.B.: self.sub_opts stops taking in options at the first non-flagged # token. The rest of the tokens are sent to self.args. Thus, in order to # handle input of the form "-d <binding> <binding> <url>", we will have to # parse self.args for a mix of both bindings and CloudUrls. We are not # expecting to come across the -r, -f flags here. it = iter(self.args) for token in it: if STORAGE_URI_REGEX.match(token): patterns.append(token) break if token == '-d': patch_bindings_tuples.append( BindingStringToTuple(False, next(it))) else: patch_bindings_tuples.append(BindingStringToTuple(True, token)) if not patch_bindings_tuples: raise CommandException('Must specify at least one binding.') # All following arguments are urls. for token in it: patterns.append(token) self.everything_set_okay = True self.tried_ch_on_resource_with_conditions = False threaded_wildcards = [] for pattern in patterns: surl = StorageUrlFromString(pattern) try: if surl.IsBucket(): if self.recursion_requested: surl.object = '*' threaded_wildcards.append(surl.url_string) else: self.PatchIamHelper(surl, patch_bindings_tuples) else: threaded_wildcards.append(surl.url_string) except AttributeError: error_msg = 'Invalid Cloud URL "%s".' % surl.object_name if set(surl.object_name).issubset(set('-Rrf')): error_msg += ( ' This resource handle looks like a flag, which must appear ' 'before all bindings. See "gsutil help iam ch" for more details.' ) raise CommandException(error_msg) if threaded_wildcards: name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, threaded_wildcards, self.recursion_requested, all_versions=self.all_versions, continue_on_error=self.continue_on_error or self.parallel_operations, bucket_listing_fields=['name']) seek_ahead_iterator = SeekAheadNameExpansionIterator( self.command_name, self.debug, self.GetSeekAheadGsutilApi(), threaded_wildcards, self.recursion_requested, all_versions=self.all_versions) serialized_bindings_tuples_it = itertools.repeat( [SerializeBindingsTuple(t) for t in patch_bindings_tuples]) self.Apply(_PatchIamWrapper, zip(serialized_bindings_tuples_it, name_expansion_iterator), _PatchIamExceptionHandler, fail_on_error=not self.continue_on_error, seek_ahead_iterator=seek_ahead_iterator) self.everything_set_okay &= not GetFailureCount() > 0 # TODO: Add an error counter for files and objects. if not self.everything_set_okay: msg = 'Some IAM policies could not be patched.' if self.tried_ch_on_resource_with_conditions: msg += '\n' msg += '\n'.join( textwrap.wrap( 'Some resources had conditions present in their IAM policy ' 'bindings, which is not supported by "iam ch". %s' % (IAM_CH_CONDITIONS_WORKAROUND_MSG))) raise CommandException(msg)
def setUp(self): super(TestAcl, self).setUp() self.sample_uri = self.CreateBucket() self.sample_url = StorageUrlFromString(str(self.sample_uri)) self.logger = CreateGsutilLogger('acl')
def RunCommand(self): """Command entry point for the hash command.""" (calc_crc32c, calc_md5, format_func, cloud_format_func, output_format) = (self._ParseOpts(self.sub_opts, self.logger)) matched_one = False for url_str in self.args: for file_ref in self.WildcardIterator(url_str).IterObjects( bucket_listing_fields=[ 'crc32c', 'customerEncryption', 'md5Hash', 'size', ]): matched_one = True url = StorageUrlFromString(url_str) file_name = file_ref.storage_url.object_name if StorageUrlFromString(url_str).IsFileUrl(): file_size = os.path.getsize(file_name) self.gsutil_api.status_queue.put( FileMessage(url, None, time.time(), size=file_size, finished=False, message_type=FileMessage.FILE_HASH)) callback_processor = ProgressCallbackWithTimeout( file_size, FileProgressCallbackHandler( self.gsutil_api.status_queue, src_url=StorageUrlFromString(url_str), operation_name='Hashing').call) hash_dict = self._GetHashClassesFromArgs( calc_crc32c, calc_md5) with open(file_name, 'rb') as fp: hashing_helper.CalculateHashesFromContents( fp, hash_dict, callback_processor=callback_processor) self.gsutil_api.status_queue.put( FileMessage(url, None, time.time(), size=file_size, finished=True, message_type=FileMessage.FILE_HASH)) else: hash_dict = {} obj_metadata = file_ref.root_object file_size = obj_metadata.size md5_present = obj_metadata.md5Hash is not None crc32c_present = obj_metadata.crc32c is not None if not md5_present and not crc32c_present: logging.getLogger().warn('No hashes present for %s', url_str) continue if md5_present: hash_dict['md5'] = obj_metadata.md5Hash if crc32c_present: hash_dict['crc32c'] = obj_metadata.crc32c print('Hashes [%s] for %s:' % (output_format, file_name)) for name, digest in six.iteritems(hash_dict): print('\tHash (%s):\t\t%s' % (name, (format_func(digest) if url.IsFileUrl() else cloud_format_func(digest)))) if not matched_one: raise CommandException('No files matched') _PutToQueueWithTimeout(self.gsutil_api.status_queue, FinalMessage(time.time())) return 0
def RunCommand(self): """Command entry point for the rm command.""" # self.recursion_requested is initialized in command.py (so it can be # checked in parent class for all commands). self.continue_on_error = self.parallel_operations self.read_args_from_stdin = False self.all_versions = False if self.sub_opts: for o, unused_a in self.sub_opts: if o == '-a': self.all_versions = True elif o == '-f': self.continue_on_error = True elif o == '-I': self.read_args_from_stdin = True elif o == '-r' or o == '-R': self.recursion_requested = True self.all_versions = True if self.read_args_from_stdin: if self.args: raise CommandException( 'No arguments allowed with the -I flag.') url_strs = StdinIterator() else: if not self.args: raise CommandException( 'The rm command (without -I) expects at ' 'least one URL.') url_strs = self.args # Tracks number of object deletes that failed. self.op_failure_count = 0 # Tracks if any buckets were missing. self.bucket_not_found_count = 0 # Tracks buckets that are slated for recursive deletion. bucket_urls_to_delete = [] self.bucket_strings_to_delete = [] if self.recursion_requested: bucket_fields = ['id'] for url_str in url_strs: url = StorageUrlFromString(url_str) if url.IsBucket() or url.IsProvider(): for blr in self.WildcardIterator(url_str).IterBuckets( bucket_fields=bucket_fields): bucket_urls_to_delete.append(blr.storage_url) self.bucket_strings_to_delete.append(url_str) self.preconditions = PreconditionsFromHeaders(self.headers or {}) try: # Expand wildcards, dirs, buckets, and bucket subdirs in URLs. name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, url_strs, self.recursion_requested, project_id=self.project_id, all_versions=self.all_versions, continue_on_error=self.continue_on_error or self.parallel_operations) # Perform remove requests in parallel (-m) mode, if requested, using # configured number of parallel processes and threads. Otherwise, # perform requests with sequential function calls in current process. self.Apply( _RemoveFuncWrapper, name_expansion_iterator, _RemoveExceptionHandler, fail_on_error=(not self.continue_on_error), shared_attrs=['op_failure_count', 'bucket_not_found_count']) # Assuming the bucket has versioning enabled, url's that don't map to # objects should throw an error even with all_versions, since the prior # round of deletes only sends objects to a history table. # This assumption that rm -a is only called for versioned buckets should be # corrected, but the fix is non-trivial. except CommandException as e: # Don't raise if there are buckets to delete -- it's valid to say: # gsutil rm -r gs://some_bucket # if the bucket is empty. if _ExceptionMatchesBucketToDelete(self.bucket_strings_to_delete, e): DecrementFailureCount() else: raise except ServiceException, e: if not self.continue_on_error: raise
class RmCommand(Command): """Implementation of gsutil rm command.""" # Command specification. See base class for documentation. command_spec = Command.CreateCommandSpec( 'rm', command_name_aliases=['del', 'delete', 'remove'], usage_synopsis=_SYNOPSIS, min_args=0, max_args=NO_MAX, supported_sub_args='afIrR', file_url_ok=False, provider_url_ok=False, urls_start_arg=0, gs_api_support=[ApiSelector.XML, ApiSelector.JSON], gs_default_api=ApiSelector.JSON, argparse_arguments=[CommandArgument.MakeZeroOrMoreCloudURLsArgument()]) # Help specification. See help_provider.py for documentation. help_spec = Command.HelpSpec( help_name='rm', help_name_aliases=['del', 'delete', 'remove'], help_type='command_help', help_one_line_summary='Remove objects', help_text=_DETAILED_HELP_TEXT, subcommand_help_text={}, ) def RunCommand(self): """Command entry point for the rm command.""" # self.recursion_requested is initialized in command.py (so it can be # checked in parent class for all commands). self.continue_on_error = self.parallel_operations self.read_args_from_stdin = False self.all_versions = False if self.sub_opts: for o, unused_a in self.sub_opts: if o == '-a': self.all_versions = True elif o == '-f': self.continue_on_error = True elif o == '-I': self.read_args_from_stdin = True elif o == '-r' or o == '-R': self.recursion_requested = True self.all_versions = True if self.read_args_from_stdin: if self.args: raise CommandException( 'No arguments allowed with the -I flag.') url_strs = StdinIterator() else: if not self.args: raise CommandException( 'The rm command (without -I) expects at ' 'least one URL.') url_strs = self.args # Tracks number of object deletes that failed. self.op_failure_count = 0 # Tracks if any buckets were missing. self.bucket_not_found_count = 0 # Tracks buckets that are slated for recursive deletion. bucket_urls_to_delete = [] self.bucket_strings_to_delete = [] if self.recursion_requested: bucket_fields = ['id'] for url_str in url_strs: url = StorageUrlFromString(url_str) if url.IsBucket() or url.IsProvider(): for blr in self.WildcardIterator(url_str).IterBuckets( bucket_fields=bucket_fields): bucket_urls_to_delete.append(blr.storage_url) self.bucket_strings_to_delete.append(url_str) self.preconditions = PreconditionsFromHeaders(self.headers or {}) try: # Expand wildcards, dirs, buckets, and bucket subdirs in URLs. name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, url_strs, self.recursion_requested, project_id=self.project_id, all_versions=self.all_versions, continue_on_error=self.continue_on_error or self.parallel_operations) # Perform remove requests in parallel (-m) mode, if requested, using # configured number of parallel processes and threads. Otherwise, # perform requests with sequential function calls in current process. self.Apply( _RemoveFuncWrapper, name_expansion_iterator, _RemoveExceptionHandler, fail_on_error=(not self.continue_on_error), shared_attrs=['op_failure_count', 'bucket_not_found_count']) # Assuming the bucket has versioning enabled, url's that don't map to # objects should throw an error even with all_versions, since the prior # round of deletes only sends objects to a history table. # This assumption that rm -a is only called for versioned buckets should be # corrected, but the fix is non-trivial. except CommandException as e: # Don't raise if there are buckets to delete -- it's valid to say: # gsutil rm -r gs://some_bucket # if the bucket is empty. if _ExceptionMatchesBucketToDelete(self.bucket_strings_to_delete, e): DecrementFailureCount() else: raise except ServiceException, e: if not self.continue_on_error: raise if self.bucket_not_found_count: raise CommandException( 'Encountered non-existent bucket during listing') if self.op_failure_count and not self.continue_on_error: raise CommandException('Some files could not be removed.') # If this was a gsutil rm -r command covering any bucket subdirs, # remove any dir_$folder$ objects (which are created by various web UI # tools to simulate folders). if self.recursion_requested: folder_object_wildcards = [] for url_str in url_strs: url = StorageUrlFromString(url_str) if url.IsObject(): folder_object_wildcards.append('%s**_$folder$' % url_str) if folder_object_wildcards: self.continue_on_error = True try: name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, folder_object_wildcards, self.recursion_requested, project_id=self.project_id, all_versions=self.all_versions) # When we're removing folder objects, always continue on error self.Apply(_RemoveFuncWrapper, name_expansion_iterator, _RemoveFoldersExceptionHandler, fail_on_error=False) except CommandException as e: # Ignore exception from name expansion due to an absent folder file. if not e.reason.startswith(NO_URLS_MATCHED_GENERIC): raise # Now that all data has been deleted, delete any bucket URLs. for url in bucket_urls_to_delete: self.logger.info('Removing %s...', url) @Retry(NotEmptyException, tries=3, timeout_secs=1) def BucketDeleteWithRetry(): self.gsutil_api.DeleteBucket(url.bucket_name, provider=url.scheme) BucketDeleteWithRetry() if self.op_failure_count: plural_str = 's' if self.op_failure_count else '' raise CommandException( '%d file%s/object%s could not be removed.' % (self.op_failure_count, plural_str, plural_str)) return 0
def RunCommand(self): """Command entry point for the rm command.""" # self.recursion_requested is initialized in command.py (so it can be # checked in parent class for all commands). self.continue_on_error = self.parallel_operations self.read_args_from_stdin = False self.all_versions = False if self.sub_opts: for o, unused_a in self.sub_opts: if o == '-a': self.all_versions = True elif o == '-f': self.continue_on_error = True elif o == '-I': self.read_args_from_stdin = True elif o == '-r' or o == '-R': self.recursion_requested = True self.all_versions = True if self.read_args_from_stdin: if self.args: raise CommandException('No arguments allowed with the -I flag.') url_strs = StdinIterator() else: if not self.args: raise CommandException('The rm command (without -I) expects at ' 'least one URL.') url_strs = self.args # Tracks number of object deletes that failed. self.op_failure_count = 0 # Tracks if any buckets were missing. self.bucket_not_found_count = 0 # Tracks buckets that are slated for recursive deletion. bucket_urls_to_delete = [] self.bucket_strings_to_delete = [] if self.recursion_requested: bucket_fields = ['id'] for url_str in url_strs: url = StorageUrlFromString(url_str) if url.IsBucket() or url.IsProvider(): for blr in self.WildcardIterator(url_str).IterBuckets( bucket_fields=bucket_fields): bucket_urls_to_delete.append(blr.storage_url) self.bucket_strings_to_delete.append(url_str) self.preconditions = PreconditionsFromHeaders(self.headers or {}) try: # Expand wildcards, dirs, buckets, and bucket subdirs in URLs. name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, url_strs, self.recursion_requested, project_id=self.project_id, all_versions=self.all_versions, continue_on_error=self.continue_on_error or self.parallel_operations) seek_ahead_iterator = None # Cannot seek ahead with stdin args, since we can only iterate them # once without buffering in memory. if not self.read_args_from_stdin: seek_ahead_iterator = SeekAheadNameExpansionIterator( self.command_name, self.debug, self.GetSeekAheadGsutilApi(), url_strs, self.recursion_requested, all_versions=self.all_versions, project_id=self.project_id) # Perform remove requests in parallel (-m) mode, if requested, using # configured number of parallel processes and threads. Otherwise, # perform requests with sequential function calls in current process. self.Apply(_RemoveFuncWrapper, name_expansion_iterator, _RemoveExceptionHandler, fail_on_error=(not self.continue_on_error), shared_attrs=['op_failure_count', 'bucket_not_found_count'], seek_ahead_iterator=seek_ahead_iterator) # Assuming the bucket has versioning enabled, url's that don't map to # objects should throw an error even with all_versions, since the prior # round of deletes only sends objects to a history table. # This assumption that rm -a is only called for versioned buckets should be # corrected, but the fix is non-trivial. except CommandException as e: # Don't raise if there are buckets to delete -- it's valid to say: # gsutil rm -r gs://some_bucket # if the bucket is empty. if _ExceptionMatchesBucketToDelete(self.bucket_strings_to_delete, e): DecrementFailureCount() else: raise except ServiceException as e: if not self.continue_on_error: raise if self.bucket_not_found_count: raise CommandException('Encountered non-existent bucket during listing') if self.op_failure_count and not self.continue_on_error: raise CommandException('Some files could not be removed.') # If this was a gsutil rm -r command covering any bucket subdirs, # remove any dir_$folder$ objects (which are created by various web UI # tools to simulate folders). if self.recursion_requested: folder_object_wildcards = [] for url_str in url_strs: url = StorageUrlFromString(url_str) if url.IsObject(): folder_object_wildcards.append('%s**_$folder$' % url_str) if folder_object_wildcards: self.continue_on_error = True try: name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, folder_object_wildcards, self.recursion_requested, project_id=self.project_id, all_versions=self.all_versions) # When we're removing folder objects, always continue on error self.Apply(_RemoveFuncWrapper, name_expansion_iterator, _RemoveFoldersExceptionHandler, fail_on_error=False) except CommandException as e: # Ignore exception from name expansion due to an absent folder file. if not e.reason.startswith(NO_URLS_MATCHED_PREFIX): raise # Now that all data has been deleted, delete any bucket URLs. for url in bucket_urls_to_delete: self.logger.info('Removing %s...', url) @Retry(NotEmptyException, tries=3, timeout_secs=1) def BucketDeleteWithRetry(): self.gsutil_api.DeleteBucket(url.bucket_name, provider=url.scheme) BucketDeleteWithRetry() if self.op_failure_count: plural_str = 's' if self.op_failure_count else '' raise CommandException('%d file%s/object%s could not be removed.' % (self.op_failure_count, plural_str, plural_str)) return 0
def RunCommand(self): """Command entry point for the du command.""" self.line_ending = '\n' self.all_versions = False self.produce_total = False self.human_readable = False self.summary_only = False self.exclude_patterns = [] if self.sub_opts: for o, a in self.sub_opts: if o == '-0': self.line_ending = '\0' elif o == '-a': self.all_versions = True elif o == '-c': self.produce_total = True elif o == '-e': self.exclude_patterns.append(a) elif o == '-h': self.human_readable = True elif o == '-s': self.summary_only = True elif o == '-X': if a == '-': f = sys.stdin f_close = False else: f = open(a, 'r') if six.PY2 else open(a, 'r', encoding=UTF8) f_close = True self.exclude_patterns = [six.ensure_text(line.strip()) for line in f] if f_close: f.close() if not self.args: # Default to listing all gs buckets. self.args = ['gs://'] total_bytes = 0 got_nomatch_errors = False def _PrintObjectLong(blr): return self._PrintInfoAboutBucketListingRef(blr) def _PrintNothing(unused_blr=None): pass def _PrintDirectory(num_bytes, blr): if not self.summary_only: self._PrintSummaryLine(num_bytes, blr.url_string.encode(UTF8)) for url_arg in self.args: top_level_storage_url = StorageUrlFromString(url_arg) if top_level_storage_url.IsFileUrl(): raise CommandException('Only cloud URLs are supported for %s' % self.command_name) bucket_listing_fields = ['size'] listing_helper = ls_helper.LsHelper( self.WildcardIterator, self.logger, print_object_func=_PrintObjectLong, print_dir_func=_PrintNothing, print_dir_header_func=_PrintNothing, print_dir_summary_func=_PrintDirectory, print_newline_func=_PrintNothing, all_versions=self.all_versions, should_recurse=True, exclude_patterns=self.exclude_patterns, fields=bucket_listing_fields) # LsHelper expands to objects and prefixes, so perform a top-level # expansion first. if top_level_storage_url.IsProvider(): # Provider URL: use bucket wildcard to iterate over all buckets. top_level_iter = self.WildcardIterator( '%s://*' % top_level_storage_url.scheme).IterBuckets(bucket_fields=['id']) elif top_level_storage_url.IsBucket(): top_level_iter = self.WildcardIterator( '%s://%s' % (top_level_storage_url.scheme, top_level_storage_url.bucket_name)).IterBuckets( bucket_fields=['id']) else: top_level_iter = [BucketListingObject(top_level_storage_url)] for blr in top_level_iter: storage_url = blr.storage_url if storage_url.IsBucket() and self.summary_only: storage_url = StorageUrlFromString( storage_url.CreatePrefixUrl(wildcard_suffix='**')) _, exp_objs, exp_bytes = listing_helper.ExpandUrlAndPrint(storage_url) if (storage_url.IsObject() and exp_objs == 0 and ContainsWildcard(url_arg) and not self.exclude_patterns): got_nomatch_errors = True total_bytes += exp_bytes if self.summary_only: self._PrintSummaryLine(exp_bytes, blr.url_string.rstrip('/').encode(UTF8)) if self.produce_total: self._PrintSummaryLine(total_bytes, 'total') if got_nomatch_errors: raise CommandException('One or more URLs matched no objects.') return 0
def RunCommand(self): """Command entry point for the setmeta command.""" headers = [] if self.sub_opts: for o, a in self.sub_opts: if o == '-h': if 'x-goog-acl' in a or 'x-amz-acl' in a: raise CommandException( 'gsutil setmeta no longer allows canned ACLs. Use gsutil acl ' 'set ... to set canned ACLs.') headers.append(a) (metadata_minus, metadata_plus) = self._ParseMetadataHeaders(headers) self.metadata_change = metadata_plus for header in metadata_minus: self.metadata_change[header] = '' if len(self.args) == 1 and not self.recursion_requested: url = StorageUrlFromString(self.args[0]) if not (url.IsCloudUrl() and url.IsObject()): raise CommandException('URL (%s) must name an object' % self.args[0]) # Used to track if any objects' metadata failed to be set. self.everything_set_okay = True self.preconditions = PreconditionsFromHeaders(self.headers) name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, self.args, self.recursion_requested, all_versions=self.all_versions, continue_on_error=self.parallel_operations, bucket_listing_fields=['generation', 'metadata', 'metageneration']) seek_ahead_iterator = SeekAheadNameExpansionIterator( self.command_name, self.debug, self.GetSeekAheadGsutilApi(), self.args, self.recursion_requested, all_versions=self.all_versions, project_id=self.project_id) try: # Perform requests in parallel (-m) mode, if requested, using # configured number of parallel processes and threads. Otherwise, # perform requests with sequential function calls in current process. self.Apply(_SetMetadataFuncWrapper, name_expansion_iterator, _SetMetadataExceptionHandler, fail_on_error=True, seek_ahead_iterator=seek_ahead_iterator) except AccessDeniedException as e: if e.status == 403: self._WarnServiceAccounts() raise if not self.everything_set_okay: raise CommandException('Metadata for some objects could not be set.') return 0
def RunCommand(self): """Command entry point for the compose command.""" target_url_str = self.args[-1] self.args = self.args[:-1] target_url = StorageUrlFromString(target_url_str) self.CheckProvider(target_url) if target_url.HasGeneration(): raise CommandException( 'A version-specific URL (%s) cannot be ' 'the destination for gsutil compose - abort.' % target_url) dst_obj_metadata = apitools_messages.Object( name=target_url.object_name, bucket=target_url.bucket_name) components = [] # Remember the first source object so we can get its content type. first_src_url = None for src_url_str in self.args: if ContainsWildcard(src_url_str): src_url_iter = self.WildcardIterator(src_url_str).IterObjects() else: src_url_iter = [ BucketListingObject(StorageUrlFromString(src_url_str)) ] for blr in src_url_iter: src_url = blr.storage_url self.CheckProvider(src_url) if src_url.bucket_name != target_url.bucket_name: raise CommandException( 'GCS does not support inter-bucket composing.') if not first_src_url: first_src_url = src_url src_obj_metadata = (apitools_messages.ComposeRequest. SourceObjectsValueListEntry( name=src_url.object_name)) if src_url.HasGeneration(): src_obj_metadata.generation = src_url.generation components.append(src_obj_metadata) # Avoid expanding too many components, and sanity check each name # expansion result. if len(components) > MAX_COMPOSE_ARITY: raise CommandException( '"compose" called with too many component ' 'objects. Limit is %d.' % MAX_COMPOSE_ARITY) if not components: raise CommandException( '"compose" requires at least 1 component object.') dst_obj_metadata.contentType = self.gsutil_api.GetObjectMetadata( first_src_url.bucket_name, first_src_url.object_name, provider=first_src_url.scheme, fields=['contentType']).contentType preconditions = PreconditionsFromHeaders(self.headers or {}) self.logger.info('Composing %s from %d component object(s).', target_url, len(components)) self.gsutil_api.ComposeObject( components, dst_obj_metadata, preconditions=preconditions, provider=target_url.scheme, encryption_tuple=GetEncryptionKeyWrapper(config))
def test_FilterExistingComponentsNonVersioned(self): """Tests upload with a variety of component states.""" mock_api = MockCloudApi() bucket_name = self.MakeTempName('bucket') tracker_file = self.CreateTempFile(file_name='foo', contents='asdf') tracker_file_lock = CreateLock() # dst_obj_metadata used for passing content-type. empty_object = apitools_messages.Object() # Already uploaded, contents still match, component still used. fpath_uploaded_correctly = self.CreateTempFile(file_name='foo1', contents='1') fpath_uploaded_correctly_url = StorageUrlFromString( str(fpath_uploaded_correctly)) object_uploaded_correctly_url = StorageUrlFromString( '%s://%s/%s' % (self.default_provider, bucket_name, fpath_uploaded_correctly)) with open(fpath_uploaded_correctly) as f_in: fpath_uploaded_correctly_md5 = CalculateB64EncodedMd5FromContents( f_in) mock_api.MockCreateObjectWithMetadata(apitools_messages.Object( bucket=bucket_name, name=fpath_uploaded_correctly, md5Hash=fpath_uploaded_correctly_md5), contents='1') args_uploaded_correctly = PerformParallelUploadFileToObjectArgs( fpath_uploaded_correctly, 0, 1, fpath_uploaded_correctly_url, object_uploaded_correctly_url, '', empty_object, tracker_file, tracker_file_lock) # Not yet uploaded, but needed. fpath_not_uploaded = self.CreateTempFile(file_name='foo2', contents='2') fpath_not_uploaded_url = StorageUrlFromString(str(fpath_not_uploaded)) object_not_uploaded_url = StorageUrlFromString( '%s://%s/%s' % (self.default_provider, bucket_name, fpath_not_uploaded)) args_not_uploaded = PerformParallelUploadFileToObjectArgs( fpath_not_uploaded, 0, 1, fpath_not_uploaded_url, object_not_uploaded_url, '', empty_object, tracker_file, tracker_file_lock) # Already uploaded, but contents no longer match. Even though the contents # differ, we don't delete this since the bucket is not versioned and it # will be overwritten anyway. fpath_wrong_contents = self.CreateTempFile(file_name='foo4', contents='4') fpath_wrong_contents_url = StorageUrlFromString( str(fpath_wrong_contents)) object_wrong_contents_url = StorageUrlFromString( '%s://%s/%s' % (self.default_provider, bucket_name, fpath_wrong_contents)) with open(self.CreateTempFile(contents='_')) as f_in: fpath_wrong_contents_md5 = CalculateB64EncodedMd5FromContents(f_in) mock_api.MockCreateObjectWithMetadata(apitools_messages.Object( bucket=bucket_name, name=fpath_wrong_contents, md5Hash=fpath_wrong_contents_md5), contents='1') args_wrong_contents = PerformParallelUploadFileToObjectArgs( fpath_wrong_contents, 0, 1, fpath_wrong_contents_url, object_wrong_contents_url, '', empty_object, tracker_file, tracker_file_lock) # Exists in tracker file, but component object no longer exists. fpath_remote_deleted = self.CreateTempFile(file_name='foo5', contents='5') fpath_remote_deleted_url = StorageUrlFromString( str(fpath_remote_deleted)) args_remote_deleted = PerformParallelUploadFileToObjectArgs( fpath_remote_deleted, 0, 1, fpath_remote_deleted_url, '', '', empty_object, tracker_file, tracker_file_lock) # Exists in tracker file and already uploaded, but no longer needed. fpath_no_longer_used = self.CreateTempFile(file_name='foo6', contents='6') with open(fpath_no_longer_used) as f_in: file_md5 = CalculateB64EncodedMd5FromContents(f_in) mock_api.MockCreateObjectWithMetadata(apitools_messages.Object( bucket=bucket_name, name='foo6', md5Hash=file_md5), contents='6') dst_args = { fpath_uploaded_correctly: args_uploaded_correctly, fpath_not_uploaded: args_not_uploaded, fpath_wrong_contents: args_wrong_contents, fpath_remote_deleted: args_remote_deleted } existing_components = [ ObjectFromTracker(fpath_uploaded_correctly, ''), ObjectFromTracker(fpath_wrong_contents, ''), ObjectFromTracker(fpath_remote_deleted, ''), ObjectFromTracker(fpath_no_longer_used, '') ] bucket_url = StorageUrlFromString('%s://%s' % (self.default_provider, bucket_name)) (components_to_upload, uploaded_components, existing_objects_to_delete) = (FilterExistingComponents( dst_args, existing_components, bucket_url, mock_api)) for arg in [ args_not_uploaded, args_wrong_contents, args_remote_deleted ]: self.assertTrue(arg in components_to_upload) self.assertEqual(1, len(uploaded_components)) self.assertEqual(args_uploaded_correctly.dst_url.url_string, uploaded_components[0].url_string) self.assertEqual(1, len(existing_objects_to_delete)) no_longer_used_url = StorageUrlFromString( '%s://%s/%s' % (self.default_provider, bucket_name, fpath_no_longer_used)) self.assertEqual(no_longer_used_url.url_string, existing_objects_to_delete[0].url_string)
def __iter__(self, bucket_listing_fields=None, expand_top_level_buckets=False): """Iterator that gets called when iterating over the cloud wildcard. In the case where no wildcard is present, returns a single matching object, single matching prefix, or one of each if both exist. Args: bucket_listing_fields: Iterable fields to include in bucket listings. Ex. ['name', 'acl']. Iterator is responsible for converting these to list-style format ['items/name', 'items/acl'] as well as adding any fields necessary for listing such as prefixes. API implementation is responsible for adding pagination fields. If this is None, all fields are returned. expand_top_level_buckets: If true, yield no BUCKET references. Instead, expand buckets into top-level objects and prefixes. Yields: BucketListingRef of type BUCKET, OBJECT or PREFIX. """ single_version_request = self.wildcard_url.HasGeneration() # For wildcard expansion purposes, we need at a minimum the name of # each object and prefix. If we're not using the default of requesting # all fields, make sure at least these are requested. The Cloud API # tolerates specifying the same field twice. get_fields = None if bucket_listing_fields: get_fields = set() for field in bucket_listing_fields: get_fields.add(field) bucket_listing_fields = self._GetToListFields( get_fields=bucket_listing_fields) bucket_listing_fields.update(['items/name', 'prefixes']) get_fields.update(['name']) # If we're making versioned requests, ensure generation and # metageneration are also included. if single_version_request or self.all_versions: bucket_listing_fields.update( ['items/generation', 'items/metageneration']) get_fields.update(['generation', 'metageneration']) # Handle bucket wildcarding, if any, in _ExpandBucketWildcards. Then # iterate over the expanded bucket strings and handle any object # wildcarding. for bucket_listing_ref in self._ExpandBucketWildcards( bucket_fields=['id']): bucket_url_string = bucket_listing_ref.url_string if self.wildcard_url.IsBucket(): # IsBucket() guarantees there are no prefix or object wildcards, and # thus this is a top-level listing of buckets. if expand_top_level_buckets: url = StorageUrlFromString(bucket_url_string) for obj_or_prefix in self.gsutil_api.ListObjects( url.bucket_name, delimiter='/', all_versions=self.all_versions, provider=self.wildcard_url.scheme, fields=bucket_listing_fields): if obj_or_prefix.datatype == CloudApi.CsObjectOrPrefixType.OBJECT: yield self._GetObjectRef( bucket_url_string, obj_or_prefix.data, with_version=self.all_versions) else: # CloudApi.CsObjectOrPrefixType.PREFIX: yield self._GetPrefixRef(bucket_url_string, obj_or_prefix.data) else: yield bucket_listing_ref else: # By default, assume a non-wildcarded URL is an object, not a prefix. # This prevents unnecessary listings (which are slower, more expensive, # and also subject to eventual consistency). if (not ContainsWildcard(self.wildcard_url.url_string) and self.wildcard_url.IsObject() and not self.all_versions): try: get_object = self.gsutil_api.GetObjectMetadata( self.wildcard_url.bucket_name, self.wildcard_url.object_name, generation=self.wildcard_url.generation, provider=self.wildcard_url.scheme, fields=get_fields) yield self._GetObjectRef( self.wildcard_url.bucket_url_string, get_object, with_version=(self.all_versions or single_version_request)) return except (NotFoundException, AccessDeniedException): # It's possible this is a prefix - try to list instead. pass # Expand iteratively by building prefix/delimiter bucket listing # request, filtering the results per the current level's wildcard # (if present), and continuing with the next component of the # wildcard. See _BuildBucketFilterStrings() documentation for details. if single_version_request: url_string = '%s%s#%s' % (bucket_url_string, self.wildcard_url.object_name, self.wildcard_url.generation) else: # Rstrip any prefixes to correspond with rstripped prefix wildcard # from _BuildBucketFilterStrings(). url_string = '%s%s' % ( bucket_url_string, StripOneSlash(self.wildcard_url.object_name) or '/' ) # Cover root object named '/' case. urls_needing_expansion = [url_string] while urls_needing_expansion: url = StorageUrlFromString(urls_needing_expansion.pop(0)) (prefix, delimiter, prefix_wildcard, suffix_wildcard) = (self._BuildBucketFilterStrings( url.object_name)) regex_patterns = self._GetRegexPatterns(prefix_wildcard) # If we have a suffix wildcard, we only care about listing prefixes. listing_fields = (set(['prefixes']) if suffix_wildcard else bucket_listing_fields) # List bucket for objects matching prefix up to delimiter. for obj_or_prefix in self.gsutil_api.ListObjects( url.bucket_name, prefix=prefix, delimiter=delimiter, all_versions=self.all_versions or single_version_request, provider=self.wildcard_url.scheme, fields=listing_fields): for pattern in regex_patterns: if obj_or_prefix.datatype == CloudApi.CsObjectOrPrefixType.OBJECT: gcs_object = obj_or_prefix.data if pattern.match(gcs_object.name): if not suffix_wildcard or ( StripOneSlash(gcs_object.name) == suffix_wildcard): if not single_version_request or ( self._SingleVersionMatches( gcs_object.generation)): yield self._GetObjectRef( bucket_url_string, gcs_object, with_version=( self.all_versions or single_version_request)) break else: # CloudApi.CsObjectOrPrefixType.PREFIX prefix = obj_or_prefix.data if ContainsWildcard(prefix): # TODO: Disambiguate user-supplied strings from iterated # prefix and object names so that we can better reason # about wildcards and handle this case without raising # an error. raise CommandException( 'Cloud folder %s%s contains a wildcard; gsutil does ' 'not currently support objects with wildcards in their ' 'name.' % (bucket_url_string, prefix)) # If the prefix ends with a slash, remove it. Note that we only # remove one slash so that we can successfully enumerate dirs # containing multiple slashes. rstripped_prefix = StripOneSlash(prefix) if pattern.match(rstripped_prefix): if suffix_wildcard and rstripped_prefix != suffix_wildcard: # There's more wildcard left to expand. url_append_string = '%s%s' % ( bucket_url_string, rstripped_prefix + '/' + suffix_wildcard) urls_needing_expansion.append( url_append_string) else: # No wildcard to expand, just yield the prefix. yield self._GetPrefixRef( bucket_url_string, prefix) break
def RunCommand(self): """Command entry point for the ls command.""" got_nomatch_errors = False got_bucket_nomatch_errors = False listing_style = ListingStyle.SHORT get_bucket_info = False self.recursion_requested = False self.all_versions = False self.include_etag = False self.human_readable = False self.list_subdir_contents = True if self.sub_opts: for o, a in self.sub_opts: if o == '-a': self.all_versions = True elif o == '-e': self.include_etag = True elif o == '-b': get_bucket_info = True elif o == '-h': self.human_readable = True elif o == '-l': listing_style = ListingStyle.LONG elif o == '-L': listing_style = ListingStyle.LONG_LONG elif o == '-p': # Project IDs are sent as header values when using gs and s3 XML APIs. InsistAscii(a, 'Invalid non-ASCII character found in project ID') self.project_id = a elif o == '-r' or o == '-R': self.recursion_requested = True elif o == '-d': self.list_subdir_contents = False if not self.args: # default to listing all gs buckets self.args = ['gs://'] total_objs = 0 total_bytes = 0 def MaybePrintBucketHeader(blr): if len(self.args) > 1: print '%s:' % blr.url_string.encode(UTF8) print_bucket_header = MaybePrintBucketHeader for url_str in self.args: storage_url = StorageUrlFromString(url_str) if storage_url.IsFileUrl(): raise CommandException('Only cloud URLs are supported for %s' % self.command_name) bucket_fields = None if (listing_style == ListingStyle.SHORT or listing_style == ListingStyle.LONG): bucket_fields = ['id'] elif listing_style == ListingStyle.LONG_LONG: bucket_fields = ['acl', 'cors', 'defaultObjectAcl', 'labels', 'location', 'logging', 'lifecycle', 'metageneration', 'storageClass', 'timeCreated', 'updated', 'versioning', 'website'] if storage_url.IsProvider(): # Provider URL: use bucket wildcard to list buckets. for blr in self.WildcardIterator( '%s://*' % storage_url.scheme).IterBuckets( bucket_fields=bucket_fields): self._PrintBucketInfo(blr, listing_style) elif storage_url.IsBucket() and get_bucket_info: # ls -b bucket listing request: List info about bucket(s). total_buckets = 0 for blr in self.WildcardIterator(url_str).IterBuckets( bucket_fields=bucket_fields): if not ContainsWildcard(url_str) and not blr.root_object: # Iterator does not make an HTTP call for non-wildcarded # listings with fields=='id'. Ensure the bucket exists by calling # GetBucket. self.gsutil_api.GetBucket( blr.storage_url.bucket_name, fields=['id'], provider=storage_url.scheme) self._PrintBucketInfo(blr, listing_style) total_buckets += 1 if not ContainsWildcard(url_str) and not total_buckets: got_bucket_nomatch_errors = True else: # URL names a bucket, object, or object subdir -> # list matching object(s) / subdirs. def _PrintPrefixLong(blr): print '%-33s%s' % ('', blr.url_string.encode(UTF8)) if listing_style == ListingStyle.SHORT: # ls helper by default readies us for a short listing. ls_helper = LsHelper(self.WildcardIterator, self.logger, all_versions=self.all_versions, print_bucket_header_func=print_bucket_header, should_recurse=self.recursion_requested, list_subdir_contents=self.list_subdir_contents) elif listing_style == ListingStyle.LONG: bucket_listing_fields = ['name', 'timeCreated', 'updated', 'size'] if self.all_versions: bucket_listing_fields.extend(['generation', 'metageneration']) if self.include_etag: bucket_listing_fields.append('etag') ls_helper = LsHelper(self.WildcardIterator, self.logger, print_object_func=self._PrintLongListing, print_dir_func=_PrintPrefixLong, print_bucket_header_func=print_bucket_header, all_versions=self.all_versions, should_recurse=self.recursion_requested, fields=bucket_listing_fields, list_subdir_contents=self.list_subdir_contents) elif listing_style == ListingStyle.LONG_LONG: # List all fields bucket_listing_fields = (UNENCRYPTED_FULL_LISTING_FIELDS + ENCRYPTED_FIELDS) ls_helper = LsHelper(self.WildcardIterator, self.logger, print_object_func=PrintFullInfoAboutObject, print_dir_func=_PrintPrefixLong, print_bucket_header_func=print_bucket_header, all_versions=self.all_versions, should_recurse=self.recursion_requested, fields=bucket_listing_fields, list_subdir_contents=self.list_subdir_contents) else: raise CommandException('Unknown listing style: %s' % listing_style) exp_dirs, exp_objs, exp_bytes = ls_helper.ExpandUrlAndPrint(storage_url) if storage_url.IsObject() and exp_objs == 0 and exp_dirs == 0: got_nomatch_errors = True total_bytes += exp_bytes total_objs += exp_objs if total_objs and listing_style != ListingStyle.SHORT: print ('TOTAL: %d objects, %d bytes (%s)' % (total_objs, total_bytes, MakeHumanReadable(float(total_bytes)))) if got_nomatch_errors: raise CommandException('One or more URLs matched no objects.') if got_bucket_nomatch_errors: raise NotFoundException('One or more bucket URLs matched no buckets.') return 0
def __iter__(self, bucket_listing_fields=None): """Iterator that gets called when iterating over the file wildcard. In the case where no wildcard is present, returns a single matching file or directory. Args: bucket_listing_fields: Iterable fields to include in listings. Ex. ['size']. Currently only 'size' is supported. If present, will populate yielded BucketListingObject.root_object with the file name and size. Raises: WildcardException: if invalid wildcard found. Yields: BucketListingRef of type OBJECT (for files) or PREFIX (for directories) """ include_size = (bucket_listing_fields and 'size' in set(bucket_listing_fields)) wildcard = self.wildcard_url.object_name match = FLAT_LIST_REGEX.match(wildcard) if match: # Recursive wildcarding request ('.../**/...'). # Example input: wildcard = '/tmp/tmp2pQJAX/**/*' base_dir = match.group('before')[:-1] remaining_wildcard = match.group('after') # At this point for the above example base_dir = '/tmp/tmp2pQJAX' and # remaining_wildcard = '/*' if remaining_wildcard.startswith('*'): raise WildcardException( 'Invalid wildcard with more than 2 consecutive ' '*s (%s)' % wildcard) # If there was no remaining wildcard past the recursive wildcard, # treat it as if it were a '*'. For example, file://tmp/** is equivalent # to file://tmp/**/* if not remaining_wildcard: remaining_wildcard = '*' # Skip slash(es). remaining_wildcard = remaining_wildcard.lstrip(os.sep) filepaths = self._IterDir(base_dir, remaining_wildcard) else: # Not a recursive wildcarding request. filepaths = glob.iglob(wildcard) for filepath in filepaths: expanded_url = StorageUrlFromString(filepath) try: if self.ignore_symlinks and os.path.islink(filepath): if self.logger: self.logger.info('Skipping symbolic link %s...', filepath) continue if os.path.isdir(filepath): yield BucketListingPrefix(expanded_url) else: blr_object = _GetFileObject( filepath) if include_size else None yield BucketListingObject(expanded_url, root_object=blr_object) except UnicodeEncodeError: raise CommandException('\n'.join( textwrap.wrap(_UNICODE_EXCEPTION_TEXT % repr(filepath))))
def _SetIam(self): """Set IAM policy for given wildcards on the command line.""" self.continue_on_error = False self.recursion_requested = False self.all_versions = False force_etag = False etag = '' if self.sub_opts: for o, arg in self.sub_opts: if o in ['-r', '-R']: self.recursion_requested = True elif o == '-f': self.continue_on_error = True elif o == '-a': self.all_versions = True elif o == '-e': etag = str(arg) force_etag = True else: self.RaiseInvalidArgumentException() file_url = self.args[0] patterns = self.args[1:] # Load the IAM policy file and raise error if the file is invalid JSON or # does not exist. try: with open(file_url, 'r') as fp: policy = json.loads(fp.read()) except IOError: raise ArgumentException( 'Specified IAM policy file "%s" does not exist.' % file_url) except ValueError: raise ArgumentException( 'Invalid IAM policy file "%s".' % file_url) bindings = policy.get('bindings', []) if not force_etag: etag = policy.get('etag', '') policy_json = json.dumps({'bindings': bindings, 'etag': etag}) try: policy = protojson.decode_message(apitools_messages.Policy, policy_json) except DecodeError: raise ArgumentException( 'Invalid IAM policy file "%s" or etag "%s".' % (file_url, etag)) self.everything_set_okay = True # This list of wildcard strings will be handled by NameExpansionIterator. threaded_wildcards = [] for pattern in patterns: surl = StorageUrlFromString(pattern) if surl.IsBucket(): if self.recursion_requested: surl.object_name = '*' threaded_wildcards.append(surl.url_string) else: self.SetIamHelper(surl, policy) else: threaded_wildcards.append(surl.url_string) # N.B.: If threaded_wildcards contains a non-existent bucket # (e.g. ["gs://non-existent", "gs://existent"]), NameExpansionIterator # will raise an exception in iter.next. This halts all iteration, even # when -f is set. This behavior is also evident in acl set. This behavior # also appears for any exception that will be raised when iterating over # wildcard expansions (access denied if bucket cannot be listed, etc.). if threaded_wildcards: name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, threaded_wildcards, self.recursion_requested, all_versions=self.all_versions, continue_on_error=self.continue_on_error or self.parallel_operations, bucket_listing_fields=['name']) seek_ahead_iterator = SeekAheadNameExpansionIterator( self.command_name, self.debug, self.GetSeekAheadGsutilApi(), threaded_wildcards, self.recursion_requested, all_versions=self.all_versions) # We cannot curry policy along due to a Python2.6 bug; see comments in # IamCommand._PatchIam for more information. policy_it = itertools.repeat(protojson.encode_message(policy)) self.Apply( _SetIamWrapper, itertools.izip( policy_it, name_expansion_iterator), _SetIamExceptionHandler, fail_on_error=not self.continue_on_error, seek_ahead_iterator=seek_ahead_iterator) self.everything_set_okay &= not GetFailureCount() > 0 # TODO: Add an error counter for files and objects. if not self.everything_set_okay: raise CommandException('Some IAM policies could not be set.')
def RunCommand(self): """Command entry point for the mb command.""" bucket_policy_only = None location = None storage_class = None seconds = None if self.sub_opts: for o, a in self.sub_opts: if o == '-l': location = a elif o == '-p': # Project IDs are sent as header values when using gs and s3 XML APIs. InsistAscii( a, 'Invalid non-ASCII character found in project ID') self.project_id = a elif o == '-c' or o == '-s': storage_class = NormalizeStorageClass(a) elif o == '--retention': seconds = RetentionInSeconds(a) elif o == '-b': if self.gsutil_api.GetApiSelector( 'gs') != ApiSelector.JSON: raise CommandException( 'The -b <on|off> option ' 'can only be used with the JSON API') InsistOnOrOff( a, 'Only on and off values allowed for -b option') bucket_policy_only = (a == 'on') bucket_metadata = apitools_messages.Bucket(location=location, storageClass=storage_class) if bucket_policy_only: bucket_metadata.iamConfiguration = IamConfigurationValue() iam_config = bucket_metadata.iamConfiguration iam_config.bucketPolicyOnly = BucketPolicyOnlyValue() iam_config.bucketPolicyOnly.enabled = bucket_policy_only for bucket_url_str in self.args: bucket_url = StorageUrlFromString(bucket_url_str) if seconds is not None: if bucket_url.scheme != 'gs': raise CommandException( 'Retention policy can only be specified for ' 'GCS buckets.') retention_policy = ( apitools_messages.Bucket.RetentionPolicyValue( retentionPeriod=seconds)) bucket_metadata.retentionPolicy = retention_policy if not bucket_url.IsBucket(): raise CommandException( 'The mb command requires a URL that specifies a ' 'bucket.\n"%s" is not valid.' % bucket_url) if (not BUCKET_NAME_RE.match(bucket_url.bucket_name) or TOO_LONG_DNS_NAME_COMP.search(bucket_url.bucket_name)): raise InvalidUrlError('Invalid bucket name in URL "%s"' % bucket_url.bucket_name) self.logger.info('Creating %s...', bucket_url) # Pass storage_class param only if this is a GCS bucket. (In S3 the # storage class is specified on the key object.) try: self.gsutil_api.CreateBucket(bucket_url.bucket_name, project_id=self.project_id, metadata=bucket_metadata, provider=bucket_url.scheme) except BadRequestException as e: if (e.status == 400 and e.reason == 'DotfulBucketNameNotUnderTld' and bucket_url.scheme == 'gs'): bucket_name = bucket_url.bucket_name final_comp = bucket_name[bucket_name.rfind('.') + 1:] raise CommandException('\n'.join( textwrap.wrap( 'Buckets with "." in the name must be valid DNS names. The bucket' ' you are attempting to create (%s) is not a valid DNS name,' ' because the final component (%s) is not currently a valid part' ' of the top-level DNS tree.' % (bucket_name, final_comp)))) else: raise return 0
def _PatchIam(self): self.continue_on_error = False self.recursion_requested = False patch_bindings_tuples = [] if self.sub_opts: for o, a in self.sub_opts: if o in ['-r', '-R']: self.recursion_requested = True elif o == '-f': self.continue_on_error = True elif o == '-d': patch_bindings_tuples.append(BindingStringToTuple(False, a)) patterns = [] # N.B.: self.sub_opts stops taking in options at the first non-flagged # token. The rest of the tokens are sent to self.args. Thus, in order to # handle input of the form "-d <binding> <binding> <url>", we will have to # parse self.args for a mix of both bindings and CloudUrls. We are not # expecting to come across the -r, -f flags here. it = iter(self.args) for token in it: if STORAGE_URI_REGEX.match(token): patterns.append(token) break if token == '-d': patch_bindings_tuples.append(BindingStringToTuple(False, next(it))) else: patch_bindings_tuples.append(BindingStringToTuple(True, token)) if not patch_bindings_tuples: raise CommandException('Must specify at least one binding.') # All following arguments are urls. for token in it: patterns.append(token) self.everything_set_okay = True self.tried_ch_on_resource_with_conditions = False threaded_wildcards = [] for pattern in patterns: surl = StorageUrlFromString(pattern) try: if surl.IsBucket(): if self.recursion_requested: surl.object = '*' threaded_wildcards.append(surl.url_string) else: self.PatchIamHelper(surl, patch_bindings_tuples) else: threaded_wildcards.append(surl.url_string) except AttributeError: error_msg = 'Invalid Cloud URL "%s".' % surl.object_name if set(surl.object_name).issubset(set('-Rrf')): error_msg += ( ' This resource handle looks like a flag, which must appear ' 'before all bindings. See "gsutil help iam ch" for more details.') raise CommandException(error_msg) if threaded_wildcards: name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, threaded_wildcards, self.recursion_requested, all_versions=self.all_versions, continue_on_error=self.continue_on_error or self.parallel_operations, bucket_listing_fields=['name']) seek_ahead_iterator = SeekAheadNameExpansionIterator( self.command_name, self.debug, self.GetSeekAheadGsutilApi(), threaded_wildcards, self.recursion_requested, all_versions=self.all_versions) serialized_bindings_tuples_it = itertools.repeat( [SerializeBindingsTuple(t) for t in patch_bindings_tuples]) self.Apply(_PatchIamWrapper, zip(serialized_bindings_tuples_it, name_expansion_iterator), _PatchIamExceptionHandler, fail_on_error=not self.continue_on_error, seek_ahead_iterator=seek_ahead_iterator) self.everything_set_okay &= not GetFailureCount() > 0 # TODO: Add an error counter for files and objects. if not self.everything_set_okay: msg = 'Some IAM policies could not be patched.' if self.tried_ch_on_resource_with_conditions: msg += '\n' msg += '\n'.join( textwrap.wrap( 'Some resources had conditions present in their IAM policy ' 'bindings, which is not supported by "iam ch". %s' % (IAM_CH_CONDITIONS_WORKAROUND_MSG))) raise CommandException(msg)
def PrintFullInfoAboutObject(bucket_listing_ref, incl_acl=True): """Print full info for given object (like what displays for gsutil ls -L). Args: bucket_listing_ref: BucketListingRef being listed. Must have ref_type OBJECT and a populated root_object with the desired fields. incl_acl: True if ACL info should be output. Returns: Tuple (number of objects, object_length) Raises: Exception: if calling bug encountered. """ url_str = bucket_listing_ref.url_string storage_url = StorageUrlFromString(url_str) obj = bucket_listing_ref.root_object if (obj.metadata and S3_DELETE_MARKER_GUID in obj.metadata.additionalProperties): num_bytes = 0 num_objs = 0 url_str += '<DeleteMarker>' else: num_bytes = obj.size num_objs = 1 print '%s:' % url_str.encode(UTF8) if obj.updated: print '\tCreation time:\t\t%s' % obj.updated.strftime( '%a, %d %b %Y %H:%M:%S GMT') if obj.cacheControl: print '\tCache-Control:\t\t%s' % obj.cacheControl if obj.contentDisposition: print '\tContent-Disposition:\t\t%s' % obj.contentDisposition if obj.contentEncoding: print '\tContent-Encoding:\t\t%s' % obj.contentEncoding if obj.contentLanguage: print '\tContent-Language:\t%s' % obj.contentLanguage print '\tContent-Length:\t\t%s' % obj.size print '\tContent-Type:\t\t%s' % obj.contentType if obj.componentCount: print '\tComponent-Count:\t%d' % obj.componentCount marker_props = {} if obj.metadata and obj.metadata.additionalProperties: non_marker_props = [] for add_prop in obj.metadata.additionalProperties: if add_prop.key not in S3_MARKER_GUIDS: non_marker_props.append(add_prop) else: marker_props[add_prop.key] = add_prop.value if non_marker_props: print '\tMetadata:' for ap in non_marker_props: meta_string = '\t\t%s:\t\t%s' % (ap.key, ap.value) print meta_string.encode(UTF8) if obj.crc32c: print '\tHash (crc32c):\t\t%s' % obj.crc32c if obj.md5Hash: print '\tHash (md5):\t\t%s' % obj.md5Hash print '\tETag:\t\t\t%s' % obj.etag.strip('"\'') if obj.generation: generation_str = GenerationFromUrlAndString(storage_url, obj.generation) print '\tGeneration:\t\t%s' % generation_str if obj.metageneration: print '\tMetageneration:\t\t%s' % obj.metageneration if incl_acl: # JSON API won't return acls as part of the response unless we have # full control scope if obj.acl: print '\tACL:\t\t%s' % AclTranslation.JsonFromMessage(obj.acl) elif S3_ACL_MARKER_GUID in marker_props: print '\tACL:\t\t%s' % marker_props[S3_ACL_MARKER_GUID] else: print ('\tACL:\t\t\tACCESS DENIED. Note: you need OWNER ' 'permission\n\t\t\t\ton the object to read its ACL.') return (num_objs, num_bytes)
def _SetIam(self): """Set IAM policy for given wildcards on the command line.""" self.continue_on_error = False self.recursion_requested = False self.all_versions = False if self.sub_opts: for o, unused_a in self.sub_opts: if o in ['-r', '-R']: self.recursion_requested = True elif o == '-f': self.continue_on_error = True elif o == '-a': self.all_versions = True else: self.RaiseInvalidArgumentException() file_url = self.args[0] patterns = self.args[1:] # Load the IAM policy file and raise error if the file is invalid JSON or # does not exist. try: with open(file_url, 'r') as fp: bindings = json.loads(fp.read()) except (IOError, ValueError): raise ArgumentException('Invalid IAM policy file "%s".' % file_url) policy = apitools_messages.Policy(bindings=bindings) self.everything_set_okay = True # This list of wildcard strings will be handled by NameExpansionIterator. threaded_wildcards = [] for pattern in patterns: surl = StorageUrlFromString(pattern) if surl.IsBucket(): if self.recursion_requested: surl.object_name = '*' threaded_wildcards.append(surl.url_string) else: self.SetIamHelper(surl, policy) else: threaded_wildcards.append(surl.url_string) # N.B.: If threaded_wildcards contains a non-existent bucket # (e.g. ["gs://non-existent", "gs://existent"]), NameExpansionIterator # will raise an exception in iter.next. This halts all iteration, even # when -f is set. This behavior is also evident in acl set. This behavior # also appears for any exception that will be raised when iterating over # wildcard expansions (access denied if bucket cannot be listed, etc.). if threaded_wildcards: name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, threaded_wildcards, self.recursion_requested, all_versions=self.all_versions, continue_on_error=self.continue_on_error or self.parallel_operations, bucket_listing_fields=['name']) seek_ahead_iterator = SeekAheadNameExpansionIterator( self.command_name, self.debug, self.GetSeekAheadGsutilApi(), threaded_wildcards, self.recursion_requested, all_versions=self.all_versions) # We cannot curry policy along due to a Python2.6 bug; see comments in # IamCommand._PatchIam for more information. policy_it = itertools.repeat(protojson.encode_message(policy)) self.Apply( _SetIamWrapper, itertools.izip( policy_it, name_expansion_iterator), _SetIamExceptionHandler, fail_on_error=not self.continue_on_error, seek_ahead_iterator=seek_ahead_iterator) self.everything_set_okay &= not GetFailureCount() > 0 # TODO: Add an error counter for files and objects. if not self.everything_set_okay: raise CommandException('Some IAM policies could not be set.')
def RunCommand(self): """Command entry point for the rm command.""" # self.recursion_requested is initialized in command.py (so it can be # checked in parent class for all commands). self.continue_on_error = False self.all_versions = False if self.sub_opts: for o, unused_a in self.sub_opts: if o == '-a': self.all_versions = True elif o == '-f': self.continue_on_error = True elif o == '-r' or o == '-R': self.recursion_requested = True self.all_versions = True bucket_urls_to_delete = [] bucket_strings_to_delete = [] if self.recursion_requested: bucket_fields = ['id'] for url_str in self.args: url = StorageUrlFromString(url_str) if url.IsBucket() or url.IsProvider(): for blr in self.WildcardIterator(url_str).IterBuckets( bucket_fields=bucket_fields): bucket_urls_to_delete.append(blr.storage_url) bucket_strings_to_delete.append(url_str) self.preconditions = PreconditionsFromHeaders(self.headers or {}) # Used to track if any files failed to be removed. self.everything_removed_okay = True try: # Expand wildcards, dirs, buckets, and bucket subdirs in URLs. name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, self.args, self.recursion_requested, project_id=self.project_id, all_versions=self.all_versions, continue_on_error=self.continue_on_error or self.parallel_operations) # Perform remove requests in parallel (-m) mode, if requested, using # configured number of parallel processes and threads. Otherwise, # perform requests with sequential function calls in current process. self.Apply(_RemoveFuncWrapper, name_expansion_iterator, _RemoveExceptionHandler, fail_on_error=(not self.continue_on_error)) # Assuming the bucket has versioning enabled, url's that don't map to # objects should throw an error even with all_versions, since the prior # round of deletes only sends objects to a history table. # This assumption that rm -a is only called for versioned buckets should be # corrected, but the fix is non-trivial. except CommandException as e: # Don't raise if there are buckets to delete -- it's valid to say: # gsutil rm -r gs://some_bucket # if the bucket is empty. if not bucket_urls_to_delete and not self.continue_on_error: raise # Reset the failure count if we failed due to an empty bucket that we're # going to delete. msg = 'No URLs matched: ' if msg in str(e): parts = str(e).split(msg) if len(parts) == 2 and parts[1] in bucket_strings_to_delete: ResetFailureCount() except ServiceException, e: if not self.continue_on_error: raise