def __iter__(self, bucket_listing_fields=None): """Iterator that gets called when iterating over the file wildcard. In the case where no wildcard is present, returns a single matching file or directory. Args: bucket_listing_fields: Iterable fields to include in listings. Ex. ['size']. Currently only 'size' is supported. If present, will populate yielded BucketListingObject.root_object with the file name and size. Raises: WildcardException: if invalid wildcard found. Yields: BucketListingRef of type OBJECT (for files) or PREFIX (for directories) """ include_size = (bucket_listing_fields and 'size' in set(bucket_listing_fields)) wildcard = self.wildcard_url.object_name match = FLAT_LIST_REGEX.match(wildcard) if match: # Recursive wildcarding request ('.../**/...'). # Example input: wildcard = '/tmp/tmp2pQJAX/**/*' base_dir = match.group('before')[:-1] remaining_wildcard = match.group('after') # At this point for the above example base_dir = '/tmp/tmp2pQJAX' and # remaining_wildcard = '/*' if remaining_wildcard.startswith('*'): raise WildcardException( 'Invalid wildcard with more than 2 consecutive ' '*s (%s)' % wildcard) # If there was no remaining wildcard past the recursive wildcard, # treat it as if it were a '*'. For example, file://tmp/** is equivalent # to file://tmp/**/* if not remaining_wildcard: remaining_wildcard = '*' # Skip slash(es). remaining_wildcard = remaining_wildcard.lstrip(os.sep) filepaths = self._IterDir(base_dir, remaining_wildcard) else: # Not a recursive wildcarding request. filepaths = glob.iglob(wildcard) for filepath in filepaths: expanded_url = StorageUrlFromString(filepath) try: if self.ignore_symlinks and os.path.islink(filepath): continue if os.path.isdir(filepath): yield BucketListingPrefix(expanded_url) else: blr_object = _GetFileObject( filepath) if include_size else None yield BucketListingObject(expanded_url, root_object=blr_object) except UnicodeEncodeError: raise CommandException('\n'.join( textwrap.wrap(_UNICODE_EXCEPTION_TEXT % repr(filepath))))
def testContainsWildcardMatchesNotObject(self, mock_CreateWildcardIterator, mock_gsutil_api): storage_url = StorageUrlFromString('gs://test/helloworld') mock_CreateWildcardIterator.return_value = iter( [BucketListingPrefix(storage_url)]) (exp_url, have_existing_dst_container) = ExpandUrlToSingleBlr( 'gs://test/hello*/', mock_gsutil_api, 'project_id', False, CreateOrGetGsutilLogger('copy_test')) self.assertTrue(have_existing_dst_container) self.assertEqual(exp_url, storage_url)
def _GetPrefixRef(self, bucket_url_string, prefix): """Creates a BucketListingRef of type PREFIX from the arguments. Args: bucket_url_string: Wildcardless string describing the containing bucket. prefix: gsutil_api Prefix for populating the BucketListingRef Returns: BucketListingRef of type PREFIX. """ prefix_url = StorageUrlFromString('%s%s' % (bucket_url_string, prefix)) return BucketListingPrefix(prefix_url, root_object=prefix)
def __iter__(self): """Iterator that gets called when iterating over the file wildcard. In the case where no wildcard is present, returns a single matching file or directory. Raises: WildcardException: if invalid wildcard found. Yields: BucketListingRef of type OBJECT (for files) or PREFIX (for directories) """ wildcard = self.wildcard_url.object_name match = FLAT_LIST_REGEX.match(wildcard) if match: # Recursive wildcarding request ('.../**/...'). # Example input: wildcard = '/tmp/tmp2pQJAX/**/*' base_dir = match.group('before')[:-1] remaining_wildcard = match.group('after') # At this point for the above example base_dir = '/tmp/tmp2pQJAX' and # remaining_wildcard = '/*' if remaining_wildcard.startswith('*'): raise WildcardException( 'Invalid wildcard with more than 2 consecutive ' '*s (%s)' % wildcard) # If there was no remaining wildcard past the recursive wildcard, # treat it as if it were a '*'. For example, file://tmp/** is equivalent # to file://tmp/**/* if not remaining_wildcard: remaining_wildcard = '*' # Skip slash(es). remaining_wildcard = remaining_wildcard.lstrip(os.sep) filepaths = self._IterDir(base_dir, remaining_wildcard) else: # Not a recursive wildcarding request. filepaths = glob.iglob(wildcard) for filepath in filepaths: expanded_url = StorageUrlFromString(filepath) try: if os.path.isdir(filepath): yield BucketListingPrefix(expanded_url) else: yield BucketListingObject(expanded_url) except UnicodeEncodeError: raise CommandException('\n'.join( textwrap.wrap(_UNICODE_EXCEPTION_TEXT % repr(filepath))))