def _FieldedListingIterator(cls, gsutil_api, base_url_str, desc): """Iterator over base_url_str formatting output per _BuildTmpOutputLine. Args: cls: Command instance. gsutil_api: gsutil Cloud API instance to use for bucket listing. base_url_str: The top-level URL string over which to iterate. desc: 'source' or 'destination'. Yields: Output line formatted per _BuildTmpOutputLine. """ base_url = StorageUrlFromString(base_url_str) if base_url.scheme == 'file' and not cls.recursion_requested: iterator = _LocalDirIterator(base_url) else: if cls.recursion_requested: wildcard = '%s/**' % base_url_str.rstrip('/\\') else: wildcard = '%s/*' % base_url_str.rstrip('/\\') iterator = CreateWildcardIterator( wildcard, gsutil_api, debug=cls.debug, project_id=cls.project_id).IterObjects( # Request just the needed fields, to reduce bandwidth usage. bucket_listing_fields=['crc32c', 'md5Hash', 'name', 'size']) i = 0 for blr in iterator: # Various GUI tools (like the GCS web console) create placeholder objects # ending with '/' when the user creates an empty directory. Normally these # tools should delete those placeholders once objects have been written # "under" the directory, but sometimes the placeholders are left around. # We need to filter them out here, otherwise if the user tries to rsync # from GCS to a local directory it will result in a directory/file # conflict (e.g., trying to download an object called "mydata/" where the # local directory "mydata" exists). url = blr.storage_url if IsCloudSubdirPlaceholder(url, blr=blr): # We used to output the message 'Skipping cloud sub-directory placeholder # object...' but we no longer do so because it caused customer confusion. continue if (cls.exclude_symlinks and url.IsFileUrl() and os.path.islink(url.object_name)): continue if cls.exclude_pattern: str_to_check = url.url_string[len(base_url_str):] if str_to_check.startswith(url.delim): str_to_check = str_to_check[1:] if cls.exclude_pattern.match(str_to_check): continue i += 1 if i % _PROGRESS_REPORT_LISTING_COUNT == 0: cls.logger.info('At %s listing %d...', desc, i) yield _BuildTmpOutputLine(blr)
def _FieldedListingIterator(cls, gsutil_api, url_str, desc): """Iterator over url_str outputting lines formatted per _BuildTmpOutputLine. Args: cls: Command instance. gsutil_api: gsutil Cloud API instance to use for bucket listing. url_str: The URL string over which to iterate. desc: 'source' or 'destination'. Yields: Output line formatted per _BuildTmpOutputLine. """ if cls.recursion_requested: wildcard = '%s/**' % url_str.rstrip('/\\') else: wildcard = '%s/*' % url_str.rstrip('/\\') i = 0 for blr in CreateWildcardIterator( wildcard, gsutil_api, debug=cls.debug, project_id=cls.project_id).IterObjects( # Request just the needed fields, to reduce bandwidth usage. bucket_listing_fields=['crc32c', 'md5Hash', 'name', 'size']): # Various GUI tools (like the GCS web console) create placeholder objects # ending with '/' when the user creates an empty directory. Normally these # tools should delete those placeholders once objects have been written # "under" the directory, but sometimes the placeholders are left around. # We need to filter them out here, otherwise if the user tries to rsync # from GCS to a local directory it will result in a directory/file # conflict (e.g., trying to download an object called "mydata/" where the # local directory "mydata" exists). url = blr.storage_url if IsCloudSubdirPlaceholder(url, blr=blr): cls.logger.info('Skipping cloud sub-directory placeholder object (%s) ' 'because such objects aren\'t needed in (and would ' 'interfere with) directories in the local file system', url) continue if (cls.exclude_symlinks and url.IsFileUrl() and os.path.islink(url.object_name)): continue i += 1 if i % _PROGRESS_REPORT_LISTING_COUNT == 0: cls.logger.info('At %s listing %d...', desc, i) yield _BuildTmpOutputLine(blr)