示例#1
0
def _FieldedListingIterator(cls, gsutil_api, base_url_str, desc):
  """Iterator over base_url_str formatting output per _BuildTmpOutputLine.

  Args:
    cls: Command instance.
    gsutil_api: gsutil Cloud API instance to use for bucket listing.
    base_url_str: The top-level URL string over which to iterate.
    desc: 'source' or 'destination'.

  Yields:
    Output line formatted per _BuildTmpOutputLine.
  """
  base_url = StorageUrlFromString(base_url_str)
  if base_url.scheme == 'file' and not cls.recursion_requested:
    iterator = _LocalDirIterator(base_url)
  else:
    if cls.recursion_requested:
      wildcard = '%s/**' % base_url_str.rstrip('/\\')
    else:
      wildcard = '%s/*' % base_url_str.rstrip('/\\')
    iterator = CreateWildcardIterator(
        wildcard, gsutil_api, debug=cls.debug,
        project_id=cls.project_id).IterObjects(
            # Request just the needed fields, to reduce bandwidth usage.
            bucket_listing_fields=['crc32c', 'md5Hash', 'name', 'size'])

  i = 0
  for blr in iterator:
    # Various GUI tools (like the GCS web console) create placeholder objects
    # ending with '/' when the user creates an empty directory. Normally these
    # tools should delete those placeholders once objects have been written
    # "under" the directory, but sometimes the placeholders are left around.
    # We need to filter them out here, otherwise if the user tries to rsync
    # from GCS to a local directory it will result in a directory/file
    # conflict (e.g., trying to download an object called "mydata/" where the
    # local directory "mydata" exists).
    url = blr.storage_url
    if IsCloudSubdirPlaceholder(url, blr=blr):
      # We used to output the message 'Skipping cloud sub-directory placeholder
      # object...' but we no longer do so because it caused customer confusion.
      continue
    if (cls.exclude_symlinks and url.IsFileUrl()
        and os.path.islink(url.object_name)):
      continue
    if cls.exclude_pattern:
      str_to_check = url.url_string[len(base_url_str):]
      if str_to_check.startswith(url.delim):
        str_to_check = str_to_check[1:]
      if cls.exclude_pattern.match(str_to_check):
        continue
    i += 1
    if i % _PROGRESS_REPORT_LISTING_COUNT == 0:
      cls.logger.info('At %s listing %d...', desc, i)
    yield _BuildTmpOutputLine(blr)
def _FieldedListingIterator(cls, gsutil_api, url_str, desc):
  """Iterator over url_str outputting lines formatted per _BuildTmpOutputLine.

  Args:
    cls: Command instance.
    gsutil_api: gsutil Cloud API instance to use for bucket listing.
    url_str: The URL string over which to iterate.
    desc: 'source' or 'destination'.

  Yields:
    Output line formatted per _BuildTmpOutputLine.
  """
  if cls.recursion_requested:
    wildcard = '%s/**' % url_str.rstrip('/\\')
  else:
    wildcard = '%s/*' % url_str.rstrip('/\\')
  i = 0
  for blr in CreateWildcardIterator(
      wildcard, gsutil_api, debug=cls.debug,
      project_id=cls.project_id).IterObjects(
          # Request just the needed fields, to reduce bandwidth usage.
          bucket_listing_fields=['crc32c', 'md5Hash', 'name', 'size']):
    # Various GUI tools (like the GCS web console) create placeholder objects
    # ending with '/' when the user creates an empty directory. Normally these
    # tools should delete those placeholders once objects have been written
    # "under" the directory, but sometimes the placeholders are left around.
    # We need to filter them out here, otherwise if the user tries to rsync
    # from GCS to a local directory it will result in a directory/file
    # conflict (e.g., trying to download an object called "mydata/" where the
    # local directory "mydata" exists).
    url = blr.storage_url
    if IsCloudSubdirPlaceholder(url, blr=blr):
      cls.logger.info('Skipping cloud sub-directory placeholder object (%s) '
                      'because such objects aren\'t needed in (and would '
                      'interfere with) directories in the local file system',
                      url)
      continue
    if (cls.exclude_symlinks and url.IsFileUrl()
        and os.path.islink(url.object_name)):
      continue
    i += 1
    if i % _PROGRESS_REPORT_LISTING_COUNT == 0:
      cls.logger.info('At %s listing %d...', desc, i)
    yield _BuildTmpOutputLine(blr)