示例#1
0
    def __iter__(self):
        """Iterates over each URL in self._urls and yield the expanded result.

    Yields:
      NameExpansionResult instance.
    """
        for url in self._urls:
            resources = plurality_checkable_iterator.PluralityCheckableIterator(
                wildcard_iterator.get_wildcard_iterator(url))
            if resources.is_empty():
                raise errors.InvalidUrlError(
                    '{} matched no objects.'.format(url))

            # Iterate over all the resource_reference.Resource objects.
            for resource in resources:
                if self._recursion_requested and resource.is_container():
                    # Append '**' to fetch all objects under this container
                    new_storage_url = resource.storage_url.join('**')
                    child_resources = wildcard_iterator.get_wildcard_iterator(
                        new_storage_url.url_string)
                    for child_resource in child_resources:
                        yield NameExpansionResult(child_resource,
                                                  resource.storage_url)
                else:
                    yield NameExpansionResult(resource, resource.storage_url)
示例#2
0
  def Run(self, args):
    """Command execution logic."""
    encryption_util.initialize_key_store(args)
    if args.path:
      storage_urls = [storage_url.storage_url_from_string(path)
                      for path in args.path]
      for url in storage_urls:
        if not isinstance(url, storage_url.CloudUrl):
          raise errors.InvalidUrlError('Ls only works for cloud URLs.'
                                       ' Error for: {}'.format(url.url_string))
    else:
      storage_urls = [storage_url.CloudUrl(cloud_api.DEFAULT_PROVIDER)]

    if args.full:
      display_detail = cloud_list_task.DisplayDetail.FULL
    elif args.json:
      display_detail = cloud_list_task.DisplayDetail.JSON
    elif args.long:
      display_detail = cloud_list_task.DisplayDetail.LONG
    else:
      display_detail = cloud_list_task.DisplayDetail.SHORT

    tasks = []
    for url in storage_urls:
      tasks.append(
          cloud_list_task.CloudListTask(
              url,
              all_versions=args.all_versions,
              buckets_flag=args.buckets,
              display_detail=display_detail,
              include_etag=args.etag,
              readable_sizes=args.readable_sizes,
              recursion_flag=args.recursive))
    task_executor.execute_tasks(tasks, parallelizable=False)
    def from_url_string(cls, url_string):
        """Parse the url string and return the storage url object.

    Args:
      url_string (str): Cloud storage url of the form gs://bucket/object

    Returns:
      CloudUrl object

    Raises:
      InvalidUrlError: Raised if the url_string is not a valid cloud url.
    """
        scheme = _get_scheme_from_url_string(url_string)

        # gs://a/b/c/d#num => a/b/c/d#num
        schemeless_url_string = url_string[len(scheme.value + '://'):]

        if schemeless_url_string.startswith('/'):
            raise errors.InvalidUrlError(
                'Cloud URL scheme should be followed by colon and two slashes: "://".'
                ' Found: "{}"'.format(url_string))

        # a/b/c/d#num => a, b/c/d#num
        bucket_name, _, object_name = schemeless_url_string.partition(
            CLOUD_URL_DELIMITER)

        # b/c/d#num => b/c/d, num
        object_name, _, generation = object_name.partition('#')

        return cls(scheme, bucket_name, object_name, generation)
def get_wildcard_iterator(url_str,
                          all_versions=False,
                          fields_scope=cloud_api.FieldsScope.NO_ACL,
                          get_bucket_metadata=False,
                          ignore_symlinks=False):
    """Instantiate a WildcardIterator for the given URL string.

  Args:
    url_str (str): URL string which may contain wildcard characters.
    all_versions (bool): If true, the iterator yields all versions of objects
        matching the wildcard.  If false, yields just the live object version.
    fields_scope (cloud_api.FieldsScope): Determines amount of metadata
        returned by API.
    get_bucket_metadata (bool): If true, perform a bucket GET request when
        fetching bucket resources
    ignore_symlinks (bool): Skip over symlinks instead of following them.

  Returns:
    A WildcardIterator object.
  """
    url = storage_url.storage_url_from_string(url_str)
    if isinstance(url, storage_url.CloudUrl):
        return CloudWildcardIterator(url,
                                     all_versions=all_versions,
                                     fields_scope=fields_scope,
                                     get_bucket_metadata=get_bucket_metadata)
    elif isinstance(url, storage_url.FileUrl):
        return FileWildcardIterator(url, ignore_symlinks=ignore_symlinks)
    else:
        raise command_errors.InvalidUrlError('Unknown url type %s.' % url)
示例#5
0
    def Run(self, args):
        """Command execution logic."""
        if args.path:
            storage_urls = [
                storage_url.storage_url_from_string(path) for path in args.path
            ]
            for url in storage_urls:
                if not isinstance(url, storage_url.CloudUrl):
                    raise errors.InvalidUrlError(
                        'Ls only works for cloud URLs.'
                        ' Error for: {}'.format(url.url_string))
        else:
            storage_urls = [storage_url.CloudUrl(cloud_api.DEFAULT_PROVIDER)]

        display_detail = cloud_list_task.DisplayDetail.SHORT
        if args.full:
            display_detail = cloud_list_task.DisplayDetail.FULL
        if args.json:
            display_detail = cloud_list_task.DisplayDetail.JSON
        if args.long:
            display_detail = cloud_list_task.DisplayDetail.LONG

        tasks = []
        for url in storage_urls:
            tasks.append(
                cloud_list_task.CloudListTask(url,
                                              all_versions=args.all_versions,
                                              display_detail=display_detail,
                                              include_etag=args.etag,
                                              recursion_flag=args.recursive))
        task_executor.ExecuteTasks(tasks, is_parallel=False)
  def execute(self, callback=None):
    """Recursively create wildcard iterators to print all relevant items."""
    fields_scope = _translate_display_detail_to_fields_scope(
        self._display_detail, is_bucket_listing=self._cloud_url.is_provider())
    resources = plurality_checkable_iterator.PluralityCheckableIterator(
        wildcard_iterator.CloudWildcardIterator(
            self._cloud_url, fields_scope=fields_scope))

    if resources.is_empty():
      raise errors.InvalidUrlError('One or more URLs matched no objects.')
    if self._cloud_url.is_provider():
      # Received a provider URL ("gs://"). List bucket names with no formatting.
      resources_wrappers = self._recursion_helper(resources, recursion_level=0)
    # "**" overrides recursive flag.
    elif self._recursion_flag and '**' not in self._cloud_url.url_string:
      resources_wrappers = self._recursion_helper(resources, float('inf'))
    elif not resources.is_plural() and resources.peek().is_container():
      # One container was returned by the query, in which case we show
      # its contents.
      resources_wrappers = self._get_container_iterator(
          resources.peek().storage_url, recursion_level=0)
    else:
      resources_wrappers = self._recursion_helper(resources, recursion_level=1)

    if self._display_detail == DisplayDetail.FULL:
      # TODO(b/169795589): We may display something other than JSON for FULL,
      # and make JSON its own DisplayDetail option.
      self._print_json_list(resources_wrappers)
    else:
      self._print_row_list(resources_wrappers)

    if callback:
      callback()
 def _raise_if_destination_is_file_url_and_not_a_directory_or_pipe(self):
   if (isinstance(self._raw_destination.storage_url, storage_url.FileUrl) and
       not (_destination_is_container(self._raw_destination) or
            self._raw_destination.storage_url.is_pipe)):
     raise errors.InvalidUrlError(
         'Destination URL must name an existing directory.'
         ' Provided: {}.'.format(
             self._raw_destination.storage_url.object_name))
 def _raise_no_url_match_error_if_necessary(self, url_found_match_dict):
     non_matching_urls = [
         url for url, found_match in url_found_match_dict.items()
         if not found_match
     ]
     if non_matching_urls:
         raise errors.InvalidUrlError(
             'The following URLs matched no objects or files:\n-{}'.format(
                 '\n-'.join(non_matching_urls)))
示例#9
0
def _get_scheme_from_url_string(url_str):
  """Returns scheme component of a URL string."""
  end_scheme_idx = url_str.find('://')
  if end_scheme_idx == -1:
    # File is the default scheme.
    return ProviderPrefix.FILE
  else:
    prefix_string = url_str[0:end_scheme_idx].lower()
    if prefix_string not in VALID_SCHEMES:
      raise errors.InvalidUrlError('Unrecognized scheme "%s"' % prefix_string)
    return ProviderPrefix(prefix_string)
 def Run(self, args):
     if wildcard_iterator.contains_wildcard(args.url):
         raise errors.InvalidUrlError(
             'Describe does not accept wildcards because it returns a single'
             ' resource. Please use the `ls` or `buckets list` command for'
             ' retrieving multiple resources.')
     url = storage_url.storage_url_from_string(args.url)
     bucket_resource = api_factory.get_api(url.scheme).get_bucket(
         url.bucket_name, fields_scope=cloud_api.FieldsScope.FULL)
     # MakeSerializable will omit all the None values.
     return resource_projector.MakeSerializable(
         bucket_resource.get_displayable_bucket_data())
def _prompt_and_add_valid_scheme(url):
    """Has user select a valid scheme from a list and returns new URL."""
    if not console_io.CanPrompt():
        raise errors.InvalidUrlError('Did you mean "posix://{}"'.format(
            url.object_name))
    scheme_index = console_io.PromptChoice(
        [scheme.value + '://' for scheme in VALID_TRANSFER_SCHEMES],
        cancel_option=True,
        message=('Storage Transfer does not support direct file URLs: {}\n'
                 'Did you mean to use "posix://"?\n'
                 'Run this command with "--help" for more info,\n'
                 'or select a valid scheme below.').format(url))

    new_scheme = VALID_TRANSFER_SCHEMES[scheme_index]
    return storage_url.switch_scheme(url, new_scheme)
    def __init__(self,
                 scheme,
                 bucket_name=None,
                 object_name=None,
                 generation=None,
                 snapshot=None,
                 account=None):
        super(AzureUrl, self).__init__(scheme, bucket_name, object_name,
                                       generation)
        self.snapshot = snapshot if snapshot else None

        if not account:
            raise errors.InvalidUrlError(
                'Azure URLs must contain an account name.')
        self.account = account
示例#13
0
    def Run(self, args):
        """Command execution logic."""
        if args.path:
            storage_urls = [
                storage_url.storage_url_from_string(path) for path in args.path
            ]
            for url in storage_urls:
                if not isinstance(url, storage_url.CloudUrl):
                    raise errors.InvalidUrlError(
                        'Ls only works for cloud URLs.'
                        ' Error for: {}'.format(url.url_string))
        else:
            storage_urls = [storage_url.CloudUrl(cloud_api.DEFAULT_PROVIDER)]

        tasks = [
            cloud_list_task.CloudListTask(url, recursion_flag=args.recursive)
            for url in storage_urls
        ]
        task_executor.ExecuteTasks(tasks)
示例#14
0
    def execute(self, task_status_queue=None):
        """Recursively create wildcard iterators to print all relevant items."""
        # List task does not need to report status information.
        del task_status_queue

        fields_scope = _translate_display_detail_to_fields_scope(
            self._display_detail,
            is_bucket_listing=self._cloud_url.is_provider())
        resources = plurality_checkable_iterator.PluralityCheckableIterator(
            wildcard_iterator.CloudWildcardIterator(
                self._cloud_url,
                all_versions=self._all_versions,
                error_on_missing_key=False,
                fields_scope=fields_scope,
                get_bucket_metadata=self._buckets_flag))

        if resources.is_empty():
            raise errors.InvalidUrlError(
                'One or more URLs matched no objects.')
        if self._only_display_buckets:
            # Received a provider URL ("gs://") -> List all buckets.
            # Received buckets flag and bucket URL -> List matching buckets, ignoring
            #   recursion.
            resources_wrappers = self._recursion_helper(resources,
                                                        recursion_level=0)
        elif self._recursion_flag and '**' not in self._cloud_url.url_string:
            # "**" overrides recursive flag.
            resources_wrappers = self._recursion_helper(
                resources, float('inf'))
        elif not resources.is_plural() and resources.peek().is_container():
            # One container was returned by the query, in which case we show
            # its contents.
            resources_wrappers = self._get_container_iterator(
                resources.peek().storage_url, recursion_level=0)
        else:
            resources_wrappers = self._recursion_helper(resources,
                                                        recursion_level=1)

        if self._display_detail == DisplayDetail.JSON:
            self._print_json_list(resources_wrappers)
        else:
            self._print_row_list(resources_wrappers)
    def Run(self, args):
        if args.urls:
            urls = []
            for url_string in args.urls:
                url = storage_url.storage_url_from_string(url_string)
                if not (url.is_provider() or url.is_bucket()):
                    raise errors.InvalidUrlError(
                        'URL does not match buckets: {}'.format(url_string))
                urls.append(url)
        else:
            urls = [storage_url.CloudUrl(storage_url.ProviderPrefix.GCS)]

        for url in urls:
            for bucket in wildcard_iterator.get_wildcard_iterator(
                    url.url_string,
                    fields_scope=cloud_api.FieldsScope.FULL,
                    get_bucket_metadata=True):
                # MakeSerializable will omit all the None values.
                yield resource_projector.MakeSerializable(
                    bucket.get_displayable_bucket_data())
示例#16
0
def get_wildcard_iterator(url_str,
                          all_versions=False,
                          fields_scope=cloud_api.FieldsScope.NO_ACL):
    """Instantiate a WildcardIterator for the given URL string.

  Args:
    url_str (str): URL string which may contain wildcard characters.
    all_versions (bool): If true, the iterator yields all versions of objects
        matching the wildcard.  If false, yields just the live object version.
    fields_scope (cloud_api.FieldsScope): Determines amount of metadata
        returned by API.
  Returns:
    A WildcardIterator object.
  """
    url = storage_url.storage_url_from_string(url_str)
    if isinstance(url, storage_url.CloudUrl):
        return CloudWildcardIterator(url, all_versions, fields_scope)
    elif isinstance(url, storage_url.FileUrl):
        return FileWildcardIterator(url)
    else:
        raise errors.InvalidUrlError('Unknown url type %s.' % url)
示例#17
0
  def __iter__(self):
    """Iterates over each URL in self._urls and yield the expanded result.

    Yields:
      NameExpansionResult instance.

    Raises:
      InvalidUrlError: No matching objects found.
    """
    for url in self._urls:
      resources = plurality_checkable_iterator.PluralityCheckableIterator(
          wildcard_iterator.get_wildcard_iterator(url))
      is_name_expansion_iterator_empty = True
      original_storage_url = storage_url.storage_url_from_string(url)

      # Iterate over all the resource_reference.Resource objects.
      for resource in resources:
        if not resource.is_container():
          yield NameExpansionResult(resource, resource.storage_url,
                                    original_storage_url)
          is_name_expansion_iterator_empty = False
          continue

        if not self._recursion_requested:
          log.info('Omitting {} because it is a container, and recursion'
                   ' is not enabled.'.format(resource.is_container()))
          continue

        # Append '**' to fetch all objects under this container.
        new_storage_url = resource.storage_url.join('**')
        child_resources = wildcard_iterator.get_wildcard_iterator(
            new_storage_url.url_string)
        for child_resource in child_resources:
          yield NameExpansionResult(child_resource, resource.storage_url,
                                    original_storage_url)
          is_name_expansion_iterator_empty = False

      if is_name_expansion_iterator_empty:
        raise errors.InvalidUrlError(
            '{} matched no objects or files.'.format(url))
def storage_url_from_string(url_string):
    """Static factory function for creating a StorageUrl from a string.

  Args:
    url_string (str): Cloud url or local filepath.

  Returns:
     StorageUrl object.

  Raises:
    InvalidUrlError: Unrecognized URL scheme.
  """
    scheme = _get_scheme_from_url_string(url_string)
    if scheme == ProviderPrefix.FILE:
        return FileUrl(url_string)
    if scheme == ProviderPrefix.POSIX:
        return PosixFileSystemUrl(url_string)
    if scheme in VALID_HTTP_SCHEMES:
        # Azure's scheme breaks from other clouds.
        return AzureUrl.from_url_string(url_string)
    if scheme in VALID_CLOUD_SCHEMES:
        return CloudUrl.from_url_string(url_string)
    raise errors.InvalidUrlError('Unrecognized URL scheme.')
示例#19
0
    def Run(self, args):
        for url_string in args.urls:
            if not storage_url.storage_url_from_string(url_string).is_bucket():
                raise errors.InvalidUrlError(
                    'buckets delete only accepts cloud bucket URLs. Example:'
                    ' "gs://bucket"')

        task_status_queue = multiprocessing.Queue()

        bucket_iterator = delete_task_iterator_factory.DeleteTaskIteratorFactory(
            name_expansion.NameExpansionIterator(args.urls,
                                                 include_buckets=True),
            task_status_queue=task_status_queue).bucket_iterator()
        plurality_checkable_bucket_iterator = (
            plurality_checkable_iterator.PluralityCheckableIterator(
                bucket_iterator))

        task_executor.execute_tasks(
            plurality_checkable_bucket_iterator,
            parallelizable=True,
            task_status_queue=task_status_queue,
            progress_manager_args=task_status.ProgressManagerArgs(
                increment_type=task_status.IncrementType.INTEGER,
                manifest_path=None))
 def _validate_scheme(self):
     if not AzureUrl.is_valid_scheme(self.scheme):
         raise errors.InvalidUrlError('Invalid Azure scheme "{}"'.format(
             self.scheme))
示例#21
0
 def _validate_object_name(self):
     if self.object_name == '.' or self.object_name == '..':
         raise errors.InvalidUrlError(
             '%s is an invalid root-level object name' % self.object_name)
示例#22
0
 def _validate_scheme(self):
     if self.scheme not in VALID_CLOUD_SCHEMES:
         raise errors.InvalidUrlError('Unrecognized scheme "%s"' %
                                      self.scheme)
  def _get_destination_suffix_for_recursion(self, destination_container,
                                            source):
    """Returns the suffix required to complete the destination URL.

    Let's assume the following:
      User command => cp -r */base_dir gs://dest/existing_prefix
      source.resource.storage_url => a/base_dir/c/d.txt
      source.expanded_url => a/base_dir
      destination_container.storage_url => gs://dest/existing_prefix

    If the destination container exists, the entire directory gets copied:
    Result => gs://dest/existing_prefix/base_dir/c/d.txt

    Args:
      destination_container (resource_reference.Resource): The destination
        container.
      source (NameExpansionResult): Represents the source resource and the
        expanded parent url in case of recursion.

    Returns:
      (str) The suffix to be appended to the destination container.
    """
    source_prefix_to_ignore = storage_url.rstrip_one_delimiter(
        source.expanded_url.versionless_url_string,
        source.expanded_url.delimiter)

    expanded_url_is_valid_parent = _is_expanded_url_valid_parent_dir(
        source.expanded_url)
    if not expanded_url_is_valid_parent and self._has_multiple_top_level_sources:
      # To avoid top-level name conflicts, we need to copy the parent dir.
      # However, that cannot be done because the parent dir has an invalid name.
      raise errors.InvalidUrlError(
          'Presence of multiple top-level sources and invalid expanded URL'
          ' make file name conflicts possible for URL: {}'.format(
              source.resource))

    is_top_level_source_object_name_conflict_possible = (
        isinstance(destination_container, resource_reference.UnknownResource)
        and self._has_multiple_top_level_sources)
    destination_is_existing_dir = (not isinstance(
        destination_container, resource_reference.UnknownResource) and
                                   destination_container.is_container())
    if is_top_level_source_object_name_conflict_possible or (
        expanded_url_is_valid_parent and destination_is_existing_dir):
      # Preserve the top-level source directory, and remove the leaf name
      # so that it gets added to the destination.
      source_prefix_to_ignore, _, _ = source_prefix_to_ignore.rpartition(
          source.expanded_url.delimiter)
      if not source_prefix_to_ignore:
        # In case of Windows, the source URL might not contain any Windows
        # delimiter if it was a single directory (e.g file://dir) and
        # source_prefix_to_ignore will be empty. Set it to <scheme>://.
        # TODO(b/169093672) This will not be required if we get rid of file://
        source_prefix_to_ignore = source.expanded_url.scheme.value + '://'

    full_source_url = source.resource.storage_url.versionless_url_string
    suffix_for_destination = full_source_url.split(source_prefix_to_ignore)[1]

    # Windows uses \ as a delimiter. Force the suffix to use the same
    # delimiter used by the destination container.
    source_delimiter = source.resource.storage_url.delimiter
    destination_delimiter = destination_container.storage_url.delimiter
    if source_delimiter != destination_delimiter:
      return suffix_for_destination.replace(source_delimiter,
                                            destination_delimiter)
    return suffix_for_destination
    def _expand_object_path(self, bucket_name):
        """If wildcard, expand object names.

    Recursively expand each folder with wildcard.

    Args:
      bucket_name (str): Name of the bucket.

    Yields:
      resource_reference.Resource objects where each resource can be
      an ObjectResource object or a PrefixResource object.
    """
        # Retain original name to see if user wants only prefixes.
        original_object_name = self._url.object_name
        # Force API to return prefix resource not the prefix's contents.
        object_name = storage_url.rstrip_one_delimiter(original_object_name)

        names_needing_expansion = collections.deque([object_name])
        error = None
        while names_needing_expansion:
            name = names_needing_expansion.popleft()

            # Parse out the prefix, delimiter, filter_pattern and suffix.
            # Given a string 'a/b*c/d/e*f/g.txt', this will return
            # CloudWildcardParts(prefix='a/b', filter_pattern='*c',
            #                    delimiter='/', suffix='d/e*f/g.txt')
            wildcard_parts = CloudWildcardParts.from_string(
                name, self._url.delimiter)

            # Fetch all the objects and prefixes.
            resource_iterator = self._client.list_objects(
                all_versions=self._all_versions or bool(self._url.generation),
                bucket_name=bucket_name,
                delimiter=wildcard_parts.delimiter,
                fields_scope=self._fields_scope,
                prefix=wildcard_parts.prefix or None)

            # We have all the objects and prefixes that matched the
            # wildcard_parts.prefix. Use the filter_pattern to eliminate non-matching
            # objects and prefixes.
            filtered_resources = self._filter_resources(
                resource_iterator,
                wildcard_parts.prefix + wildcard_parts.filter_pattern)

            for resource in filtered_resources:
                resource_path = resource.storage_url.object_name
                if wildcard_parts.suffix:
                    if isinstance(resource, resource_reference.PrefixResource):
                        # Suffix is present, which indicates that we have more wildcards to
                        # expand. Let's say object_name is a/b1c. Then the new string that
                        # we want to expand will be a/b1c/d/e*f/g.txt
                        if WILDCARD_REGEX.search(resource_path):
                            error = command_errors.InvalidUrlError(
                                'Cloud folders named with wildcards are not supported.'
                                ' API returned {}'.format(resource))
                        else:
                            names_needing_expansion.append(
                                resource_path + wildcard_parts.suffix)
                else:
                    # Make sure an object is not returned if the original query was for
                    # a prefix.
                    if (not resource_path.endswith(self._url.delimiter)
                            and original_object_name.endswith(
                                self._url.delimiter)):
                        continue

                    yield self._decrypt_resource_if_necessary(resource)

        if error:
            raise error
 def _raise_error_if_source_matches_destination(self):
   if not self._multiple_sources:
     source_url = self._source_name_iterator.peek().expanded_url
     if source_url == self._raw_destination.storage_url:
       raise errors.InvalidUrlError(
           'Source URL matches destination URL: {}'.format(source_url))
 def validate_url_string(cls, url_string, scheme):
     AzureUrl.is_valid_scheme(scheme)
     if not (AZURE_DOMAIN in url_string
             and AzureUrl.is_valid_scheme(scheme)):
         raise errors.InvalidUrlError(
             'Invalid Azure URL: "{}"'.format(url_string))