Exemplo n.º 1
0
    def __iter__(self):
        """Iterates over each URL in self._urls and yield the expanded result.

    Yields:
      NameExpansionResult instance.
    """
        for url in self._urls:
            resources = plurality_checkable_iterator.PluralityCheckableIterator(
                wildcard_iterator.get_wildcard_iterator(url))
            if resources.is_empty():
                raise errors.InvalidUrlError(
                    '{} matched no objects.'.format(url))

            # Iterate over all the resource_reference.Resource objects.
            for resource in resources:
                if self._recursion_requested and resource.is_container():
                    # Append '**' to fetch all objects under this container
                    new_storage_url = resource.storage_url.join('**')
                    child_resources = wildcard_iterator.get_wildcard_iterator(
                        new_storage_url.url_string)
                    for child_resource in child_resources:
                        yield NameExpansionResult(child_resource,
                                                  resource.storage_url)
                else:
                    yield NameExpansionResult(resource, resource.storage_url)
    def test_gcs_bucket_wildcard_and_object_wildcard(self, fields_scope,
                                                     client):
        """Test multiple object matches in multiple buckets."""
        self.bucket1_object = test_resources.get_object_resource(
            storage_url.ProviderPrefix.GCS, 'bucket1', 'a.txt')
        self.bucket2_object = test_resources.get_object_resource(
            storage_url.ProviderPrefix.GCS, 'bucket2', 'a.txt')
        client.list_buckets.side_effect = [self.buckets_response]
        client.list_objects.side_effect = [[self.bucket1_object],
                                           [self.bucket2_object]]

        resource_iterator = wildcard_iterator.get_wildcard_iterator(
            'gs://bucket*/*', fields_scope=fields_scope)

        self.assertEqual(list(resource_iterator),
                         [self.bucket1_object, self.bucket2_object])
        client.list_buckets.assert_called_once_with(
            cloud_api.FieldsScope(fields_scope))
        client.list_objects.mock_calls = [
            mock.call(all_versions=False,
                      bucket_name='bucket1',
                      delimiter='/',
                      fields_scope=fields_scope,
                      prefix=None),
            mock.call(all_versions=False,
                      bucket_name='bucket2',
                      delimiter='/',
                      fields_scope=fields_scope,
                      prefix=None)
        ]
Exemplo n.º 3
0
  def _expand_destination_wildcards(self):
    """Expands destination wildcards.

    Ensures that only one resource matches the wildcard expanded string. Much
    like the unix cp command, the storage surface only supports copy operations
    to one user-specified destination.

    Returns:
      A resource_reference.Resource, or None if no matching resource is found.

    Raises:
      ValueError if more than one resource is matched, or the source contained
      an unescaped wildcard and no resources were matched.
    """
    destination_iterator = (
        plurality_checkable_iterator.PluralityCheckableIterator(
            wildcard_iterator.get_wildcard_iterator(self._destination_string)))

    contains_unexpanded_wildcard = (
        destination_iterator.is_empty() and
        wildcard_iterator.contains_wildcard(self._destination_string))

    if destination_iterator.is_plural() or contains_unexpanded_wildcard:
      raise ValueError('Destination ({}) must match exactly one URL'.format(
          self._destination_string))

    if not destination_iterator.is_empty():
      return next(destination_iterator)
 def _get_top_level_iterator(self):
     for url in self._urls:
         for resource in wildcard_iterator.get_wildcard_iterator(
                 url,
                 all_versions=self.all_versions,
                 ignore_symlinks=self._ignore_symlinks):
             original_storage_url = storage_url.storage_url_from_string(url)
             yield url, self._get_name_expansion_result(
                 resource, resource.storage_url, original_storage_url)
 def update_task_iterator(self, args):
     user_request_args = (
         user_request_args_factory.get_user_request_args_from_command_args(
             args,
             metadata_type=user_request_args_factory.MetadataType.BUCKET))
     for url in args.url:
         for resource in wildcard_iterator.get_wildcard_iterator(url):
             yield update_bucket_task.UpdateBucketTask(
                 resource, user_request_args=user_request_args)
    def test_gcs_bucket_url_without_wildcard(self, fields_scope, client):
        """Test bucket with no bucket-level expansion."""
        client.get_bucket.side_effect = [self.buckets_response[0]]

        resource_iterator = wildcard_iterator.get_wildcard_iterator(
            'gs://bucket1', fields_scope=fields_scope)
        expected = self.buckets_response[:1]

        self.assertEqual(list(resource_iterator), expected)
        client.get_bucket.assert_called_once_with(
            self.bucket1.name, cloud_api.FieldsScope(fields_scope))
    def test_gcs_bucket_url_with_wildcard_gets_all_buckets(
            self, fields_scope, client):
        """Test multiple bucket with bucket-level expansion."""
        client.list_buckets.side_effect = [self.buckets_response]

        resource_iterator = wildcard_iterator.get_wildcard_iterator(
            'gs://bucket*', fields_scope=fields_scope)

        self.assertEqual(list(resource_iterator), self.buckets_response)
        client.list_buckets.assert_called_once_with(
            cloud_api.FieldsScope(fields_scope))
 def test_object_with_generation_without_wildcard(self, client):
     """Test with generation."""
     resource = test_resources.get_object_resource(
         storage_url.ProviderPrefix.GCS, 'bucket1', 'a.txt', '1')
     client.get_object_metadata.return_value = resource
     resource_list = list(
         wildcard_iterator.get_wildcard_iterator('gs://bucket1/a.txt#1'))
     self.assertEqual(resource_list, [resource])
     client.get_object_metadata.assert_called_once_with(
         'bucket1', 'a.txt', '1', cloud_api.FieldsScope.NO_ACL)
     self.assertFalse(client.list_objects.called)
 def _get_nested_objects_iterator(self, parent_name_expansion_result):
     new_storage_url = parent_name_expansion_result.resource.storage_url.join(
         '**')
     child_resources = wildcard_iterator.get_wildcard_iterator(
         new_storage_url.url_string,
         all_versions=self.all_versions,
         ignore_symlinks=self._ignore_symlinks)
     for child_resource in child_resources:
         yield self._get_name_expansion_result(
             child_resource,
             parent_name_expansion_result.resource.storage_url,
             parent_name_expansion_result.original_url)
    def test_list_objects(self, mock_client, wildcard_url, expected_prefixes,
                          expected_objects):
        mock_client.get_object_metadata.side_effect = api_errors.NotFoundError
        mock_client.list_objects.side_effect = self._list_objects_side_effect

        resource_iterator = wildcard_iterator.get_wildcard_iterator(
            wildcard_url)
        prefixes, object_names = _get_prefixes_and_object_names(
            resource_iterator)

        self.assertEqual(prefixes, expected_prefixes)
        self.assertEqual(object_names, expected_objects)
    def test_list_objects_without_wildcard(self, mock_client):
        resource = test_resources.get_object_resource(
            storage_url.ProviderPrefix.GCS, 'bucket', 'a/b.txt')
        mock_client.get_object_metadata.side_effect = [resource]

        resources = list(
            wildcard_iterator.get_wildcard_iterator('gs://bucket/a/b.txt'))

        self.assertEqual(resources, [resource])
        mock_client.get_object_metadata.assert_called_once_with(
            'bucket', 'a/b.txt', None, cloud_api.FieldsScope.NO_ACL)
        self.assertFalse(mock_client.list_objects.called)
    def test_gcs_root_listing(self, fields_scope, client):
        """Test retrieving provider URL with no specified resource."""
        client.list_buckets.side_effect = [self.buckets_response]

        resource_iterator = wildcard_iterator.get_wildcard_iterator(
            'gs://', fields_scope=fields_scope)
        actual = [resource.metadata.name for resource in resource_iterator]
        expected = [b.name for b in self.buckets]
        self.assertEqual(actual, expected)

        client.list_buckets.assert_called_once_with(
            cloud_api.FieldsScope(fields_scope))
Exemplo n.º 13
0
  def __iter__(self):
    """Iterates over each URL in self._urls and yield the expanded result.

    Yields:
      NameExpansionResult instance.

    Raises:
      InvalidUrlError: No matching objects found.
    """
    for url in self._urls:
      resources = plurality_checkable_iterator.PluralityCheckableIterator(
          wildcard_iterator.get_wildcard_iterator(url))
      is_name_expansion_iterator_empty = True
      original_storage_url = storage_url.storage_url_from_string(url)

      # Iterate over all the resource_reference.Resource objects.
      for resource in resources:
        if not resource.is_container():
          yield NameExpansionResult(resource, resource.storage_url,
                                    original_storage_url)
          is_name_expansion_iterator_empty = False
          continue

        if not self._recursion_requested:
          log.info('Omitting {} because it is a container, and recursion'
                   ' is not enabled.'.format(resource.is_container()))
          continue

        # Append '**' to fetch all objects under this container.
        new_storage_url = resource.storage_url.join('**')
        child_resources = wildcard_iterator.get_wildcard_iterator(
            new_storage_url.url_string)
        for child_resource in child_resources:
          yield NameExpansionResult(child_resource, resource.storage_url,
                                    original_storage_url)
          is_name_expansion_iterator_empty = False

      if is_name_expansion_iterator_empty:
        raise errors.InvalidUrlError(
            '{} matched no objects or files.'.format(url))
    def test_gcs_bucket_url_with_wildcard_gets_single_bucket(
            self, fields_scope, client):
        """Test single bucket with bucket-level expansion."""
        client.list_buckets.side_effect = [self.buckets_response]

        resource_iterator = wildcard_iterator.get_wildcard_iterator(
            'gs://buck*1', fields_scope=fields_scope)

        self.assertEqual(
            list(resource_iterator),
            [gcs_api._bucket_resource_from_metadata(self.bucket1)])
        client.list_buckets.assert_called_once_with(
            cloud_api.FieldsScope(fields_scope))
    def test_object_with_incorrect_generation(self, client):
        """Test with generation."""
        client.get_object_metadata.side_effect = api_errors.NotFoundError
        client.list_objects.return_value = self._object_resources_with_generation

        resources = list(
            wildcard_iterator.get_wildcard_iterator('gs://bucket1/b.txt#2'))

        self.assertEqual(resources, [])
        client.get_object_metadata.assert_called_once_with(
            'bucket1', 'b.txt', '2', cloud_api.FieldsScope.NO_ACL)
        client.list_objects.assert_called_once_with(
            'bucket1', 'b.txt', '/', True, cloud_api.FieldsScope.NO_ACL)
 def test_object_with_generation(self, client, wildcard_url, request_prefix,
                                 expected_resources):
     """Test with generation."""
     client.list_objects.return_value = self._object_resources_with_generation
     resource_list = list(
         wildcard_iterator.get_wildcard_iterator(wildcard_url))
     self.assertEqual(resource_list, expected_resources)
     self.assertFalse(client.get_object_metadata.called)
     client.list_objects.assert_called_once_with(
         all_versions=True,
         bucket_name='bucket1',
         delimiter='/',
         fields_scope=cloud_api.FieldsScope.NO_ACL,
         prefix=request_prefix,
     )
    def test_file_wildcard(self, wildcard_url, expected_dirs, expected_files):
        processed_url_str = os.path.join(self.root_path,
                                         wildcard_url.replace('/', os.sep))
        processed_expected_dirs = [
            os.path.join(self.root_path, d.replace('/', os.sep))
            for d in expected_dirs
        ]
        processed_expected_files = [
            os.path.join(self.root_path, f.replace('/', os.sep))
            for f in expected_files
        ]

        file_wildcard_iterator = wildcard_iterator.get_wildcard_iterator(
            processed_url_str)
        dirs, files = _get_prefixes_and_object_names(file_wildcard_iterator)
        self.assertCountEqual(dirs, processed_expected_dirs)
        self.assertCountEqual(files, processed_expected_files)
    def test_gcs_list_object_with_fields_scope(self, fields_scope, client):
        """Test if list_objects gets correct fields_scope."""
        test_resource = test_resources.get_object_resource(
            storage_url.ProviderPrefix.GCS, 'b', 'o.txt')
        expected_resources = [test_resource]
        client.list_objects.side_effect = [expected_resources]

        resource_list = list(
            wildcard_iterator.get_wildcard_iterator('gs://b/o*',
                                                    fields_scope=fields_scope))

        self.assertEqual(resource_list, expected_resources)
        client.list_objects.assert_called_once_with(
            all_versions=False,
            bucket_name='b',
            delimiter='/',
            fields_scope=fields_scope,
            prefix='o',
        )
    def test_gcs_get_object_metadata_with_fields_scope(self, fields_scope,
                                                       client):
        """Test if get_object_metadata gets correct fields_scope."""
        test_resource = test_resources.get_object_resource(
            storage_url.ProviderPrefix.GCS, 'b', 'o.txt')
        expected_resources = [test_resource]
        client.get_object_metadata.side_effect = [test_resource]

        resource_list = list(
            wildcard_iterator.get_wildcard_iterator('gs://b/o.txt',
                                                    fields_scope=fields_scope))

        self.assertEqual(resource_list, expected_resources)
        client.get_object_metadata.assert_called_once_with(
            bucket_name='b',
            fields_scope=fields_scope,
            generation=None,
            object_name='o.txt',
        )
    def Run(self, args):
        if args.urls:
            urls = []
            for url_string in args.urls:
                url = storage_url.storage_url_from_string(url_string)
                if not (url.is_provider() or url.is_bucket()):
                    raise errors.InvalidUrlError(
                        'URL does not match buckets: {}'.format(url_string))
                urls.append(url)
        else:
            urls = [storage_url.CloudUrl(storage_url.ProviderPrefix.GCS)]

        for url in urls:
            for bucket in wildcard_iterator.get_wildcard_iterator(
                    url.url_string,
                    fields_scope=cloud_api.FieldsScope.FULL,
                    get_bucket_metadata=True):
                # MakeSerializable will omit all the None values.
                yield resource_projector.MakeSerializable(
                    bucket.get_displayable_bucket_data())
 def test_gcs_list_all_object_versions(self, client):
     """Test with generation."""
     client.list_objects.return_value = self._object_resources_with_generation
     resource_list = list(
         wildcard_iterator.get_wildcard_iterator('gs://bucket1/a.txt',
                                                 all_versions=True))
     expected_resources = [
         test_resources.get_object_resource(storage_url.ProviderPrefix.GCS,
                                            'bucket1', 'a.txt', '1'),
         test_resources.get_object_resource(storage_url.ProviderPrefix.GCS,
                                            'bucket1', 'a.txt', '2'),
     ]
     self.assertEqual(resource_list, expected_resources)
     client.list_objects.assert_called_once_with(
         all_versions=True,
         bucket_name='bucket1',
         delimiter='/',
         fields_scope=cloud_api.FieldsScope.NO_ACL,
         prefix='a.txt',
     )
 def test_invalid_scheme_raises_error(self):
     """Test wildcard iterator refuses invalid URL scheme."""
     with self.assertRaises(command_errors.InvalidUrlError):
         wildcard_iterator.get_wildcard_iterator(
             'invalid://', fields_scope=cloud_api.FieldsScope.SHORT)
 def test_compresses_url_wildcards(self, observed_url_string,
                                   expected_url_string):
     """Test if FileWildcardIterator compresses URL wildcards correctly."""
     iterator = wildcard_iterator.get_wildcard_iterator(observed_url_string)
     self.assertEqual(iterator._path, expected_url_string)