Exemplo n.º 1
0
    def test_parent(self):
        root_path = FigsharePath('/', _ids=('', ), folder=True, is_public=False)
        folder_path = FigsharePath('/folder/', _ids=('', '142132'), folder=True, is_public=False)
        file_path = FigsharePath('/folder/test.txt',
            _ids=('', '142132', '1595252'), folder=False, is_public=False)

        assert root_path.parent is None
        assert folder_path.parent == root_path
        assert file_path.parent == folder_path

        path = file_path.parent

        assert path.identifier_path == '142132/'
        assert path.identifier == '142132'
        assert path.is_dir is True
        assert path.is_folder is True
        assert path.kind == 'folder'
        assert path.name == 'folder'
        assert path.ext == ''
        assert path.path == 'folder/'
        assert str(path) == '/folder/'
        assert path.raw_path == 'folder/'
        assert path.full_path == 'folder/'
        assert path.materialized_path == '/folder/'
        assert path.extra == {}
Exemplo n.º 2
0
    async def validate_v1_path(self, path, **kwargs):
        """Take a string path from the url and attempt to map it to an entity within this article.
        If the entity is found, returns a FigsharePath object with the entity identifiers included.
        Otherwise throws a 404 Not Found. Will also assert that the entity type inferred from the
        path matches the type of the entity at that url.

        :param str path: entity path from the v1 API
        :rtype FigsharePath:
        """
        if path == '/':
            return FigsharePath('/', _ids=('', ), folder=True, is_public=False)

        path_parts = self._path_split(path)
        if len(path_parts) != 2:
            raise exceptions.InvalidPathError(
                '{} is not a valid Figshare path.'.format(path))

        file_id = path_parts[1]

        resp = await self.make_request(
            'GET',
            self.build_url(False, *self.root_path_parts, 'files', file_id),
            expects=(200, ),
        )
        file_json = await resp.json()
        return FigsharePath('/' + file_json['name'],
                            _ids=('', file_id),
                            folder=False,
                            is_public=False)
Exemplo n.º 3
0
    async def upload(self, stream, path, conflict='replace', **kwargs):
        """Upload a file to provider root or to an article whose defined_type is
        configured to represent a folder.

        :param asyncio.StreamReader stream: stream to upload
        :param FigsharePath path: FigsharePath to upload the file to.
        :param dict \*\*kwargs: Will be passed to returned metadata object
        """
        path, exists = await self.handle_name_conflict(path, conflict=conflict)
        if not path.parent.is_root:
            parent_resp = await self.make_request(
                'GET',
                self.build_url(False, *self.root_path_parts, 'articles',
                               path.parent.identifier),
                expects=(200, ),
            )
            parent_json = await parent_resp.json()
            if not parent_json['defined_type'] in settings.FOLDER_TYPES:
                del path._parts[1]

        stream.add_writer('md5', streams.HashStreamWriter(hashlib.md5))
        file_id = await self._upload_file(self.container_id, path.name, stream)

        # Build new file path and return metadata
        path = FigsharePath('/' + file_id,
                            _ids=('', file_id),
                            folder=False,
                            is_public=False)
        metadata = await self.metadata(path, **kwargs)
        if stream.writers['md5'].hexdigest != metadata.extra['hashes']['md5']:
            raise exceptions.UploadChecksumMismatchError()

        return metadata, True
Exemplo n.º 4
0
    async def validate_path(self, path, **kwargs):
        """Take a string path from the url and attempt to map it to an entity within this article.
        If the entity is found, returns a FigsharePath object with the entity identifiers included.
        Otherwise returns a FigsharePath with empty identifiers.

        :param str path: identifier path URN as passed through the v0 API
        :rtype FigsharePath:

        Quirks:

        * v0 may pass an identifier_path whose last part is a name and not an identifier, in the
          case of file/folder creation calls.

        * validate_path validates parent and returns a FigsharePath as accurately as possible.
        """
        if path == '/':
            return FigsharePath('/', _ids=('', ), folder=True, is_public=False)

        path_parts = self._path_split(path)
        if len(path_parts) != 2:
            raise exceptions.InvalidPathError(
                '{} is not a valid Figshare path.'.format(path))

        file_id = path_parts[1]

        resp = await self.make_request(
            'GET',
            self.build_url(False, *self.root_path_parts, 'files', file_id),
            expects=(
                200,
                404,
            ),
        )
        if resp.status == 200:
            file_json = await resp.json()
            file_name = file_json['name']
            return FigsharePath('/' + file_name,
                                _ids=('', file_id),
                                folder=False,
                                is_public=False)

        # catch for create file in article root
        await resp.release()
        return FigsharePath('/' + file_id,
                            _ids=('', ''),
                            folder=False,
                            is_public=False)
Exemplo n.º 5
0
    async def upload(self, stream, path, conflict='replace', **kwargs):
        """Upload a file to provider root or to an article whose defined_type is
        configured to represent a folder.

        :param asyncio.StreamReader stream: stream to upload
        :param FigsharePath path: FigsharePath to upload the file to.
        :param dict \*\*kwargs: Will be passed to returned metadata object
        """
        if path.identifier and conflict == 'replace':
            raise exceptions.UnsupportedOperationError(
                'Files in Figshare cannot be updated')

        path, exists = await self.handle_name_conflict(path, conflict=conflict)
        if not path.parent.is_root:
            parent_resp = await self.make_request(
                'GET',
                self.build_url(False, *self.root_path_parts, 'articles',
                               path.parent.identifier),
                expects=(200, ),
            )
            parent_json = await parent_resp.json()
            if not parent_json['defined_type'] in settings.FOLDER_TYPES:
                del path._parts[1]

        # Create article or retrieve article_id from existing article
        if not path.parent.is_root:
            article_id = path.parent.identifier
        else:
            article_name = json.dumps({'title': path.name})
            if self.container_type == 'project':
                article_id = await self._create_article(article_name)
            elif self.container_type == 'collection':
                # TODO don't think this is correct.  Probably should POST to /accounts/articles
                article_id = await self._create_article(article_name)
                article_list = json.dumps({'articles': [article_id]})
                await self.make_request(
                    'POST',
                    self.build_url(False, *self.root_path_parts, 'articles'),
                    data=article_list,
                    expects=(201, ),
                )

        stream.add_writer('md5', streams.HashStreamWriter(hashlib.md5))
        file_id = await self._upload_file(article_id, path.name, stream)

        # Build new file path and return metadata
        path = FigsharePath('/' + article_id + '/' + file_id,
                            _ids=(self.container_id, article_id, file_id),
                            folder=False,
                            is_public=False)
        metadata = await self.metadata(path, **kwargs)
        if stream.writers['md5'].hexdigest != metadata.extra['hashes']['md5']:
            raise exceptions.UploadChecksumMismatchError()

        return metadata, True
Exemplo n.º 6
0
    def test_folder_path(self):
        path = FigsharePath('/folder/', _ids=('', '142132'), folder=True, is_public=False)

        assert path.identifier_path == '142132/'
        assert path.identifier == '142132'
        assert path.is_dir is True
        assert path.is_folder is True
        assert path.kind == 'folder'
        assert path.name == 'folder'
        assert path.ext == ''
        assert path.path == 'folder/'
        assert str(path) == '/folder/'
        assert path.raw_path == 'folder/'
        assert path.full_path == 'folder/'
        assert path.materialized_path == '/folder/'
        assert path.extra == {}
Exemplo n.º 7
0
    def test_file_path(self):
        path = FigsharePath('/folder/test.txt',
            _ids=('', '142132', '1595252'), folder=False, is_public=False)

        assert path.identifier_path == '142132/1595252'
        assert path.identifier == '1595252'
        assert path.is_dir is False
        assert path.is_folder is False
        assert path.kind == 'file'
        assert path.name == 'test.txt'
        assert path.ext == '.txt'
        assert path.path == 'folder/test.txt'
        assert str(path) == '/folder/test.txt'
        assert path.raw_path == 'folder/test.txt'
        assert path.full_path == 'folder/test.txt'
        assert path.materialized_path == '/folder/test.txt'
        assert path.extra == {}
Exemplo n.º 8
0
    def test_child(self):
        root_path = FigsharePath('/', _ids=('', ), folder=True, is_public=False)
        folder_path = FigsharePath('/folder/', _ids=('', '142132'), folder=True, is_public=False)
        file_path = FigsharePath('/folder/test.txt',
            _ids=('', '142132', '1595252'), folder=False, is_public=False)

        child_root_path = root_path.child('folder/', _id='142132', folder=True)
        child_folder_path = folder_path.child('test.txt', _id='1595252')

        assert child_root_path == folder_path
        assert child_folder_path == file_path

        assert child_root_path.identifier_path == '142132/'
        assert child_root_path.identifier == '142132'
        assert child_root_path.is_dir is True
        assert child_root_path.is_folder is True
        assert child_root_path.kind == 'folder'
        assert child_root_path.name == 'folder'
        assert child_root_path.ext == ''
        assert child_root_path.path == 'folder/'
        assert child_root_path.raw_path == 'folder/'
        assert child_root_path.full_path == 'folder/'
        assert child_root_path.materialized_path == '/folder/'
        assert child_root_path.extra == {}

        assert child_folder_path.identifier_path == '142132/1595252'
        assert child_folder_path.identifier == '1595252'
        assert child_folder_path.is_dir is False
        assert child_folder_path.is_folder is False
        assert child_folder_path.kind == 'file'
        assert child_folder_path.name == 'test.txt'
        assert child_folder_path.ext == '.txt'
        assert child_folder_path.path == 'folder/test.txt'
        assert child_folder_path.raw_path == 'folder/test.txt'
        assert child_folder_path.full_path == 'folder/test.txt'
        assert child_folder_path.materialized_path == '/folder/test.txt'
        assert child_folder_path.extra == {}
Exemplo n.º 9
0
 def test_root_path(self):
     root_path = FigsharePath('/', _ids=('', ), folder=True, is_public=False)
     assert root_path.identifier_path == ''
Exemplo n.º 10
0
    async def validate_path(self, path, **kwargs):
        """Take a string path from the url and attempt to map it to an entity within this project.
        If the entity is found, returns a FigsharePath object with the entity identifiers included.
        Otherwise returns a FigsharePath with empty identifiers.

        :param str path: identifier_path URN as passed through the v0 API
        :rtype FigsharePath:

        Quirks:

        * v0 may pass an identifier_path whose last part is a name and not an identifier, in the
          case of file/folder creation calls.

        * validate_path validates parent and returns a FigsharePath as accurately as possible.
        """
        if path == '/':
            return FigsharePath('/', _ids=('', ), folder=True, is_public=False)

        path_parts = self._path_split(path)
        if len(path_parts) not in (2, 3):
            raise exceptions.InvalidPathError(
                '{} is not a valid Figshare path.'.format(path))
        article_id = path_parts[1]
        file_id = path_parts[2] if len(path_parts) == 3 else None

        articles = await self._get_all_articles()

        # TODO: need better way to get public/private
        # This call's return value is currently busted at figshare for collections. Figshare always
        # returns private-looking urls.
        is_public = False
        for item in articles:
            if '/articles/' + article_id in item['url']:
                article_name = item['title']
                if settings.PRIVATE_IDENTIFIER not in item['url']:
                    is_public = True

        article_segments = (*self.root_path_parts, 'articles', article_id)
        if file_id:
            file_response = await self.make_request(
                'GET',
                self.build_url(is_public, *article_segments, 'files', file_id),
                expects=(
                    200,
                    404,
                ),
            )
            if file_response.status == 200:
                file_response_json = await file_response.json()
                file_name = file_response_json['name']
                return FigsharePath('/' + article_name + '/' + file_name,
                                    _ids=(self.container_id, article_id,
                                          file_id),
                                    folder=False,
                                    is_public=is_public)
            await file_response.release()

        article_response = await self.make_request(
            'GET',
            self.build_url(is_public, *article_segments),
            expects=(
                200,
                404,
            ),
        )
        if article_response.status == 200:
            article_json = await article_response.json()
            if article_json['defined_type'] in settings.FOLDER_TYPES:
                # Case of v0 file creation
                if file_id:
                    ids = ('', article_id, '')
                    folder = False
                    path_urn = '/' + article_name + '/' + file_id
                else:
                    ids = ('', article_id)
                    folder = True
                    path_urn = '/' + article_name + '/'
                return FigsharePath(path_urn,
                                    _ids=ids,
                                    folder=folder,
                                    is_public=is_public)
        else:
            await article_response.release()

        if file_id:
            # Catch for if neither file nor article exist
            raise exceptions.NotFoundError(path)

        # Return for v0 folder creation
        return FigsharePath(path, _ids=('', ''), folder=True, is_public=False)
Exemplo n.º 11
0
    async def validate_v1_path(self, path, **kwargs):
        """Take a string path from the url and attempt to map it to an entity within this project.
        If the entity is found, returns a FigsharePath object with the entity identifiers included.
        Otherwise throws a 404 Not Found. Will also assert that the entity type inferred from the
        path matches the type of the entity at that url.

        :param str path: entity path from the v1 API
        :rtype FigsharePath:
        """
        if path == '/':
            return FigsharePath('/', _ids=('', ), folder=True, is_public=False)

        path_parts = self._path_split(path)
        if len(path_parts) not in (2, 3):
            raise exceptions.InvalidPathError(
                '{} is not a valid Figshare path.'.format(path))
        article_id = path_parts[1]
        file_id = path_parts[2] if len(path_parts) == 3 else None

        articles = await self._get_all_articles()

        # TODO: need better way to get public/private
        # This call's return value is currently busted at figshare for collections. Figshare always
        # returns private-looking urls.
        is_public = False
        for item in articles:
            if '/articles/' + article_id in item['url']:
                article_name = item['title']
                if settings.PRIVATE_IDENTIFIER not in item['url']:
                    is_public = True

        article_segments = (*self.root_path_parts, 'articles', article_id)
        if file_id:
            file_response = await self.make_request(
                'GET',
                self.build_url(is_public, *article_segments, 'files', file_id),
                expects=(200, ),
            )
            file_json = await file_response.json()
            file_name = file_json['name']
            if path[-1] == '/':
                raise exceptions.NotFoundError(
                    'File paths must not end with "/". '
                    '{} not found.'.format(path))
            return FigsharePath('/' + article_name + '/' + file_name,
                                _ids=(self.container_id, article_id, file_id),
                                folder=False,
                                is_public=is_public)

        article_response = await self.make_request(
            'GET',
            self.build_url(is_public, *article_segments),
            expects=(200, ),
        )
        article_json = await article_response.json()
        if article_json['defined_type'] in settings.FOLDER_TYPES:
            if not path[-1] == '/':
                raise exceptions.NotFoundError(
                    'Folder paths must end with "/".  {} not found.'.format(
                        path))
            return FigsharePath('/' + article_name + '/',
                                _ids=(self.container_id, article_id),
                                folder=True,
                                is_public=is_public)

        raise exceptions.NotFoundError(
            'This article is not configured as a folder defined_type. '
            '{} not found.'.format(path))
Exemplo n.º 12
0
    async def validate_v1_path(self, path: str, **kwargs) -> FigsharePath:
        """Take a string path from the url and attempt to map it to an entity within this project.
        If the entity is found, returns a FigsharePath object with the entity identifiers included.
        Otherwise throws a 404 Not Found. Will also assert that the entity type inferred from the
        path matches the type of the entity at that url.

        :param str path: entity path from the v1 API
        :rtype FigsharePath:
        """

        if path == '/':
            # Root path should always be private api-wise since project and collection itself must
            # be owned by the fighsare-OSF OAuth user.
            return FigsharePath('/', _ids=('', ), folder=True, is_public=False)

        # Step 0: Preprocess the string path.
        path_parts = self._path_split(path)
        if len(path_parts) not in (2, 3):
            raise exceptions.InvalidPathError('{} is not a valid Figshare path.'.format(path))
        article_id = path_parts[1]
        file_id = path_parts[2] if len(path_parts) == 3 else None

        # Step 1: Get a list of all articles in the project.
        articles = await self._get_all_articles()

        # Step 2: Find the article; set `article_name`, `is_public`; and prepare `article_segments`.
        is_public = False
        article_name = None
        for article in articles:
            if '/articles/' + article_id in article['url']:
                article_name = article['title']
                is_public = article['published_date'] is not None
                break
        # Raise error earlier instead of on 404.  Please note that this is different than V0.
        if not article_name:
            raise exceptions.NotFoundError('Path {} with article ID {} not found in the project\'s '
                                           'article list'.format(path, article_id))
        article_segments = (*self.root_path_parts, 'articles', article_id)

        # Step 3.1: if the path is a file
        if file_id:
            file_response = await self.make_request(
                'GET',
                self.build_url(is_public, *article_segments, 'files', file_id),
                expects=(200, ),
            )
            file_json = await file_response.json()
            file_name = file_json['name']
            if path[-1] == '/':
                raise exceptions.NotFoundError('File paths must not end with "/". '
                                               '{} not found.'.format(path))
            return FigsharePath('/' + article_name + '/' + file_name,
                                _ids=(self.container_id, article_id, file_id),
                                folder=False,
                                is_public=is_public)

        # Step 3.2: if the path is a folder
        article_response = await self.make_request(
            'GET',
            self.build_url(is_public, *article_segments),
            expects=(200, ),
        )
        article_json = await article_response.json()
        if article_json['defined_type'] in pd_settings.FOLDER_TYPES:
            if not path[-1] == '/':
                raise exceptions.NotFoundError('Folder paths must end with "/". '
                                               '{} not found.'.format(path))
            return FigsharePath('/' + article_name + '/', _ids=(self.container_id, article_id),
                                folder=True, is_public=is_public)
        raise exceptions.NotFoundError('This article is not configured as a folder defined_type. '
                                       '{} not found.'.format(path))