Exemplo n.º 1
0
Arquivo: main.py Projeto: ndlib/presqt
    def iter_resources_objects(self, container_resource, resources,
                               container_id):
        """
        Recursive function to add resource data to the resources list.
        """
        folder_data = []
        for resource in container_resource['data']:
            kind = resource['attributes']['kind']

            if kind == 'file':
                file = File(resource, self.session)
                file_obj = {
                    'kind': file.kind,
                    'kind_name': file.kind_name,
                    'id': file.id,
                    'container': container_id,
                    'title': file.title
                }
                resources.append(file_obj)

            elif kind == 'folder':
                folder = Folder(resource, self.session)
                folder_obj = {
                    'kind': folder.kind,
                    'kind_name': folder.kind_name,
                    'id': folder.id,
                    'container': container_id,
                    'title': folder.title
                }
                resources.append(folder_obj)

                # Keep track of all folders' file urls that need to be called.
                folder_data.append({
                    'url': folder._files_url,
                    'id': folder.id,
                    'path': folder.materialized_path
                })

        # Asynchronously call all folder file urls to get the folder's top level resources.
        all_folders_resources = run_urls_async_with_pagination(
            self, [folder_dict['url'] for folder_dict in folder_data])

        # For each folder, get it's container_id and resources
        for folder_resources in all_folders_resources:
            if folder_resources['data']:
                resource_attr = folder_resources['data'][0]['attributes']
                if resource_attr['kind'] == 'folder':
                    parent_path = resource_attr[
                        'materialized_path'][:-len(resource_attr['name']) - 1]
                else:
                    parent_path = resource_attr[
                        'materialized_path'][:-len(resource_attr['name'])]
                # Find the corresponding parent_path in the folder_data list of dictionaries so we
                # can get the container id for this resource.
                container_id = get_dictionary_from_list(
                    folder_data, 'path', parent_path)['id']

                self.iter_resources_objects(folder_resources, resources,
                                            container_id)
Exemplo n.º 2
0
def create_upload_metadata(instance, file_metadata_list, action_metadata,
                           project_id, resources_ignored, resources_updated):
    """
    Create FTS file metadata for the action's resources.

    Parameters
    ----------
    instance: BaseResource Class Instance
        Class instance for the action
    file_metadata_list: list
        List of file metadata brought back from the upload function
    action_metadata: dict
        Metadata about the action itself
    project_id: str
        ID of the project the resource metadata should be uploaded to
    resources_ignored: list
        List of resource string paths that were ignored during upload
    resources_updated: list
        List of resource string paths that were updated during upload

    Returns
    -------
    Returns the result of schema validation against the final FTS metadata.
    Will be True if valid and an error string if invalid.
    """
    instance.action_metadata['destinationUsername'] = action_metadata[
        'destinationUsername']

    # Put the file metadata in the correct file list
    instance.action_metadata['files'] = build_file_dict(
        instance.action_metadata['files']['created'], resources_ignored,
        resources_updated, 'destinationPath')
    for resource in file_metadata_list:
        # Get the resource's metadata dict that has already been created
        fts_metadata_entry = get_dictionary_from_list(
            instance.new_fts_metadata_files, 'destinationPath',
            resource['actionRootPath'][len(instance.data_directory):])
        # Add destination metadata
        fts_metadata_entry['destinationHashes'] = {}
        if resource['destinationHash']:
            fts_metadata_entry['destinationHashes'][
                instance.hash_algorithm] = resource['destinationHash']

        fts_metadata_entry['destinationPath'] = resource['destinationPath']
        fts_metadata_entry['failedFixityInfo'] += resource[
            'failed_fixity_info']

    # Create FTS metadata object
    from presqt.api_v1.utilities import create_fts_metadata
    instance.fts_metadata_data = create_fts_metadata(
        instance.all_keywords, instance.action_metadata,
        instance.source_fts_metadata_actions, instance.extra_metadata)
    # Write the metadata file to the destination target and validate the metadata file
    metadata_validation = write_and_validate_metadata(
        instance, project_id, instance.fts_metadata_data)
    return metadata_validation
Exemplo n.º 3
0
def github_download_resource(token, resource_id):
    """
    Fetch the requested resource from GitHub along with its hash information.

    Parameters
    ----------
    token : str
        User's GitHub token
    resource_id : str
        ID of the resource requested

    Returns
    -------
    Dictionary with the following keys: values
        'resources': List of dictionary objects that each hold a file and its information.
                     Dictionary must be in the following format:
                         {
                            'file': binary_file,
                            'hashes': {'hash_algorithm': 'the_hash'},
                            'title': 'file.jpg',
                            'path': '/path/to/file',
                            'source_path: '/full/path/to/file',
                            'extra_metadata': {'any': 'extra'}
                         }
        'empty_containers: List of string paths representing empty containers that must be written.
                              Example: ['empty/folder/to/write/', 'another/empty/folder/]
        'action_metadata': Dictionary containing action metadata. Must be in the following format:
                              {
                              'sourceUsername': '******',
                              }
    """
    try:
        header, username = validation_check(token)
    except PresQTResponseException:
        raise PresQTResponseException(
            "Token is invalid. Response returned a 401 status code.",
            status.HTTP_401_UNAUTHORIZED)

    project_url = 'https://api.github.com/repositories/{}'.format(resource_id)

    response = requests.get(project_url, headers=header)

    if response.status_code != 200:
        raise PresQTResponseException(
            'The resource with id, {}, does not exist for this user.'.format(
                resource_id), status.HTTP_404_NOT_FOUND)
    data = response.json()

    repo_name = data['name']
    # Strip off the unnecessary {+path} that's included in the url
    # Example: https://api.github.com/repos/eggyboi/djangoblog/contents/{+path} becomes
    # https://api.github.com/repos/eggyboi/djangoblog/contents
    contents_url = data['contents_url'].partition('/{+path}')[0]

    files, empty_containers, action_metadata = download_content(
        username, contents_url, header, repo_name, [])
    file_urls = [file['file'] for file in files]

    loop = asyncio.new_event_loop()
    download_data = loop.run_until_complete(async_main(file_urls, header))
    # Go through the file dictionaries and replace the file path with the binary_content
    for file in files:
        file['file'] = get_dictionary_from_list(download_data, 'url',
                                                file['file'])['binary_content']

    return {
        'resources': files,
        'empty_containers': empty_containers,
        'action_metadata': action_metadata
    }
Exemplo n.º 4
0
def zenodo_download_resource(token, resource_id, process_info_path, action):
    """
    Fetch the requested resource from Zenodo along with its hash information.

    Parameters
    ----------
    token : str
        User's Zenodo token
    resource_id : str
        ID of the resource requested
    process_info_path: str
        Path to the process info file that keeps track of the action's progress
    action: str
        The action being performed

    Returns
    -------
    Dictionary with the following keys: values
        'resources': List of dictionary objects that each hold a file and its information.
                     Dictionary must be in the following format:
                         {
                            'file': binary_file,
                            'hashes': {'hash_algorithm': 'the_hash'},
                            'title': 'file.jpg',
                            'path': '/path/to/file',
                            'metadata': {
                                'sourcePath': '/full/path/at/source.jpg',
                                'title': 'file_title',
                                'sourceHashes': {'hash_algorithm': 'the_hash'},
                                'extra': {'any': 'extra'}
                             }
                         }
        'empty_containers: List of string paths representing empty containers that must be written.
                              Example: ['empty/folder/to/write/', 'another/empty/folder/]
        'action_metadata': Dictionary containing action metadata. Must be in the following format:
                              {
                              'sourceUsername': '******',
                              }
    """
    try:
        auth_parameter = zenodo_validation_check(token)
    except PresQTResponseException:
        raise PresQTResponseException(
            'Token is invalid. Response returned a 401 status code.',
            status.HTTP_401_UNAUTHORIZED)
    files = []
    empty_containers = []
    extra_metadata = {}
    base_url = None

    # If the resource_id is longer than 7 characters, the resource is an individual file
    if len(resource_id) > 7:
        # First we need to check if the file id given belongs to a public published record.
        zenodo_file = requests.get(
            'https://zenodo.org/api/files/{}'.format(resource_id),
            params=auth_parameter)
        if zenodo_file.status_code != 200:
            # If not, we need to loop through their depositions to look for the file.
            zenodo_projects = requests.get(
                'https://zenodo.org/api/deposit/depositions',
                params=auth_parameter).json()
            for entry in zenodo_projects:
                project_files = requests.get(entry['links']['self'],
                                             params=auth_parameter).json()
                for file in project_files['files']:
                    if file['id'] == resource_id:
                        base_url = entry['links']['self']
                        file_url = file['links']['self']
                        is_record = False
                        break
                else:
                    # If the file wasn't found we want to continue the loop.
                    continue
                break
        else:
            is_record = True
            base_url = 'https://zenodo.org/api/files/{}'.format(resource_id)
            file_url = 'https://zenodo.org/api/files/{}'.format(resource_id)

        if base_url is None:
            raise PresQTResponseException(
                "The resource with id, {}, does not exist for this user.".
                format(resource_id), status.HTTP_404_NOT_FOUND)

        update_process_info_message(process_info_path, action,
                                    'Downloading files from Zenodo...')
        # Add the total number of projects to the process info file.
        # This is necessary to keep track of the progress of the request.
        update_process_info(process_info_path, 1, action, 'download')

        files, action_metadata = zenodo_download_helper(
            is_record, base_url, auth_parameter, files, file_url)

        # Increment the number of files done in the process info file.
        increment_process_info(process_info_path, action, 'download')

    # Otherwise, it's a full project
    else:
        base_url = 'https://zenodo.org/api/records/{}'.format(resource_id)
        zenodo_record = requests.get(base_url, params=auth_parameter)
        is_record = True
        if zenodo_record.status_code != 200:
            base_url = 'https://zenodo.org/api/deposit/depositions/{}'.format(
                resource_id)
            is_record = False
        try:
            files, action_metadata = zenodo_download_helper(
                is_record, base_url, auth_parameter, files)
        except PresQTResponseException:
            raise PresQTResponseException(
                "The resource with id, {}, does not exist for this user.".
                format(resource_id), status.HTTP_404_NOT_FOUND)

        extra_metadata = extra_metadata_helper(base_url, is_record,
                                               auth_parameter)
        file_urls = [file['file'] for file in files]

        update_process_info_message(process_info_path, action,
                                    'Downloading files from Zenodo...')
        # Add the total number of projects to the process info file.
        # This is necessary to keep track of the progress of the request.
        update_process_info(process_info_path, len(file_urls), action,
                            'download')

        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)
        download_data = loop.run_until_complete(
            async_main(file_urls, auth_parameter, process_info_path, action))

        # Go through the file dictionaries and replace the file path with the binary_content
        for file in files:
            file['file'] = get_dictionary_from_list(
                download_data, 'url', file['file'])['binary_content']

    return {
        'resources': files,
        'empty_containers': empty_containers,
        'action_metadata': action_metadata,
        'extra_metadata': extra_metadata
    }
Exemplo n.º 5
0
def curate_nd_download_resource(token, resource_id):
    """
    Fetch the requested resource from CurateND along with its hash information.

    Parameters
    ----------
    token : str
        User's CurateND token
    resource_id : str
        ID of the resource requested

    Returns
    -------
    Dictionary with the following keys: values
        'resources': List of dictionary objects that each hold a file and its information.
                     Dictionary must be in the following format:
                         {
                            'file': binary_file,
                            'hashes': {'hash_algorithm': 'the_hash'},
                            'title': 'file.jpg',
                            'path': '/path/to/file',
                            'source_path: '/full/path/to/file',
                            'extra_metadata': {'any': 'extra'}
                         }
        'empty_containers: List of string paths representing empty containers that must be written.
                              Example: ['empty/folder/to/write/', 'another/empty/folder/]
        'action_metadata': Dictionary containing action metadata. Must be in the following format:
                              {
                              'sourceUsername': '******',
                              }
    """
    try:
        curate_instance = CurateND(token)
    except PresQTInvalidTokenError:
        raise PresQTValidationError(
            "Token is invalid. Response returned a 401 status code.",
            status.HTTP_401_UNAUTHORIZED)

    # Get the resource
    resource = get_curate_nd_resource(resource_id, curate_instance)
    action_metadata = {"sourceUsername": resource.extra['depositor']}

    # Get all the files for the provided resources.
    files = []
    empty_containers = []
    if resource.kind_name == 'file':
        # Get the title of the Project to add to sourcePath
        project_title = requests.get(resource.extra['isPartOf'],
                                     headers={
                                         'X-Api-Token': '{}'.format(token)
                                     }).json()['title']

        # This is so we aren't missing the few extra keys that are pulled out for the PresQT payload
        resource.extra.update({
            "id": resource.id,
            "date_submitted": resource.date_submitted
        })

        binary_file, curate_hash = resource.download()

        files.append({
            'file':
            binary_file,
            'hashes': {
                'md5': curate_hash
            },
            'title':
            resource.title,
            # If the file is the only resource we are downloading then we don't need it's full path.
            'path':
            '/{}'.format(resource.title),
            'source_path':
            '/{}/{}'.format(project_title, resource.title),
            'extra_metadata':
            resource.extra
        })

    else:
        if not resource.extra['containedFiles']:
            empty_containers.append('{}'.format(resource.title))
        else:
            title_helper = {}
            hash_helper = {}
            file_urls = []
            project_title = resource.title
            file_metadata = []
            for file in resource.extra['containedFiles']:
                download_url = file['downloadUrl']
                contained_file = get_curate_nd_resource(
                    file['id'], curate_instance)
                file_metadata_dict = {
                    "title": contained_file.title,
                    "extra": contained_file.extra
                }
                file_metadata.append(file_metadata_dict)

                title_helper[download_url] = contained_file.title
                hash_helper[download_url] = contained_file.md5
                file_urls.append(download_url)

                title_helper[file['downloadUrl']] = file['label']
                file_urls.append(file['downloadUrl'])

            loop = asyncio.new_event_loop()
            download_data = loop.run_until_complete(
                async_main(file_urls, token))

            for file in download_data:
                title = title_helper[file['url']]
                hash = hash_helper[file['url']]
                files.append({
                    'file':
                    file['binary_content'],
                    'hashes': {
                        'md5': hash
                    },
                    'title':
                    title,
                    "source_path":
                    '/{}/{}'.format(project_title, title),
                    'path':
                    '/{}/{}'.format(resource.title, title),
                    'extra_metadata':
                    get_dictionary_from_list(file_metadata, 'title',
                                             title)['extra']
                })

    return {
        'resources': files,
        'empty_containers': empty_containers,
        'action_metadata': action_metadata
    }
Exemplo n.º 6
0
def osf_download_resource(token, resource_id):
    """
    Fetch the requested resource from OSF along with its hash information.

    Parameters
    ----------
    token : str
        User's OSF token

    resource_id : str
        ID of the resource requested

    Returns
    -------
    Dictionary with the following keys: values
        'resources': List of dictionary objects that each hold a file and its information.
                     Dictionary must be in the following format:
                         {
                            'file': binary_file,
                            'hashes': {'hash_algorithm': 'the_hash'},
                            'title': 'file.jpg',
                            'path': '/path/to/file',
                            'source_path: '/full/path/to/file',
                            'extra_metadata': {'any': 'extra'}
                         }
        'empty_containers: List of string paths representing empty containers that must be written.
                              Example: ['empty/folder/to/write/', 'another/empty/folder/]
        'action_metadata': Dictionary containing action metadata. Must be in the following format:
                              {
                                'sourceUsername': '******',
                              }
    """
    try:
        osf_instance = OSF(token)
    except PresQTInvalidTokenError:
        raise PresQTResponseException(
            "Token is invalid. Response returned a 401 status code.",
            status.HTTP_401_UNAUTHORIZED)

    # Get contributor name
    contributor_name = requests.get(
        'https://api.osf.io/v2/users/me/',
        headers={
            'Authorization': 'Bearer {}'.format(token)
        }).json()['data']['attributes']['full_name']
    action_metadata = {"sourceUsername": contributor_name}
    # Get the resource
    resource = get_osf_resource(resource_id, osf_instance)

    # Get all files for the provided resources.
    # The 'path' value will be the path that the file is eventually saved in. The root of the
    # path should be the resource.
    files = []
    empty_containers = []
    if resource.kind_name == 'file':
        project = osf_instance.project(resource.parent_project_id)
        files.append({
            "file":
            resource.download(),
            "hashes":
            resource.hashes,
            "title":
            resource.title,
            # If the file is the only resource we are downloading then we don't need it's full path
            "path":
            '/{}'.format(resource.title),
            "source_path":
            '/{}/{}{}'.format(project.title, resource.provider,
                              resource.materialized_path),
            "extra_metadata":
            osf_download_metadata(resource)
        })
    else:
        if resource.kind_name == 'project':
            resource.get_all_files('', files, empty_containers)
            project = resource
        elif resource.kind_name == 'storage':
            resource.get_all_files('/{}'.format(resource.title), files,
                                   empty_containers)
            project = osf_instance.project(resource.node)
        else:
            resource.get_all_files('', files, empty_containers)
            project = osf_instance.project(resource.parent_project_id)
            for file in files:
                # File Path needs to start at the folder and strip everything before it.
                # Example: If the resource is 'Docs2' and the starting path is
                # '/Project/Storage/Docs1/Docs2/file.jpeg' then the final path
                # needs to be '/Docs2/file.jpeg'
                path_to_strip = resource.materialized_path[:-(
                    len(resource.title) + 2)]
                file['path'] = file['file'].materialized_path[len(path_to_strip
                                                                  ):]

        # Asynchronously make all download requests
        file_urls = [file['file'].download_url for file in files]
        loop = asyncio.new_event_loop()
        download_data = loop.run_until_complete(async_main(file_urls, token))

        # Go through the file dictionaries and replace the file class with the binary_content
        for file in files:
            file['source_path'] = '/{}/{}{}'.format(
                project.title, file['file'].provider,
                file['file'].materialized_path)
            file['file'] = get_dictionary_from_list(
                download_data, 'url',
                file['file'].download_url)['binary_content']

    return {
        'resources': files,
        'empty_containers': empty_containers,
        'action_metadata': action_metadata
    }
Exemplo n.º 7
0
    def handle(self, *args, **kwargs):
        targets_json = read_file('presqt/specs/targets.json', True)
        list_of_partners_in = []
        list_of_partners_out = []
        for target in targets_json:
            if target['supported_actions']['resource_transfer_in'] == True:
                list_of_partners_in.append(target['name'])
            if target['supported_actions']['resource_transfer_out'] == True:
                list_of_partners_out.append(target['name'])

        ##### Get Input From User #####
        while True:
            target_name = input('Enter target name (use underscores not spaces): ').lower()
            if set('[~! @#$%^&*()+{}":;[],.<>`=+-\']+$\\').intersection(target_name):
                print("Target name can't contain special characters or spaces")
            else:
                break
        human_readable_target_name = input('Enter human readable target name (format however): ')

        url_validator = URLValidator()
        while True:
            status_url = input('Enter target status url (include http:// or https://): ')
            try:
                url_validator(status_url)
            except ValidationError:
                print("Target status url must be a valid url")
            else:
                break

        while True:
            resource_collection = input('Does your target support the Resource Collection endpoint? (Y or N): ')
            if resource_collection not in ['Y', 'y', 'N', 'n']:
                print('Must input Y or N')
            else:
                if resource_collection in ['Y', 'y']:
                    resource_collection = True
                else:
                    resource_collection = False
                break

        while True:
            resource_detail = input('Does your target support the Resource Detail endpoint? (Y or N): ')
            if resource_detail not in ['Y', 'y', 'N', 'n']:
                print('Must input Y or N')
            else:
                if resource_detail in ['Y', 'y']:
                    resource_detail = True
                else:
                    resource_detail = False
                break

        while True:
            resource_download = input('Does your target support the Resource Download endpoint? (Y or N): ')
            if resource_download not in ['Y', 'y', 'N', 'n']:
                print('Must input Y or N')
            else:
                if resource_download in ['Y', 'y']:
                    resource_download = True
                else:
                    resource_download = False
                break

        while True:
            resource_upload = input('Does your target support the Resource Upload endpoint? (Y or N): ')
            if resource_upload not in ['Y', 'y', 'N', 'n']:
                print('Must input Y or N')
            else:
                if resource_upload in ['Y', 'y']:
                    resource_upload = True
                else:
                    resource_upload = False
                break

        while True:
            resource_transfer_in = input('Does your target support the Resource Transfer In endpoint? (Y or N): ')
            if resource_transfer_in not in ['Y', 'y', 'N', 'n']:
                print('Must input Y or N')
            else:
                if resource_transfer_in in ['Y', 'y']:
                    resource_transfer_in = True
                else:
                    resource_transfer_in = False
                break

        while True:
            resource_transfer_out = input('Does your target support the Resource Transfer Out endpoint? (Y or N): ')
            if resource_transfer_out not in ['Y', 'y', 'N', 'n']:
                print('Must input Y or N')
            else:
                if resource_transfer_out in ['Y', 'y']:
                    resource_transfer_out = True
                else:
                    resource_transfer_out = False
                break

        while True:
            transfer_in = input("Which PresQT partners are you allowing to transfer into your service? (comma seperated list with no spaces (use underscores))\nOptions are {}: ".format(list_of_partners_out))
            if ' ' in transfer_in:
                print("Input can't contain spaces")
                continue
            transfer_in = transfer_in.lower().split(',')
            for partner in transfer_in:
                if partner not in list_of_partners_out:
                    print("{} is not a recognized target, or doesn't support resource_transfer_out.".format(partner))
                    break
            else:
                break

        while True:
            transfer_out = input("Which PresQT partners are you allowing your service to transfer to? (comma seperated list with no spaces (use underscores))\nOptions are {}: ".format(list_of_partners_in))
            if ' ' in transfer_out:
                print("Input can't contain spaces")
                continue
            transfer_out = transfer_out.lower().split(',')
            for partner in transfer_out:
                if partner not in list_of_partners_in:
                    print("{} is not a recognized target, or doesn't support resource_transfer_in.".format(partner))
                    break
            else:
                break

        while True:
            hash_algorithms = input('Enter your supported hash algorithms (comma separated list with no spaces)')
            if ' ' in hash_algorithms:
                print("Input can't contain spaces")
                continue
            hash_algorithms = hash_algorithms.split(',')
            for hash_algorithm in hash_algorithms:
                if hash_algorithm not in hashlib.algorithms_available:
                    print('{} is not supported by the hashlib Python library'.format(hash_algorithm))
                    break
            else:
                break

        ##### Check if target exists in targets.json #####
        if get_dictionary_from_list(targets_json, 'name', target_name):
            print('Error! Target, {}, already exists in targets.json!'.format(target_name))
            return

        ##### Make Target Directory #####
        target_directory = 'presqt/targets/{}/'.format(target_name)
        try:
            os.makedirs(os.path.dirname(target_directory))
            print('Directory created: {}'.format(target_directory))
        except FileExistsError:
            print('Error! Target directory already exists!')
            return
        else:
            open('{}{}'.format(target_directory, '__init__.py'), 'a').close()

        ##### Make Target Function Directory #####
        target_function_dir = '{}{}/'.format(target_directory, 'functions')
        os.makedirs(os.path.dirname(target_function_dir))
        print('Directory created: {}'.format(target_function_dir))
        open('{}{}'.format(target_function_dir, '__init__.py'), 'a').close()
        print('File created: {}'.format(target_function_dir))

        ##### Make Target Action Files ####
        target_functions = {}
        if resource_collection or resource_detail:
            with open('{}fetch.py'.format(target_function_dir), 'w') as file:
                target_functions['fetch'] = {}

                if resource_collection:
                    resource_collection_function ='{}_fetch_resources'.format(target_name)
                    target_functions['fetch']['{}_resource_collection'.format(target_name)] = resource_collection_function

                    file.write('def {}(token, search_parameter):\n\tpass'.format(resource_collection_function))
                    if resource_detail:
                        file.write('\n\n')

                if resource_detail:
                    resource_detail_function = '{}_fetch_resource'.format(target_name)
                    target_functions['fetch']['{}_resource_detail'.format(target_name)] = resource_detail_function
                    file.write('def {}(token, resource_id):\n\tpass'.format(resource_detail_function))

                print('File created: {}fetch.py'.format(target_function_dir))

        if resource_download:
            with open('{}download.py'.format(target_function_dir), 'w') as file:
                resource_download_function ='{}_download_resource'.format(target_name)
                target_functions['download'] = {'{}_resource_download'.format(target_name): resource_download_function}
                file.write('def {}(token, resource_id):\n\tpass'.format(resource_download_function))
                print('File created: {}download.py'.format(target_function_dir))

        if resource_upload:
            with open('{}upload.py'.format(target_function_dir), 'w') as file:
                resource_upload_function = '{}_upload_resource'.format(target_name)
                target_functions['upload'] = {'{}_resource_upload'.format(target_name): resource_upload_function}
                file.write('def {}(token, resource_id, resource_main_dir, hash_algorithm, file_duplicate_action):\n\tpass'.format(resource_upload_function))
                print('File created: {}upload.py'.format(target_function_dir))

        ##### Write to function_router.py #####
        with open('presqt/api_v1/utilities/utils/function_router.py', 'a') as file:
            if target_functions:
                file.write('\n')

            for file_name, file_name_dict in target_functions.items():
                for variable_name, function_name in file_name_dict.items():
                    file.write('    {} = {}\n'.format(variable_name, function_name))

        with open('presqt/api_v1/utilities/utils/function_router.py', 'r+') as file:
            content = file.read()
            file.seek(0, 0)

            new_imports = ''
            for file_name, file_name_dict in target_functions.items():
                new_imports += 'from presqt.targets.{}.functions.{} import {}\n'.format(target_name, file_name, ', '.join(file_name_dict.values()))

            file.write(new_imports + content)
        print('File updated: presqt/api_v1/utilities/utils/function_router.py')

        ##### Write to targets.json #####
        target_dict = {
            "name": target_name,
            "readable_name": human_readable_target_name,
            "status_url": status_url,
            "supported_actions": {
                "resource_collection": resource_collection,
                "resource_detail": resource_detail,
                "resource_download": resource_download,
                "resource_upload": resource_upload,
                "resource_transfer_in": resource_transfer_in,
                "resource_transfer_out": resource_transfer_out
            },
            "supported_transfer_partners": {
                "transfer_in": transfer_in,
                "transfer_out": transfer_out
            },
            "supported_hash_algorithms": hash_algorithms
        }

        data = read_file('presqt/specs/targets.json', True)
        data.append(target_dict)
        write_file('presqt/specs/targets.json', data, True)
        print('File updated: presqt/specs/targets.json')
Exemplo n.º 8
0
def figshare_download_resource(token, resource_id, process_info_path, action):
    """
    Fetch the requested resource from FigShare along with its hash information.

    Parameters
    ----------
    token : str
        User's FigShare token
    resource_id : str
        ID of the resource requested
    process_info_path: str
        Path to the process info file that keeps track of the action's progress
    action: str
        The action being performed

    Returns
    -------
    Dictionary with the following keys: values
        'resources': List of dictionary objects that each hold a file and its information.
                     Dictionary must be in the following format:
                         {
                            'file': binary_file,
                            'hashes': {'hash_algorithm': 'the_hash'},
                            'title': 'file.jpg',
                            'path': '/path/to/file',
                            'source_path: '/full/path/to/file',
                            'extra_metadata': {'any': 'extra'}
                         }
        'empty_containers: List of string paths representing empty containers that must be written.
                              Example: ['empty/folder/to/write/', 'another/empty/folder/]
        'action_metadata': Dictionary containing action metadata. Must be in the following format:
                              {
                              'sourceUsername': '******',
                              }
    """
    try:
        headers, username = validation_check(token)
    except PresQTResponseException:
        raise PresQTResponseException(
            "Token is invalid. Response returned a 401 status code.",
            status.HTTP_401_UNAUTHORIZED)
    split_id = str(resource_id).split(":")
    extra_metadata = {}

    # But first we need to see whether it is a public project, or a private project.
    project_url = "https://api.figshare.com/v2/account/projects/{}".format(
        split_id[0])
    response = requests.get(project_url, headers=headers)
    if response.status_code != 200:
        # Looking for a private project was unsuccessful, try a public project.
        project_url = "https://api.figshare.com/v2/projects/{}".format(
            split_id[0])
        response = requests.get(project_url, headers=headers)
        if response.status_code != 200:
            # Project id is invalid
            raise PresQTResponseException(
                "The resource could not be found by the requesting user.",
                status.HTTP_404_NOT_FOUND)
    data = response.json()
    project_name = data['title']

    # Flags to be used for file checks.
    file_urls = None
    files = None

    if len(split_id) == 1:
        # Download the contents of the project and build the list of file urls to download.
        articles_url = project_url + "/articles"
        files, empty_containers, action_metadata = download_project(
            username, articles_url, headers, project_name, [])
        file_urls = [file['file'] for file in files]
        extra_metadata = extra_metadata_helper(project_url, headers)

    elif len(split_id) == 2 or len(split_id) == 3:
        # We have an article or a file so we need to get the article url
        article_url = "https://api.figshare.com/v2/account/projects/{}/articles/{}".format(
            split_id[0], split_id[1])
        response = requests.get(article_url, headers=headers)

        if response.status_code != 200:
            # Let's see if this is a public article....
            article_url = "https://api.figshare.com/v2/articles/{}".format(
                split_id[1])
            response = requests.get(article_url, headers=headers)

            if response.status_code != 200:
                # We couldn't find the article.
                raise PresQTResponseException(
                    "The resource could not be found by the requesting user.",
                    status.HTTP_404_NOT_FOUND)
        if len(split_id) == 2:
            # Download the contents of the article and build the list of file urls to download.
            files, empty_containers, action_metadata = download_article(
                username, article_url, headers, project_name, [])
            file_urls = [file['file'] for file in files]

        elif len(split_id) == 3:
            update_process_info_message(process_info_path, action,
                                        'Downloading files from FigShare...')
            # Add the total number of articles to the process info file.
            # This is necessary to keep track of the progress of the request.
            update_process_info(process_info_path, 1, action, 'download')

            # Single file download.
            data = response.json()
            for file in data['files']:
                if str(file['id']) == split_id[2]:
                    files = [{
                        "file":
                        requests.get(file['download_url'],
                                     headers=headers).content,
                        "hashes": {
                            "md5": file['computed_md5']
                        },
                        "title":
                        file['name'],
                        "path":
                        "/{}".format(file['name']),
                        "source_path":
                        "/{}/{}/{}".format(project_name, data['title'],
                                           file['name']),
                        "extra_metadata": {
                            "size": file['size']
                        }
                    }]
                    # Increment the number of files done in the process info file.
                    increment_process_info(process_info_path, action,
                                           'download')

                    empty_containers = []
                    action_metadata = {"sourceUsername": username}
            if not files:
                # We could not find the file.
                raise PresQTResponseException(
                    "The resource could not be found by the requesting user.",
                    status.HTTP_404_NOT_FOUND)
    if file_urls:
        update_process_info_message(process_info_path, action,
                                    'Downloading files from FigShare...')
        # Add the total number of articles to the process info file.
        # This is necessary to keep track of the progress of the request.
        update_process_info(process_info_path, len(file_urls), action,
                            'download')

        # Start the async calls for project or article downloads
        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)
        download_data = loop.run_until_complete(
            async_main(file_urls, headers, process_info_path, action))

        # Go through the file dictionaries and replace the file path with the binary_content
        for file in files:
            file['file'] = get_dictionary_from_list(
                download_data, 'url', file['file'])['binary_content']

    return {
        'resources': files,
        'empty_containers': empty_containers,
        'action_metadata': action_metadata,
        'extra_metadata': extra_metadata
    }
Exemplo n.º 9
0
def gitlab_download_resource(token, resource_id, process_info_path, action):
    """
    Fetch the requested resource from GitLab along with its hash information.

    Parameters
    ----------
    token : str
        User's GitLab token
    resource_id : str
        ID of the resource requested
    process_info_path: str
        Path to the process info file that keeps track of the action's progress
    action: str
        The action being performed

    Returns
    -------
    Dictionary with the following keys: values
        'resources': List of dictionary objects that each hold a file and its information.
                     Dictionary must be in the following format:
                         {
                            'file': binary_file,
                            'hashes': {'hash_algorithm': 'the_hash'},
                            'title': 'file.jpg',
                            'path': '/path/to/file',
                            'source_path: '/full/path/to/file',
                            'extra_metadata': {'any': 'extra'}
                         }
        'empty_containers: List of string paths representing empty containers that must be written.
                              Example: ['empty/folder/to/write/', 'another/empty/folder/]
        'action_metadata': Dictionary containing action metadata. Must be in the following format:
                              {
                              'sourceUsername': '******',
                              }
    """
    try:
        header, user_id = validation_check(token)
    except PresQTResponseException:
        raise PresQTResponseException("Token is invalid. Response returned a 401 status code.",
                                      status.HTTP_401_UNAUTHORIZED)

    # Get the user's GitLab username for action metadata
    username = requests.get("https://gitlab.com/api/v4/user", headers=header).json()['username']

    partitioned_id = resource_id.partition(':')
    if ':' in resource_id:
        project_id = partitioned_id[0]
    else:
        project_id = resource_id

    project_url = 'https://gitlab.com/api/v4/projects/{}'.format(project_id)

    response = requests.get(project_url, headers=header)
    if response.status_code != 200:
        raise PresQTResponseException(
            'The resource with id, {}, does not exist for this user.'.format(resource_id),
            status.HTTP_404_NOT_FOUND)

    project_name = response.json()['name']
    extra_metadata = {}
    if ':' not in resource_id:
        # This is for a project
        all_files_url = "https://gitlab.com/api/v4/projects/{}/repository/tree?recursive=1".format(
            resource_id)
        data = gitlab_paginated_data(header, user_id, all_files_url)
        is_project = True
        # Get extra metadata
        extra_metadata = extra_metadata_helper(response.json(), header)

    elif ':' in resource_id and '%2E' not in resource_id:
        # This is for a directory
        all_files_url = "https://gitlab.com/api/v4/projects/{}/repository/tree?path={}&recursive=1".format(
            partitioned_id[0], partitioned_id[2].replace('+', ' '))
        data = gitlab_paginated_data(header, user_id, all_files_url)
        if not data:
            raise PresQTResponseException(
                'The resource with id, {}, does not exist for this user.'.format(resource_id),
                status.HTTP_404_NOT_FOUND)
        is_project = False

    else:
        update_process_info_message(process_info_path, action, 'Downloading files from GitLab...')
        # Add the total number of projects to the process info file.
        # This is necessary to keep track of the progress of the request.
        update_process_info(process_info_path, 1, action, 'download')

        # This is a single file
        data = requests.get('https://gitlab.com/api/v4/projects/{}/repository/files/{}?ref=master'.format(
            project_id, partitioned_id[2].replace('+', ' ')), headers=header).json()
        if 'message' in data.keys():
            raise PresQTResponseException(
                'The resource with id, {}, does not exist for this user.'.format(resource_id),
                status.HTTP_404_NOT_FOUND)

        # Increment the number of files done in the process info file.
        increment_process_info(process_info_path, action, 'download')
        return {
            'resources': [{
                'file': base64.b64decode(data['content']),
                'hashes': {'sha256': data['content_sha256']},
                'title': data['file_name'],
                'path': '/{}'.format(data['file_name']),
                'source_path': data['file_path'],
                'extra_metadata': {}}],
            'empty_containers': [],
            'action_metadata': {'sourceUsername': username},
            'extra_metadata': extra_metadata
        }

    files, empty_containers, action_metadata = download_content(
        username, project_name, project_id, data, [], is_project)
    file_urls = [file['file'] for file in files]

    update_process_info_message(process_info_path, action, 'Downloading files from GitLab...')
    # Add the total number of projects to the process info file.
    # This is necessary to keep track of the progress of the request.
    update_process_info(process_info_path, len(file_urls), action, 'download')

    loop = asyncio.new_event_loop()
    asyncio.set_event_loop(loop)
    download_data = loop.run_until_complete(
        async_main(file_urls, header, process_info_path, action))

    # Go through the file dictionaries and replace the file path with the binary_content
    # and replace the hashes with the correct file hashes
    for file in files:
        file['hashes'] = get_dictionary_from_list(
            download_data, 'url', file['file'])['hashes']
        file['file'] = get_dictionary_from_list(
            download_data, 'url', file['file'])['binary_content']

    return {
        'resources': files,
        'empty_containers': empty_containers,
        'action_metadata': action_metadata,
        'extra_metadata': extra_metadata
    }