def get_resources(self, process_info_path, url=None): """ Get all of the user's resources. To batch calls together asynchronously we will group calls together by projects, then storages, then each storage's resources. """ resources = [] all_projects, top_level_projects = self.projects(url) # Add all top level projects and subprojects to the resources list self.iter_project_hierarchy(all_projects, top_level_projects, resources) # Add all storages to the resource list user_storages_links = self.iter_project_storages( all_projects, resources) # Get initial resources for all storages all_storages_resources = run_urls_async_with_pagination( self, user_storages_links) # Add the total number of storages to the process info file. # This is necessary to keep track of the progress of the request. update_process_info(process_info_path, len(all_storages_resources), 'resource_collection', 'fetch') # Loop through the storage resources to either add them to the main resources list or # traverse further down the tree to get their children resources. for storage_resources in all_storages_resources: # Increment the number of files done in the process info file. increment_process_info(process_info_path, 'resource_collection', 'fetch') if storage_resources and storage_resources[ 'data']: #TODO: First if check doing this to avoid private file errors look into it # Calculate the given resource's container_id parent_project_id = storage_resources['data'][0][ 'relationships']['node']['data']['id'] parent_storage = storage_resources['data'][0]['attributes'][ 'provider'] container_id = '{}:{}'.format(parent_project_id, parent_storage) self.iter_resources_objects(storage_resources, resources, container_id) return resources
def figshare_upload_resource(token, resource_id, resource_main_dir, hash_algorithm, file_duplicate_action, process_info_path, action): """ Upload the files found in the resource_main_dir to the target. Parameters ---------- token : str User's token. resource_id : str ID of the resource requested. resource_main_dir : str Path to the main directory for the resources to be uploaded. hash_algorithm : str Hash algorithm we are using to check for fixity. file_duplicate_action : str The action to take when a duplicate file is found process_info_path: str Path to the process info file that keeps track of the action's progress action: str The action being performed Returns ------- Dictionary with the following keys: values 'resources_ignored' : Array of string file paths of files that were ignored when uploading the resource. Path should have the same base as resource_main_dir. Example: ['path/to/ignored/file.pg', 'another/ignored/file.jpg'] 'resources_updated' : Array of string file paths of files that were updated when uploading the resource. Path should have the same base as resource_main_dir. Example: ['path/to/updated/file.jpg'] 'action_metadata': Dictionary containing action metadata. Must be in the following format: { 'destinationUsername': '******' } 'file_metadata_list': List of dictionaries for each file that contains metadata and hash info. Must be in the following format: { "actionRootPath": '/path/on/disk', "destinationPath": '/path/on/target/destination', "title": 'file_title', "destinationHash": {'hash_algorithm': 'the_hash'}} } 'project_id': ID of the parent project for this upload. Needed for metadata upload. 'project_link': The link to either the resource or the home page of the user if not available through API FigShare's Upload Process 1. Initiate new file upload (POST) within the article. Send file size, md5, and name but no file contents yet. 2. Send a GET request to the 'Uploader Service' to determine that the status is "Pending" and how many parts to split the upload into. 3. Split the file into the correct number of parts and upload each using a PUT request. 4. Send a POST request to complete the upload. """ try: headers, username = validation_check(token) except PresQTResponseException: raise PresQTResponseException( "Token is invalid. Response returned a 401 status code.", status.HTTP_401_UNAUTHORIZED) os_path = next(os.walk(resource_main_dir)) total_files = upload_total_files(resource_main_dir) # Update process info file update_process_info(process_info_path, total_files, action, 'upload') update_process_info_message(process_info_path, action, "Uploading files to FigShare...") resources_ignored = [] resources_updated = [] file_metadata_list = [] action_metadata = {'destinationUsername': username} # Upload a new project if not resource_id: project_title = os_path[1][0] # Create a new project with the name being the top level directory's name. project_name, project_id = create_project(project_title, headers, token) # Create article, for now we'll name it the same as the project article_id = create_article(project_title, headers, project_id) else: # Upload to an existing project split_id = str(resource_id).split(":") project_id = split_id[0] try: project_title = requests.get( "https://api.figshare.com/v2/account/projects/{}".format( project_id), headers=headers).json()['title'] except KeyError: raise PresQTResponseException( "Project with id, {}, could not be found by the requesting user." .format(project_id), status.HTTP_400_BAD_REQUEST) if len(split_id) == 1: # We only have a project and we need to make a new article id # Check to see if an article with this name already exists articles = requests.get( "https://api.figshare.com/v2/account/projects/{}/articles". format(project_id), headers=headers).json() article_titles = [article['title'] for article in articles] new_title = get_duplicate_title(project_title, article_titles, "(PresQT*)") article_id = create_article(new_title, headers, resource_id) elif len(split_id) == 2: article_id = split_id[1] else: # Can't upload to file raise PresQTResponseException( "Can not upload into an existing file.", status.HTTP_400_BAD_REQUEST) # Get the article title try: article_title = requests.get( "https://api.figshare.com/v2/account/articles/{}".format( article_id), headers=headers).json()['title'] except KeyError: raise PresQTResponseException( "Article with id, {}, could not be found by the requesting user.". format(article_id), status.HTTP_400_BAD_REQUEST) # Get md5, size and name of zip file to be uploaded for path, subdirs, files in os.walk(resource_main_dir): for name in files: file_info = open(os.path.join(path, name), 'rb') zip_hash = hash_generator(file_info.read(), 'md5') figshare_file_upload_process(file_info, headers, name, article_id, file_type='zip', path=path) file_metadata_list.append({ 'actionRootPath': os.path.join(path, name), 'destinationPath': '/{}/{}/{}'.format(project_title, article_title, name), 'title': name, 'destinationHash': zip_hash }) increment_process_info(process_info_path, action, 'upload') return { "resources_ignored": resources_ignored, "resources_updated": resources_updated, "action_metadata": action_metadata, "file_metadata_list": file_metadata_list, "project_id": "{}:{}".format(project_id, article_id), "project_link": "https://figshare.com/account/home#/projects" }
def download_directory(header, path_to_resource, repo_data, process_info_path, action): """ Go through a repo's tree and download all files inside of a given resource directory path. Parameters ---------- header: dict API header expected by GitHub path_to_resource: str The path to the requested directory repo_data: dict Repository data gathered in the repo GET request process_info_path: str Path to the process info file that keeps track of the action's progress action: str The action being performed Returns ------- A list of dictionaries for each file being downloaded """ repo_name = repo_data['name'] # Strip {/sha} off the end trees_url = '{}/master?recursive=1'.format(repo_data['trees_url'][:-6]) contents = requests.get(trees_url, headers=header).json() number_of_files = len([ file for file in contents['tree'] if file['path'].startswith(path_to_resource) and file['type'] == 'blob' ]) # Add the total number of repository to the process info file. # This is necessary to keep track of the progress of the request. update_process_info(process_info_path, number_of_files, action, 'download') update_process_info_message(process_info_path, action, 'Downloading files from GitHub...') files = [] for resource in contents['tree']: if resource['path'].startswith( path_to_resource) and resource['type'] == 'blob': # Strip the requested directory's parents off the directory path path_to_strip = path_to_resource.rpartition('/')[0] if path_to_strip: directory_path = '{}'.format( resource['path'].partition(path_to_strip)[2]) else: directory_path = '/{}'.format(resource['path']) file_data = requests.get(resource['url']).json() files.append({ 'file': base64.b64decode(file_data['content']), 'hashes': {}, 'title': resource['path'].rpartition('/')[0], 'path': directory_path, 'source_path': '/{}/{}'.format(repo_name, resource['path']), 'extra_metadata': {} }) # Increment the number of files done in the process info file. increment_process_info(process_info_path, action, 'download') return files
def zenodo_download_resource(token, resource_id, process_info_path, action): """ Fetch the requested resource from Zenodo along with its hash information. Parameters ---------- token : str User's Zenodo token resource_id : str ID of the resource requested process_info_path: str Path to the process info file that keeps track of the action's progress action: str The action being performed Returns ------- Dictionary with the following keys: values 'resources': List of dictionary objects that each hold a file and its information. Dictionary must be in the following format: { 'file': binary_file, 'hashes': {'hash_algorithm': 'the_hash'}, 'title': 'file.jpg', 'path': '/path/to/file', 'metadata': { 'sourcePath': '/full/path/at/source.jpg', 'title': 'file_title', 'sourceHashes': {'hash_algorithm': 'the_hash'}, 'extra': {'any': 'extra'} } } 'empty_containers: List of string paths representing empty containers that must be written. Example: ['empty/folder/to/write/', 'another/empty/folder/] 'action_metadata': Dictionary containing action metadata. Must be in the following format: { 'sourceUsername': '******', } """ try: auth_parameter = zenodo_validation_check(token) except PresQTResponseException: raise PresQTResponseException( 'Token is invalid. Response returned a 401 status code.', status.HTTP_401_UNAUTHORIZED) files = [] empty_containers = [] extra_metadata = {} base_url = None # If the resource_id is longer than 7 characters, the resource is an individual file if len(resource_id) > 7: # First we need to check if the file id given belongs to a public published record. zenodo_file = requests.get( 'https://zenodo.org/api/files/{}'.format(resource_id), params=auth_parameter) if zenodo_file.status_code != 200: # If not, we need to loop through their depositions to look for the file. zenodo_projects = requests.get( 'https://zenodo.org/api/deposit/depositions', params=auth_parameter).json() for entry in zenodo_projects: project_files = requests.get(entry['links']['self'], params=auth_parameter).json() for file in project_files['files']: if file['id'] == resource_id: base_url = entry['links']['self'] file_url = file['links']['self'] is_record = False break else: # If the file wasn't found we want to continue the loop. continue break else: is_record = True base_url = 'https://zenodo.org/api/files/{}'.format(resource_id) file_url = 'https://zenodo.org/api/files/{}'.format(resource_id) if base_url is None: raise PresQTResponseException( "The resource with id, {}, does not exist for this user.". format(resource_id), status.HTTP_404_NOT_FOUND) update_process_info_message(process_info_path, action, 'Downloading files from Zenodo...') # Add the total number of projects to the process info file. # This is necessary to keep track of the progress of the request. update_process_info(process_info_path, 1, action, 'download') files, action_metadata = zenodo_download_helper( is_record, base_url, auth_parameter, files, file_url) # Increment the number of files done in the process info file. increment_process_info(process_info_path, action, 'download') # Otherwise, it's a full project else: base_url = 'https://zenodo.org/api/records/{}'.format(resource_id) zenodo_record = requests.get(base_url, params=auth_parameter) is_record = True if zenodo_record.status_code != 200: base_url = 'https://zenodo.org/api/deposit/depositions/{}'.format( resource_id) is_record = False try: files, action_metadata = zenodo_download_helper( is_record, base_url, auth_parameter, files) except PresQTResponseException: raise PresQTResponseException( "The resource with id, {}, does not exist for this user.". format(resource_id), status.HTTP_404_NOT_FOUND) extra_metadata = extra_metadata_helper(base_url, is_record, auth_parameter) file_urls = [file['file'] for file in files] update_process_info_message(process_info_path, action, 'Downloading files from Zenodo...') # Add the total number of projects to the process info file. # This is necessary to keep track of the progress of the request. update_process_info(process_info_path, len(file_urls), action, 'download') loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) download_data = loop.run_until_complete( async_main(file_urls, auth_parameter, process_info_path, action)) # Go through the file dictionaries and replace the file path with the binary_content for file in files: file['file'] = get_dictionary_from_list( download_data, 'url', file['file'])['binary_content'] return { 'resources': files, 'empty_containers': empty_containers, 'action_metadata': action_metadata, 'extra_metadata': extra_metadata }
def zenodo_upload_resource(token, resource_id, resource_main_dir, hash_algorithm, file_duplicate_action, process_info_path, action): """ Upload the files found in the resource_main_dir to the target. Parameters ---------- token : str User's token. resource_id : str ID of the resource requested. resource_main_dir : str Path to the main directory for the resources to be uploaded. hash_algorithm : str Hash algorithm we are using to check for fixity. file_duplicate_action : str The action to take when a duplicate file is found process_info_path: str Path to the process info file that keeps track of the action's progress action: str The action being performed Returns ------- Dictionary with the following keys: values 'resources_ignored' : Array of string file paths of files that were ignored when uploading the resource. Path should have the same base as resource_main_dir. Example: ['path/to/ignored/file.pg', 'another/ignored/file.jpg'] 'resources_updated' : Array of string file paths of files that were updated when uploading the resource. Path should have the same base as resource_main_dir. Example: ['path/to/updated/file.jpg'] 'action_metadata': Dictionary containing action metadata. Must be in the following format: { 'destinationUsername': '******' } 'file_metadata_list': List of dictionaries for each file that contains metadata and hash info. Must be in the following format: { "actionRootPath": '/path/on/disk', "destinationPath": '/path/on/target/destination', "title": 'file_title', "destinationHash": {'hash_algorithm': 'the_hash'}} } 'project_id': ID of the parent project for this upload. Needed for metadata upload. 'project_link': The link to either the resource or the home page of the user if not available through API """ try: auth_parameter = zenodo_validation_check(token) except PresQTValidationError: raise PresQTValidationError("Token is invalid. Response returned a 401 status code.", status.HTTP_401_UNAUTHORIZED) os_path = next(os.walk(resource_main_dir)) total_files = upload_total_files(resource_main_dir) # Update process info file update_process_info(process_info_path, total_files, action, 'upload') update_process_info_message(process_info_path, action, "Uploading files to Zenodo...") # Since Zenodo is a finite depth target, the checks for path validity have already been done. if resource_id: name_helper = requests.get("https://zenodo.org/api/deposit/depositions/{}".format( resource_id), params=auth_parameter).json() try: final_title = name_helper['title'] except KeyError: raise PresQTResponseException( "Can't find the resource with id {}, on Zenodo".format(resource_id), status.HTTP_404_NOT_FOUND) action_metadata = {"destinationUsername": None} else: action_metadata = {"destinationUsername": None} project_title = os_path[1][0] name_helper = requests.get("https://zenodo.org/api/deposit/depositions", params=auth_parameter).json() titles = [project['title'] for project in name_helper] final_title = get_duplicate_title(project_title, titles, ' (PresQT*)') resource_id = zenodo_upload_helper(auth_parameter, final_title) post_url = "https://zenodo.org/api/deposit/depositions/{}/files".format(resource_id) upload_dict = zenodo_upload_loop(action_metadata, resource_id, resource_main_dir, post_url, auth_parameter, final_title, file_duplicate_action, process_info_path, action) return upload_dict
def github_upload_resource(token, resource_id, resource_main_dir, hash_algorithm, file_duplicate_action, process_info_path, action): """ Upload the files found in the resource_main_dir to the target. Parameters ---------- token : str User's token. resource_id : str ID of the resource requested. resource_main_dir : str Path to the main directory for the resources to be uploaded. hash_algorithm : str Hash algorithm we are using to check for fixity. file_duplicate_action : str The action to take when a duplicate file is found process_info_path: str Path to the process info file that keeps track of the action's progress action: str The action being performed Returns ------- Dictionary with the following keys: values 'resources_ignored' : Array of string file paths of files that were ignored when uploading the resource. Path should have the same base as resource_main_dir. Example: ['path/to/ignored/file.pg', 'another/ignored/file.jpg'] 'resources_updated' : Array of string file paths of files that were updated when uploading the resource. Path should have the same base as resource_main_dir. Example: ['path/to/updated/file.jpg'] 'action_metadata': Dictionary containing action metadata. Must be in the following format: { 'destinationUsername': '******' } 'file_metadata_list': List of dictionaries for each file that contains metadata and hash info. Must be in the following format: { "actionRootPath": '/path/on/disk', "destinationPath": '/path/on/target/destination', "title": 'file_title', "destinationHash": {'hash_algorithm': 'the_hash'}} } 'project_id': ID of the parent project for this upload. Needed for metadata upload. 'project_link': The link to either the resource or the home page of the user if not available through API """ try: header, username = validation_check(token) except PresQTResponseException: raise PresQTResponseException( "Token is invalid. Response returned a 401 status code.", status.HTTP_401_UNAUTHORIZED) os_path = next(os.walk(resource_main_dir)) # Get total amount of files total_files = upload_total_files(resource_main_dir) update_process_info(process_info_path, total_files, action, 'upload') update_process_info_message(process_info_path, action, "Uploading files to GitHub...") # Upload a new repository if not resource_id: # Create a new repository with the name being the top level directory's name. # Note: GitHub doesn't allow spaces, or circlebois in repo_names repo_title = os_path[1][0].replace(' ', '_').replace("(", "-").replace( ")", "-").replace(":", "-") repo_name, repo_id, repo_url = create_repository(repo_title, token) resources_ignored = [] resources_updated = [] action_metadata = {"destinationUsername": username} file_metadata_list = [] for path, subdirs, files in os.walk(resource_main_dir): if not subdirs and not files: resources_ignored.append(path) for name in files: # Extract and encode the file bytes in the way expected by GitHub. file_bytes = open(os.path.join(path, name), 'rb').read() encoded_file = base64.b64encode(file_bytes).decode('utf-8') # A relative path to the file is what is added to the GitHub PUT address path_to_add = os.path.join(path.partition('/data/')[2], name) path_to_add_to_url = path_to_add.partition('/')[2].replace( ' ', '_') finished_path = '/' + repo_name + '/' + path_to_add_to_url file_metadata_list.append({ "actionRootPath": os.path.join(path, name), "destinationPath": finished_path, "title": name, "destinationHash": None }) put_url = "https://api.github.com/repos/{}/{}/contents/{}".format( username, repo_name, path_to_add_to_url) data = { "message": "PresQT Upload", "committer": { "name": "PresQT", "email": "N/A" }, "content": encoded_file } file_response = requests.put(put_url, headers=header, data=json.dumps(data)) if file_response.status_code != 201: raise PresQTResponseException( "Github returned the following error: '{}'".format( str(file_response.json()['message'])), status.HTTP_400_BAD_REQUEST) # Increment the file counter increment_process_info(process_info_path, action, 'upload') else: # Upload to an existing repository if ':' not in resource_id: repo_id = resource_id path_to_upload_to = '' # Upload to an existing directory else: partitioned_id = resource_id.partition(':') repo_id = partitioned_id[0] path_to_upload_to = '/{}'.format(partitioned_id[2]).replace( '%2F', '/').replace('%2E', '.') # Get initial repo data for the resource requested repo_url = 'https://api.github.com/repositories/{}'.format(repo_id) response = requests.get(repo_url, headers=header) if response.status_code != 200: raise PresQTResponseException( 'The resource with id, {}, does not exist for this user.'. format(resource_id), status.HTTP_404_NOT_FOUND) repo_data = response.json() repo_name = repo_data['name'] repo_url = repo_data['svn_url'] # Get all repo resources so we can check if any files already exist repo_resources = requests.get('{}/master?recursive=1'.format( repo_data['trees_url'][:-6]), headers=header).json() if 'message' in repo_resources: repo_resources = requests.get('{}/main?recursive=1'.format( repo_data['trees_url'][:-6]), headers=header).json() # current_file_paths = ['/' + resource['path'] for resource in repo_resources['tree'] if resource['type'] == 'blob'] current_file_paths = [] for resource in repo_resources['tree']: if resource['type'] == 'blob': current_file_paths.append('/' + resource['path']) # Check if the provided path to upload to is actually a path to an existing file if path_to_upload_to in current_file_paths: raise PresQTResponseException( 'The Resource provided, {}, is not a container'.format( resource_id), status.HTTP_400_BAD_REQUEST) resources_ignored = [] resources_updated = [] file_metadata_list = [] sha = None action_metadata = {"destinationUsername": username} for path, subdirs, files in os.walk(resource_main_dir): if not subdirs and not files: resources_ignored.append(path) for name in files: path_to_file = os.path.join('/', path.partition('/data/')[2], name).replace(' ', '_') # Check if the file already exists in this repository full_file_path = '{}{}'.format(path_to_upload_to, path_to_file) if full_file_path in current_file_paths: if file_duplicate_action == 'ignore': resources_ignored.append(os.path.join(path, name)) continue else: resources_updated.append(os.path.join(path, name)) # Get the sha sha_url = 'https://api.github.com/repos/{}/contents{}'.format( repo_data['full_name'], full_file_path) sha_response = requests.get(sha_url, headers=header) sha = sha_response.json()['sha'] # Extract and encode the file bytes in the way expected by GitHub. file_bytes = open(os.path.join(path, name), 'rb').read() encoded_file = base64.b64encode(file_bytes).decode('utf-8') # A relative path to the file is what is added to the GitHub PUT address file_metadata_list.append({ "actionRootPath": os.path.join(path, name), "destinationPath": '/{}{}{}'.format(repo_name, path_to_upload_to, path_to_file), "title": name, "destinationHash": None }) put_url = 'https://api.github.com/repos/{}/contents{}{}'.format( repo_data['full_name'], path_to_upload_to, path_to_file) data = { "message": "PresQT Upload", "sha": sha, "committer": { "name": "PresQT", "email": "N/A" }, "content": encoded_file } upload_response = requests.put(put_url, headers=header, data=json.dumps(data)) if upload_response.status_code not in [200, 201]: raise PresQTResponseException( 'Upload failed with a status code of {}'.format( upload_response.status_code), status.HTTP_400_BAD_REQUEST) # Increment the file counter increment_process_info(process_info_path, action, 'upload') return { 'resources_ignored': resources_ignored, 'resources_updated': resources_updated, 'action_metadata': action_metadata, 'file_metadata_list': file_metadata_list, 'project_id': repo_id, "project_link": repo_url }
def osf_upload_resource(token, resource_id, resource_main_dir, hash_algorithm, file_duplicate_action, process_info_path, action): """ Upload the files found in the resource_main_dir to OSF. Parameters ---------- token : str User's OSF token. resource_id : str ID of the resource requested. resource_main_dir : str Path to the main directory for the resources to be uploaded. hash_algorithm : str Hash algorithm we are using to check for fixity. file_duplicate_action : str The action to take when a duplicate file is found process_info_path: str Path to the process info file that keeps track of the action's progress action: str The action being performed Returns ------- Dictionary with the following keys: values 'resources_ignored' : Array of string file paths of files that were ignored when uploading the resource. Path should have the same base as resource_main_dir. Example: ['path/to/ignored/file.pg', 'another/ignored/file.jpg'] 'resources_updated' : Array of string file paths of files that were updated when uploading the resource. Path should have the same base as resource_main_dir. Example: ['path/to/updated/file.jpg'] 'action_metadata': Dictionary containing FTS action metadata. Must be in the following format: { 'destinationUsername': '******' } 'file_metadata_list': List of dictionaries for each file that contains FTS metadata and hash info. Must be in the following format: { "actionRootPath": '/path/on/disk', "destinationPath": '/path/on/target/destination', "title": 'file_title', "destinationHash": {'hash_algorithm': 'the_hash'}} } 'project_id': ID of the parent project for this upload. Needed for metadata upload. 'project_link': The link to either the resource or the home page of the user if not available through API """ try: osf_instance = OSF(token) except PresQTInvalidTokenError: raise PresQTResponseException( "Token is invalid. Response returned a 401 status code.", status.HTTP_401_UNAUTHORIZED) # Get contributor name contributor_name = requests.get( 'https://api.osf.io/v2/users/me/', headers={ 'Authorization': 'Bearer {}'.format(token) }).json()['data']['attributes']['full_name'] action_metadata = {"destinationUsername": contributor_name} hashes = {} resources_ignored = [] resources_updated = [] file_metadata_list = [] # Get total amount of files total_files = upload_total_files(resource_main_dir) update_process_info(process_info_path, total_files, action, 'upload') update_process_info_message(process_info_path, action, "Uploading files to OSF...") # If we are uploading to an existing container if resource_id: # Get the resource resource = get_osf_resource(resource_id, osf_instance) # Resource being uploaded to must not be a file if resource.kind_name == 'file': raise PresQTResponseException( "The Resource provided, {}, is not a container".format( resource_id), status.HTTP_400_BAD_REQUEST) elif resource.kind_name == 'project': project = resource project_id = project.id resource.storage('osfstorage').create_directory( resource_main_dir, file_duplicate_action, hashes, resources_ignored, resources_updated, file_metadata_list, process_info_path, action) else: # Folder or Storage resource.create_directory(resource_main_dir, file_duplicate_action, hashes, resources_ignored, resources_updated, file_metadata_list, process_info_path, action) # Get the project class for later metadata work if resource.kind_name == 'storage': project_id = resource.node else: project_id = resource.parent_project_id project = osf_instance.project(project_id) # else we are uploading a new project else: os_path = next(os.walk(resource_main_dir)) # Get the actual data we want to upload data_to_upload_path = '{}/{}'.format(os_path[0], os_path[1][0]) # Create a new project with the name being the top level directory's name. project = osf_instance.create_project(os_path[1][0]) project_id = project.id # Upload resources into OSFStorage for the new project. project.storage('osfstorage').create_directory( data_to_upload_path, file_duplicate_action, hashes, resources_ignored, resources_updated, file_metadata_list, process_info_path, action) for file_metadata in file_metadata_list: # Only send forward the hash we need based on the hash_algorithm provided file_metadata['destinationHash'] = file_metadata['destinationHash'][ hash_algorithm] # Prepend the project title to each resource's the metadata destinationPath file_metadata['destinationPath'] = '/{}/{}'.format( project.title, file_metadata['destinationPath']) return { 'resources_ignored': resources_ignored, 'resources_updated': resources_updated, 'action_metadata': action_metadata, 'file_metadata_list': file_metadata_list, 'project_id': project_id, "project_link": "https://osf.io/{}".format(project_id) }
def osf_download_resource(token, resource_id, process_info_path, action): """ Fetch the requested resource from OSF along with its hash information. Parameters ---------- token : str User's OSF token resource_id : str ID of the resource requested process_info_path: str Path to the process info file that keeps track of the action's progress action: str The action being performed Returns ------- Dictionary with the following keys: values 'resources': List of dictionary objects that each hold a file and its information. Dictionary must be in the following format: { 'file': binary_file, 'hashes': {'hash_algorithm': 'the_hash'}, 'title': 'file.jpg', 'path': '/path/to/file', 'source_path: '/full/path/to/file', 'extra_metadata': {'any': 'extra'} } 'empty_containers: List of string paths representing empty containers that must be written. Example: ['empty/folder/to/write/', 'another/empty/folder/] 'action_metadata': Dictionary containing action metadata. Must be in the following format: { 'sourceUsername': '******', } """ try: osf_instance = OSF(token) except PresQTInvalidTokenError: raise PresQTResponseException("Token is invalid. Response returned a 401 status code.", status.HTTP_401_UNAUTHORIZED) # Get contributor name contributor_name = requests.get('https://api.osf.io/v2/users/me/', headers={'Authorization': 'Bearer {}'.format(token)}).json()[ 'data']['attributes']['full_name'] action_metadata = {"sourceUsername": contributor_name} # Get the resource resource = get_osf_resource(resource_id, osf_instance) # Get all files for the provided resources. # The 'path' value will be the path that the file is eventually saved in. The root of the # path should be the resource. files = [] empty_containers = [] extra_metadata = {} if resource.kind_name == 'file': update_process_info_message(process_info_path, action, 'Downloading files from OSF...') # Add the total number of projects to the process info file. # This is necessary to keep track of the progress of the request. update_process_info(process_info_path, 1, action, 'download') project = osf_instance.project(resource.parent_project_id) files.append({ "file": resource.download(), "hashes": resource.hashes, "title": resource.title, # If the file is the only resource we are downloading then we don't need it's full path "path": '/{}'.format(resource.title), "source_path": '/{}/{}{}'.format(project.title, resource.provider, resource.materialized_path), "extra_metadata": osf_download_metadata(resource) }) # Increment the number of files done in the process info file. increment_process_info(process_info_path, action, 'download') else: if resource.kind_name == 'project': extra_metadata = extra_metadata_helper(resource_id, {'Authorization': 'Bearer {}'.format(token)}) resource.get_all_files('', files, empty_containers) project = resource elif resource.kind_name == 'storage': resource.get_all_files('/{}'.format(resource.title), files, empty_containers) project = osf_instance.project(resource.node) else: resource.get_all_files('', files, empty_containers) project = osf_instance.project(resource.parent_project_id) for file in files: # File Path needs to start at the folder and strip everything before it. # Example: If the resource is 'Docs2' and the starting path is # '/Project/Storage/Docs1/Docs2/file.jpeg' then the final path # needs to be '/Docs2/file.jpeg' path_to_strip = resource.materialized_path[:-(len(resource.title) + 2)] file['path'] = file['file'].materialized_path[len(path_to_strip):] file_urls = [file['file'].download_url for file in files] update_process_info_message(process_info_path, action, 'Downloading files from OSF...') # Add the total number of projects to the process info file. # This is necessary to keep track of the progress of the request. update_process_info(process_info_path, len(file_urls), action, 'download') # Asynchronously make all download requests loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) download_data = loop.run_until_complete(async_main(file_urls, token, process_info_path, action)) # Go through the file dictionaries and replace the file class with the binary_content for file in files: file['source_path'] = '/{}/{}{}'.format(project.title, file['file'].provider, file['file'].materialized_path) file['file'] = get_dictionary_from_list( download_data, 'url', file['file'].download_url)['binary_content'] return { 'resources': files, 'empty_containers': empty_containers, 'action_metadata': action_metadata, 'extra_metadata': extra_metadata }
def figshare_download_resource(token, resource_id, process_info_path, action): """ Fetch the requested resource from FigShare along with its hash information. Parameters ---------- token : str User's FigShare token resource_id : str ID of the resource requested process_info_path: str Path to the process info file that keeps track of the action's progress action: str The action being performed Returns ------- Dictionary with the following keys: values 'resources': List of dictionary objects that each hold a file and its information. Dictionary must be in the following format: { 'file': binary_file, 'hashes': {'hash_algorithm': 'the_hash'}, 'title': 'file.jpg', 'path': '/path/to/file', 'source_path: '/full/path/to/file', 'extra_metadata': {'any': 'extra'} } 'empty_containers: List of string paths representing empty containers that must be written. Example: ['empty/folder/to/write/', 'another/empty/folder/] 'action_metadata': Dictionary containing action metadata. Must be in the following format: { 'sourceUsername': '******', } """ try: headers, username = validation_check(token) except PresQTResponseException: raise PresQTResponseException( "Token is invalid. Response returned a 401 status code.", status.HTTP_401_UNAUTHORIZED) split_id = str(resource_id).split(":") extra_metadata = {} # But first we need to see whether it is a public project, or a private project. project_url = "https://api.figshare.com/v2/account/projects/{}".format( split_id[0]) response = requests.get(project_url, headers=headers) if response.status_code != 200: # Looking for a private project was unsuccessful, try a public project. project_url = "https://api.figshare.com/v2/projects/{}".format( split_id[0]) response = requests.get(project_url, headers=headers) if response.status_code != 200: # Project id is invalid raise PresQTResponseException( "The resource could not be found by the requesting user.", status.HTTP_404_NOT_FOUND) data = response.json() project_name = data['title'] # Flags to be used for file checks. file_urls = None files = None if len(split_id) == 1: # Download the contents of the project and build the list of file urls to download. articles_url = project_url + "/articles" files, empty_containers, action_metadata = download_project( username, articles_url, headers, project_name, []) file_urls = [file['file'] for file in files] extra_metadata = extra_metadata_helper(project_url, headers) elif len(split_id) == 2 or len(split_id) == 3: # We have an article or a file so we need to get the article url article_url = "https://api.figshare.com/v2/account/projects/{}/articles/{}".format( split_id[0], split_id[1]) response = requests.get(article_url, headers=headers) if response.status_code != 200: # Let's see if this is a public article.... article_url = "https://api.figshare.com/v2/articles/{}".format( split_id[1]) response = requests.get(article_url, headers=headers) if response.status_code != 200: # We couldn't find the article. raise PresQTResponseException( "The resource could not be found by the requesting user.", status.HTTP_404_NOT_FOUND) if len(split_id) == 2: # Download the contents of the article and build the list of file urls to download. files, empty_containers, action_metadata = download_article( username, article_url, headers, project_name, []) file_urls = [file['file'] for file in files] elif len(split_id) == 3: update_process_info_message(process_info_path, action, 'Downloading files from FigShare...') # Add the total number of articles to the process info file. # This is necessary to keep track of the progress of the request. update_process_info(process_info_path, 1, action, 'download') # Single file download. data = response.json() for file in data['files']: if str(file['id']) == split_id[2]: files = [{ "file": requests.get(file['download_url'], headers=headers).content, "hashes": { "md5": file['computed_md5'] }, "title": file['name'], "path": "/{}".format(file['name']), "source_path": "/{}/{}/{}".format(project_name, data['title'], file['name']), "extra_metadata": { "size": file['size'] } }] # Increment the number of files done in the process info file. increment_process_info(process_info_path, action, 'download') empty_containers = [] action_metadata = {"sourceUsername": username} if not files: # We could not find the file. raise PresQTResponseException( "The resource could not be found by the requesting user.", status.HTTP_404_NOT_FOUND) if file_urls: update_process_info_message(process_info_path, action, 'Downloading files from FigShare...') # Add the total number of articles to the process info file. # This is necessary to keep track of the progress of the request. update_process_info(process_info_path, len(file_urls), action, 'download') # Start the async calls for project or article downloads loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) download_data = loop.run_until_complete( async_main(file_urls, headers, process_info_path, action)) # Go through the file dictionaries and replace the file path with the binary_content for file in files: file['file'] = get_dictionary_from_list( download_data, 'url', file['file'])['binary_content'] return { 'resources': files, 'empty_containers': empty_containers, 'action_metadata': action_metadata, 'extra_metadata': extra_metadata }
def curate_nd_download_resource(token, resource_id, process_info_path, action): """ Fetch the requested resource from CurateND along with its hash information. Parameters ---------- token : str User's CurateND token resource_id : str ID of the resource requested process_info_path: str Path to the process info file that keeps track of the action's progress action: str The action being performed Returns ------- Dictionary with the following keys: values 'resources': List of dictionary objects that each hold a file and its information. Dictionary must be in the following format: { 'file': binary_file, 'hashes': {'hash_algorithm': 'the_hash'}, 'title': 'file.jpg', 'path': '/path/to/file', 'source_path: '/full/path/to/file', 'extra_metadata': {'any': 'extra'} } 'empty_containers: List of string paths representing empty containers that must be written. Example: ['empty/folder/to/write/', 'another/empty/folder/] 'action_metadata': Dictionary containing action metadata. Must be in the following format: { 'sourceUsername': '******', } """ try: curate_instance = CurateND(token) except PresQTInvalidTokenError: raise PresQTValidationError( "Token is invalid. Response returned a 401 status code.", status.HTTP_401_UNAUTHORIZED) # Get the resource resource = get_curate_nd_resource(resource_id, curate_instance) action_metadata = {"sourceUsername": resource.extra['depositor']} extra_metadata = {} # Get all the files for the provided resources. files = [] empty_containers = [] if resource.kind_name == 'file': title_url = resource.extra['isPartOf'] if type(title_url) is list: title_url = resource.extra['isPartOf'][0] # Get the title of the Project to add to sourcePath project_title = requests.get(title_url, headers={ 'X-Api-Token': '{}'.format(token) }).json()['title'] # This is so we aren't missing the few extra keys that are pulled out for the PresQT payload resource.extra.update({ "id": resource.id, "date_submitted": resource.date_submitted }) update_process_info_message(process_info_path, action, 'Downloading files from CurateND...') # Add the total number of items to the process info file. # This is necessary to keep track of the progress of the request. update_process_info(process_info_path, 1, action, 'download') binary_file, curate_hash = resource.download() files.append({ 'file': binary_file, 'hashes': { 'md5': curate_hash }, 'title': resource.title, # If the file is the only resource we are downloading then we don't need it's full path. 'path': '/{}'.format(resource.title), 'source_path': '/{}/{}'.format(project_title, resource.title), 'extra_metadata': resource.extra }) # Increment the number of files done in the process info file. increment_process_info(process_info_path, action, 'download') else: if not resource.extra['containedFiles']: empty_containers.append('{}'.format(resource.title)) else: update_process_info_message(process_info_path, action, 'Downloading files from CurateND...') # Add the total number of items to the process info file. # This is necessary to keep track of the progress of the request. update_process_info(process_info_path, len(resource.extra['containedFiles']), action, 'download') title_helper = {} hash_helper = {} file_urls = [] project_title = resource.title file_metadata = [] extra_metadata = extra_metadata_helper(resource) for file in resource.extra['containedFiles']: download_url = file['downloadUrl'] contained_file = get_curate_nd_resource( file['id'], curate_instance) file_metadata_dict = { "title": contained_file.title, "extra": contained_file.extra } file_metadata.append(file_metadata_dict) title_helper[download_url] = contained_file.title hash_helper[download_url] = contained_file.md5 title_helper[file['downloadUrl']] = file['label'] file_urls.append(file['downloadUrl']) loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) download_data = loop.run_until_complete( async_main(file_urls, token, process_info_path, action)) for file in download_data: title = title_helper[file['url']] hash = hash_helper[file['url']] files.append({ 'file': file['binary_content'], 'hashes': { 'md5': hash }, 'title': title, "source_path": '/{}/{}'.format(project_title, title), 'path': '/{}/{}'.format(resource.title, title), 'extra_metadata': get_dictionary_from_list(file_metadata, 'title', title)['extra'] }) return { 'resources': files, 'empty_containers': empty_containers, 'action_metadata': action_metadata, 'extra_metadata': extra_metadata }
def gitlab_download_resource(token, resource_id, process_info_path, action): """ Fetch the requested resource from GitLab along with its hash information. Parameters ---------- token : str User's GitLab token resource_id : str ID of the resource requested process_info_path: str Path to the process info file that keeps track of the action's progress action: str The action being performed Returns ------- Dictionary with the following keys: values 'resources': List of dictionary objects that each hold a file and its information. Dictionary must be in the following format: { 'file': binary_file, 'hashes': {'hash_algorithm': 'the_hash'}, 'title': 'file.jpg', 'path': '/path/to/file', 'source_path: '/full/path/to/file', 'extra_metadata': {'any': 'extra'} } 'empty_containers: List of string paths representing empty containers that must be written. Example: ['empty/folder/to/write/', 'another/empty/folder/] 'action_metadata': Dictionary containing action metadata. Must be in the following format: { 'sourceUsername': '******', } """ try: header, user_id = validation_check(token) except PresQTResponseException: raise PresQTResponseException("Token is invalid. Response returned a 401 status code.", status.HTTP_401_UNAUTHORIZED) # Get the user's GitLab username for action metadata username = requests.get("https://gitlab.com/api/v4/user", headers=header).json()['username'] partitioned_id = resource_id.partition(':') if ':' in resource_id: project_id = partitioned_id[0] else: project_id = resource_id project_url = 'https://gitlab.com/api/v4/projects/{}'.format(project_id) response = requests.get(project_url, headers=header) if response.status_code != 200: raise PresQTResponseException( 'The resource with id, {}, does not exist for this user.'.format(resource_id), status.HTTP_404_NOT_FOUND) project_name = response.json()['name'] extra_metadata = {} if ':' not in resource_id: # This is for a project all_files_url = "https://gitlab.com/api/v4/projects/{}/repository/tree?recursive=1".format( resource_id) data = gitlab_paginated_data(header, user_id, all_files_url) is_project = True # Get extra metadata extra_metadata = extra_metadata_helper(response.json(), header) elif ':' in resource_id and '%2E' not in resource_id: # This is for a directory all_files_url = "https://gitlab.com/api/v4/projects/{}/repository/tree?path={}&recursive=1".format( partitioned_id[0], partitioned_id[2].replace('+', ' ')) data = gitlab_paginated_data(header, user_id, all_files_url) if not data: raise PresQTResponseException( 'The resource with id, {}, does not exist for this user.'.format(resource_id), status.HTTP_404_NOT_FOUND) is_project = False else: update_process_info_message(process_info_path, action, 'Downloading files from GitLab...') # Add the total number of projects to the process info file. # This is necessary to keep track of the progress of the request. update_process_info(process_info_path, 1, action, 'download') # This is a single file data = requests.get('https://gitlab.com/api/v4/projects/{}/repository/files/{}?ref=master'.format( project_id, partitioned_id[2].replace('+', ' ')), headers=header).json() if 'message' in data.keys(): raise PresQTResponseException( 'The resource with id, {}, does not exist for this user.'.format(resource_id), status.HTTP_404_NOT_FOUND) # Increment the number of files done in the process info file. increment_process_info(process_info_path, action, 'download') return { 'resources': [{ 'file': base64.b64decode(data['content']), 'hashes': {'sha256': data['content_sha256']}, 'title': data['file_name'], 'path': '/{}'.format(data['file_name']), 'source_path': data['file_path'], 'extra_metadata': {}}], 'empty_containers': [], 'action_metadata': {'sourceUsername': username}, 'extra_metadata': extra_metadata } files, empty_containers, action_metadata = download_content( username, project_name, project_id, data, [], is_project) file_urls = [file['file'] for file in files] update_process_info_message(process_info_path, action, 'Downloading files from GitLab...') # Add the total number of projects to the process info file. # This is necessary to keep track of the progress of the request. update_process_info(process_info_path, len(file_urls), action, 'download') loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) download_data = loop.run_until_complete( async_main(file_urls, header, process_info_path, action)) # Go through the file dictionaries and replace the file path with the binary_content # and replace the hashes with the correct file hashes for file in files: file['hashes'] = get_dictionary_from_list( download_data, 'url', file['file'])['hashes'] file['file'] = get_dictionary_from_list( download_data, 'url', file['file'])['binary_content'] return { 'resources': files, 'empty_containers': empty_containers, 'action_metadata': action_metadata, 'extra_metadata': extra_metadata }
def gitlab_upload_resource(token, resource_id, resource_main_dir, hash_algorithm, file_duplicate_action, process_info_path, action): """ Upload the files found in the resource_main_dir to the target. Parameters ---------- token : str User's token. resource_id : str ID of the resource requested. resource_main_dir : str Path to the main directory for the resources to be uploaded. hash_algorithm : str Hash algorithm we are using to check for fixity. file_duplicate_action : str The action to take when a duplicate file is found process_info_path: str Path to the process info file that keeps track of the action's progress action: str The action being performed Returns ------- Dictionary with the following keys: values 'resources_ignored' : Array of string file paths of files that were ignored when uploading the resource. Path should have the same base as resource_main_dir. Example: ['path/to/ignored/file.pg', 'another/ignored/file.jpg'] 'resources_updated' : Array of string file paths of files that were updated when uploading the resource. Path should have the same base as resource_main_dir. Example: ['path/to/updated/file.jpg'] 'action_metadata': Dictionary containing action metadata. Must be in the following format: { 'destinationUsername': '******' } 'file_metadata_list': List of dictionaries for each file that contains metadata and hash info. Must be in the following format: { "actionRootPath": '/path/on/disk', "destinationPath": '/path/on/target/destination', "title": 'file_title', "destinationHash": {'hash_algorithm': 'the_hash'}} } 'project_id': ID of the parent project for this upload. Needed for metadata upload. 'project_link': The link to either the resource or the home page of the user if not available through API """ base_url = "https://gitlab.com/api/v4/" try: headers, user_id = validation_check(token) except PresQTResponseException: raise PresQTResponseException("Token is invalid. Response returned a 401 status code.", status.HTTP_401_UNAUTHORIZED) username = requests.get("https://gitlab.com/api/v4/user", headers=headers).json()['username'] action_metadata = {"destinationUsername": username} os_path = next(os.walk(resource_main_dir)) # Get total amount of files total_files = upload_total_files(resource_main_dir) update_process_info(process_info_path, total_files, action, 'upload') update_process_info_message(process_info_path, action, "Uploading files to GitLab...") resources_ignored = [] resources_updated = [] file_metadata_list = [] #*** CREATE NEW PROJECT ***# # Create a new project with the name being the top level directory's name. # Check if a project with this name exists for this user if not resource_id: project_title = os_path[1][0] titles = [data['name'] for data in gitlab_paginated_data(headers, user_id)] title = get_duplicate_title(project_title, titles, '-PresQT*-').replace('(', '-').replace(')', '-') response = requests.post('{}projects?name={}&visibility=public'.format( base_url, title), headers=headers) if response.status_code == 201: project_id = response.json()['id'] project_name = response.json()['name'] web_url = response.json()['web_url'] else: raise PresQTResponseException( "Response has status code {} while creating project {}.".format( response.status_code, project_title), status.HTTP_400_BAD_REQUEST) #*** UPLOAD FILES ***# # Upload files to project's repository base_repo_path = "{}projects/{}/repository/files/".format(base_url, project_id) for path, subdirs, files in os.walk(resource_main_dir): if not subdirs and not files: resources_ignored.append(path) for name in files: # Strip server directories from file path relative_file_path = os.path.join(path.partition('/data/{}/'.format( project_title))[2], name) # Extract and encode the file bytes in the way expected by GitLab. file_bytes = open(os.path.join(path, name), 'rb').read() encoded_file = base64.b64encode(file_bytes) # A relative path to the file is what is added to the GitLab POST address encoded_file_path = relative_file_path.replace('/', '%2F').replace('.', '%2E') request_data = {"branch": "master", "commit_message": "PresQT Upload", "encoding": "base64", "content": encoded_file} requests.post("{}{}".format( base_repo_path, encoded_file_path), headers=headers, data=request_data) # Get the file hash file_json = requests.get("{}{}?ref=master".format(base_repo_path, encoded_file_path), headers=headers) # Increment files finished increment_process_info(process_info_path, action, 'upload') file_metadata_list.append({ "actionRootPath": os.path.join(path, name), # This ensures that the title is up to date if there are duplicates "destinationPath": os.path.join(project_name, path.partition( '/data/')[2].partition('/')[2], name), "title": name, "destinationHash": file_json.json()['content_sha256'] }) else: if ':' not in resource_id: project_id = resource_id base_repo_url = "{}projects/{}/repository/files/".format(base_url, project_id) string_path_to_resource = '' else: partitioned_id = resource_id.partition(':') project_id = partitioned_id[0] base_repo_url = "{}projects/{}/repository/files/{}".format( base_url, project_id, partitioned_id[2]) string_path_to_resource = partitioned_id[2].replace('%2F', '/').replace('%2E', '.') # Check if the resource_id belongs to a file tree_url = 'https://gitlab.com/api/v4/projects/{}/repository/tree?recursive=1'.format( project_id) file_data = gitlab_paginated_data(headers, None, tree_url) for data in file_data: if data['path'] == string_path_to_resource: if data['type'] == 'blob': raise PresQTResponseException("Resource with id, {}, belongs to a file.".format( resource_id), status.HTTP_400_BAD_REQUEST) # Get project data project = requests.get('{}projects/{}'.format(base_url, project_id), headers=headers) if project.status_code != 200: raise PresQTResponseException("Project with id, {}, could not be found.".format( project_id), status.HTTP_404_NOT_FOUND) project_name = project.json()['name'] web_url = project.json()['web_url'] for path, subdirs, files in os.walk(resource_main_dir): if not subdirs and not files: resources_ignored.append(path) for name in files: # Strip server directories from file path relative_file_path = os.path.join(path.partition('/data/')[2], name) # A relative path to the file is what is added to the GitLab POST address if base_repo_url == "{}projects/{}/repository/files/".format(base_url, project_id): encoded_file_path = relative_file_path.replace('/', '%2F').replace('.', '%2E') else: encoded_file_path = '%2F{}'.format( relative_file_path.replace('/', '%2F').replace('.', '%2E')) full_encoded_url = '{}{}'.format(base_repo_url, encoded_file_path) ignore_file = False upload_request = requests.post file_bytes = None # Check if this file exists already for file in file_data: if os.path.join(string_path_to_resource, relative_file_path) == file['path']: if file_duplicate_action == 'ignore': resources_ignored.append(os.path.join(path, name)) ignore_file = True break else: file_url = '{}?ref=master'.format(full_encoded_url) file_response = requests.get(file_url, headers=headers) file_bytes = open(os.path.join(path, name), 'rb').read() if hash_generator(file_bytes, 'sha256') == file_response.json()['content_sha256']: resources_ignored.append(os.path.join(path, name)) ignore_file = True else: resources_updated.append(os.path.join(path, name)) upload_request = requests.put # Break out of this for loop and attempt to upload this duplicate break # If we find a file to ignore then move onto the next file in the os.walk if ignore_file: continue # Extract and encode the file bytes in the way expected by GitLab. if not file_bytes: file_bytes = open(os.path.join(path, name), 'rb').read() encoded_file = base64.b64encode(file_bytes) request_data = {"branch": "master", "commit_message": "PresQT Upload", "encoding": "base64", "content": encoded_file} response = upload_request("{}".format(full_encoded_url), headers=headers, data=request_data) if response.status_code not in [201, 200]: raise PresQTResponseException( 'Upload failed with a status code of {}'.format(response.status_code), status.HTTP_400_BAD_REQUEST) # Get the file hash file_json = requests.get("{}?ref=master".format(full_encoded_url), headers=headers).json() # Increment files finished increment_process_info(process_info_path, action, 'upload') file_metadata_list.append({ "actionRootPath": os.path.join(path, name), "destinationPath": os.path.join(project_name, path.partition('/data/')[2], name), "title": name, "destinationHash": file_json['content_sha256'] }) return { 'resources_ignored': resources_ignored, 'resources_updated': resources_updated, 'action_metadata': action_metadata, 'file_metadata_list': file_metadata_list, 'project_id': project_id, 'project_link': web_url }