def update_process_info(process_info_path, total_files, action, function): """ Update the process_info.json file with the number of total files involved in the action Parameters ---------- process_info_path: str Path to the process_info.json file to update total_files: int Total number of resources involved in the action action: str The action to update in the process_info.json object function: str The function being called """ process_info_data = read_file(process_info_path, True) # Get the proper dict key if function == 'upload': key = 'upload_total_files' elif function == 'download': key = 'download_total_files' else: key = 'total_files' process_info_data[action][key] = total_files write_file(process_info_path, process_info_data, True) return
def test_get_success_202_osf(self): """ Return a 202 if the GET was successful and the resource upload is in progress. """ self.url = reverse('resource_collection', kwargs={'target_name': 'osf'}) self.file = 'presqt/api_v1/tests/resources/upload/ProjectBagItToUpload.zip' self.call_upload_resources() # Update the fixity_info.json to say the resource hasn't finished processing write_file(self.process_info_path, self.initial_process_info, True) url = reverse('upload_job', kwargs={'ticket_number': self.ticket_number}) response = self.client.get(url, **self.headers) # Verify the status code and content self.assertEqual(response.status_code, 202) self.assertEqual( response.data, { 'message': 'Upload is being processed on the server', 'status_code': None }) # Delete corresponding folder shutil.rmtree('mediafiles/uploads/{}'.format(self.ticket_number))
def increment_process_info(process_info_path, action, function): """ Increment the download_files_finished attribute in the process_info.json file Parameters ---------- process_info_path: str Path to the process_info.json file to update action: str The action to update in the process_info.json object function: str The function being called """ process_info_data = read_file(process_info_path, True) # Get the proper dict key if function == 'upload': key = 'upload_files_finished' elif function == 'download': key = 'download_files_finished' else: key = 'files_finished' process_info_data[action][key] = process_info_data[action][key] + 1 write_file(process_info_path, process_info_data, True) return
def test_success_202(self): """ Return a 202 if the resource has not finished being prepared on the server. """ shared_call_get_resource_zip(self, self.resource_id) # Update the fixity_info.json to say the resource hasn't finished processing write_file(self.process_info_path, self.initial_process_info, True) url = reverse('download_job', kwargs={'ticket_number': self.ticket_number}) response = self.client.get(url, **self.header) # Verify the status code and content self.assertEqual(response.status_code, 202) self.assertEqual( response.data, { 'message': 'Download is being processed on the server', 'status_code': None }) # Verify the status of the process_info file is 'in_progress' process_info = read_file(self.process_info_path, True) self.assertEqual(process_info['status'], 'in_progress') # Delete corresponding folder shutil.rmtree('mediafiles/downloads/{}'.format(self.ticket_number))
def update_or_create_process_info(process_obj, action, ticket_number): """ Create or update the process_info.json file for a job. Parameters ---------- process_obj: dict Process info dictionary to save in the process_info.json file action: str The current action which the process_obj will be saved to in the process_info.json file ticket_number: str Ticket number for user's action and also the name of the directory for process_info.json Returns ------- Returns the path to the process_info.json file """ process_info_path = os.path.join('mediafiles', 'jobs', str(ticket_number), 'process_info.json') # If there already exists a process_info.json file for this user then add to the process dict if os.path.isfile(process_info_path): file_obj = read_file(process_info_path, True) file_obj[action] = process_obj # If no process_info.json file exists for this user than create a new process dict else: file_obj = {action: process_obj} write_file(process_info_path, file_obj, True) return process_info_path
def test_files_to_delete(self): """ This test is to ensure that if the expiration listed in process_info is before the current date, that data that has been downloaded will be deleted. """ with self.env: # These steps are required to alter the timestamp inside our process_info.json data = read_file('{}process_info.json'.format(self.directory), True) # Set the expiration date to be yesterday data['resource_upload']['expiration'] = str(timezone.now() - relativedelta(days=1)) # Write the data JSON back to the process_info file write_file('{}process_info.json'.format(self.directory), data, True) data_pre_command = glob.glob('mediafiles/jobs/test_command/process_info.json') self.assertEqual(len(data_pre_command), 1) call_command('delete_outdated_mediafiles') # Check that the folder has been deleted data_post_command = glob.glob('mediafiles/jobs/test_command/') self.assertEqual(len(data_post_command), 0) # Test that a directory without a process_info.json file gets deleted os.makedirs(self.directory) data_pre_command = glob.glob('mediafiles/jobs/test_command/') self.assertEqual(len(data_pre_command), 1) call_command('delete_outdated_mediafiles') # Check that the folder has been deleted data_post_command = glob.glob('mediafiles/jobs/test_command/') self.assertEqual(len(data_post_command), 0)
def post(self, request): """ Upload a proposal task to EaaSI Returns ------- 200: OK { "id": "19", "message": "Proposal task was submitted." "proposal_link": "https://localhost/api_v1/services/eaasi/1/" } 400: Bad Request { "error": "ticket_number is missing from the request body." } """ try: ticket_number = request.data['ticket_number'] except KeyError: return Response(data={"error": "ticket_number is missing from the request body."}, status=status.HTTP_400_BAD_REQUEST) # Create a one time use token for EaaSI to use. eaasi_token = str(uuid4()) data = get_process_info_data('downloads', ticket_number) data['eaasi_token'] = eaasi_token write_file('mediafiles/downloads/{}/process_info.json'.format(ticket_number), data, True) # Build EaaSI download endpoint url eaasi_download_reverse = reverse('eaasi_download', kwargs={"ticket_number": ticket_number}) eaasi_download_url = request.build_absolute_uri(eaasi_download_reverse) final_eaasi_download_url = '{}?eaasi_token={}'.format(eaasi_download_url, eaasi_token) data = { "data_url": final_eaasi_download_url, "data_type": "bagit+zip" } response = requests.post( 'https://eaasi-portal.emulation.cloud/environment-proposer/api/v1/proposals', data=json.dumps(data), headers={"Content-Type": "application/json"}) if response.status_code != 202: return Response( data={'message': 'Proposal submission returned a status code of {}.'.format( response.status_code)}, status=response.status_code) response_json = response.json() # Add Proposal link to payload reverse_proposal_url = reverse('proposal', kwargs={"proposal_id": response_json['id']}) response_json['proposal_link'] = request.build_absolute_uri(reverse_proposal_url) return Response(data=response_json, status=status.HTTP_200_OK)
def _transfer_resource(self): """ Transfer resources from the source target to the destination target. """ # Write the process id to the process_info file self.process_info_obj[ 'function_process_id'] = self.function_process.pid write_file(self.process_info_path, self.process_info_obj, True) ####### DOWNLOAD THE RESOURCES ####### download_status = self._download_resource() # If download failed then don't continue if not download_status: return ####### PREPARE UPLOAD FROM DOWNLOAD BAG ####### # Validate the 'bag' and check for checksum mismatches self.bag = bagit.Bag(self.resource_main_dir) try: validate_bag(self.bag) except PresQTValidationError as e: return Response(data={'error': e.data}, status=e.status_code) # Create a hash dictionary to compare with the hashes returned from the target after upload # If the destination target supports a hash provided by the self. then use those hashes, # otherwise create new hashes with a target supported hash. self.file_hashes, self.hash_algorithm = get_or_create_hashes_from_bag( self) ####### UPLOAD THE RESOURCES ####### upload_status = self._upload_resource() # If upload failed then don't continue if not upload_status: return ####### TRANSFER COMPLETE ####### # Transfer was a success so update the server metadata file. self.process_info_obj['status_code'] = '200' self.process_info_obj['status'] = 'finished' self.process_info_obj['failed_fixity'] = list( set(self.download_failed_fixity + self.upload_failed_fixity)) transfer_fixity = False if not self.download_fixity or not self.upload_fixity else True self.process_info_obj['message'] = get_action_message( 'Transfer', transfer_fixity, self.metadata_validation, self.action_metadata) write_file(self.process_info_path, self.process_info_obj, True) return
def update_process_info_message(process_info_path, action, message): """ Update the process_info.json file with the number of total files involved in the action Parameters ---------- process_info_path: str Path to the process_info.json file to update action: str The action to update in the process_info.json object message: str The message to add to the process_info file """ process_info_data = read_file(process_info_path, True) process_info_data[action]['message'] = message write_file(process_info_path, process_info_data, True) return
def process_watchdog(function_process, process_info_path, process_time, action): """ Monitoring function for the file transfer processes spawned off using Multiprocessing. It will monitor if the process has either finished or has gone over it's processing time. Parameters ---------- function_process : multiprocessing.Process Multiprocessing class that we are monitoring process_info_path : str Path to the process_info.json file for the process running process_time : int Amount of seconds we want the watchdog to the let the monitored process run """ slept_time = 0 while slept_time <= process_time: sleep(1) # Get the contents of process_info.json. # While loop is required in case the json file is being written to while being read. process_info_data = None while process_info_data is None: try: process_info_data = read_file(process_info_path, True) except json.decoder.JSONDecodeError: # Pass while the process_info file is being written to pass # Exception is mostly in place for testing except FileNotFoundError: return # If the monitored process has finished if process_info_data[action]['status'] != 'in_progress': return slept_time += 1 # If we've reached here then the process reached our time limit and we need to terminate # the monitored process and update the process_info.json file. function_process.terminate() process_info_data[action]['status'] = 'failed' process_info_data[action][ 'message'] = 'The process took too long on the server.' process_info_data[action]['status_code'] = 504 write_file(process_info_path, process_info_data, True)
def update_bagit_with_metadata(instance, zip_title): """ Create a metadata file and resave and validate the bag. Parameters ---------- instance : BaseResource class instance Class we want to add the attributes to zip_title: str Title of the zipped resource """ for action_metadata in instance.action_metadata['files']['created']: root_path = action_metadata['destinationPath'] action_metadata['destinationPath'] = '/{}/data{}'.format( zip_title, root_path) instance.action_metadata['destinationTargetName'] = 'Zip File' final_fts_metadata_data = create_fts_metadata(instance.action_metadata, instance.source_fts_metadata_actions) write_file(os.path.join(instance.data_directory, 'PRESQT_FTS_METADATA.json'), final_fts_metadata_data, True) # Update the bag instance.bag.save(manifests=True)
def handle(self, *args, **kwargs): targets_json = read_file('presqt/specs/targets.json', True) list_of_partners_in = [] list_of_partners_out = [] for target in targets_json: if target['supported_actions']['resource_transfer_in'] == True: list_of_partners_in.append(target['name']) if target['supported_actions']['resource_transfer_out'] == True: list_of_partners_out.append(target['name']) ##### Get Input From User ##### while True: target_name = input('Enter target name (use underscores not spaces): ').lower() if set('[~! @#$%^&*()+{}":;[],.<>`=+-\']+$\\').intersection(target_name): print("Target name can't contain special characters or spaces") else: break human_readable_target_name = input('Enter human readable target name (format however): ') url_validator = URLValidator() while True: status_url = input('Enter target status url (include http:// or https://): ') try: url_validator(status_url) except ValidationError: print("Target status url must be a valid url") else: break while True: resource_collection = input('Does your target support the Resource Collection endpoint? (Y or N): ') if resource_collection not in ['Y', 'y', 'N', 'n']: print('Must input Y or N') else: if resource_collection in ['Y', 'y']: resource_collection = True else: resource_collection = False break while True: resource_detail = input('Does your target support the Resource Detail endpoint? (Y or N): ') if resource_detail not in ['Y', 'y', 'N', 'n']: print('Must input Y or N') else: if resource_detail in ['Y', 'y']: resource_detail = True else: resource_detail = False break while True: resource_download = input('Does your target support the Resource Download endpoint? (Y or N): ') if resource_download not in ['Y', 'y', 'N', 'n']: print('Must input Y or N') else: if resource_download in ['Y', 'y']: resource_download = True else: resource_download = False break while True: resource_upload = input('Does your target support the Resource Upload endpoint? (Y or N): ') if resource_upload not in ['Y', 'y', 'N', 'n']: print('Must input Y or N') else: if resource_upload in ['Y', 'y']: resource_upload = True else: resource_upload = False break while True: resource_transfer_in = input('Does your target support the Resource Transfer In endpoint? (Y or N): ') if resource_transfer_in not in ['Y', 'y', 'N', 'n']: print('Must input Y or N') else: if resource_transfer_in in ['Y', 'y']: resource_transfer_in = True else: resource_transfer_in = False break while True: resource_transfer_out = input('Does your target support the Resource Transfer Out endpoint? (Y or N): ') if resource_transfer_out not in ['Y', 'y', 'N', 'n']: print('Must input Y or N') else: if resource_transfer_out in ['Y', 'y']: resource_transfer_out = True else: resource_transfer_out = False break while True: transfer_in = input("Which PresQT partners are you allowing to transfer into your service? (comma seperated list with no spaces (use underscores))\nOptions are {}: ".format(list_of_partners_out)) if ' ' in transfer_in: print("Input can't contain spaces") continue transfer_in = transfer_in.lower().split(',') for partner in transfer_in: if partner not in list_of_partners_out: print("{} is not a recognized target, or doesn't support resource_transfer_out.".format(partner)) break else: break while True: transfer_out = input("Which PresQT partners are you allowing your service to transfer to? (comma seperated list with no spaces (use underscores))\nOptions are {}: ".format(list_of_partners_in)) if ' ' in transfer_out: print("Input can't contain spaces") continue transfer_out = transfer_out.lower().split(',') for partner in transfer_out: if partner not in list_of_partners_in: print("{} is not a recognized target, or doesn't support resource_transfer_in.".format(partner)) break else: break while True: hash_algorithms = input('Enter your supported hash algorithms (comma separated list with no spaces)') if ' ' in hash_algorithms: print("Input can't contain spaces") continue hash_algorithms = hash_algorithms.split(',') for hash_algorithm in hash_algorithms: if hash_algorithm not in hashlib.algorithms_available: print('{} is not supported by the hashlib Python library'.format(hash_algorithm)) break else: break ##### Check if target exists in targets.json ##### if get_dictionary_from_list(targets_json, 'name', target_name): print('Error! Target, {}, already exists in targets.json!'.format(target_name)) return ##### Make Target Directory ##### target_directory = 'presqt/targets/{}/'.format(target_name) try: os.makedirs(os.path.dirname(target_directory)) print('Directory created: {}'.format(target_directory)) except FileExistsError: print('Error! Target directory already exists!') return else: open('{}{}'.format(target_directory, '__init__.py'), 'a').close() ##### Make Target Function Directory ##### target_function_dir = '{}{}/'.format(target_directory, 'functions') os.makedirs(os.path.dirname(target_function_dir)) print('Directory created: {}'.format(target_function_dir)) open('{}{}'.format(target_function_dir, '__init__.py'), 'a').close() print('File created: {}'.format(target_function_dir)) ##### Make Target Action Files #### target_functions = {} if resource_collection or resource_detail: with open('{}fetch.py'.format(target_function_dir), 'w') as file: target_functions['fetch'] = {} if resource_collection: resource_collection_function ='{}_fetch_resources'.format(target_name) target_functions['fetch']['{}_resource_collection'.format(target_name)] = resource_collection_function file.write('def {}(token, search_parameter):\n\tpass'.format(resource_collection_function)) if resource_detail: file.write('\n\n') if resource_detail: resource_detail_function = '{}_fetch_resource'.format(target_name) target_functions['fetch']['{}_resource_detail'.format(target_name)] = resource_detail_function file.write('def {}(token, resource_id):\n\tpass'.format(resource_detail_function)) print('File created: {}fetch.py'.format(target_function_dir)) if resource_download: with open('{}download.py'.format(target_function_dir), 'w') as file: resource_download_function ='{}_download_resource'.format(target_name) target_functions['download'] = {'{}_resource_download'.format(target_name): resource_download_function} file.write('def {}(token, resource_id):\n\tpass'.format(resource_download_function)) print('File created: {}download.py'.format(target_function_dir)) if resource_upload: with open('{}upload.py'.format(target_function_dir), 'w') as file: resource_upload_function = '{}_upload_resource'.format(target_name) target_functions['upload'] = {'{}_resource_upload'.format(target_name): resource_upload_function} file.write('def {}(token, resource_id, resource_main_dir, hash_algorithm, file_duplicate_action):\n\tpass'.format(resource_upload_function)) print('File created: {}upload.py'.format(target_function_dir)) ##### Write to function_router.py ##### with open('presqt/api_v1/utilities/utils/function_router.py', 'a') as file: if target_functions: file.write('\n') for file_name, file_name_dict in target_functions.items(): for variable_name, function_name in file_name_dict.items(): file.write(' {} = {}\n'.format(variable_name, function_name)) with open('presqt/api_v1/utilities/utils/function_router.py', 'r+') as file: content = file.read() file.seek(0, 0) new_imports = '' for file_name, file_name_dict in target_functions.items(): new_imports += 'from presqt.targets.{}.functions.{} import {}\n'.format(target_name, file_name, ', '.join(file_name_dict.values())) file.write(new_imports + content) print('File updated: presqt/api_v1/utilities/utils/function_router.py') ##### Write to targets.json ##### target_dict = { "name": target_name, "readable_name": human_readable_target_name, "status_url": status_url, "supported_actions": { "resource_collection": resource_collection, "resource_detail": resource_detail, "resource_download": resource_download, "resource_upload": resource_upload, "resource_transfer_in": resource_transfer_in, "resource_transfer_out": resource_transfer_out }, "supported_transfer_partners": { "transfer_in": transfer_in, "transfer_out": transfer_out }, "supported_hash_algorithms": hash_algorithms } data = read_file('presqt/specs/targets.json', True) data.append(target_dict) write_file('presqt/specs/targets.json', data, True) print('File updated: presqt/specs/targets.json')
def _download_resource(self): """ Downloads the resources from the target, performs a fixity check, zips them up in BagIt format. """ action = 'resource_download' # Write the process id to the process_info file self.process_info_obj[ 'function_process_id'] = self.function_process.pid write_file(self.process_info_path, self.process_info_obj, True) # Fetch the proper function to call func = FunctionRouter.get_function(self.source_target_name, action) # Fetch the resources. func_dict is in the format: # { # 'resources': files, # 'empty_containers': empty_containers, # 'action_metadata': action_metadata # } try: func_dict = func(self.source_token, self.source_resource_id) # If the resource is being transferred, has only one file, and that file is PresQT # metadata then raise an error. if self.action == 'resource_transfer_in' \ and len(func_dict['resources']) == 1 \ and func_dict['resources'][0]['title'] == 'PRESQT_FTS_METADATA.json': raise PresQTResponseException( 'PresQT Error: PresQT FTS metadata cannot not be transferred by itself.', status.HTTP_400_BAD_REQUEST) except PresQTResponseException as e: # Catch any errors that happen within the target fetch. # Update the server process_info file appropriately. self.process_info_obj['status_code'] = e.status_code self.process_info_obj['status'] = 'failed' if self.action == 'resource_transfer_in': self.process_info_obj['download_status'] = 'failed' self.process_info_obj['message'] = e.data # Update the expiration from 5 days to 1 hour from now. We can delete this faster because # it's an incomplete/failed directory. self.process_info_obj['expiration'] = str(timezone.now() + relativedelta(hours=1)) write_file(self.process_info_path, self.process_info_obj, True) return False # The directory all files should be saved in. self.resource_main_dir = os.path.join(self.ticket_path, self.base_directory_name) # For each resource, perform fixity check, gather metadata, and save it to disk. fixity_info = [] self.download_fixity = True self.source_fts_metadata_actions = [] self.new_fts_metadata_files = [] self.download_failed_fixity = [] for resource in func_dict['resources']: # Perform the fixity check and add extra info to the returned fixity object. fixity_obj, self.download_fixity = download_fixity_checker.download_fixity_checker( resource) fixity_info.append(fixity_obj) if not fixity_obj['fixity']: self.download_failed_fixity.append(resource['path']) # Create metadata for this resource. Return True if a valid FTS metadata file is found. if create_download_metadata(self, resource, fixity_obj): # Don't write valid FTS metadata file. continue # Save the file to the disk. write_file('{}{}'.format(self.resource_main_dir, resource['path']), resource['file']) # Create PresQT action metadata self.action_metadata = { 'id': str(uuid4()), 'actionDateTime': str(timezone.now()), 'actionType': self.action, 'sourceTargetName': self.source_target_name, 'sourceUsername': func_dict['action_metadata']['sourceUsername'], 'destinationTargetName': 'Local Machine', 'destinationUsername': None, 'files': { 'created': self.new_fts_metadata_files, 'updated': [], 'ignored': [] } } # Write empty containers to disk for container_path in func_dict['empty_containers']: # Make sure the container_path has a '/' and the beginning and end if container_path[-1] != '/': container_path += '/' if container_path[0] != '/': container_path = '/' + container_path os.makedirs( os.path.dirname('{}{}'.format(self.resource_main_dir, container_path))) # If we are transferring the downloaded resource then bag it for the resource_upload method if self.action == 'resource_transfer_in': self.action_metadata[ 'destinationTargetName'] = self.destination_target_name # Make a BagIt 'bag' of the resources. bagit.make_bag(self.resource_main_dir, checksums=['md5', 'sha1', 'sha256', 'sha512']) self.process_info_obj['download_status'] = get_action_message( 'Download', self.download_fixity, True, self.action_metadata) return True # If we are only downloading the resource then create metadata, bag, zip, # and update the server process file. else: # Create and write metadata file. final_fts_metadata_data = create_fts_metadata( self.action_metadata, self.source_fts_metadata_actions) write_file( os.path.join(self.resource_main_dir, 'PRESQT_FTS_METADATA.json'), final_fts_metadata_data, True) # Validate the final metadata metadata_validation = schema_validator( 'presqt/json_schemas/metadata_schema.json', final_fts_metadata_data) self.process_info_obj['message'] = get_action_message( 'Download', self.download_fixity, metadata_validation, self.action_metadata) # Add the fixity file to the disk directory write_file( os.path.join(self.resource_main_dir, 'fixity_info.json'), fixity_info, True) # Make a BagIt 'bag' of the resources. bagit.make_bag(self.resource_main_dir, checksums=['md5', 'sha1', 'sha256', 'sha512']) # Zip the BagIt 'bag' to send forward. zip_directory(self.resource_main_dir, "{}.zip".format(self.resource_main_dir), self.ticket_path) # Everything was a success so update the server metadata file. self.process_info_obj['status_code'] = '200' self.process_info_obj['status'] = 'finished' self.process_info_obj['zip_name'] = '{}.zip'.format( self.base_directory_name) self.process_info_obj[ 'failed_fixity'] = self.download_failed_fixity write_file(self.process_info_path, self.process_info_obj, True) return True
def _upload_resource(self): """ Upload resources to the target and perform a fixity check on the resulting hashes. """ action = 'resource_upload' # Write the process id to the process_info file self.process_info_obj[ 'function_process_id'] = self.function_process.pid write_file(self.process_info_path, self.process_info_obj, True) # Data directory in the bag self.data_directory = '{}/data'.format(self.resource_main_dir) # If we are uploading (not transferring) then create the initial metadata based on the # zipped bag provided. if self.action == 'resource_upload': self.new_fts_metadata_files = [] for path, subdirs, files in os.walk(self.data_directory): for name in files: self.new_fts_metadata_files.append({ 'destinationHashes': {}, 'destinationPath': os.path.join(path, name)[len(self.data_directory):], 'failedFixityInfo': [], 'title': name, 'sourceHashes': { self.hash_algorithm: self.file_hashes[os.path.join(path, name)] }, 'sourcePath': os.path.join(path, name)[len(self.data_directory):], 'extra': {} }) self.action_metadata = { 'id': str(uuid4()), 'actionDateTime': str(timezone.now()), 'actionType': self.action, 'sourceTargetName': 'Local Machine', 'sourceUsername': None, 'destinationTargetName': self.destination_target_name, 'destinationUsername': None, 'files': { 'created': self.new_fts_metadata_files, 'updated': [], 'ignored': [] } } # If the target destination's storage hierarchy has a finite depth then zip the resources # to be uploaded along with their metadata. # Also, create metadata files for the new zip file to be uploaded. if self.infinite_depth is False: try: structure_validation(self) finite_depth_upload_helper(self) except PresQTResponseException as e: # Catch any errors that happen within the target fetch. # Update the server process_info file appropriately. self.process_info_obj['status_code'] = e.status_code self.process_info_obj['status'] = 'failed' if self.action == 'resource_transfer_in': self.process_info_obj['upload_status'] = 'failed' self.process_info_obj['message'] = e.data # Update the expiration from 5 days to 1 hour from now. We can delete this faster because # it's an incomplete/failed directory. self.process_info_obj['expiration'] = str(timezone.now() + relativedelta( hours=1)) write_file(self.process_info_path, self.process_info_obj, True) return False # Fetch the proper function to call func = FunctionRouter.get_function(self.destination_target_name, action) # Upload the resources. func_dict has the following format: # { # 'resources_ignored': resources_ignored, # 'resources_updated': resources_updated, # 'action_metadata': action_metadata, # 'file_metadata_list': file_metadata_list, # 'project_id': title # } try: structure_validation(self) func_dict = func(self.destination_token, self.destination_resource_id, self.data_directory, self.hash_algorithm, self.file_duplicate_action) except PresQTResponseException as e: # Catch any errors that happen within the target fetch. # Update the server process_info file appropriately. self.process_info_obj['status_code'] = e.status_code self.process_info_obj['status'] = 'failed' if self.action == 'resource_transfer_in': self.process_info_obj['upload_status'] = 'failed' self.process_info_obj['message'] = e.data # Update the expiration from 5 days to 1 hour from now. We can delete this faster # because it's an incomplete/failed directory. self.process_info_obj['expiration'] = str(timezone.now() + relativedelta(hours=1)) write_file(self.process_info_path, self.process_info_obj, True) return False # Check if fixity has failed on any files during a transfer. If so, update the # process_info_data file. self.upload_fixity = True self.upload_failed_fixity = [] for resource in func_dict['file_metadata_list']: resource['failed_fixity_info'] = [] if resource['destinationHash'] != self.file_hashes[resource['actionRootPath']] \ and resource['actionRootPath'] not in func_dict['resources_ignored']: self.upload_fixity = False self.upload_failed_fixity.append( resource['actionRootPath'][len(self.data_directory):]) resource['failed_fixity_info'].append({ 'NewGeneratedHash': self.file_hashes[resource['actionRootPath']], 'algorithmUsed': self.hash_algorithm, 'reasonFixityFailed': "Either the destination did not provide a hash " "or fixity failed during upload." }) # Strip the server created directory prefix of the file paths for ignored and updated files resources_ignored = [ file[len(self.data_directory):] for file in func_dict['resources_ignored'] ] self.process_info_obj['resources_ignored'] = resources_ignored resources_updated = [ file[len(self.data_directory):] for file in func_dict['resources_updated'] ] self.process_info_obj['resources_updated'] = resources_updated self.metadata_validation = create_upload_metadata( self, func_dict['file_metadata_list'], func_dict['action_metadata'], func_dict['project_id'], resources_ignored, resources_updated) # Validate the final metadata upload_message = get_action_message('Upload', self.upload_fixity, self.metadata_validation, self.action_metadata) self.process_info_obj['message'] = upload_message if self.action == 'resource_upload': # Update server process file self.process_info_obj['status_code'] = '200' self.process_info_obj['status'] = 'finished' self.process_info_obj['hash_algorithm'] = self.hash_algorithm self.process_info_obj['failed_fixity'] = self.upload_failed_fixity write_file(self.process_info_path, self.process_info_obj, True) else: self.process_info_obj['upload_status'] = upload_message return True
def transfer_post(self): """ Transfer resources to a specific existing target resource or create a new target resource. Returns ------- Response object in JSON format """ try: self.destination_token = get_destination_token(self.request) self.source_token = get_source_token(self.request) self.file_duplicate_action = file_duplicate_action_validation( self.request) self.source_target_name, self.source_resource_id = transfer_post_body_validation( self.request) target_valid, self.infinite_depth = target_validation( self.destination_target_name, self.action) target_validation(self.source_target_name, 'resource_transfer_out') transfer_target_validation(self.source_target_name, self.destination_target_name) except PresQTValidationError as e: return Response(data={'error': e.data}, status=e.status_code) # Generate ticket number ticket_number = uuid4() self.ticket_path = os.path.join("mediafiles", "transfers", str(ticket_number)) # Create directory and process_info json file self.process_info_obj = { 'presqt-source-token': hash_tokens(self.source_token), 'presqt-destination-token': hash_tokens(self.destination_token), 'status': 'in_progress', 'expiration': str(timezone.now() + relativedelta(days=5)), 'message': 'Transfer is being processed on the server', 'download_status': None, 'upload_status': None, 'status_code': None, 'function_process_id': None } self.process_info_path = os.path.join(self.ticket_path, "process_info.json") write_file(self.process_info_path, self.process_info_obj, True) self.base_directory_name = '{}_{}_transfer_{}'.format( self.source_target_name, self.destination_target_name, self.source_resource_id) # Spawn the transfer_resource method separate from the request server by using multiprocess. spawn_action_process(self, self._transfer_resource) reversed_url = reverse('transfer_job', kwargs={'ticket_number': ticket_number}) transfer_hyperlink = self.request.build_absolute_uri(reversed_url) return Response(status=status.HTTP_202_ACCEPTED, data={ 'ticket_number': ticket_number, 'message': 'The server is processing the request.', 'transfer_job': transfer_hyperlink })
def _download_resource(self): """ Downloads the resources from the target, performs a fixity check, zips them up in BagIt format. """ action = 'resource_download' # Write the process id to the process_info file self.process_info_obj[ 'function_process_id'] = self.function_process.pid update_or_create_process_info(self.process_info_obj, self.action, self.ticket_number) # Fetch the proper function to call func = FunctionRouter.get_function(self.source_target_name, action) # Fetch the resources. func_dict is in the format: # { # 'resources': files, # 'empty_containers': empty_containers, # 'action_metadata': action_metadata # } try: func_dict = func(self.source_token, self.source_resource_id, self.process_info_path, self.action) # If the resource is being transferred, has only one file, and that file is the # PresQT metadata then raise an error. if self.action == 'resource_transfer_in' and \ len(func_dict['resources']) == 1 \ and func_dict['resources'][0]['title'] == 'PRESQT_FTS_METADATA.json': raise PresQTResponseException( 'PresQT Error: PresQT FTS metadata cannot not be transferred by itself.', status.HTTP_400_BAD_REQUEST) except PresQTResponseException as e: # TODO: Functionalize this error section # Catch any errors that happen within the target fetch. # Update the server process_info file appropriately. self.process_info_obj['status_code'] = e.status_code self.process_info_obj['status'] = 'failed' if self.action == 'resource_transfer_in': self.process_info_obj['download_status'] = 'failed' self.process_info_obj['message'] = e.data # Update the expiration from 5 hours to 1 hour from now. We can delete this faster because # it's an incomplete/failed directory. self.process_info_obj['expiration'] = str(timezone.now() + relativedelta(hours=1)) update_or_create_process_info(self.process_info_obj, self.action, self.ticket_number) return False # Get the latest contents of the job's process_info.json file self.process_info_obj = read_file(self.process_info_path, True)[self.action] # The directory all files should be saved in. self.resource_main_dir = os.path.join(self.ticket_path, self.base_directory_name) update_process_info_message( self.process_info_path, self.action, 'Performing fixity checks and gathering metadata...') self.extra_metadata = func_dict['extra_metadata'] # For each resource, perform fixity check, gather metadata, and save it to disk. fixity_info = [] self.download_fixity = True self.download_failed_fixity = [] self.source_fts_metadata_actions = [] self.new_fts_metadata_files = [] self.all_keywords = [] self.initial_keywords = [] self.manual_keywords = [] self.enhanced_keywords = [] for resource in func_dict['resources']: # Perform the fixity check and add extra info to the returned fixity object. # Note: This method of calling the function needs to stay this way for test Mock fixity_obj, self.download_fixity = download_fixity_checker.download_fixity_checker( resource) fixity_info.append(fixity_obj) if not fixity_obj['fixity']: self.download_failed_fixity.append(resource['path']) # Create metadata for this resource or validate the metadata file if resource['title'] == 'PRESQT_FTS_METADATA.json': is_valid = validate_metadata(self, resource) if not is_valid: resource['path'] = resource['path'].replace( 'PRESQT_FTS_METADATA.json', 'INVALID_PRESQT_FTS_METADATA.json') create_download_metadata(self, resource, fixity_obj) write_file( '{}{}'.format(self.resource_main_dir, resource['path']), resource['file']) else: create_download_metadata(self, resource, fixity_obj) write_file( '{}{}'.format(self.resource_main_dir, resource['path']), resource['file']) # Enhance the source keywords self.keyword_dict = {} if self.action == 'resource_transfer_in': if self.supports_keywords: if self.keyword_action == 'automatic': self.keyword_dict = automatic_keywords(self) elif self.keyword_action == 'manual': self.keyword_dict = manual_keywords(self) self.keyword_enhancement_successful = True # Create PresQT action metadata update_process_info_message(self.process_info_path, self.action, "Creating PRESQT_FTS_METADATA...") self.source_username = func_dict['action_metadata']['sourceUsername'] if self.action == 'resource_transfer_in': source_target_data = get_target_data(self.source_target_name) destination_target_data = get_target_data( self.destination_target_name) self.details = "PresQT Transfer from {} to {}".format( source_target_data['readable_name'], destination_target_data['readable_name']) else: source_target_data = get_target_data(self.source_target_name) self.details = "PresQT Download from {}".format( source_target_data['readable_name']) self.action_metadata = { 'id': str(uuid4()), 'details': self.details, 'actionDateTime': str(timezone.now()), 'actionType': self.action, 'sourceTargetName': self.source_target_name, 'sourceUsername': self.source_username, 'destinationTargetName': 'Local Machine', 'destinationUsername': None, 'keywords': self.keyword_dict, 'files': { 'created': self.new_fts_metadata_files, 'updated': [], 'ignored': [] } } # TODO: Move this up to make it occur after we loop through func_dict['resources'] and write # resources # Write empty containers to disk for container_path in func_dict['empty_containers']: # Make sure the container_path has a '/' and the beginning and end if container_path[-1] != '/': container_path += '/' if container_path[0] != '/': container_path = '/' + container_path os.makedirs( os.path.dirname('{}{}'.format(self.resource_main_dir, container_path))) # If we are transferring the downloaded resource then bag it for the resource_upload method if self.action == 'resource_transfer_in': self.action_metadata[ 'destinationTargetName'] = self.destination_target_name # Make a BagIt 'bag' of the resources. bagit.make_bag(self.resource_main_dir, checksums=['md5', 'sha1', 'sha256', 'sha512']) self.process_info_obj['download_status'] = get_action_message( self, 'Download', self.download_fixity, True, self.action_metadata) return True # If we are only downloading the resource then create metadata, bag, zip, # and update the server process file. else: # Create Metadata file final_fts_metadata_data = create_fts_metadata( self.all_keywords, self.action_metadata, self.source_fts_metadata_actions, self.extra_metadata) # Validate the final metadata metadata_validation = schema_validator( 'presqt/json_schemas/metadata_schema.json', final_fts_metadata_data) self.process_info_obj['message'] = get_action_message( self, 'Download', self.download_fixity, metadata_validation, self.action_metadata) # Make a BagIt 'bag' of the resources. bagit.make_bag(self.resource_main_dir, checksums=['md5', 'sha1', 'sha256', 'sha512']) # Write metadata file. write_file( os.path.join(self.resource_main_dir, 'PRESQT_FTS_METADATA.json'), final_fts_metadata_data, True) # Add the fixity file to the disk directory write_file( os.path.join(self.resource_main_dir, 'fixity_info.json'), fixity_info, True) # Zip the BagIt 'bag' to send forward. zip_directory(self.resource_main_dir, "{}.zip".format(self.resource_main_dir), self.ticket_path) # Everything was a success so update the server metadata file. self.process_info_obj['status_code'] = '200' self.process_info_obj['status'] = 'finished' self.process_info_obj['zip_name'] = '{}.zip'.format( self.base_directory_name) self.process_info_obj[ 'failed_fixity'] = self.download_failed_fixity update_or_create_process_info(self.process_info_obj, self.action, self.ticket_number) if self.email: # Build link to retrieve the download download_reverse = reverse('job_status', kwargs={ "action": "download", "response_format": "zip" }) download_url = self.request.build_absolute_uri( download_reverse) final_download_url = "{}?ticket_number={}".format( download_url, self.ticket_number) context = { "download_url": final_download_url, "download_message": self.process_info_obj['message'], "failed_fixity": self.process_info_obj['failed_fixity'] } email_blaster(self.email, "PresQT Download Complete", context, "emails/download_email.html") return True
def post(self, request): """ Upload a proposal task to EaaSI Returns ------- 200: OK { "id": "19", "message": "Proposal task was submitted." "proposal_link": "https://localhost/api_v1/services/eaasi/1/" } 400: Bad Request { "error": "PresQT Error: 'presqt-source-token' missing in the request headers." } or { "error": "PresQT Error: A download does not exist for this user on the server." } 404: Not Found { "error": "PresQT Error: Invalid ticket number, '1234'." } or { "error": "PresQT Error: A resource_download does not exist for this user on the server." } """ # Get the source token from the request, hash it to get the ticket_number, get the # process_info.json file connected with the ticket_number. try: source_token = get_source_token(self.request) ticket_number = hash_tokens(source_token) process_info_data = get_process_info_data(ticket_number) download_data = get_process_info_action(process_info_data, 'resource_download') except PresQTValidationError as e: return Response(data={'error': e.data}, status=e.status_code) # Create a one time use token for EaaSI to use. eaasi_token = str(uuid4()) download_data['eaasi_token'] = eaasi_token write_file( 'mediafiles/jobs/{}/process_info.json'.format(ticket_number), process_info_data, True) # Build EaaSI download endpoint url eaasi_download_reverse = reverse( 'eaasi_download', kwargs={"ticket_number": ticket_number}) eaasi_download_url = request.build_absolute_uri(eaasi_download_reverse) final_eaasi_download_url = '{}?eaasi_token={}'.format( eaasi_download_url, eaasi_token) data = {"data_url": final_eaasi_download_url, "data_type": "bagit+zip"} response = requests.post( 'https://eaasi-portal.emulation.cloud/environment-proposer/api/v1/proposals', data=json.dumps(data), headers={"Content-Type": "application/json"}) if response.status_code != 202: return Response(data={ 'message': 'Proposal submission returned a status code of {}.'.format( response.status_code) }, status=response.status_code) response_json = response.json() # Add Proposal link to payload reverse_proposal_url = reverse( 'proposal', kwargs={"proposal_id": response_json['id']}) response_json['proposal_link'] = request.build_absolute_uri( reverse_proposal_url) return Response(data=response_json, status=status.HTTP_200_OK)
def upload_post(self): """ Upload resources to a specific existing target resource or create a new target resource. Returns ------- Response object in JSON format """ # Perform token, header, target, action, and resource validation try: self.destination_token = get_destination_token(self.request) self.file_duplicate_action = file_duplicate_action_validation( self.request) target_valid, self.infinite_depth = target_validation( self.destination_target_name, self.action) resource = file_validation(self.request) except PresQTValidationError as e: return Response(data={'error': e.data}, status=e.status_code) # Save files to disk and check their fixity integrity. If BagIt validation fails, attempt # to save files to disk again. If BagIt validation fails after 3 attempts return an error. for index in range(3): # Generate ticket number ticket_number = uuid4() self.ticket_path = os.path.join("mediafiles", "uploads", str(ticket_number)) # Extract each file in the zip file to disk with zipfile.ZipFile(resource) as myzip: myzip.extractall(self.ticket_path) self.base_directory_name = next(os.walk(self.ticket_path))[1][0] self.resource_main_dir = os.path.join(self.ticket_path, self.base_directory_name) # Validate the 'bag' and check for checksum mismatches try: self.bag = bagit.Bag(self.resource_main_dir) validate_bag(self.bag) except PresQTValidationError as e: shutil.rmtree(self.ticket_path) # If we've reached the maximum number of attempts then return an error response if index == 2: return Response( data={'error': 'PresQT Error: {}'.format(e.data)}, status=e.status_code) except bagit.BagError as e: # If we've reached the maximum number of attempts then return an error response if index == 2: return Response( data={'error': 'PresQT Error: {}'.format(e.args[0])}, status=status.HTTP_400_BAD_REQUEST) else: # Collect and remove any existing source metadata get_upload_source_metadata(self, self.bag) # If the bag validated successfully then break from the loop break # Write process_info.json file self.process_info_obj = { 'presqt-destination-token': hash_tokens(self.destination_token), 'status': 'in_progress', 'expiration': str(timezone.now() + relativedelta(days=5)), 'message': 'Upload is being processed on the server', 'status_code': None, 'function_process_id': None } self.process_info_path = os.path.join(self.ticket_path, 'process_info.json') write_file(self.process_info_path, self.process_info_obj, True) # Create a hash dictionary to compare with the hashes returned from the target after upload # If the destination target supports a hash provided by the bag then use those hashes # otherwise create new hashes with a target supported hash. self.file_hashes, self.hash_algorithm = get_or_create_hashes_from_bag( self) # Spawn the upload_resource method separate from the request server by using multiprocess. spawn_action_process(self, self._upload_resource) reversed_url = reverse('upload_job', kwargs={'ticket_number': ticket_number}) upload_hyperlink = self.request.build_absolute_uri(reversed_url) return Response(status=status.HTTP_202_ACCEPTED, data={ 'ticket_number': ticket_number, 'message': 'The server is processing the request.', 'upload_job': upload_hyperlink })
def patch(self, request, ticket_number): """ Cancel the resource upload process on the server. Update the process_info.json file appropriately. Parameters ---------- ticket_number : str The ticket number of the upload being prepared. Returns ------- 200: OK { "status_code": "499", "message": "Upload was cancelled by the user" } 400: Bad Request { "error": "'presqt-destination-token' missing in the request headers." } 401: Unauthorized { "error": "Header 'presqt-destination-token' does not match the 'presqt-destination-token' for this server process." } 404: Not Found { "error": "Invalid ticket number, '1234'." } 406: Not Acceptable { "status_code": "200", "message": "Upload Successful" } """ # Perform token validation. Read data from the process_info file. try: token = get_destination_token(request) data = get_process_info_data('uploads', ticket_number) process_token_validation(hash_tokens(token), data, 'presqt-destination-token') except PresQTValidationError as e: return Response(data={'error': e.data}, status=e.status_code) # Wait until the spawned off process has started to cancel the upload while data['function_process_id'] is None: try: data = get_process_info_data('uploads', ticket_number) except json.decoder.JSONDecodeError: # Pass while the process_info file is being written to pass # If upload is still in progress then cancel the subprocess if data['status'] == 'in_progress': for process in multiprocessing.active_children(): if process.pid == data['function_process_id']: process.kill() process.join() data['status'] = 'failed' data['message'] = 'Upload was cancelled by the user' data['status_code'] = '499' data['expiration'] = str(timezone.now() + relativedelta(hours=1)) process_info_path = 'mediafiles/uploads/{}/process_info.json'.format( ticket_number) write_file(process_info_path, data, True) return Response(data={ 'status_code': data['status_code'], 'message': data['message'] }, status=status.HTTP_200_OK) # If upload is finished then don't attempt to cancel subprocess else: return Response(data={ 'status_code': data['status_code'], 'message': data['message'] }, status=status.HTTP_406_NOT_ACCEPTABLE)
def get_zip_format(self): """ Prepares a download of a resource with the given resource ID provided. Spawns a process separate from the request server to do the actual downloading and zip-file preparation. Returns ------- Response object with ticket information. """ # Perform token, target, and action validation try: self.source_token = get_source_token(self.request) target_validation(self.source_target_name, self.action) except PresQTValidationError as e: return Response(data={'error': e.data}, status=e.status_code) # Generate ticket number ticket_number = uuid4() self.ticket_path = os.path.join('mediafiles', 'downloads', str(ticket_number)) # Create directory and process_info json file self.process_info_obj = { 'presqt-source-token': hash_tokens(self.source_token), 'status': 'in_progress', 'expiration': str(timezone.now() + relativedelta(days=5)), 'message': 'Download is being processed on the server', 'status_code': None, 'function_process_id': None } self.process_info_path = os.path.join(str(self.ticket_path), 'process_info.json') write_file(self.process_info_path, self.process_info_obj, True) self.base_directory_name = '{}_download_{}'.format( self.source_target_name, self.source_resource_id) # Spawn the upload_resource method separate from the request server by using multiprocess. spawn_action_process(self, self._download_resource) # Get the download url for zip format reversed_url = reverse('download_job', kwargs={ 'ticket_number': ticket_number, 'response_format': 'zip' }) download_zip_hyperlink = self.request.build_absolute_uri(reversed_url) # Get the download url for json format reversed_url = reverse('download_job', kwargs={ 'ticket_number': ticket_number, 'response_format': 'json' }) download_json_hyperlink = self.request.build_absolute_uri(reversed_url) return Response(status=status.HTTP_202_ACCEPTED, data={ 'ticket_number': ticket_number, 'message': 'The server is processing the request.', 'download_job_zip': download_zip_hyperlink, 'download_job_json': download_json_hyperlink })