def test_success_item_multiple_files(self): """ Return a 200 along with a zip file of the item and assosciated files requested. """ resource_id = 'dj52w379504' shared_call_get_resource_zip(self, resource_id) url = reverse('download_job', kwargs={'ticket_number': self.ticket_number}) response = self.client.get(url, **self.header) # Verify the status code self.assertEqual(response.status_code, 200) zip_file = zipfile.ZipFile(io.BytesIO(response.content)) # Verify the name of the zip file self.assertEquals( response._headers['content-disposition'][1], 'attachment; filename={}_download_{}.zip'.format(self.target_name, resource_id)) # Verify content type self.assertEqual(response._headers['content-type'][1], 'application/zip') # Verify the number of resources in the zip is correct self.assertEqual(len(zip_file.namelist()), 14) # Verify the custom hash_file information is correct with zip_file.open('{}_download_{}/data/fixity_info.json'.format(self.target_name, resource_id)) as fixityfile: zip_json = json.load(fixityfile) for file_fixity in zip_json: self.assertEqual(file_fixity['fixity'], True) self.assertEqual(file_fixity['fixity_details'], 'Source Hash and PresQT Calculated hash matched.') self.assertEqual(file_fixity['hash_algorithm'], 'md5') self.assertEqual(file_fixity['presqt_hash'], file_fixity['source_hash']) # Run the files through the fixity checker again to make sure they downloaded correctly with zip_file.open('{}_download_{}/data{}'.format( self.target_name, resource_id, zip_json[0]['path'])) as myfile: temp_file = myfile.read() resource_dict = { "file": temp_file, "hashes": {'md5': zip_json[0]['presqt_hash']}, "title": 'f', "path": '{}_download_{}/data{}'.format(self.target_name, resource_id, zip_json[0]['path']), "metadata": {} } fixity, fixity_match = download_fixity_checker(resource_dict) self.assertEqual(fixity['fixity'], True) with zip_file.open('{}_download_{}/data{}'.format( self.target_name, resource_id, zip_json[1]['path'])) as myfile: temp_file = myfile.read() resource_dict = { "file": temp_file, "hashes": {'md5': zip_json[1]['presqt_hash']}, "title": 'f', "path": '{}_download_{}/data{}'.format(self.target_name, resource_id, zip_json[1]['path']), "metadata": {} } fixity, fixity_match = download_fixity_checker(resource_dict) self.assertEqual(fixity['fixity'], True) # Delete corresponding folder shutil.rmtree('mediafiles/downloads/{}'.format(self.ticket_number))
def test_success_200_zip(self): """ Return a 200 along with a zip file of the resource requested. """ shared_call_get_resource_zip(self, self.resource_id) url = reverse('job_status', kwargs={'action': 'download', 'response_format': 'zip'}) response = self.client.get(url, **self.header) # Verify the status code self.assertEqual(response.status_code, 200) zip_file = zipfile.ZipFile(io.BytesIO(response.content)) # Verify the name of the zip file self.assertEquals( response._headers['content-disposition'][1], 'attachment; filename=osf_download_{}.zip'.format(self.resource_id)) # Verify content type self.assertEqual(response._headers['content-type'][1], 'application/zip') # Verify the number of resources in the zip is correct self.assertEqual(len(zip_file.namelist()), 13) # Verify the custom hash_file information is correct with zip_file.open('osf_download_{}/fixity_info.json'.format(self.resource_id)) as fixityfile: zip_json = json.load(fixityfile)[0] self.assertEqual(zip_json['fixity'], True) self.assertEqual(zip_json['fixity_details'], 'Source Hash and PresQT Calculated hash matched.') self.assertIn(zip_json['hash_algorithm'], ['sha256', 'md5']) self.assertEqual(zip_json['presqt_hash'], self.hashes[zip_json['hash_algorithm']]) # Run the file through the fixity checker again to make sure it downloaded correctly with zip_file.open('osf_download_{}/data/22776439564_7edbed7e10_o.jpg'.format(self.resource_id)) as myfile: temp_file = myfile.read() resource_dict = { "file": temp_file, "hashes": self.hashes, "title": '22776439564_7edbed7e10_o.jpg', "path": 'osf_download_{}/data/22776439564_7edbed7e10_o.jpg'.format(self.resource_id), "metadata": {} } fixity, fixity_match = download_fixity_checker(resource_dict) self.assertEqual(fixity['fixity'], True) # Delete corresponding folder shutil.rmtree('mediafiles/jobs/{}'.format(self.ticket_number)) # Ensure no email was sent for this request as no email was provided. self.assertEqual(len(mail.outbox), 0)
def _download_resource(self): """ Downloads the resources from the target, performs a fixity check, zips them up in BagIt format. """ action = 'resource_download' # Write the process id to the process_info file self.process_info_obj[ 'function_process_id'] = self.function_process.pid update_or_create_process_info(self.process_info_obj, self.action, self.ticket_number) # Fetch the proper function to call func = FunctionRouter.get_function(self.source_target_name, action) # Fetch the resources. func_dict is in the format: # { # 'resources': files, # 'empty_containers': empty_containers, # 'action_metadata': action_metadata # } try: func_dict = func(self.source_token, self.source_resource_id, self.process_info_path, self.action) # If the resource is being transferred, has only one file, and that file is the # PresQT metadata then raise an error. if self.action == 'resource_transfer_in' and \ len(func_dict['resources']) == 1 \ and func_dict['resources'][0]['title'] == 'PRESQT_FTS_METADATA.json': raise PresQTResponseException( 'PresQT Error: PresQT FTS metadata cannot not be transferred by itself.', status.HTTP_400_BAD_REQUEST) except PresQTResponseException as e: # TODO: Functionalize this error section # Catch any errors that happen within the target fetch. # Update the server process_info file appropriately. self.process_info_obj['status_code'] = e.status_code self.process_info_obj['status'] = 'failed' if self.action == 'resource_transfer_in': self.process_info_obj['download_status'] = 'failed' self.process_info_obj['message'] = e.data # Update the expiration from 5 hours to 1 hour from now. We can delete this faster because # it's an incomplete/failed directory. self.process_info_obj['expiration'] = str(timezone.now() + relativedelta(hours=1)) update_or_create_process_info(self.process_info_obj, self.action, self.ticket_number) return False # Get the latest contents of the job's process_info.json file self.process_info_obj = read_file(self.process_info_path, True)[self.action] # The directory all files should be saved in. self.resource_main_dir = os.path.join(self.ticket_path, self.base_directory_name) update_process_info_message( self.process_info_path, self.action, 'Performing fixity checks and gathering metadata...') self.extra_metadata = func_dict['extra_metadata'] # For each resource, perform fixity check, gather metadata, and save it to disk. fixity_info = [] self.download_fixity = True self.download_failed_fixity = [] self.source_fts_metadata_actions = [] self.new_fts_metadata_files = [] self.all_keywords = [] self.initial_keywords = [] self.manual_keywords = [] self.enhanced_keywords = [] for resource in func_dict['resources']: # Perform the fixity check and add extra info to the returned fixity object. # Note: This method of calling the function needs to stay this way for test Mock fixity_obj, self.download_fixity = download_fixity_checker.download_fixity_checker( resource) fixity_info.append(fixity_obj) if not fixity_obj['fixity']: self.download_failed_fixity.append(resource['path']) # Create metadata for this resource or validate the metadata file if resource['title'] == 'PRESQT_FTS_METADATA.json': is_valid = validate_metadata(self, resource) if not is_valid: resource['path'] = resource['path'].replace( 'PRESQT_FTS_METADATA.json', 'INVALID_PRESQT_FTS_METADATA.json') create_download_metadata(self, resource, fixity_obj) write_file( '{}{}'.format(self.resource_main_dir, resource['path']), resource['file']) else: create_download_metadata(self, resource, fixity_obj) write_file( '{}{}'.format(self.resource_main_dir, resource['path']), resource['file']) # Enhance the source keywords self.keyword_dict = {} if self.action == 'resource_transfer_in': if self.supports_keywords: if self.keyword_action == 'automatic': self.keyword_dict = automatic_keywords(self) elif self.keyword_action == 'manual': self.keyword_dict = manual_keywords(self) self.keyword_enhancement_successful = True # Create PresQT action metadata update_process_info_message(self.process_info_path, self.action, "Creating PRESQT_FTS_METADATA...") self.source_username = func_dict['action_metadata']['sourceUsername'] if self.action == 'resource_transfer_in': source_target_data = get_target_data(self.source_target_name) destination_target_data = get_target_data( self.destination_target_name) self.details = "PresQT Transfer from {} to {}".format( source_target_data['readable_name'], destination_target_data['readable_name']) else: source_target_data = get_target_data(self.source_target_name) self.details = "PresQT Download from {}".format( source_target_data['readable_name']) self.action_metadata = { 'id': str(uuid4()), 'details': self.details, 'actionDateTime': str(timezone.now()), 'actionType': self.action, 'sourceTargetName': self.source_target_name, 'sourceUsername': self.source_username, 'destinationTargetName': 'Local Machine', 'destinationUsername': None, 'keywords': self.keyword_dict, 'files': { 'created': self.new_fts_metadata_files, 'updated': [], 'ignored': [] } } # TODO: Move this up to make it occur after we loop through func_dict['resources'] and write # resources # Write empty containers to disk for container_path in func_dict['empty_containers']: # Make sure the container_path has a '/' and the beginning and end if container_path[-1] != '/': container_path += '/' if container_path[0] != '/': container_path = '/' + container_path os.makedirs( os.path.dirname('{}{}'.format(self.resource_main_dir, container_path))) # If we are transferring the downloaded resource then bag it for the resource_upload method if self.action == 'resource_transfer_in': self.action_metadata[ 'destinationTargetName'] = self.destination_target_name # Make a BagIt 'bag' of the resources. bagit.make_bag(self.resource_main_dir, checksums=['md5', 'sha1', 'sha256', 'sha512']) self.process_info_obj['download_status'] = get_action_message( self, 'Download', self.download_fixity, True, self.action_metadata) return True # If we are only downloading the resource then create metadata, bag, zip, # and update the server process file. else: # Create Metadata file final_fts_metadata_data = create_fts_metadata( self.all_keywords, self.action_metadata, self.source_fts_metadata_actions, self.extra_metadata) # Validate the final metadata metadata_validation = schema_validator( 'presqt/json_schemas/metadata_schema.json', final_fts_metadata_data) self.process_info_obj['message'] = get_action_message( self, 'Download', self.download_fixity, metadata_validation, self.action_metadata) # Make a BagIt 'bag' of the resources. bagit.make_bag(self.resource_main_dir, checksums=['md5', 'sha1', 'sha256', 'sha512']) # Write metadata file. write_file( os.path.join(self.resource_main_dir, 'PRESQT_FTS_METADATA.json'), final_fts_metadata_data, True) # Add the fixity file to the disk directory write_file( os.path.join(self.resource_main_dir, 'fixity_info.json'), fixity_info, True) # Zip the BagIt 'bag' to send forward. zip_directory(self.resource_main_dir, "{}.zip".format(self.resource_main_dir), self.ticket_path) # Everything was a success so update the server metadata file. self.process_info_obj['status_code'] = '200' self.process_info_obj['status'] = 'finished' self.process_info_obj['zip_name'] = '{}.zip'.format( self.base_directory_name) self.process_info_obj[ 'failed_fixity'] = self.download_failed_fixity update_or_create_process_info(self.process_info_obj, self.action, self.ticket_number) if self.email: # Build link to retrieve the download download_reverse = reverse('job_status', kwargs={ "action": "download", "response_format": "zip" }) download_url = self.request.build_absolute_uri( download_reverse) final_download_url = "{}?ticket_number={}".format( download_url, self.ticket_number) context = { "download_url": final_download_url, "download_message": self.process_info_obj['message'], "failed_fixity": self.process_info_obj['failed_fixity'] } email_blaster(self.email, "PresQT Download Complete", context, "emails/download_email.html") return True
def _download_resource(self): """ Downloads the resources from the target, performs a fixity check, zips them up in BagIt format. """ action = 'resource_download' # Write the process id to the process_info file self.process_info_obj[ 'function_process_id'] = self.function_process.pid write_file(self.process_info_path, self.process_info_obj, True) # Fetch the proper function to call func = FunctionRouter.get_function(self.source_target_name, action) # Fetch the resources. func_dict is in the format: # { # 'resources': files, # 'empty_containers': empty_containers, # 'action_metadata': action_metadata # } try: func_dict = func(self.source_token, self.source_resource_id) # If the resource is being transferred, has only one file, and that file is PresQT # metadata then raise an error. if self.action == 'resource_transfer_in' \ and len(func_dict['resources']) == 1 \ and func_dict['resources'][0]['title'] == 'PRESQT_FTS_METADATA.json': raise PresQTResponseException( 'PresQT Error: PresQT FTS metadata cannot not be transferred by itself.', status.HTTP_400_BAD_REQUEST) except PresQTResponseException as e: # Catch any errors that happen within the target fetch. # Update the server process_info file appropriately. self.process_info_obj['status_code'] = e.status_code self.process_info_obj['status'] = 'failed' if self.action == 'resource_transfer_in': self.process_info_obj['download_status'] = 'failed' self.process_info_obj['message'] = e.data # Update the expiration from 5 days to 1 hour from now. We can delete this faster because # it's an incomplete/failed directory. self.process_info_obj['expiration'] = str(timezone.now() + relativedelta(hours=1)) write_file(self.process_info_path, self.process_info_obj, True) return False # The directory all files should be saved in. self.resource_main_dir = os.path.join(self.ticket_path, self.base_directory_name) # For each resource, perform fixity check, gather metadata, and save it to disk. fixity_info = [] self.download_fixity = True self.source_fts_metadata_actions = [] self.new_fts_metadata_files = [] self.download_failed_fixity = [] for resource in func_dict['resources']: # Perform the fixity check and add extra info to the returned fixity object. fixity_obj, self.download_fixity = download_fixity_checker.download_fixity_checker( resource) fixity_info.append(fixity_obj) if not fixity_obj['fixity']: self.download_failed_fixity.append(resource['path']) # Create metadata for this resource. Return True if a valid FTS metadata file is found. if create_download_metadata(self, resource, fixity_obj): # Don't write valid FTS metadata file. continue # Save the file to the disk. write_file('{}{}'.format(self.resource_main_dir, resource['path']), resource['file']) # Create PresQT action metadata self.action_metadata = { 'id': str(uuid4()), 'actionDateTime': str(timezone.now()), 'actionType': self.action, 'sourceTargetName': self.source_target_name, 'sourceUsername': func_dict['action_metadata']['sourceUsername'], 'destinationTargetName': 'Local Machine', 'destinationUsername': None, 'files': { 'created': self.new_fts_metadata_files, 'updated': [], 'ignored': [] } } # Write empty containers to disk for container_path in func_dict['empty_containers']: # Make sure the container_path has a '/' and the beginning and end if container_path[-1] != '/': container_path += '/' if container_path[0] != '/': container_path = '/' + container_path os.makedirs( os.path.dirname('{}{}'.format(self.resource_main_dir, container_path))) # If we are transferring the downloaded resource then bag it for the resource_upload method if self.action == 'resource_transfer_in': self.action_metadata[ 'destinationTargetName'] = self.destination_target_name # Make a BagIt 'bag' of the resources. bagit.make_bag(self.resource_main_dir, checksums=['md5', 'sha1', 'sha256', 'sha512']) self.process_info_obj['download_status'] = get_action_message( 'Download', self.download_fixity, True, self.action_metadata) return True # If we are only downloading the resource then create metadata, bag, zip, # and update the server process file. else: # Create and write metadata file. final_fts_metadata_data = create_fts_metadata( self.action_metadata, self.source_fts_metadata_actions) write_file( os.path.join(self.resource_main_dir, 'PRESQT_FTS_METADATA.json'), final_fts_metadata_data, True) # Validate the final metadata metadata_validation = schema_validator( 'presqt/json_schemas/metadata_schema.json', final_fts_metadata_data) self.process_info_obj['message'] = get_action_message( 'Download', self.download_fixity, metadata_validation, self.action_metadata) # Add the fixity file to the disk directory write_file( os.path.join(self.resource_main_dir, 'fixity_info.json'), fixity_info, True) # Make a BagIt 'bag' of the resources. bagit.make_bag(self.resource_main_dir, checksums=['md5', 'sha1', 'sha256', 'sha512']) # Zip the BagIt 'bag' to send forward. zip_directory(self.resource_main_dir, "{}.zip".format(self.resource_main_dir), self.ticket_path) # Everything was a success so update the server metadata file. self.process_info_obj['status_code'] = '200' self.process_info_obj['status'] = 'finished' self.process_info_obj['zip_name'] = '{}.zip'.format( self.base_directory_name) self.process_info_obj[ 'failed_fixity'] = self.download_failed_fixity write_file(self.process_info_path, self.process_info_obj, True) return True