def test_success(self, mock_getsize): """Tests calling ScaleFileManager.upload_files() successfully""" def new_getsize(path): return 100 mock_getsize.side_effect = new_getsize workspace = storage_test_utils.create_workspace() file_1 = ScaleFile() file_1.set_basic_fields('file.txt', 100, None) # Scale should auto-detect text/plain remote_path_1 = 'my/remote/path/file.txt' local_path_1 = 'my/local/path/file.txt' file_1.file_path = remote_path_1 file_2 = ScaleFile() file_2.set_basic_fields('file.json', 100, 'application/json') remote_path_2 = 'my/remote/path/2/file.json' local_path_2 = 'my/local/path/2/file.json' file_2.file_path = remote_path_2 workspace.upload_files = MagicMock() files = [FileUpload(file_1, local_path_1), FileUpload(file_2, local_path_2)] models = ScaleFile.objects.upload_files(workspace, files) workspace.upload_files.assert_called_once_with([FileUpload(file_1, local_path_1), FileUpload(file_2, local_path_2)]) self.assertEqual('file.txt', models[0].file_name) self.assertEqual(remote_path_1, models[0].file_path) self.assertEqual('text/plain', models[0].media_type) self.assertEqual(workspace.id, models[0].workspace_id) self.assertEqual('file.json', models[1].file_name) self.assertEqual(remote_path_2, models[1].file_path) self.assertEqual('application/json', models[1].media_type) self.assertEqual(workspace.id, models[1].workspace_id)
def test_no_tags(self): """Tests calling get_data_type_tags() with no tags""" the_file = ScaleFile() tags = the_file.get_data_type_tags() self.assertSetEqual(tags, set())
def test_objects(self): """Tests calling update_uuid with multiple object types.""" the_file = ScaleFile() the_file.update_uuid('test.txt', 1, True, {'key': 'value'}) self.assertEqual(len(the_file.uuid), 32) self.assertEqual(the_file.uuid, 'ee6535359fbe02d50589a823951eb491')
def test_multi_strings(self): """Tests calling update_uuid with multiple strings.""" the_file = ScaleFile() the_file.update_uuid('test.txt', 'test1', 'test2') self.assertEqual(len(the_file.uuid), 32) self.assertEqual(the_file.uuid, '8ff66acfc019330bba973b408c63ad15')
def test_one_string(self): """Tests calling update_uuid with a single string.""" the_file = ScaleFile() the_file.update_uuid('test.txt') self.assertEqual(len(the_file.uuid), 32) self.assertEqual(the_file.uuid, 'dd18bf3a8e0a2a3e53e2661c7fb53534')
def test_tags(self): """Tests calling get_data_type_tags() with tags""" the_file = ScaleFile(data_type='A,B,c') tags = the_file.get_data_type_tags() correct_set = set() correct_set.add('A') correct_set.add('B') correct_set.add('c') self.assertSetEqual(tags, correct_set)
def test_same_tag(self): """Tests calling add_data_type_tag() with the same tag twice""" the_file = ScaleFile() the_file.add_data_type_tag('Hello-1') the_file.add_data_type_tag('Hello-1') tags = the_file.get_data_type_tags() correct_set = set() correct_set.add('Hello-1') self.assertSetEqual(tags, correct_set)
def test_valid(self): """Tests calling add_data_type_tag() with valid tags""" the_file = ScaleFile() the_file.add_data_type_tag('Hello-1') the_file.add_data_type_tag('foo_BAR') tags = the_file.get_data_type_tags() correct_set = set() correct_set.add('Hello-1') correct_set.add('foo_BAR') self.assertSetEqual(tags, correct_set)
def test_invalid(self): """Tests calling add_data_type_tag() with invalid tags""" the_file = ScaleFile() self.assertRaises(InvalidDataTypeTag, the_file.add_data_type_tag, 'my.invalid.tag') self.assertRaises(InvalidDataTypeTag, the_file.add_data_type_tag, 'my\invalid\tag!')
def test_partial(self): """Tests calling update_uuid with some ignored None types.""" the_file1 = ScaleFile() the_file1.update_uuid('test.txt', 'test') the_file2 = ScaleFile() the_file2.update_uuid('test.txt', None, 'test', None) self.assertEqual(the_file1.uuid, the_file2.uuid)
def handle(self, *args, **options): """See :meth:`django.core.management.base.BaseCommand.handle`. This method starts the file upload process. """ file_id = options.get('file_id') remote_path = options.get('remote_path') workspace_name = options.get('workspace') logger.info('Command starting: scale_upload_file') logger.info(' - Workspace: %s', workspace_name) # Validate the file paths file_name = os.path.basename(local_path) if not os.path.exists(local_path): logger.exception('Local file does not exist: %s', local_path) sys.exit(1) # Attempt to fetch the workspace model try: workspace = Workspace.objects.get(name=workspace_name) except Workspace.DoesNotExist: logger.exception('Workspace does not exist: %s', workspace_name) sys.exit(1) # Attempt to set up a file model try: scale_file = ScaleFile.objects.get(file_name=file_name) except ScaleFile.DoesNotExist: scale_file = ScaleFile() scale_file.update_uuid(file_name) scale_file.file_path = remote_path try: ScaleFile.objects.upload_files( workspace, [FileUpload(scale_file, local_path)]) except: logger.exception('Unknown error occurred, exit code 1 returning') sys.exit(1) logger.info('Command completed: scale_upload_file')
def test_fails(self, mock_makedirs, mock_getsize): """Tests calling ScaleFileManager.upload_files() when Workspace.upload_files() fails""" def new_getsize(path): return 100 mock_getsize.side_effect = new_getsize upload_dir = os.path.join('upload', 'dir') work_dir = os.path.join('work', 'dir') workspace = storage_test_utils.create_workspace() file_1 = ScaleFile() file_1.media_type = None # Scale should auto-detect text/plain remote_path_1 = 'my/remote/path/file.txt' local_path_1 = 'my/local/path/file.txt' file_2 = ScaleFile() file_2.media_type = 'application/json' remote_path_2 = 'my/remote/path/2/file.json' local_path_2 = 'my/local/path/2/file.json' workspace.upload_files = MagicMock() workspace.upload_files.side_effect = Exception workspace.delete_files = MagicMock() delete_work_dir = os.path.join(work_dir, 'delete', get_valid_filename(workspace.name)) files = [(file_1, local_path_1, remote_path_1), (file_2, local_path_2, remote_path_2)] self.assertRaises(Exception, ScaleFile.objects.upload_files, upload_dir, work_dir, workspace, files)
def handle(self, local_path, remote_path, **options): """See :meth:`django.core.management.base.BaseCommand.handle`. This method starts the file upload process. """ workspace_name = options.get('workspace') logger.info('Command starting: scale_upload_file') logger.info(' - Workspace: %s', workspace_name) # Validate the file paths file_name = os.path.basename(local_path) if not os.path.exists(local_path): logger.exception('Local file does not exist: %s', local_path) sys.exit(1) # Attempt to fetch the workspace model try: workspace = Workspace.objects.get(name=workspace_name) except Workspace.DoesNotExist: logger.exception('Workspace does not exist: %s', workspace_name) sys.exit(1) # Attempt to set up a file model try: scale_file = ScaleFile.objects.get(file_name=file_name) except ScaleFile.DoesNotExist: scale_file = ScaleFile() scale_file.update_uuid(file_name) scale_file.file_path = remote_path try: ScaleFile.objects.upload_files(workspace, [FileUpload(scale_file, local_path)]) except: logger.exception('Unknown error occurred, exit code 1 returning') sys.exit(1) logger.info('Command completed: scale_upload_file')
def test_success(self, mock_getsize): '''Tests calling ScaleFileManager.upload_files() successfully''' def new_getsize(path): return 100 mock_getsize.side_effect = new_getsize upload_dir = os.path.join('upload', 'dir') work_dir = os.path.join('work', 'dir') workspace = storage_test_utils.create_workspace() file_1 = ScaleFile() file_1.media_type = None # Scale should auto-detect text/plain remote_path_1 = u'my/remote/path/file.txt' local_path_1 = u'my/local/path/file.txt' file_2 = ScaleFile() file_2.media_type = u'application/json' remote_path_2 = u'my/remote/path/2/file.json' local_path_2 = u'my/local/path/2/file.json' workspace.upload_files = MagicMock() workspace.delete_files = MagicMock() workspace_work_dir = ScaleFile.objects._get_workspace_work_dir(work_dir, workspace) files = [(file_1, local_path_1, remote_path_1), (file_2, local_path_2, remote_path_2)] models = ScaleFile.objects.upload_files(upload_dir, work_dir, workspace, files) workspace.upload_files.assert_called_once_with(upload_dir, workspace_work_dir, [(local_path_1, remote_path_1), (local_path_2, remote_path_2)]) self.assertListEqual(workspace.delete_files.call_args_list, []) self.assertEqual(u'file.txt', models[0].file_name) self.assertEqual(remote_path_1, models[0].file_path) self.assertEqual(u'text/plain', models[0].media_type) self.assertEqual(workspace.id, models[0].workspace_id) self.assertEqual(u'file.json', models[1].file_name) self.assertEqual(remote_path_2, models[1].file_path) self.assertEqual(u'application/json', models[1].media_type) self.assertEqual(workspace.id, models[1].workspace_id)
def test_success(self, mock_getsize): '''Tests calling ScaleFileManager.upload_files() successfully''' def new_getsize(path): return 100 mock_getsize.side_effect = new_getsize upload_dir = os.path.join('upload', 'dir') work_dir = os.path.join('work', 'dir') workspace = storage_test_utils.create_workspace() file_1 = ScaleFile() file_1.media_type = None # Scale should auto-detect text/plain remote_path_1 = u'my/remote/path/file.txt' local_path_1 = u'my/local/path/file.txt' file_2 = ScaleFile() file_2.media_type = u'application/json' remote_path_2 = u'my/remote/path/2/file.json' local_path_2 = u'my/local/path/2/file.json' workspace.upload_files = MagicMock() workspace.delete_files = MagicMock() workspace_work_dir = ScaleFile.objects._get_workspace_work_dir( work_dir, workspace) files = [(file_1, local_path_1, remote_path_1), (file_2, local_path_2, remote_path_2)] models = ScaleFile.objects.upload_files(upload_dir, work_dir, workspace, files) workspace.upload_files.assert_called_once_with( upload_dir, workspace_work_dir, [(local_path_1, remote_path_1), (local_path_2, remote_path_2)]) self.assertListEqual(workspace.delete_files.call_args_list, []) self.assertEqual(u'file.txt', models[0].file_name) self.assertEqual(remote_path_1, models[0].file_path) self.assertEqual(u'text/plain', models[0].media_type) self.assertEqual(workspace.id, models[0].workspace_id) self.assertEqual(u'file.json', models[1].file_name) self.assertEqual(remote_path_2, models[1].file_path) self.assertEqual(u'application/json', models[1].media_type) self.assertEqual(workspace.id, models[1].workspace_id)
def move_files(file_ids, new_workspace=None, new_file_path=None): """Moves the given files to a different workspace/uri :param file_ids: List of ids of ScaleFile objects to move; should all be from the same workspace :type file_ids: [int] :param new_workspace: New workspace to move files to :type new_workspace: `storage.models.Workspace` :param new_file_path: New path for files :type new_file_path: string """ try: messages = [] files = ScaleFile.objects.all() files = files.select_related('workspace') files = files.defer('workspace__json_config') files = files.filter(id__in=file_ids).only('id', 'file_name', 'file_path', 'workspace') old_files = [] old_workspace = files[0].workspace if new_workspace: # We need a local path to copy the file, try to get a direct path from the broker, if that fails we must # download the file and copy from there # TODO: a future refactor should make the brokers work off of file objects instead of paths so the extra # download is not necessary paths = old_workspace.get_file_system_paths([files]) local_paths = [] if paths: local_paths = paths else: file_downloads = [] for file in files: local_path = os.path.join('/tmp', file.file_name) file_downloads.append(FileDownload(file, local_path, False)) local_paths.append(local_path) ScaleFile.objects.download_files(file_downloads) uploads = [] for file, path in zip(files, local_paths): old_path = file.file_path old_files.append( ScaleFile(file_name=file.file_name, file_path=file.file_path)) file.file_path = new_file_path if new_file_path else file.file_path logger.info('Copying %s in workspace %s to %s in workspace %s', old_path, file.workspace.name, file.file_path, new_workspace.name) file_upload = FileUpload(file, path) uploads.append(file_upload) message = create_move_file_message(file_id=file.id) messages.append(message) ScaleFile.objects.upload_files(new_workspace, uploads) elif new_file_path: moves = [] for file in files: logger.info('Moving %s to %s in workspace %s', file.file_path, new_file_path, file.workspace.name) moves.append(FileMove(file, new_file_path)) message = create_move_file_message(file_id=file.id) messages.append(message) ScaleFile.objects.move_files(moves) else: logger.info('No new workspace or file path. Doing nothing') CommandMessageManager().send_messages(messages) if new_workspace: # Copied files to new workspace, so delete file in old workspace (if workspace provides local path to do so) old_workspace.delete_files(old_files, update_model=False) except ScaleError as err: err.log() sys.exit(err.exit_code) except Exception as ex: exit_code = GENERAL_FAIL_EXIT_CODE err = get_error_by_exception(ex.__class__.__name__) if err: err.log() exit_code = err.exit_code else: logger.exception('Error performing move_files steps') sys.exit(exit_code)
def test_none(self): """Tests calling update_uuid with no arguments.""" the_file = ScaleFile() self.assertRaises(ValueError, the_file.update_uuid)
def _generate_input_metadata(self, job_exe): """Generate the input metadata file for the job execution :param job_id: The job ID :type job_id: int :param exe_num: The execution number :type exe_num: int """ job_interface = job_exe.job_type.get_job_interface() if not job_interface.needs_input_metadata(): return # Generate input metadata dict input_metadata = {} config = job_exe.get_execution_configuration if 'input_files' in config.get_dict(): input_metadata['JOB'] = {} input_data = job_exe.job.get_input_data() for i in input_data.values.keys(): if type(input_data.values[i]) is JsonValue: input_metadata['JOB'][i] = input_data.values[i].value elif type(input_data.values[i]) is FileValue: input_metadata['JOB'][i] = [ ScaleFile.objects.get(pk=f)._get_url() for f in input_data.values[i].file_ids ] if job_exe.recipe_id and job_exe.recipe.has_input(): input_metadata['RECIPE'] = {} input_data = job_exe.recipe.get_input_data() for i in input_data.values.keys(): if type(input_data.values[i]) is JsonValue: input_metadata['RECIPE'][i] = input_data.values[i].value elif type(input_data.values[i]) is FileValue: input_metadata['RECIPE'][i] = [ ScaleFile.objects.get(pk=f)._get_url() for f in input_data.values[i].file_ids ] workspace_names = config.get_input_workspace_names() workspace_models = { w.name: w for w in Workspace.objects.get_workspaces(names=workspace_names) } input_metadata_id = None if input_metadata: file_name = '%d-input_metadata.json' % job_exe.job.id local_path = os.path.join(SCALE_JOB_EXE_INPUT_PATH, 'tmp', file_name) with open(local_path, 'w') as metadata_file: json.dump(input_metadata, metadata_file) try: scale_file = ScaleFile.objects.get(file_name=file_name) except ScaleFile.DoesNotExist: scale_file = ScaleFile() scale_file.update_uuid(file_name) remote_path = self._calculate_remote_path(job_exe) scale_file.file_path = remote_path for workspace in workspace_models: try: if not input_metadata_id: ScaleFile.objects.upload_files( workspace, [FileUpload(scale_file, local_path)]) input_metadata_id = ScaleFile.objects.get( file_name=file_name).id data = job_exe.job.get_job_data() data.add_file_input('INPUT_METADATA_MANIFEST', input_metadata_id) job_exe.job.input = data.get_dict() job_exe.job.save() except: continue if not input_metadata_id: logger.exception( 'Error uploading input_metadata manifest for job_exe %d' % job_exe.job.id)