def test_process_job_output(self): """Tests calling JobManager.process_job_output()""" output_1 = JobResults() output_1.add_file_parameter('foo', 1) output_2 = JobResults() output_2.add_file_parameter('foo', 2) # These jobs have completed and have their execution results job_exe_1 = job_test_utils.create_job_exe(status='COMPLETED', output=output_1) job_exe_2 = job_test_utils.create_job_exe(status='COMPLETED', output=output_2) # These jobs have their execution results, but have not completed job_exe_3 = job_test_utils.create_job_exe(status='RUNNING') job_exe_4 = job_test_utils.create_job_exe(status='RUNNING') for job_exe in [job_exe_3, job_exe_4]: job_exe_output = JobExecutionOutput() job_exe_output.job_exe_id = job_exe.id job_exe_output.job_id = job_exe.job_id job_exe_output.job_type_id = job_exe.job.job_type_id job_exe_output.exe_num = job_exe.exe_num job_exe_output.output = JobResults().get_dict() job_exe_output.save() # These jobs have completed, but do not have their execution results job_exe_5 = job_test_utils.create_job_exe(status='RUNNING') job_exe_6 = job_test_utils.create_job_exe(status='RUNNING') for job in [job_exe_5.job, job_exe_6.job]: job.status = 'COMPLETED' job.save() # Test method job_ids = [ job_exe.job_id for job_exe in [job_exe_1, job_exe_2, job_exe_3, job_exe_4, job_exe_5, job_exe_6] ] result_ids = Job.objects.process_job_output(job_ids, timezone.now()) self.assertEqual(set(result_ids), {job_exe_1.job_id, job_exe_2.job_id}) # Jobs 1 and 2 should have output populated, jobs 3 through 6 should not jobs = list(Job.objects.filter(id__in=job_ids).order_by('id')) self.assertEqual(len(jobs), 6) self.assertTrue(jobs[0].has_output()) self.assertDictEqual(jobs[0].output, output_1.get_dict()) self.assertTrue(jobs[1].has_output()) self.assertDictEqual(jobs[1].output, output_2.get_dict()) self.assertFalse(jobs[2].has_output()) self.assertFalse(jobs[3].has_output()) self.assertFalse(jobs[4].has_output()) self.assertFalse(jobs[5].has_output())
def test_successful_file(self): """Tests calling JobResults.add_output_to_data() successfully with a file parameter""" output_name = 'foo' file_id = 1337 input_name = 'bar' results = JobResults() results.add_file_parameter(output_name, file_id) job_data = MagicMock() results.add_output_to_data(output_name, job_data, input_name) job_data.add_file_input.assert_called_with(input_name, file_id)
def test_successful_file(self): '''Tests calling JobResults.add_output_to_data() successfully with a file parameter''' output_name = u'foo' file_id = 1337 input_name = u'bar' results = JobResults() results.add_file_parameter(output_name, file_id) job_data = MagicMock() results.add_output_to_data(output_name, job_data, input_name) job_data.add_file_input.assert_called_with(input_name, file_id)
def store_output_data_files(self, data_files, job_exe): """Stores the given data output files :param data_files: Dict with each file parameter name mapping to a list of ProductFileMetadata classes :type data_files: {string: [`ProductFileMetadata`]} :param job_exe: The job execution model (with related job and job_type fields) that is storing the output data files :type job_exe: :class:`job.models.JobExecution` :returns: The job results :rtype: :class:`job.configuration.results.job_results.JobResults` """ # Organize the data files workspace_files = { } # Workspace ID -> [(absolute local file path, media type)] params_by_file_path = { } # Absolute local file path -> output parameter name output_workspaces = JobData.create_output_workspace_dict( data_files.keys(), self, job_exe) for name in data_files: workspace_id = output_workspaces[name] if workspace_id in workspace_files: workspace_file_list = workspace_files[workspace_id] else: workspace_file_list = [] workspace_files[workspace_id] = workspace_file_list data_file_entry = data_files[name] if isinstance(data_file_entry, list): for file_entry in data_file_entry: file_path = os.path.normpath(file_entry.local_path) if not os.path.isfile(file_path): raise Exception('%s is not a valid file' % file_path) params_by_file_path[file_path] = name workspace_file_list.append(file_entry) else: file_path = os.path.normpath(data_file_entry.local_path) if not os.path.isfile(file_path): raise Exception('%s is not a valid file' % file_path) params_by_file_path[file_path] = name data_file_entry.local_path = file_path workspace_file_list.append(data_file_entry) data_file_store = DATA_FILE_STORE['DATA_FILE_STORE'] if not data_file_store: raise Exception('No data file store found') stored_files = data_file_store.store_files(workspace_files, self.get_input_file_ids(), job_exe) # Organize results param_file_ids = {} # Output parameter name -> file ID or [file IDs] for file_path in stored_files: file_id = stored_files[file_path] name = params_by_file_path[file_path] if isinstance(data_files[name], list): if name in param_file_ids: file_id_list = param_file_ids[name] else: file_id_list = [] param_file_ids[name] = file_id_list file_id_list.append(file_id) else: param_file_ids[name] = file_id # Create job results results = JobResults() for name in param_file_ids: param_entry = param_file_ids[name] if isinstance(param_entry, list): results.add_file_list_parameter(name, param_entry) else: results.add_file_parameter(name, param_entry) return results
def store_output_data_files(self, data_files, job_exe): """Stores the given data output files :param data_files: Dict with each file parameter name mapping to a tuple of absolute local file path and media type (media type is optionally None) for a single file parameter and a list of tuples for a multiple file parameter :type data_files: {string: tuple(string, string)} or [tuple(string, string)] :param job_exe: The job execution model (with related job and job_type fields) that is storing the output data files :type job_exe: :class:`job.models.JobExecution` :returns: The job results :rtype: :class:`job.configuration.results.job_results.JobResults` """ # Organize the data files workspace_files = { } # Workspace ID -> [(absolute local file path, media type)] params_by_file_path = { } # Absolute local file path -> output parameter name for name in data_files: file_output = self.data_outputs_by_name[name] workspace_id = file_output['workspace_id'] if workspace_id in workspace_files: workspace_file_list = workspace_files[workspace_id] else: workspace_file_list = [] workspace_files[workspace_id] = workspace_file_list data_file_entry = data_files[name] if isinstance(data_file_entry, list): for file_tuple in data_file_entry: file_path = os.path.normpath(file_tuple[0]) if not os.path.isfile(file_path): raise Exception('%s is not a valid file' % file_path) params_by_file_path[file_path] = name # Adjust file path to be relative to upload_dir if len(file_tuple) == 2: new_tuple = (file_path, file_tuple[1], name) else: new_tuple = (file_path, file_tuple[1], name, file_tuple[2]) workspace_file_list.append(new_tuple) else: file_path = os.path.normpath(data_file_entry[0]) if not os.path.isfile(file_path): raise Exception('%s is not a valid file' % file_path) params_by_file_path[file_path] = name # Adjust file path to be relative to upload_dir if len(data_file_entry) == 2: new_tuple = (file_path, data_file_entry[1], name) else: new_tuple = (file_path, data_file_entry[1], name, data_file_entry[2]) workspace_file_list.append(new_tuple) data_file_store = DATA_FILE_STORE['DATA_FILE_STORE'] if not data_file_store: raise Exception('No data file store found') stored_files = data_file_store.store_files(workspace_files, self.get_input_file_ids(), job_exe) # Organize results param_file_ids = {} # Output parameter name -> file ID or [file IDs] for file_path in stored_files: file_id = stored_files[file_path] name = params_by_file_path[file_path] if isinstance(data_files[name], list): if name in param_file_ids: file_id_list = param_file_ids[name] else: file_id_list = [] param_file_ids[name] = file_id_list file_id_list.append(file_id) else: param_file_ids[name] = file_id # Create job results results = JobResults() for name in param_file_ids: param_entry = param_file_ids[name] if isinstance(param_entry, list): results.add_file_list_parameter(name, param_entry) else: results.add_file_parameter(name, param_entry) return results
def store_output_data_files(self, data_files, job_exe): """Stores the given data output files :param data_files: Dict with each file parameter name mapping to a tuple of absolute local file path and media type (media type is optionally None) for a single file parameter and a list of tuples for a multiple file parameter :type data_files: dict of str -> tuple(str, str) or list of tuple(str, str) :param job_exe: The job execution model (with related job and job_type fields) that is storing the output data files :type job_exe: :class:`job.models.JobExecution` :returns: The job results :rtype: :class:`job.configuration.results.job_results.JobResults` """ # Organize the data files workspace_files = {} # Workspace ID -> list of (absolute local file path, media type) params_by_file_path = {} # Absolute local file path -> output parameter name for name in data_files: file_output = self.data_outputs_by_name[name] workspace_id = file_output['workspace_id'] if workspace_id in workspace_files: workspace_file_list = workspace_files[workspace_id] else: workspace_file_list = [] workspace_files[workspace_id] = workspace_file_list data_file_entry = data_files[name] if isinstance(data_file_entry, list): for file_tuple in data_file_entry: file_path = os.path.normpath(file_tuple[0]) if not os.path.isfile(file_path): raise Exception('%s is not a valid file' % file_path) params_by_file_path[file_path] = name # Adjust file path to be relative to upload_dir if len(file_tuple) == 2: new_tuple = (file_path, file_tuple[1]) else: new_tuple = (file_path, file_tuple[1], file_tuple[2]) workspace_file_list.append(new_tuple) else: file_path = os.path.normpath(data_file_entry[0]) if not os.path.isfile(file_path): raise Exception('%s is not a valid file' % file_path) params_by_file_path[file_path] = name # Adjust file path to be relative to upload_dir if len(data_file_entry) == 2: new_tuple = (file_path, data_file_entry[1]) else: new_tuple = (file_path, data_file_entry[1], data_file_entry[2]) workspace_file_list.append(new_tuple) data_file_store = DATA_FILE_STORE['DATA_FILE_STORE'] if not data_file_store: raise Exception('No data file store found') stored_files = data_file_store.store_files(workspace_files, self.get_input_file_ids(), job_exe) # Organize results param_file_ids = {} # Output parameter name -> file ID or list of file IDs for file_path in stored_files: file_id = stored_files[file_path] name = params_by_file_path[file_path] if isinstance(data_files[name], list): if name in param_file_ids: file_id_list = param_file_ids[name] else: file_id_list = [] param_file_ids[name] = file_id_list file_id_list.append(file_id) else: param_file_ids[name] = file_id # Create job results results = JobResults() for name in param_file_ids: param_entry = param_file_ids[name] if isinstance(param_entry, list): results.add_file_list_parameter(name, param_entry) else: results.add_file_parameter(name, param_entry) return results
def test_successful_supersede_mixed(self): """Tests calling QueueManager.queue_new_recipe() successfully when superseding a recipe where the results of a Seed job get passed to the input of a legacy job """ workspace = storage_test_utils.create_workspace() source_file = source_test_utils.create_source(workspace=workspace) event = trigger_test_utils.create_trigger_event() interface_1 = { 'seedVersion': '1.0.0', 'job': { 'name': 'job-type-a', 'jobVersion': '1.0.0', 'packageVersion': '1.0.0', 'title': 'Job Type 1', 'description': 'This is a description', 'maintainer': { 'name': 'John Doe', 'email': '*****@*****.**' }, 'timeout': 10, 'interface': { 'command': '', 'inputs': { 'files': [{ 'name': 'test-input-a' }] }, 'outputs': { 'files': [{ 'name': 'test-output-a', 'pattern': '*.png' }] } } } } job_type_1 = job_test_utils.create_seed_job_type(manifest=interface_1) interface_2 = { 'version': '1.0', 'command': 'test_command', 'command_arguments': 'test_arg', 'input_data': [{ 'name': 'Test Input 2', 'type': 'file', 'media_types': ['image/png', 'image/tiff'], }], 'output_data': [{ 'name': 'Test Output 2', 'type': 'file', }] } job_type_2 = job_test_utils.create_job_type(interface=interface_2) definition = { 'version': '1.0', 'input_data': [{ 'name': 'Recipe Input', 'type': 'file', 'media_types': ['text/plain'], }], 'jobs': [{ 'name': 'Job 1', 'job_type': { 'name': job_type_1.name, 'version': job_type_1.version, }, 'recipe_inputs': [{ 'recipe_input': 'Recipe Input', 'job_input': 'test-input-a', }] }, { 'name': 'Job 2', 'job_type': { 'name': job_type_2.name, 'version': job_type_2.version, }, 'dependencies': [{ 'name': 'Job 1', 'connections': [{ 'output': 'test-output-a', 'input': 'Test Input 2', }] }] }] } recipe_definition = RecipeDefinition(definition) recipe_definition.validate_job_interfaces() recipe_type = recipe_test_utils.create_recipe_type( definition=definition) data = { 'version': '1.0', 'input_data': [{ 'name': 'Recipe Input', 'file_id': source_file.id, }], 'workspace_id': workspace.id, } data = LegacyRecipeData(data) # Queue initial recipe and complete its first job handler = Queue.objects.queue_new_recipe(recipe_type, data, event) recipe = Recipe.objects.get(id=handler.recipe.id) recipe_job_1 = RecipeNode.objects.select_related('job') recipe_job_1 = recipe_job_1.get(recipe_id=handler.recipe.id, node_name='Job 1') Job.objects.update_jobs_to_running([recipe_job_1.job], now()) results = JobResults() results.add_file_parameter('test-output-a', product_test_utils.create_product().id) job_test_utils.create_job_exe(job=recipe_job_1.job, status='COMPLETED', output=results) Job.objects.update_jobs_to_completed([recipe_job_1.job], now()) Job.objects.process_job_output([recipe_job_1.job_id], now()) # Create a new recipe type that has a new version of job 2 (job 1 is identical) new_job_type_2 = job_test_utils.create_job_type( name=job_type_2.name, version='New Version', interface=job_type_2.manifest) new_definition = { 'version': '1.0', 'input_data': [{ 'name': 'Recipe Input', 'type': 'file', 'media_types': ['text/plain'], }], 'jobs': [{ 'name': 'New Job 1', 'job_type': { 'name': job_type_1.name, 'version': job_type_1.version, }, 'recipe_inputs': [{ 'recipe_input': 'Recipe Input', 'job_input': 'test-input-a', }] }, { 'name': 'New Job 2', 'job_type': { 'name': new_job_type_2.name, 'version': new_job_type_2.version, }, 'dependencies': [{ 'name': 'New Job 1', 'connections': [{ 'output': 'test-output-a', 'input': 'Test Input 2', }] }] }] } new_recipe_type = recipe_test_utils.create_recipe_type( name=recipe_type.name, definition=new_definition) event = trigger_test_utils.create_trigger_event() recipe_job_1 = RecipeNode.objects.select_related('job').get( recipe_id=handler.recipe.id, node_name='Job 1') recipe_job_2 = RecipeNode.objects.select_related('job').get( recipe_id=handler.recipe.id, node_name='Job 2') superseded_jobs = { 'Job 1': recipe_job_1.job, 'Job 2': recipe_job_2.job } graph_a = recipe_type.get_recipe_definition().get_graph() graph_b = new_recipe_type.get_recipe_definition().get_graph() delta = RecipeGraphDelta(graph_a, graph_b) # Queue new recipe that supersedes the old recipe new_handler = Queue.objects.queue_new_recipe( new_recipe_type, None, event, superseded_recipe=recipe, delta=delta, superseded_jobs=superseded_jobs) # Ensure old recipe is superseded recipe = Recipe.objects.get(id=handler.recipe.id) self.assertTrue(recipe.is_superseded) # Ensure new recipe supersedes old recipe new_recipe = Recipe.objects.get(id=new_handler.recipe.id) self.assertEqual(new_recipe.superseded_recipe_id, handler.recipe.id) # Ensure that job 1 is already completed (it was copied from original recipe) and that job 2 is queued new_recipe_job_1 = RecipeNode.objects.select_related('job').get( recipe_id=new_handler.recipe.id, node_name='New Job 1') new_recipe_job_2 = RecipeNode.objects.select_related('job').get( recipe_id=new_handler.recipe.id, node_name='New Job 2') self.assertEqual(new_recipe_job_1.job.status, 'COMPLETED') self.assertFalse(new_recipe_job_1.is_original) self.assertEqual(new_recipe_job_2.job.status, 'QUEUED') self.assertTrue(new_recipe_job_2.is_original)