def test_capture_output_files_multiple(self, get_files): output_files = [SeedOutputFiles(self.test_output_snippet)] name = 'OUTPUT_TIFFS' get_files.return_value = ['outfile0.tif', 'outfile1.tif'] outputs = JobResults()._capture_output_files(output_files) self.assertIn(name, outputs) files = outputs[name] self.assertEqual(len(files), 2) self.assertEqual(files[0].__dict__, ProductFileMetadata(name, 'outfile0.tif', media_type='image/tiff').__dict__) self.assertEqual(files[1].__dict__, ProductFileMetadata(name, 'outfile1.tif', media_type='image/tiff').__dict__)
def setUp(self): django.setup() def upload_files(file_uploads): for file_upload in file_uploads: file_upload.file.save() def delete_files(files): for scale_file in files: scale_file.save() self.workspace = storage_test_utils.create_workspace() self.workspace.upload_files = MagicMock(side_effect=upload_files) self.workspace.delete_files = MagicMock(side_effect=delete_files) self.source_file = source_test_utils.create_source(file_name='input1.txt', workspace=self.workspace) inputs_json=[ {'name': 'property1', 'type': 'string'}, {'name': 'property2', 'type': 'string'} ] manifest = job_test_utils.create_seed_manifest(inputs_json=inputs_json, command='my_command') manifest['job']['interface']['inputs']['files'] = [] job_type = job_test_utils.create_seed_job_type(manifest=manifest) self.job_exe = job_test_utils.create_job_exe(job_type=job_type) data = self.job_exe.job.get_input_data() data.add_value(JsonValue('property1', 'value1')) data.add_value(JsonValue('property2', 'value2')) self.job_exe.job.input = convert_data_to_v6_json(data).get_dict() self.job_exe.job.source_sensor_class = 'classA' self.job_exe.job.source_sensor = '1' self.job_exe.job.source_collection = '12345' self.job_exe.job.source_task = 'my-task' self.job_exe.job.save() self.job_exe_no = job_test_utils.create_job_exe() self.local_path_1 = os.path.join(SCALE_JOB_EXE_OUTPUT_PATH, 'local/1/file.txt') self.local_path_2 = os.path.join(SCALE_JOB_EXE_OUTPUT_PATH, 'local/2/file.json') self.local_path_3 = os.path.join(SCALE_JOB_EXE_OUTPUT_PATH, 'local/3/file.h5') self.files = [ ProductFileMetadata(output_name='output_name_1', local_path=self.local_path_1, remote_path='remote/1/file.txt'), ProductFileMetadata(output_name='output_name_2', local_path=self.local_path_2, media_type='application/x-custom-json', remote_path='remote/2/file.json', source_sensor_class='classB', source_sensor='2', source_collection='12346', source_task='my-task-2'), ] self.files_no = [ ProductFileMetadata(output_name='output_name_3', local_path=self.local_path_3, media_type='image/x-hdf5-image', remote_path='remote/3/file.h5') ]
def _capture_output_files(self, seed_output_files): """Evaluate files patterns and capture any available side-car metadata associated with matched files :param seed_output_files: interface definition of Seed output files that should be captured :type seed_output_files: [`job.seed.types.SeedOutputFiles`] :return: collection of files name keys mapped to a ProductFileMetadata list. { name : [`ProductFileMetadata`] :rtype: dict """ # Dict of detected files and associated metadata captured_files = {} # Iterate over each files object for output_file in seed_output_files: # For files obj that are detected, handle results (may be multiple) product_files = [] for matched_file in output_file.get_files(): product_file_meta = ProductFileMetadata( output_file.name, matched_file, output_file.media_type) # check to see if there is side-car metadata files metadata_file = os.path.join(matched_file, METADATA_SUFFIX) # If metadata is found, attempt to grab any Scale relevant data and place in ProductFileMetadata tuple if os.path.isfile(metadata_file): with open(metadata_file) as metadata_file_handle: metadata = SeedMetadata( json.load(metadata_file_handle)) # Create a GeoJSON object, as the present Seed Metadata schema only uses the Geometry fragment # TODO: Update if Seed schema updates. Ref: https://github.com/ngageoint/seed/issues/95 product_file_meta.geojson = \ { 'type': 'Feature', 'geometry': metadata.get_geometry() } timestamp = metadata.get_time() # Seed Metadata Schema defines start / end as required # so we do not need to check here. if timestamp: product_file_meta.data_start = timestamp['start'] product_file_meta.data_end = timestamp['end'] product_files.append(product_file_meta) captured_files[output_file.name] = product_files return captured_files
def setUp(self): django.setup() def upload_files(file_uploads): for file_upload in file_uploads: file_upload.file.save() def delete_files(files): for scale_file in files: scale_file.save() self.workspace = storage_test_utils.create_workspace() self.workspace.upload_files = MagicMock(side_effect=upload_files) self.workspace.delete_files = MagicMock(side_effect=delete_files) self.source_file = source_test_utils.create_source( file_name='input1.txt', workspace=self.workspace) self.job_exe = job_test_utils.create_job_exe() data = self.job_exe.job.get_job_data() data.add_property_input('property1', 'value1') data.add_property_input('property2', 'value2') self.job_exe.job.data = data.get_dict() self.job_exe.job.save() self.job_exe_no = job_test_utils.create_job_exe() with transaction.atomic(): self.job_exe_no.job.is_operational = False self.job_exe_no.job.job_type.is_operational = False self.job_exe_no.job.save() self.job_exe_no.job.job_type.save() self.local_path_1 = os.path.join(SCALE_JOB_EXE_OUTPUT_PATH, 'local/1/file.txt') self.local_path_2 = os.path.join(SCALE_JOB_EXE_OUTPUT_PATH, 'local/2/file.json') self.local_path_3 = os.path.join(SCALE_JOB_EXE_OUTPUT_PATH, 'local/3/file.h5') self.files = [ ProductFileMetadata('output_name_1', self.local_path_1, remote_path='remote/1/file.txt'), ProductFileMetadata('output_name_2', self.local_path_2, 'application/x-custom-json', 'remote/2/file.json'), ] self.files_no = [ ProductFileMetadata('output_name_3', self.local_path_3, 'image/x-hdf5-image', 'remote/3/file.h5') ]
def test_geo_metadata(self, mock_upload_files, mock_create_file_ancestry_links): """Tests calling ProductDataFileType.store_files() successfully""" geo_metadata = { "data_started": "2015-05-15T10:34:12Z", "data_ended": "2015-05-15T10:36:12Z", "geo_json": { "type": "Polygon", "coordinates": [[[1.0, 10.0], [2.0, 10.0], [2.0, 20.0], [1.0, 20.0], [1.0, 10.0]]] } } parent_ids = set([98, 99]) local_path_1 = os.path.join('my', 'path', 'one', 'my_test.txt') full_local_path_1 = os.path.join(SCALE_JOB_EXE_OUTPUT_PATH, local_path_1) remote_path_1 = os.path.join( ProductDataFileStore()._calculate_remote_path( self.job_exe, parent_ids), local_path_1) media_type_1 = 'text/plain' job_output_1 = 'mock_output_1' local_path_2 = os.path.join('my', 'path', 'one', 'my_test.json') full_local_path_2 = os.path.join(SCALE_JOB_EXE_OUTPUT_PATH, local_path_2) remote_path_2 = os.path.join( ProductDataFileStore()._calculate_remote_path( self.job_exe, parent_ids), local_path_2) media_type_2 = 'application/json' job_output_2 = 'mock_output_2' metadata_1 = ProductFileMetadata(output_name=job_output_1, local_path=full_local_path_1, remote_path=remote_path_1, media_type=media_type_1, geojson=geo_metadata) metadata_2 = ProductFileMetadata(output_name=job_output_2, local_path=full_local_path_2, remote_path=remote_path_2, media_type=media_type_2) data_files = {self.workspace_1.id: [metadata_1, metadata_2]} ProductDataFileStore().store_files(data_files, parent_ids, self.job_exe) files_to_store = [metadata_1, metadata_2] mock_upload_files.assert_called_with(files_to_store, parent_ids, self.job_exe, self.workspace_1)
def test_geo_metadata(self): """Tests calling ProductFileManager.upload_files() successfully with extra geometry meta data""" data_start = '2015-05-15T10:34:12Z' data_end = '2015-05-15T10:36:12Z' geojson = { 'type': 'Polygon', 'coordinates': [ [[1.0, 10.0], [2.0, 10.0], [2.0, 20.0], [1.0, 20.0], [1.0, 10.0]], ] } files = [ProductFileMetadata('output_1', os.path.join(SCALE_JOB_EXE_OUTPUT_PATH, 'local/1/file.txt'), 'text/plain', 'remote/1/file.txt', data_start, data_end, geojson)] products = ProductFile.objects.upload_files(files, [self.source_file.id], self.job_exe, self.workspace) self.assertEqual('file.txt', products[0].file_name) self.assertEqual('remote/1/file.txt', products[0].file_path) self.assertEqual('text/plain', products[0].media_type) self.assertEqual(self.workspace.id, products[0].workspace_id) self.assertEqual('Polygon', products[0].geometry.geom_type) self.assertEqual('Point', products[0].center_point.geom_type) self.assertEqual(datetime.datetime(2015, 5, 15, 10, 34, 12, tzinfo=utc), products[0].data_started) self.assertEqual(datetime.datetime(2015, 5, 15, 10, 36, 12, tzinfo=utc), products[0].data_ended) self.assertIsNotNone(products[0].uuid)
def test_successful(self, mock_file_call, mock_file_list_call, mock_store, mock_isfile): """Tests calling JobData.store_output_data_files() successfully""" def new_isfile(path): return True mock_isfile.side_effect = new_isfile job_exe = MagicMock() job_exe.id = 1 job_exe.job.get_job_configuration().default_output_workspace = None job_exe.job.get_job_configuration().output_workspaces = None data = { 'output_data': [{ 'name': 'Param1', 'workspace_id': 1 }, { 'name': 'Param2', 'workspace_id': 2 }] } file_path_1 = os.path.join('/path', '1', 'my_file.txt') file_path_2 = os.path.join('/path', '2', 'my_file_2.txt') file_path_3 = os.path.join('/path', '3', 'my_file_3.txt') data_files = { 'Param1': ProductFileMetadata(output_name='Param1', local_path=file_path_1), 'Param2': [ ProductFileMetadata(output_name='Param2', local_path=file_path_2, media_type='text/plain'), ProductFileMetadata(output_name='Param2', local_path=file_path_3) ] } JobData(data).store_output_data_files(data_files, job_exe) mock_file_call.assert_called_once_with('Param1', long(1)) self.assertEqual('Param2', mock_file_list_call.call_args[0][0]) self.assertSetEqual(set([long(3), long(2)]), set(mock_file_list_call.call_args[0][1]))
def test_store_output_files(self, dummy_store, isfile): workspace = storage_test_utils.create_workspace() files = {'OUTPUT_TIFFS': [ProductFileMetadata('OUTPUT_TIFFS', 'outfile0.tif', media_type='image/tiff')]} job_data = JobData({}) job_config = JobConfiguration() job_config.add_output_workspace('OUTPUT_TIFFS', workspace.name) job_exe = Mock() job_exe.job_type.get_job_configuration.return_value = job_config results = JobResults()._store_output_data_files(files, job_data, job_exe) self.assertEqual({'OUTPUT_TIFFS': [1]}, results.files)
def test_capture_output_files_metadata(self, get_files): output_files = [SeedOutputFiles(self.test_output_snippet)] name = 'OUTPUT_TIFFS' get_files.return_value = ['outfile0.tif'] metadata = { 'type': 'Feature', 'geometry': { 'type': 'Point', 'coordinates': [0, 1] }, 'properties': { 'dataStarted': '2018-06-01T00:00:00Z', 'dataEnded': '2018-06-01T01:00:00Z', 'sourceStarted': '2018-06-01T00:00:00Z', 'sourceEnded': '2018-06-01T06:00:00Z', 'sourceSensorClass': 'Platform', 'sourceSensor': 'X1', 'sourceCollection': '12345A', 'sourceTask': 'Calibration' } } metadata_name = 'outfile0.tif.metadata.json' with open(metadata_name, 'w') as metadata_file: json.dump(metadata, metadata_file) outputs = JobResults()._capture_output_files(output_files) os.remove(metadata_name) self.assertIn(name, outputs) files = outputs[name] self.assertEqual(len(files), 1) self.assertDictEqual( files[0].__dict__, ProductFileMetadata(output_name=name, local_path='outfile0.tif', media_type='image/tiff', data_start='2018-06-01T00:00:00Z', data_end='2018-06-01T01:00:00Z', geojson=metadata, source_started='2018-06-01T00:00:00Z', source_ended='2018-06-01T06:00:00Z', source_sensor_class='Platform', source_sensor='X1', source_collection='12345A', source_task='Calibration').__dict__)
def _capture_output_files(self, seed_output_files): """Evaluate files patterns and capture any available side-car metadata associated with matched files :param seed_output_files: interface definition of Seed output files that should be captured :type seed_output_files: [`job.seed.types.SeedOutputFiles`] :return: collection of files name keys mapped to a ProductFileMetadata list. { name : [`ProductFileMetadata`] :rtype: dict """ # Dict of detected files and associated metadata captured_files = {} # Iterate over each files object for output_file in seed_output_files: # For files obj that are detected, handle results (may be multiple) product_files = [] for matched_file in output_file.get_files(): logger.info('File detected for output capture: %s' % matched_file) product_file_meta = ProductFileMetadata(output_file.name, matched_file, output_file.media_type) # check to see if there is a side-car metadata file metadata_file = matched_file + METADATA_SUFFIX # If metadata is found, attempt to grab any Scale relevant data and place in ProductFileMetadata tuple if os.path.isfile(metadata_file): logger.info('Capturing metadata from detected side-car file: %s' % metadata_file) with open(metadata_file) as metadata_file_handle: try: metadata = SeedMetadata.metadata_from_json(json.load(metadata_file_handle)) # Property keys per #1160 product_file_meta.geojson = metadata.data product_file_meta.data_start = metadata.get_property('dataStarted') product_file_meta.data_end = metadata.get_property('dataEnded') product_file_meta.source_started = metadata.get_property('sourceStarted') product_file_meta.source_ended = metadata.get_property('sourceEnded') product_file_meta.source_sensor_class = metadata.get_property('sourceSensorClass') product_file_meta.source_sensor = metadata.get_property('sourceSensor') product_file_meta.source_collection = metadata.get_property('sourceCollection') product_file_meta.source_task = metadata.get_property('sourceTask') except InvalidSeedMetadataDefinition: logger.exception() product_files.append(product_file_meta) captured_files[output_file.name] = product_files return captured_files
def test_successful(self, mock_upload_files, mock_create_file_ancestry_links): """Tests calling ProductDataFileType.store_files() successfully""" local_path_1 = os.path.join('my', 'path', 'one', 'my_test.txt') media_type_1 = 'text/plain' job_output_1 = 'mock_output_1' local_path_2 = os.path.join('my', 'path', 'one', 'my_test.json') media_type_2 = 'application/json' job_output_2 = 'mock_output_2' local_path_3 = os.path.join('my', 'path', 'three', 'my_test.png') media_type_3 = 'image/png' job_output_3 = 'mock_output_3' local_path_4 = os.path.join('my', 'path', 'four', 'my_test.xml') media_type_4 = None job_output_4 = 'mock_output_4' # Set up mocks def new_upload_files(file_entries, input_file_ids, job_exe, workspace): results = [] for file_entry in file_entries: # Check base remote path for job type name and version self.assertTrue( file_entry.remote_path.startswith(self.remote_base_path)) if file_entry.local_path == local_path_1: mock_1 = MagicMock() mock_1.id = 1 results.append(mock_1) elif file_entry.local_path == local_path_2: mock_2 = MagicMock() mock_2.id = 2 results.append(mock_2) elif file_entry.local_path == local_path_3: mock_3 = MagicMock() mock_3.id = 3 results.append(mock_3) elif file_entry.local_path == local_path_4: mock_4 = MagicMock() mock_4.id = 4 results.append(mock_4) return results mock_upload_files.side_effect = new_upload_files data_files = { self.workspace_1.id: [ ProductFileMetadata(output_name=job_output_1, local_path=local_path_1, media_type=media_type_1), ProductFileMetadata(output_name=job_output_2, local_path=local_path_2, media_type=media_type_2) ], self.workspace_2.id: [ ProductFileMetadata(output_name=job_output_3, local_path=local_path_3, media_type=media_type_3), ProductFileMetadata(output_name=job_output_4, local_path=local_path_4, media_type=media_type_4) ] } parent_ids = {98, 99} results = ProductDataFileStore().store_files(data_files, parent_ids, self.job_exe) self.assertDictEqual( results, { local_path_1: long(1), local_path_2: long(2), local_path_3: long(3), local_path_4: long(4) }) mock_create_file_ancestry_links.assert_called_once_with( parent_ids, {1, 2, 3, 4}, self.job_exe.job, self.job_exe.id)
def test_successful_recipe_path(self, mock_upload_files, mock_create_file_ancestry_links): """Tests calling ProductDataFileType.store_files() successfully with a job that is in a recipe""" job_exe_in_recipe = job_utils.create_job_exe(status='RUNNING') recipe = recipe_utils.create_recipe() _recipe_job = recipe_utils.create_recipe_job(recipe=recipe, job_name='My Job', job=job_exe_in_recipe.job) remote_base_path_with_recipe = os.path.join( 'recipes', get_valid_filename(recipe.recipe_type.name), get_valid_filename('revision_%i' % recipe.recipe_type.revision_num), 'jobs', get_valid_filename(job_exe_in_recipe.job.job_type.name), get_valid_filename(job_exe_in_recipe.job.job_type.version)) local_path_1 = os.path.join('my', 'path', 'one', 'my_test.txt') media_type_1 = 'text/plain' job_output_1 = 'mock_output_1' local_path_2 = os.path.join('my', 'path', 'one', 'my_test.json') media_type_2 = 'application/json' job_output_2 = 'mock_output_2' local_path_3 = os.path.join('my', 'path', 'three', 'my_test.png') media_type_3 = 'image/png' job_output_3 = 'mock_output_3' local_path_4 = os.path.join('my', 'path', 'four', 'my_test.xml') media_type_4 = None job_output_4 = 'mock_output_4' # Set up mocks def new_upload_files(file_entries, input_file_ids, job_exe, workspace): results = [] for file_entry in file_entries: # Check base remote path for recipe type and job type information self.assertTrue( file_entry.remote_path.startswith( remote_base_path_with_recipe)) if file_entry.local_path == local_path_1: mock_1 = MagicMock() mock_1.id = 1 results.append(mock_1) elif file_entry.local_path == local_path_2: mock_2 = MagicMock() mock_2.id = 2 results.append(mock_2) elif file_entry.local_path == local_path_3: mock_3 = MagicMock() mock_3.id = 3 results.append(mock_3) elif file_entry.local_path == local_path_4: mock_4 = MagicMock() mock_4.id = 4 results.append(mock_4) return results mock_upload_files.side_effect = new_upload_files data_files = { self.workspace_1.id: [ ProductFileMetadata(output_name=job_output_1, local_path=local_path_1, media_type=media_type_1), ProductFileMetadata(output_name=job_output_2, local_path=local_path_2, media_type=media_type_2) ], self.workspace_2.id: [ ProductFileMetadata(output_name=job_output_3, local_path=local_path_3, media_type=media_type_3), ProductFileMetadata(output_name=job_output_4, local_path=local_path_4, media_type=media_type_4) ] } parent_ids = {98, 99} # Dummy values ProductDataFileStore().store_files(data_files, parent_ids, job_exe_in_recipe)
def perform_post_steps(self, job_exe, job_data, stdoutAndStderr): """Stores the files and deletes any working directories :param job_exe: The job execution model with related job and job_type fields :type job_exe: :class:`job.models.JobExecution` :param job_data: The job data :type job_data: :class:`job.configuration.data.job_data.JobData` :param stdoutAndStderr: the standard out from the job execution :type stdoutAndStderr: str :return: A tuple of the job results and the results manifest generated by the job execution :rtype: (:class:`job.configuration.results.job_results.JobResults`, :class:`job.configuration.results.results_manifest.results_manifest.ResultsManifest`) """ manifest_data = {} path_to_manifest_file = os.path.join(SCALE_JOB_EXE_OUTPUT_PATH, 'results_manifest.json') if os.path.exists(path_to_manifest_file): logger.info('Opening results manifest...') with open(path_to_manifest_file, 'r') as manifest_file: manifest_data = json.loads(manifest_file.read()) logger.info('Results manifest:') logger.info(manifest_data) else: logger.info('No results manifest found') results_manifest = ResultsManifest(manifest_data) stdout_files = self._get_artifacts_from_stdout(stdoutAndStderr) results_manifest.add_files(stdout_files) results_manifest.validate(self._output_file_manifest_dict) files_to_store = {} for manifest_file_entry in results_manifest.get_files(): param_name = manifest_file_entry['name'] media_type = None output_data_item = self._get_output_data_item_by_name(param_name) if output_data_item: media_type = output_data_item.get('media_type') msg = 'Output %s has invalid/missing file path "%s"' if 'file' in manifest_file_entry: file_entry = manifest_file_entry['file'] if not os.path.isfile(file_entry['path']): raise InvalidResultsManifest(msg % (param_name, file_entry['path'])) if 'geo_metadata' in file_entry: geometadata = file_entry['geo_metadata'] geojson = geometadata['geojson'] if 'geojson' in geometadata else None started = geometadata['data_started'] if 'data_started' in geometadata else None ended = geometadata['data_ended'] if 'data_ended' in geometadata else None files_to_store[param_name] = ProductFileMetadata(output_name=param_name, local_path=file_entry['path'], media_type=media_type, geojson=geojson, data_start=started, data_end=ended) else: files_to_store[param_name] = ProductFileMetadata(output_name=param_name, local_path=file_entry['path'], media_type=media_type) elif 'files' in manifest_file_entry: file_tuples = [] for file_entry in manifest_file_entry['files']: if not os.path.isfile(file_entry['path']): raise InvalidResultsManifest(msg % (param_name, file_entry['path'])) if 'geo_metadata' in file_entry: geometadata = file_entry['geo_metadata'] geojson = geometadata['geojson'] if 'geojson' in geometadata else None started = geometadata['data_started'] if 'data_started' in geometadata else None ended = geometadata['data_ended'] if 'data_ended' in geometadata else None file_tuples.append(ProductFileMetadata(output_name=param_name, local_path=file_entry['path'], media_type=media_type, geojson=geojson, data_start=started, data_end=ended)) else: file_tuples.append(ProductFileMetadata(output_name=param_name, local_path=file_entry['path'], media_type=media_type)) files_to_store[param_name] = file_tuples job_data_parse_results = {} # parse results formatted for job_data for parse_result in results_manifest.get_parse_results(): filename = parse_result['filename'] assert filename not in job_data_parse_results geo_metadata = parse_result.get('geo_metadata', {}) geo_json = geo_metadata.get('geo_json', None) data_started = geo_metadata.get('data_started', None) data_ended = geo_metadata.get('data_ended', None) data_types = parse_result.get('data_types', []) new_workspace_path = parse_result.get('new_workspace_path', None) if new_workspace_path: new_workspace_path = os.path.join(new_workspace_path, filename) job_data_parse_results[filename] = (geo_json, data_started, data_ended, data_types, new_workspace_path) job_data.save_parse_results(job_data_parse_results) return (job_data.store_output_data_files(files_to_store, job_exe), results_manifest)