def test_model_data_src_is_supplied___symlink_to_output_dir_static_is_created(self): with TemporaryDirectory() as output_path, TemporaryDirectory() as input_path: Path(os.path.join(input_path, 'linked_file')).touch() prepare_model_run_directory(output_path, model_data_src_path=input_path) self.assertTrue(os.path.exists(os.path.join(output_path, 'static', 'linked_file')))
def test_input_directory_is_supplied___input_files_are_copied_to_input_csv(self): with TemporaryDirectory() as output_path, TemporaryDirectory() as input_path: Path(os.path.join(input_path, 'a_file.csv')).touch() prepare_model_run_directory(output_path, oasis_files_src_path=input_path) self.assertTrue(os.path.exists(os.path.join(output_path, 'input', 'csv', 'a_file.csv')))
def test_model_data_src_is_supplied_sym_link_raises___input_is_copied_from_static(self): with TemporaryDirectory() as output_path, TemporaryDirectory() as input_path: Path(os.path.join(input_path, 'linked_file')).touch() with patch('os.symlink', Mock(side_effect=OSError())): prepare_model_run_directory(output_path, model_data_src_path=input_path) self.assertTrue(os.path.exists(os.path.join(output_path, 'static', 'linked_file')))
def test_settings_file_is_supplied___settings_file_is_copied_into_run_dir(self): with TemporaryDirectory() as output_path, NamedTemporaryFile('w') as input_file: input_file.write('conf stuff') input_file.flush() prepare_model_run_directory(output_path, analysis_settings_json_src_file_path=input_file.name) with io.open(os.path.join(output_path, 'analysis_settings.json'), encoding='utf-8') as output_conf: self.assertEqual('conf stuff', output_conf.read())
def test_directory_is_empty___child_directories_are_created(self): with TemporaryDirectory() as d: prepare_model_run_directory(d) #self.assertTrue(os.path.exists(os.path.join(d, 'fifo'))) self.assertTrue(os.path.exists(os.path.join(d, 'input'))) self.assertTrue(os.path.exists(os.path.join(d, 'input', 'csv'))) self.assertTrue(os.path.exists(os.path.join(d, 'output'))) self.assertTrue(os.path.exists(os.path.join(d, 'static'))) self.assertTrue(os.path.exists(os.path.join(d, 'work')))
def test_inputs_archive_is_supplied___archive_is_extracted_into_inputs(self): with TemporaryDirectory() as output_path, TemporaryDirectory() as input_path: tar_path = os.path.join(input_path, 'archive.tar') with tarfile.open(tar_path, 'w', encoding='utf-8') as tar: archived_file_path = Path(input_path, 'archived_file') archived_file_path.touch() tar.add(str(archived_file_path), arcname='archived_file') prepare_model_run_directory(output_path, inputs_archive=tar_path) self.assertTrue(Path(output_path, 'input', 'archived_file').exists())
def test_directory_has_some_exisitng_directories___other_child_directories_are_created(self): with TemporaryDirectory() as d: os.mkdir(os.path.join(d, 'fifo')) os.mkdir(os.path.join(d, 'input')) prepare_model_run_directory(d) #self.assertTrue(os.path.exists(os.path.join(d, 'fifo'))) self.assertTrue(os.path.exists(os.path.join(d, 'input'))) self.assertTrue(os.path.exists(os.path.join(d, 'input', 'csv'))) self.assertTrue(os.path.exists(os.path.join(d, 'output'))) self.assertTrue(os.path.exists(os.path.join(d, 'static'))) self.assertTrue(os.path.exists(os.path.join(d, 'work')))
def start_analysis(analysis_settings, input_location): ''' Run an analysis. Args: analysis_profile_json (string): The analysis settings. Returns: (string) The location of the outputs. ''' # Check that the input archive exists and is valid input_archive = os.path.join( settings.get('worker', 'INPUTS_DATA_DIRECTORY'), input_location + ARCHIVE_FILE_SUFFIX) if not os.path.exists(input_archive): raise MissingInputsException(input_archive) if not tarfile.is_tarfile(input_archive): raise InvalidInputsException(input_archive) source_tag = analysis_settings['analysis_settings']['source_tag'] analysis_tag = analysis_settings['analysis_settings']['analysis_tag'] logging.info("Source tag = {}; Analysis tag: {}".format( analysis_tag, source_tag)) module_supplier_id = analysis_settings['analysis_settings'][ 'module_supplier_id'] model_version_id = analysis_settings['analysis_settings'][ 'model_version_id'] logging.info("Model supplier - version = {} {}".format( module_supplier_id, model_version_id)) # Get the supplier module and call it use_default_model_runner = not Path( settings.get('worker', 'SUPPLIER_MODULE_DIRECTORY'), module_supplier_id).exists() model_data_path = os.path.join( settings.get('worker', 'MODEL_DATA_DIRECTORY'), module_supplier_id, model_version_id) if not os.path.exists(model_data_path): raise MissingModelDataException(model_data_path) logging.info("Setting up analysis working directory") directory_name = "{}_{}_{}".format(source_tag, analysis_tag, uuid.uuid4().hex) working_directory = os.path.join( settings.get('worker', 'WORKING_DIRECTORY'), directory_name) if 'ri_output' in analysis_settings['analysis_settings'].keys(): ri = analysis_settings['analysis_settings']['ri_output'] else: ri = False prepare_model_run_directory(working_directory, ri=ri, model_data_src_path=model_data_path, inputs_archive=input_archive) prepare_model_run_inputs(analysis_settings['analysis_settings'], working_directory, ri=ri) with setcwd(working_directory): logging.info("Working directory = {}".format(working_directory)) # Persist the analysis_settings with open("analysis_settings.json", "w") as json_file: json.dump(analysis_settings, json_file) if use_default_model_runner: model_runner_module = runner else: sys.path.append(settings.get('worker', 'SUPPLIER_MODULE_DIRECTORY')) model_runner_module = importlib.import_module( '{}.supplier_model_runner'.format(module_supplier_id)) ##! to add check that RI directories take the form of RI_{ID} amd ID is a monotonic index num_reinsurance_iterations = len(glob.glob('RI_[0-9]')) model_runner_module.run( analysis_settings['analysis_settings'], settings.getint('worker', 'KTOOLS_BATCH_COUNT'), num_reinsurance_iterations=num_reinsurance_iterations, ktools_mem_limit=settings.getboolean('worker', 'KTOOLS_MEMORY_LIMIT'), set_alloc_rule=settings.getint('worker', 'KTOOLS_ALLOC_RULE'), fifo_tmp_dir=False) output_location = uuid.uuid4().hex output_filepath = os.path.join( settings.get('worker', 'OUTPUTS_DATA_DIRECTORY'), output_location + ARCHIVE_FILE_SUFFIX) output_directory = os.path.join(working_directory, "output") with tarfile.open(output_filepath, "w:gz") as tar: tar.add(output_directory, arcname="output") if settings.getboolean('worker', 'DO_CLEAR_WORKING'): shutil.rmtree(working_directory, ignore_errors=True) logging.info("Output location = {}".format(output_location)) return output_location