def test_run_todo_list_dir_data_source_invalid_fname_v( set_clients_mock, test_config ): set_clients_mock.side_effect = _clients_mock test_config.working_directory = TEST_DIR test_config.use_local_files = True test_config.task_types = [mc.TaskType.INGEST] test_config.log_to_file = False test_config.features.supports_latest_client = True if os.path.exists(test_config.failure_fqn): os.unlink(test_config.failure_fqn) if os.path.exists(test_config.retry_fqn): os.unlink(test_config.retry_fqn) if not os.path.exists(f'{TEST_DIR}/abc.fits.gz'): with open(f'{TEST_DIR}/abc.fits.gz', 'w') as f: f.write('abc') class TestStorageName(mc.StorageName): def __init__(self, entry): self._obs_id = entry def is_valid(self): return False class TestStorageNameInstanceBuilder(b.StorageNameInstanceBuilder): def __init__(self): pass def build(self, entry): return TestStorageName(entry) test_builder = TestStorageNameInstanceBuilder() test_chooser = ec.OrganizeChooser() test_result = rc.run_by_todo( config=test_config, chooser=test_chooser, name_builder=test_builder, command_name=TEST_COMMAND, ) assert test_result is not None, 'expect a result' assert test_result == -1, 'expect failure, because of file naming' assert ( not os.path.exists(test_config.failure_fqn) ), 'no logging, no failure file' assert ( not os.path.exists(test_config.retry_fqn) ), 'no logging, no retry file' test_config.log_to_file = True test_result = rc.run_by_todo( config=test_config, chooser=test_chooser, command_name=TEST_COMMAND, ) assert test_result is not None, 'expect a result' assert test_result == -1, 'expect failure, because of file naming' assert os.path.exists(test_config.failure_fqn), 'expect failure file' assert os.path.exists(test_config.retry_fqn), 'expect retry file'
def test_run_todo_retry(do_one_mock, clients_mock, test_config): test_config.features.supports_latest_client = True ( retry_success_fqn, retry_failure_fqn, retry_retry_fqn, ) = _clean_up_log_files(test_config) do_one_mock.side_effect = _mock_do_one test_config.work_fqn = f'{tc.TEST_DATA_DIR}/todo.txt' test_config.log_to_file = True test_config.retry_failures = True _write_todo(test_config) test_result = rc.run_by_todo(config=test_config, command_name=TEST_COMMAND) assert test_result is not None, 'expect a result' assert test_result == -1, 'expect failure' _check_log_files(test_config, retry_success_fqn, retry_failure_fqn, retry_retry_fqn) assert do_one_mock.called, 'expect do_one call' assert do_one_mock.call_count == 2, 'wrong number of calls' assert not (clients_mock.return_value.metadata_client.read.called ), 'do_one is mocked, should be no metadata client call' assert not (clients_mock.return_value.data_client.get_file_info.called ), 'do_one is mocked, should be no data client call'
def _run(): """ Uses a todo file to identify the work to be done. :return 0 if successful, -1 if there's any sort of failure. Return status is used by airflow for task instance management and reporting. """ config, clients, name_builder, metadata_reader = _common() files_source = None if config.use_local_files: if config.cleanup_files_when_storing: files_source = data_source.DAOLocalFilesDataSource( config, clients.data_client, metadata_reader ) else: files_source = dsc.TodoFileDataSource(config) return rc.run_by_todo( name_builder=name_builder, meta_visitors=META_VISITORS, data_visitors=DATA_VISITORS, clients=clients, config=config, source=files_source, metadata_reader=metadata_reader, )
def test_run_todo_file_data_source(clients_mock, test_config): clients_mock.return_value.data_client.get_file_info.return_value = None clients_mock.return_value.metadata_client.read.return_value = ( SimpleObservation( collection=test_config.collection, observation_id='def', algorithm=Algorithm(str('test')), )) if os.path.exists(test_config.success_fqn): os.unlink(test_config.success_fqn) test_config.work_fqn = f'{TEST_DIR}/todo.txt' test_config.task_types = [mc.TaskType.VISIT] test_config.log_to_file = True test_chooser = ec.OrganizeChooser() test_result = rc.run_by_todo(config=test_config, chooser=test_chooser, command_name=TEST_COMMAND) assert test_result is not None, 'expect a result' assert test_result == 0, 'expect success' assert os.path.exists(test_config.success_fqn), 'expect success file' with open(test_config.success_fqn) as f: content = f.read() # the obs id and file name assert 'def def.fits' in content, 'wrong success message'
def test_run_todo_list_dir_data_source_v( write_obs_mock, read_obs_mock, fits2caom2_in_out_mock, fits2caom2_mock, clients_mock, test_config, ): read_obs_mock.side_effect = _mock_read test_config.working_directory = tc.TEST_DATA_DIR test_config.use_local_files = True test_config.data_sources = [tc.TEST_FILES_DIR] test_config.data_source_extensions = ['.fits'] test_config.task_types = [mc.TaskType.SCRAPE] test_config.features.supports_latest_client = True test_result = rc.run_by_todo(config=test_config, command_name=TEST_COMMAND) assert test_result is not None, 'expect a result' assert test_result == 0, 'expect success' if fits2caom2_mock.called: fits2caom2_mock.assert_called_with(connected=False) else: assert fits2caom2_in_out_mock.called, 'expect fits2caom2 in/out call' assert read_obs_mock.called, 'read_obs not called' assert write_obs_mock.called, 'write_obs mock not called' assert not (clients_mock.return_value.metadata_client.read.called ), 'scrape, should be no client access' assert not (clients_mock.return_value.data_client.get_file.called ), 'scrape, should be no client access'
def test_run_todo_list_dir_data_source( write_obs_mock, read_obs_mock, fits2caom2_in_out_mock, fits2caom2_mock, test_config, ): read_obs_mock.side_effect = _mock_read test_config.working_directory = tc.TEST_DATA_DIR test_config.use_local_files = True test_config.task_types = [mc.TaskType.SCRAPE] test_config.data_sources = [tc.TEST_FILES_DIR] test_config.data_source_extensions = ['.fits'] test_chooser = ec.OrganizeChooser() test_result = rc.run_by_todo(config=test_config, chooser=test_chooser, command_name=TEST_COMMAND) assert test_result is not None, 'expect a result' assert test_result == 0, 'expect success' if fits2caom2_mock.called: fits2caom2_mock.assert_called_with(connected=False) else: assert fits2caom2_in_out_mock.called, 'expect fits2caom2 in/out call' assert write_obs_mock.called, 'expect write call'
def test_run_todo_file_data_source_v( repo_read_mock, set_clients_mock, test_config ): set_clients_mock.side_effect = _clients_mock test_config.features.supports_latest_client = True test_cert_file = os.path.join(TEST_DIR, 'test_proxy.pem') test_config.proxy_fqn = test_cert_file repo_read_mock.return_value = SimpleObservation( collection=test_config.collection, observation_id='def', algorithm=Algorithm(str('test')), ) if os.path.exists(test_config.success_fqn): os.unlink(test_config.success_fqn) test_config.work_fqn = f'{TEST_DIR}/todo.txt' test_config.task_types = [mc.TaskType.VISIT] test_config.log_to_file = True test_chooser = ec.OrganizeChooser() test_result = rc.run_by_todo( config=test_config, chooser=test_chooser, command_name=TEST_COMMAND ) assert test_result is not None, 'expect a result' assert test_result == 0, 'expect success' assert os.path.exists(test_config.success_fqn), 'expect success file' with open(test_config.success_fqn) as f: content = f.read() # the obs id and file name assert 'def def.fits' in content, 'wrong success message' assert repo_read_mock.called, 'expect e call' repo_read_mock.assert_called_with(), 'wrong e args'
def _run(): """Run the processing for observations using a todo file to identify the work to be done, but with the support of a Builder, so that StorageName instances can be provided. This is important here, because the instrument name needs to be provided to the StorageName constructor. :return 0 if successful, -1 if there's any sort of failure. Return status is used by airflow for task instance management and reporting. """ config = mc.Config() config.get_executors() # time_bounds_augmentation and quality_augmentation depend on # metadata scraped from the NRAO site, but that only changes if a new # file is created, a new version of a file is created, or an old version # of a file is replaced. If the pipeline isn't STORE'ing information from # the source, files aren't changing, and the related metadata isn't # changing, so be polite to the NRAO site, and don't scrape if it's not # necessary. meta_visitors = [cleanup_augmentation] if (mc.TaskType.STORE in config.task_types and mc.TaskType.INGEST in config.task_types): meta_visitors = META_VISITORS name_builder = nbc.EntryBuilder(storage_name.VlassName) storage_name.set_use_storage_inventory( config.features.supports_latest_client) return rc.run_by_todo( config=config, name_builder=name_builder, meta_visitors=meta_visitors, data_visitors=DATA_VISITORS, store_transfer=tc.HttpTransfer(), )
def _run(): """ Uses a todo file to identify the work to be done. :return 0 if successful, -1 if there's any sort of failure. Return status is used by airflow for task instance management and reporting. """ name_builder = nbc.FileNameBuilder(get_storage_name) return rc.run_by_todo(name_builder=name_builder, meta_visitors=META_VISITORS, data_visitors=DATA_VISITORS)
def _run(): """ Uses a todo file to identify the work to be done. :return 0 if successful, -1 if there's any sort of failure. Return status is used by airflow for task instance management and reporting. """ return rc.run_by_todo(config=None, name_builder=nbc.ObsIDBuilder(mc.StorageName), command_name='visitCaom2', meta_visitors=META_VISITORS, data_visitors=DATA_VISITORS, chooser=None)
def _run(): """ Uses a todo file to identify the work to be done. :return 0 if successful, -1 if there's any sort of failure. Return status is used by airflow for task instance management and reporting. """ name_builder = nbc.FileNameBuilder(PHANGSName) return rc.run_by_todo(config=None, name_builder=name_builder, command_name=APPLICATION, meta_visitors=META_VISITORS, data_visitors=DATA_VISITORS, chooser=None)
def _run(): """ Uses a todo file to identify the work to be done. :return 0 if successful, -1 if there's any sort of failure. Return status is used by airflow for task instance management and reporting. """ StorageName.collection = COLLECTION return rc.run_by_todo( config=None, name_builder=None, meta_visitors=META_VISITORS, data_visitors=DATA_VISITORS, chooser=None, )
def _run(): """ Uses a todo file with file names, even though Gemini provides information about existing data referenced by observation ID. """ config = mc.Config() config.get_executors() external_metadata.init_global(config=config) name_builder = builder.GemObsIDBuilder(config) meta_visitors = _define_meta_visitors(config) return rc.run_by_todo(config, name_builder, chooser=None, command_name=main_app.APPLICATION, meta_visitors=meta_visitors)
def _run(): """ Uses a todo file to identify the work to be done. :return 0 if successful, -1 if there's any sort of failure. Return status is used by airflow for task instance management and reporting. """ builder = nbc.FileNameBuilder(NEOSSatName) config = mc.Config() config.get_executors() transferrer = tc.FtpTransfer(config.data_source) return rc.run_by_todo(name_builder=builder, config=config, command_name=APPLICATION, meta_visitors=META_VISITORS, data_visitors=DATA_VISITORS, store_transfer=transferrer)
def test_run_todo_list_dir_data_source( read_obs_mock, fits2caom2_in_out_mock, fits2caom2_mock, test_config ): read_obs_mock.side_effect = _mock_read test_config.working_directory = tc.TEST_FILES_DIR test_config.use_local_files = True test_config.task_types = [mc.TaskType.SCRAPE] test_chooser = ec.OrganizeChooser() test_result = rc.run_by_todo( config=test_config, chooser=test_chooser, command_name=TEST_COMMAND ) assert test_result is not None, 'expect a result' assert test_result == 0, 'expect success' if fits2caom2_mock.called: assert not fits2caom2_in_out_mock.called, 'expect no in/out call' else: assert fits2caom2_in_out_mock.called, 'expect fits2caom2 in/out call'
def _run_by_builder(): """Run the processing for observations using a todo file to identify the work to be done, but with the support of a Builder, so that StorageName instances can be provided. This is important here, because the instrument name needs to be provided to the StorageName constructor. :return 0 if successful, -1 if there's any sort of failure. Return status is used by airflow for task instance management and reporting. """ config = mc.Config() config.get_executors() builder = cfht_builder.CFHTBuilder(config) return rc.run_by_todo(config, builder, chooser=None, command_name=main_app.APPLICATION, meta_visitors=META_VISITORS, data_visitors=DATA_VISITORS)
def _run(): """ Uses a todo file to identify the work to be done. :return 0 if successful, -1 if there's any sort of failure. Return status is used by airflow for task instance management and reporting. """ config = mc.Config() config.get_executors() external_metadata.init_global(config=config) name_builder = builder.GemProcBuilder(config) return rc.run_by_todo( config=config, name_builder=name_builder, command_name=main_app.APPLICATION, meta_visitors=META_VISITORS, data_visitors=DATA_VISITORS, )
def test_run_todo_list_dir_data_source_exception(do_one_mock, clients_mock, test_config): test_config.working_directory = TEST_DIR test_config.use_local_files = True test_config.task_types = [mc.TaskType.SCRAPE] test_config.log_to_file = True for entry in [False, True]: test_config.features.supports_latest_client = entry do_one_mock.side_effect = mc.CadcException if os.path.exists(test_config.failure_fqn): os.unlink(test_config.failure_fqn) if os.path.exists(test_config.retry_fqn): os.unlink(test_config.retry_fqn) test_chooser = ec.OrganizeChooser() test_data_source = dsc.ListDirDataSource(test_config, test_chooser) test_result = rc.run_by_todo( config=test_config, chooser=test_chooser, command_name=TEST_COMMAND, source=test_data_source, ) assert test_result is not None, 'expect a result' assert test_result == -1, 'expect failure' assert do_one_mock.called, 'expect do_one call' assert os.path.exists(test_config.failure_fqn), 'expect failure file' assert os.path.exists(test_config.retry_fqn), 'expect retry file' with open(test_config.failure_fqn) as f: content = f.read() # the obs id and file name assert 'abc abc.fits' in content, 'wrong failure message' with open(test_config.retry_fqn) as f: content = f.read() # retry file names assert content == 'abc.fits\n', 'wrong retry content' assert not (clients_mock.return_value.metadata_client.read.called ), 'scrape, should be no metadata client call' assert not (clients_mock.return_value.data_client.get_file_info.called ), 'scrape, should be no data client call'
def _run(): """Run the processing for observations using a todo file to identify the work to be done, but with the support of a Builder, so that StorageName instances can be provided. This is important here, because the instrument name needs to be provided to the StorageName constructor. :return 0 if successful, -1 if there's any sort of failure. Return status is used by airflow for task instance management and reporting. """ config = mc.Config() config.get_executors() state = mc.State(config.state_fqn) work.init_web_log(state, config) name_builder = builder.VlassInstanceBuilder(config) return rc.run_by_todo(config=config, name_builder=name_builder, command_name=sn.APPLICATION, meta_visitors=META_VISITORS, data_visitors=DATA_VISITORS, store_transfer=tc.HttpTransfer())
def _run_remote(): """ Uses a todo file to identify the work to be done. :return 0 if successful, -1 if there's any sort of failure. Return status is used by airflow for task instance management and reporting. """ config = mc.Config() config.get_executors() name_builder = nbc.FileNameBuilder(GemProcName) vos_client = Client(vospace_certfile=config.proxy_fqn) store_transfer = tc.VoFitsTransfer(vos_client) data_source = dsc.VaultListDirDataSource(vos_client, config) return rc.run_by_todo(config=config, name_builder=name_builder, command_name=APPLICATION, source=data_source, meta_visitors=META_VISITORS, data_visitors=DATA_VISITORS, store_transfer=store_transfer)
def test_run_todo_list_dir_data_source_v( read_obs_mock, fits2caom2_in_out_mock, fits2caom2_mock, set_clients_mock, test_config, ): set_clients_mock.side_effect = _clients_mock read_obs_mock.side_effect = _mock_read test_config.working_directory = tc.TEST_FILES_DIR test_config.use_local_files = True test_config.task_types = [mc.TaskType.SCRAPE] test_config.features.supports_latest_client = True test_result = rc.run_by_todo(config=test_config, command_name=TEST_COMMAND) assert test_result is not None, 'expect a result' assert test_result == 0, 'expect success' if fits2caom2_mock.called: assert not fits2caom2_in_out_mock.called, 'expect no in/out call' else: assert fits2caom2_in_out_mock.called, 'expect fits2caom2 in/out call' assert read_obs_mock.called, 'read_obs not called'
def test_run_todo_retry(do_one_mock, test_config): retry_success_fqn, retry_failure_fqn, retry_retry_fqn = \ _clean_up_log_files(test_config) do_one_mock.side_effect = _mock_do_one test_config.work_fqn = f'{tc.TEST_DATA_DIR}/todo.txt' test_config.log_to_file = True test_config.retry_failures = True _write_todo(test_config) test_result = rc.run_by_todo( config=test_config, command_name=TEST_COMMAND ) assert test_result is not None, 'expect a result' assert test_result == -1, 'expect failure' _check_log_files( test_config, retry_success_fqn, retry_failure_fqn, retry_retry_fqn ) assert do_one_mock.called, 'expect do_one call' assert do_one_mock.call_count == 2, 'wrong number of calls'
def test_run_todo_file_data_source( caps_mock, ad_mock, data_client_mock, set_clients_mock, test_config ): set_clients_mock.side_effect = _clients_mock caps_mock.return_value = 'https://sc2.canfar.net/sc2repo' response = Mock() response.status_code = 200 response.iter_content.return_value = [b'fileName\n'] ad_mock.return_value.__enter__.return_value = response data_client_mock.return_value = SimpleObservation( collection=test_config.collection, observation_id='def', algorithm=Algorithm(str('test')) ) if os.path.exists(test_config.success_fqn): os.unlink(test_config.success_fqn) test_config.work_fqn = f'{TEST_DIR}/todo.txt' test_config.task_types = [mc.TaskType.VISIT] test_config.log_to_file = True test_chooser = ec.OrganizeChooser() test_result = rc.run_by_todo( config=test_config, chooser=test_chooser, command_name=TEST_COMMAND ) assert test_result is not None, 'expect a result' assert test_result == 0, 'expect success' assert os.path.exists(test_config.success_fqn), 'expect success file' with open(test_config.success_fqn) as f: content = f.read() # the obs id and file name assert 'def def.fits' in content, 'wrong success message'
def _run_vo(): """ Uses a VOS listdir to identify the work to be done. :return 0 if successful, -1 if there's any sort of failure. Return status is used by airflow for task instance management and reporting. """ config, clients, name_builder, metadata_reader = _common() vos_client = Client(vospace_certfile=config.proxy_file_name) clients.vo_client = vos_client source = data_source.DAOVaultDataSource( config, clients.vo_client, clients.data_client ) store_transferrer = transfer.VoFitsCleanupTransfer(vos_client, config) return rc.run_by_todo( name_builder=name_builder, meta_visitors=META_VISITORS, data_visitors=DATA_VISITORS, source=source, clients=clients, store_transfer=store_transferrer, metadata_reader=metadata_reader, )
def _run(): """ Uses a todo file with file names, even though Gemini provides information about existing data referenced by observation ID. """ ( clients, config, metadata_reader, meta_visitors, name_builder, ) = _common_init() if config.use_local_files or mc.TaskType.SCRAPE in config.task_types: source = dsc.ListDirSeparateDataSource(config) else: source = dsc.TodoFileDataSource(config) return rc.run_by_todo( config=config, name_builder=name_builder, meta_visitors=meta_visitors, source=source, metadata_reader=metadata_reader, clients=clients, )
def test_run_todo_list_dir_data_source_invalid_fname_v(clients_mock, test_config): test_dir = os.path.join('/test_files', '1') test_fqn = os.path.join(test_dir, 'abc.fits.gz') test_config.working_directory = tc.TEST_DATA_DIR test_config.use_local_files = True test_config.data_sources = [test_dir] test_config.data_source_extensions = ['.fits', '.fits.gz'] test_config.task_types = [mc.TaskType.INGEST] test_config.log_to_file = False test_config.features.supports_latest_client = True if os.path.exists(test_config.failure_fqn): os.unlink(test_config.failure_fqn) if os.path.exists(test_config.retry_fqn): os.unlink(test_config.retry_fqn) if not os.path.exists(test_dir): os.mkdir(test_dir) if not os.path.exists(test_fqn): with open(test_fqn, 'w') as f: f.write('abc') class TestStorageName(mc.StorageName): def __init__(self, entry): self._obs_id = os.path.basename(entry) self._source_names = [entry] def is_valid(self): return False class TestStorageNameInstanceBuilder(b.StorageNameInstanceBuilder): def __init__(self): pass def build(self, entry): return TestStorageName(entry) try: test_builder = TestStorageNameInstanceBuilder() test_chooser = ec.OrganizeChooser() test_result = rc.run_by_todo( config=test_config, chooser=test_chooser, name_builder=test_builder, command_name=TEST_COMMAND, ) assert test_result is not None, 'expect a result' assert test_result == -1, 'expect failure, because of file naming' assert os.path.exists(test_config.failure_fqn), 'expect failure file' assert os.path.exists(test_config.retry_fqn), 'expect retry file' assert (not clients_mock.metadata_client.read.called ), 'repo client read access happens after is_valid call' assert not (clients_mock.data_client.get_file.called ), 'bad file naming, should be no client access' finally: if os.path.exists(test_fqn): os.unlink(test_fqn) if os.path.exists(test_dir): logging.error(os.listdir(test_dir)) os.rmdir(test_dir)