def single(self, command, args): """Run a single command using batch queue.""" task_id = uuid.uuid4().hex # generate batch script extra_args = ' '.join( ['--{}={}'.format(k, v) for k, v in args.items()]) script = self.single_script_template.format(workdir=normpath( self.workdir), venv=self.venv, command=command, extra_args=extra_args, task_id=task_id) batch_script_name = normpath(self.workdir, 'batch_script_{}.sh'.format(task_id)) with open(batch_script_name, 'w') as batch_script: batch_script.write(script) self.tmpcontext.add_files(batch_script_name) # run batch result = subprocess.run([self.batch_submitter, batch_script_name]) assert result.returncode == 0 # block until completion semaphores = [normpath(self.workdir, '{}.done'.format(task_id))] self.tmpcontext.add_files(*semaphores) wait_semaphores(semaphores) return batch_script_name
def _setup_pass(config): parameters_file = None if config.parameters_file: parameters_file = normpath(config.parameters_file) params = default_parameters.copy() user_params = None if parameters_file: try: # override default parameters with those found in parameters_file with open(parameters_file, 'r') as f: user_params = json.load(f) params = update(params, user_params) except Exception as e: logger.log(logging.INFO, f'unable to read parameter file : {e}, using defaults') raise # setup calibration dir if not os.path.exists(config.calibration_dir): raise FileNotFoundError(f"Calibration directory does not exist: " f"{config.calibration_dir}") params['calibrationDir'] = config.calibration_dir context = Context(vars(config), params) return context, user_params
def main_method(config): """main_method Parameters ---------- config : :obj:`Config` Configuration object Returns ------- int 0 on success """ # initialize logger logger = init_logger("pre_process", config.logdir, config.log_level) start_message = "Running pre_process {}".format(VERSION) logger.info(start_message) spectra_dir = normpath(config.workdir, config.spectra_dir) # bunch bunch_list = [] for i, spc_list in enumerate(bunch(config.bunch_size, spectra_dir)): spectralist_file = os.path.join(config.output_dir, 'spectralist_B{}.json'.format(str(i))) with open(spectralist_file, "w") as ff: json.dump(spc_list, ff) bunch_list.append(spectralist_file) # create json containing list of bunches with open(config.bunch_list, 'w') as f: json.dump(bunch_list, f) return 0
def test_main_method(): """ The "test_run" function. This function test the "run" function of "pre_process.py" module. """ wd = tempfile.TemporaryDirectory() sd = tempfile.TemporaryDirectory() config = Config(config_defaults) config.workdir = wd.name config.logdir = wd.name config.spectra_dir = sd.name config.output_dir = wd.name config.bunch_list = os.path.join(wd.name, config.bunch_list) list_file = [] for i in range(9): with open(normpath(config.spectra_dir, '{}.file'.format(i)), 'w') as ff: list_file.append(os.path.basename(ff.name)) result_run = main_method(config) assert result_run == 0 json_bunch_list = os.path.join(config.bunch_list) assert os.path.exists(json_bunch_list) with open(json_bunch_list, "r") as ff: lines = ff.readlines() assert len(lines) == 1 with open(json_bunch_list, 'r') as ff: data = json.load(ff) assert len(data) == 2 assert os.path.basename(data[0]) == "spectralist_B0.json" assert os.path.basename(data[1]) == "spectralist_B1.json" total = [] for e in data: with open(e, 'r') as ff: datal = json.load(ff) total.append(datal) assert len(total) == 2 assert len(total[0]) == 8 assert len(total[1]) == 1
def test_main_method(): """ The "test_run" function. This function test the "run" function of "pre_process.py" module. """ wd = tempfile.TemporaryDirectory() config = Config(config_defaults) config.workdir = wd.name config.output_dir = wd.name with pytest.raises(FileNotFoundError): main_method(config) bd = os.path.join(config.output_dir, "B0") config.bunch_listfile = os.path.join(bd, 'reduce.json') bdd = os.path.join(bd, 'data') os.makedirs(bdd, exist_ok=True) list_file = [] for i in range(2): with open(normpath(bdd, '{}.file'.format(i)), 'w') as ff: ff.write("/n") list_file.append(ff.name) with open(config.bunch_listfile, "w") as ff: json.dump([bd], ff) rstr = "#com\nstr1 str2 1.0 2.0 str3 str4 3.0 str5 4.0 5.0 6.0 7.0 str6" rname = os.path.join(bd, "redshift.csv") with open(rname, "w") as ff: ff.write(rstr) result_run = main_method(config) assert result_run == 0 data_dir = os.path.join(config.output_dir, 'data') assert os.path.exists(data_dir) dl = os.listdir(data_dir) assert len(dl) == 2 assert "0.file" in dl assert "1.file" in dl
def _output_path(args, *path): return normpath(args.workdir, args.output_dir, *path)
def amazed(config): """Run the full-featured amazed client Parameters ---------- config : :obj:`Config` Configuration object """ logFileHandler = CLogFileHandler(os.path.join(config.logdir, 'amazed.log')) logFileHandler.SetLevelMask(_map_loglevel[config.log_level]) # # Set up param and linecatalog for redshift pass # context, user_parameters = _setup_pass(config) with open(normpath(config.workdir, config.spectra_listfile), 'r') as f: spectra_list = json.load(f) outdir = normpath(config.workdir, config.output_dir) os.makedirs(outdir, exist_ok=True) data_dir = os.path.join(outdir, 'data') os.makedirs(data_dir, exist_ok=True) products = [] for i, spectrum_path in enumerate(spectra_list): spectrum = normpath(config.workdir, config.spectra_dir, spectrum_path["fits"]) reader = PfsObjectReader(spectrum, context.calibration_library) nb_valid_points = reader.get_nb_valid_points() if nb_valid_points < 3000: logger.log( logging.WARNING, "Invalid spectrum, only " + str(nb_valid_points) + " valid points, not processed") to_process = False else: to_process = True proc_id, ext = os.path.splitext(spectrum_path["fits"]) spc_out_dir = os.path.join(outdir, proc_id) processed = False if to_process: # first step : compute redshift to_process = True if os.path.exists(spc_out_dir): if config.continue_: to_process = False else: shutil.rmtree(spc_out_dir) if to_process: try: _process_spectrum(data_dir, reader, context, user_parameters) processed = True except Exception as e: logger.log(logging.ERROR, "Could not process spectrum: {}".format(e)) with TemporaryFilesSet( keep_tempfiles=config.log_level <= logging.INFO) as tmpcontext: # save amazed version and parameters file to output dir version_file = _output_path(config, 'version.json') with open(version_file, 'w') as f: json.dump({'amazed-version': get_version()}, f) parameters_file = os.path.join( normpath(config.workdir, config.output_dir), 'parameters.json') with open(parameters_file, 'w') as f: json.dump(context.parameters, f) tmpcontext.add_files(parameters_file) # write list of created products with open(os.path.join(config.output_dir, "output.json"), 'w') as ff: json.dump(products, ff)
def main_method(config): """Run the 1D Data Reduction Pipeline. Returns ------- int 0 on success """ # initialize logger logger = init_logger('scheduler', config.logdir, config.log_level) start_message = "Running drp_1dpipe {}".format(VERSION) logger.info(start_message) # Launch banner print(start_message) # set workdir environment init_environ(config.workdir) runner_class = get_runner(config.scheduler) # if not runner_class: # error_message = "Unknown runner {}".format(config.scheduler) # logger.error(error_message) # raise error_message notifier = init_notifier(config.notification_url) json_bunch_list = normpath(config.output_dir, 'bunchlist.json') notifier.update('root', 'RUNNING') notifier.update('pre_process', 'RUNNING') with TemporaryFilesSet(keep_tempfiles=config.log_level <= logging.DEBUG) as tmpcontext: runner = runner_class(config, tmpcontext) # prepare workdir try: runner.single('pre_process', args={'workdir': normpath(config.workdir), 'logdir': normpath(config.logdir), 'bunch_size': config.bunch_size, 'spectra_dir': normpath(config.spectra_dir), 'bunch_list': json_bunch_list, 'output_dir': normpath(config.output_dir) }) except Exception as e: traceback.print_exc() notifier.update('pre_process', 'ERROR') return 1 else: notifier.update('pre_process', 'SUCCESS') # tmpcontext.add_files(json_bunch_list) # process spectra bunch_list, output_list, logdir_list = map_process_spectra_entries( json_bunch_list, config.output_dir, config.logdir) try: # runner.parallel('process_spectra', bunch_list, # 'spectra-listfile', ['output-dir','logdir'], runner.parallel('process_spectra', parallel_args={ 'spectra_listfile': bunch_list, 'output_dir': output_list, 'logdir': logdir_list }, args={ 'workdir': normpath(config.workdir), 'lineflux': config.lineflux, 'spectra_dir': normpath(config.spectra_dir), 'parameters_file': config.parameters_file, 'linemeas_parameters_file': config.linemeas_parameters_file, 'stellar': config.stellar }) except Exception as e: traceback.print_exc() notifier.update('root', 'ERROR') else: notifier.update('root', 'SUCCESS') json_reduce = normpath(config.output_dir, 'reduce.json') reduce_process_spectra_output(json_bunch_list, config.output_dir, json_reduce) try: runner.single('merge_results', args={ 'workdir': normpath(config.workdir), 'logdir': normpath(config.logdir), 'output_dir': normpath(config.output_dir), 'bunch_listfile': json_reduce }) except Exception as e: traceback.print_exc() notifier.update('merge_results', 'ERROR') return 1 else: notifier.update('merge_results', 'SUCCESS') aux_data_list = list_aux_data(json_bunch_list, config.output_dir) for aux_dir in aux_data_list: tmpcontext.add_dirs(aux_dir) return 0
def test_normpath(): assert normpath('~/foo//bar/baz/~') == os.path.expanduser( '~/foo/bar/baz/~') assert normpath('~/foo/.././bar/./baz/') == os.path.expanduser('~/bar/baz') assert normpath('////foo/baz////') == os.path.expanduser('/foo/baz')
def amazed(config): """Run the full-featured amazed client Parameters ---------- config : :obj:`Config` Configuration object """ zlog = CLog() logFileHandler = CLogFileHandler(zlog, os.path.join(config.logdir, 'amazed.log')) logFileHandler.SetLevelMask(_map_loglevel[config.log_level]) # # Set up param and linecatalog for redshift pass # param, line_catalog = _setup_pass(normpath(config.calibration_dir), normpath(config.parameters_file), normpath(config.linecatalog)) medianRemovalMethod = param.Get_String('templateCatalog.continuumRemoval.' 'method', 'IrregularSamplingMedian') opt_medianKernelWidth = param.Get_Float64('templateCatalog.' 'continuumRemoval.' 'medianKernelWidth') opt_nscales = param.Get_Float64('templateCatalog.continuumRemoval.' 'decompScales', 8.0) dfBinPath = param.Get_String('templateCatalog.continuumRemoval.binPath', 'absolute_path_to_df_binaries_here') # # Set up param and linecatalog for line measurement pass # linemeas_param, linemeas_line_catalog = _setup_pass(normpath(config.calibration_dir), normpath(config.linemeas_parameters_file), normpath(config.linemeas_linecatalog)) classif = CClassifierStore() if config.zclassifier_dir: zclassifier_dir = normpath(config.zclassifier_dir) if not os.path.exists(zclassifier_dir): raise FileNotFoundError(f"zclassifier directory does not exist: " f"{zclassifier_dir}") classif.Load(zclassifier_dir) with open(normpath(config.workdir, config.spectra_listfile), 'r') as f: spectra_list = json.load(f) template_catalog = CTemplateCatalog(medianRemovalMethod, opt_medianKernelWidth, opt_nscales, dfBinPath) logger.log(logging.INFO, "Loading %s" % config.template_dir) try: template_catalog.Load(normpath(config.template_dir)) except Exception as e: logger.log(logging.CRITICAL, "Can't load template : {}".format(e)) raise outdir = normpath(config.workdir, config.output_dir) os.makedirs(outdir, exist_ok=True) data_dir = os.path.join(outdir, 'data') os.makedirs(data_dir, exist_ok=True) outdir_linemeas = None if config.lineflux in ['only', 'on']: outdir_linemeas = '-'.join([outdir, 'lf']) os.makedirs(outdir_linemeas, exist_ok=True) products = [] for i, spectrum_path in enumerate(spectra_list): spectrum = normpath(config.workdir, config.spectra_dir, spectrum_path) proc_id, ext = os.path.splitext(spectrum_path) spc_out_dir = os.path.join(outdir, proc_id ) if config.lineflux != 'only': # first step : compute redshift to_process = True if os.path.exists(spc_out_dir): if config.continue_: to_process = False else: shutil.rmtree(spc_out_dir) if to_process: _process_spectrum(outdir, i, spectrum, template_catalog, line_catalog, param, classif, 'all') if config.lineflux in ['only', 'on']: # second step : compute line fluxes to_process_lin = True spc_out_lin_dir = os.path.join(outdir_linemeas, proc_id) if os.path.exists(spc_out_lin_dir): if config.continue_: to_process_lin = False else: shutil.rmtree(spc_out_lin_dir) if to_process_lin: linemeas_param.Set_String('linemeascatalog', os.path.join(outdir, 'redshift.csv')) _process_spectrum(outdir_linemeas, i, spectrum, template_catalog, linemeas_line_catalog, linemeas_param, classif, 'linemeas') result = SpectrumResults(spectrum, spc_out_dir, output_lines_dir=spc_out_lin_dir, stellar=config.stellar) products.append(result.write(data_dir)) with TemporaryFilesSet(keep_tempfiles=config.log_level <= logging.INFO) as tmpcontext: # save amazed version and parameters file to output dir version_file = _output_path(config, 'version.json') with open(version_file, 'w') as f: json.dump({'amazed-version': get_version()}, f) parameters_file = os.path.join(normpath(config.workdir, config.output_dir), 'parameters.json') param.Save(parameters_file) tmpcontext.add_files(parameters_file) # create output products # results = AmazedResults(_output_path(config), normpath(config.workdir, # config.spectra_dir), # config.lineflux in ['only', 'on'], # tmpcontext=tmpcontext) # products = results.write() # write list of created products with open(os.path.join(config.output_dir, "output.json"), 'w') as ff: json.dump(products, ff)
def parallel(self, command, parallel_args=None, args=None): """Execute parallel task for batch runners Parameters ---------- command : str Path to command to execute parallel_args : dict, optional command line arguments to related to each parallel task, by default None args : dict, optional command line arguments common to all parallel tasks, by default None """ task_id = uuid.uuid4().hex executor_script = normpath(self.workdir, 'batch_executor_{}.py'.format(task_id)) self.tmpcontext.add_files(executor_script) # Convert dictionnary of list to list of dictionnaries pll_args = convert_dl_to_ld(parallel_args) # generate batch_executor script tasks = [] extra_args = ['--{}={}'.format(k, v) for k, v in args.items()] # if k not in ('pre-commands', seq_arg_name, 'notifier')] # setup tasks # with open(filelist, 'r') as f: # subtasks = json.load(f) # # register these files for deletion # self.tmpcontext.add_files(*subtasks) # for k, v in pll_args.items(): # task = [command, # '--{arg_name}={arg_value}'.format(arg_name=k, # arg_value=v)] # task.extend(extra_args) # tasks.append(task) for i, arg_value in enumerate(pll_args): task = [command] for k, v in arg_value.items(): task.append('--{arg_name}={arg_value}'.format(arg_name=k, arg_value=v)) task.extend(extra_args) tasks.append(task) # for i, arg_value in enumerate(subtasks): # task = [command, # '--{arg_name}={arg_value}'.format(arg_name=arg_name, # arg_value=arg_value)] # task.extend(extra_args) # if seq_arg_name: # [task.append('--{}={}'.format( # seq_arg, # os.path.join(args[seq_arg], 'B'+str(i))) # ) for seq_arg in seq_arg_name] # tasks.append(task) # setup pipeline notifier # notifier = args['notifier'] # notifier.update(command, # children=['{}-{}'.format(command, i) # for i in range(ntasks)]) # for i in range(ntasks): # notifier.update('{}-{}'.format(command, i), state='WAITING') # notifier.update(command, 'RUNNING') # generate batch script with open( os.path.join(os.path.dirname(__file__), 'resources', 'executor.py.in'), 'r') as f: batch_executor = f.read().format(tasks=tasks, notification_url='') # batch_executor = f.read().format(tasks=tasks, # notification_url=(notifier.pipeline_url # if notifier.pipeline_url # else '')) with open(executor_script, 'w') as executor: executor.write(batch_executor) # generate batch script ntasks = len(tasks) script = self.parallel_script_template.format( jobs=ntasks, workdir=normpath(self.workdir), venv=self.venv, executor_script=executor_script, task_id=task_id) batch_script_name = normpath(self.workdir, f'batch_script_{task_id}.sh') with open(batch_script_name, 'w') as batch_script: batch_script.write(script) self.tmpcontext.add_files(batch_script_name) # run batch result = subprocess.run([self.batch_submitter, batch_script_name]) assert result.returncode == 0 # wait all sub-tasks semaphores = [ normpath(self.workdir, f'{task_id}_{i}.done') for i in range(1, ntasks + 1) ] self.tmpcontext.add_files(*semaphores) wait_semaphores(semaphores)