Exemplo n.º 1
0
    def single(self, command, args):
        """Run a single command using batch queue."""

        task_id = uuid.uuid4().hex

        # generate batch script
        extra_args = ' '.join(
            ['--{}={}'.format(k, v) for k, v in args.items()])

        script = self.single_script_template.format(workdir=normpath(
            self.workdir),
                                                    venv=self.venv,
                                                    command=command,
                                                    extra_args=extra_args,
                                                    task_id=task_id)
        batch_script_name = normpath(self.workdir,
                                     'batch_script_{}.sh'.format(task_id))
        with open(batch_script_name, 'w') as batch_script:
            batch_script.write(script)
        self.tmpcontext.add_files(batch_script_name)

        # run batch
        result = subprocess.run([self.batch_submitter, batch_script_name])
        assert result.returncode == 0

        # block until completion
        semaphores = [normpath(self.workdir, '{}.done'.format(task_id))]
        self.tmpcontext.add_files(*semaphores)
        wait_semaphores(semaphores)
        return batch_script_name
Exemplo n.º 2
0
def _setup_pass(config):

    parameters_file = None
    if config.parameters_file:
        parameters_file = normpath(config.parameters_file)
    params = default_parameters.copy()
    user_params = None
    if parameters_file:
        try:
            # override default parameters with those found in parameters_file
            with open(parameters_file, 'r') as f:
                user_params = json.load(f)
                params = update(params, user_params)
        except Exception as e:
            logger.log(logging.INFO,
                       f'unable to read parameter file : {e}, using defaults')
            raise

    # setup calibration dir
    if not os.path.exists(config.calibration_dir):
        raise FileNotFoundError(f"Calibration directory does not exist: "
                                f"{config.calibration_dir}")
    params['calibrationDir'] = config.calibration_dir

    context = Context(vars(config), params)

    return context, user_params
Exemplo n.º 3
0
def main_method(config):
    """main_method

    Parameters
    ----------
    config : :obj:`Config`
        Configuration object

    Returns
    -------
    int
        0 on success
    """

    # initialize logger
    logger = init_logger("pre_process", config.logdir, config.log_level)
    start_message = "Running pre_process {}".format(VERSION)
    logger.info(start_message)

    spectra_dir = normpath(config.workdir, config.spectra_dir)

    # bunch
    bunch_list = []
    for i, spc_list in enumerate(bunch(config.bunch_size, spectra_dir)):
        spectralist_file = os.path.join(config.output_dir,
                                        'spectralist_B{}.json'.format(str(i)))
        with open(spectralist_file, "w") as ff:
            json.dump(spc_list, ff)
        bunch_list.append(spectralist_file)

    # create json containing list of bunches
    with open(config.bunch_list, 'w') as f:
        json.dump(bunch_list, f)

    return 0
Exemplo n.º 4
0
def test_main_method():
    """
    The "test_run" function.

    This function test the "run" function of "pre_process.py" module.
    """

    wd = tempfile.TemporaryDirectory()
    sd = tempfile.TemporaryDirectory()
    config = Config(config_defaults)
    config.workdir = wd.name
    config.logdir = wd.name
    config.spectra_dir = sd.name
    config.output_dir = wd.name
    config.bunch_list = os.path.join(wd.name, config.bunch_list)

    list_file = []
    for i in range(9):
        with open(normpath(config.spectra_dir, '{}.file'.format(i)),
                  'w') as ff:
            list_file.append(os.path.basename(ff.name))
    result_run = main_method(config)
    assert result_run == 0

    json_bunch_list = os.path.join(config.bunch_list)
    assert os.path.exists(json_bunch_list)

    with open(json_bunch_list, "r") as ff:
        lines = ff.readlines()
    assert len(lines) == 1

    with open(json_bunch_list, 'r') as ff:
        data = json.load(ff)
    assert len(data) == 2
    assert os.path.basename(data[0]) == "spectralist_B0.json"
    assert os.path.basename(data[1]) == "spectralist_B1.json"

    total = []
    for e in data:
        with open(e, 'r') as ff:
            datal = json.load(ff)
            total.append(datal)
    assert len(total) == 2
    assert len(total[0]) == 8
    assert len(total[1]) == 1
Exemplo n.º 5
0
def test_main_method():
    """
    The "test_run" function.

    This function test the "run" function of "pre_process.py" module.
    """

    wd = tempfile.TemporaryDirectory()
    config = Config(config_defaults)
    config.workdir = wd.name
    config.output_dir = wd.name

    with pytest.raises(FileNotFoundError):
        main_method(config)

    bd = os.path.join(config.output_dir, "B0")
    config.bunch_listfile = os.path.join(bd, 'reduce.json')
    bdd = os.path.join(bd, 'data')
    os.makedirs(bdd, exist_ok=True)

    list_file = []
    for i in range(2):
        with open(normpath(bdd, '{}.file'.format(i)), 'w') as ff:
            ff.write("/n")
            list_file.append(ff.name)

    with open(config.bunch_listfile, "w") as ff:
        json.dump([bd], ff)

    rstr = "#com\nstr1	str2	1.0	2.0 str3    str4	3.0	str5	4.0	5.0	6.0	7.0	str6"
    rname = os.path.join(bd, "redshift.csv")
    with open(rname, "w") as ff:
        ff.write(rstr)

    result_run = main_method(config)
    assert result_run == 0

    data_dir = os.path.join(config.output_dir, 'data')
    assert os.path.exists(data_dir)

    dl = os.listdir(data_dir)
    assert len(dl) == 2
    assert "0.file" in dl
    assert "1.file" in dl
Exemplo n.º 6
0
def _output_path(args, *path):
    return normpath(args.workdir, args.output_dir, *path)
Exemplo n.º 7
0
def amazed(config):
    """Run the full-featured amazed client

    Parameters
    ----------
    config : :obj:`Config`
        Configuration object
    """
    logFileHandler = CLogFileHandler(os.path.join(config.logdir, 'amazed.log'))
    logFileHandler.SetLevelMask(_map_loglevel[config.log_level])
    #
    # Set up param and linecatalog for redshift pass
    #
    context, user_parameters = _setup_pass(config)

    with open(normpath(config.workdir, config.spectra_listfile), 'r') as f:
        spectra_list = json.load(f)

    outdir = normpath(config.workdir, config.output_dir)
    os.makedirs(outdir, exist_ok=True)

    data_dir = os.path.join(outdir, 'data')
    os.makedirs(data_dir, exist_ok=True)

    products = []
    for i, spectrum_path in enumerate(spectra_list):
        spectrum = normpath(config.workdir, config.spectra_dir,
                            spectrum_path["fits"])
        reader = PfsObjectReader(spectrum, context.calibration_library)
        nb_valid_points = reader.get_nb_valid_points()
        if nb_valid_points < 3000:
            logger.log(
                logging.WARNING, "Invalid spectrum, only " +
                str(nb_valid_points) + " valid points, not processed")
            to_process = False
        else:
            to_process = True
        proc_id, ext = os.path.splitext(spectrum_path["fits"])
        spc_out_dir = os.path.join(outdir, proc_id)
        processed = False
        if to_process:
            # first step : compute redshift
            to_process = True
            if os.path.exists(spc_out_dir):
                if config.continue_:
                    to_process = False
                else:
                    shutil.rmtree(spc_out_dir)
            if to_process:
                try:
                    _process_spectrum(data_dir, reader, context,
                                      user_parameters)
                    processed = True
                except Exception as e:
                    logger.log(logging.ERROR,
                               "Could not process spectrum: {}".format(e))

    with TemporaryFilesSet(
            keep_tempfiles=config.log_level <= logging.INFO) as tmpcontext:

        # save amazed version and parameters file to output dir
        version_file = _output_path(config, 'version.json')
        with open(version_file, 'w') as f:
            json.dump({'amazed-version': get_version()}, f)
        parameters_file = os.path.join(
            normpath(config.workdir, config.output_dir), 'parameters.json')
        with open(parameters_file, 'w') as f:
            json.dump(context.parameters, f)
        tmpcontext.add_files(parameters_file)

        # write list of created products
        with open(os.path.join(config.output_dir, "output.json"), 'w') as ff:
            json.dump(products, ff)
Exemplo n.º 8
0
def main_method(config):
    """Run the 1D Data Reduction Pipeline.

    Returns
    -------
    int
        0 on success
    """

    # initialize logger
    logger = init_logger('scheduler', config.logdir, config.log_level)
    start_message = "Running drp_1dpipe {}".format(VERSION)
    logger.info(start_message)

    # Launch banner
    print(start_message)

    # set workdir environment
    init_environ(config.workdir)

    runner_class = get_runner(config.scheduler)
    # if not runner_class:
    #     error_message = "Unknown runner {}".format(config.scheduler)
    #     logger.error(error_message)
    #     raise error_message

    notifier = init_notifier(config.notification_url)

    json_bunch_list = normpath(config.output_dir, 'bunchlist.json')

    notifier.update('root', 'RUNNING')
    notifier.update('pre_process', 'RUNNING')

    with TemporaryFilesSet(keep_tempfiles=config.log_level <= logging.DEBUG) as tmpcontext:

        runner = runner_class(config, tmpcontext)

        # prepare workdir
        try:
            runner.single('pre_process',
                        args={'workdir': normpath(config.workdir),
                                'logdir': normpath(config.logdir),
                                'bunch_size': config.bunch_size,
                                'spectra_dir': normpath(config.spectra_dir),
                                'bunch_list': json_bunch_list,
                                'output_dir': normpath(config.output_dir)
                                })
        except Exception as e:
            traceback.print_exc()
            notifier.update('pre_process', 'ERROR')
            return 1
        else:
            notifier.update('pre_process', 'SUCCESS')
            # tmpcontext.add_files(json_bunch_list)

        # process spectra
        bunch_list, output_list, logdir_list = map_process_spectra_entries(
            json_bunch_list, config.output_dir, config.logdir)
        try:
            # runner.parallel('process_spectra', bunch_list,
            #                 'spectra-listfile', ['output-dir','logdir'],
            runner.parallel('process_spectra',
                            parallel_args={
                                'spectra_listfile': bunch_list,
                                'output_dir': output_list,
                                'logdir': logdir_list
                            },
                            args={
                                'workdir': normpath(config.workdir),
                                'lineflux': config.lineflux,
                                'spectra_dir': normpath(config.spectra_dir),
                                'parameters_file': config.parameters_file,
                                'linemeas_parameters_file': config.linemeas_parameters_file,
                                'stellar': config.stellar
                            })
        except Exception as e:
            traceback.print_exc()
            notifier.update('root', 'ERROR')
        else:
            notifier.update('root', 'SUCCESS')
        
        json_reduce = normpath(config.output_dir, 'reduce.json')
        reduce_process_spectra_output(json_bunch_list, config.output_dir, json_reduce)
        try:
            runner.single('merge_results',
                            args={
                                'workdir': normpath(config.workdir),
                                'logdir': normpath(config.logdir),
                                'output_dir': normpath(config.output_dir),
                                'bunch_listfile': json_reduce
                        })
        except Exception as e:
            traceback.print_exc()
            notifier.update('merge_results', 'ERROR')
            return 1
        else:
            notifier.update('merge_results', 'SUCCESS')

        aux_data_list = list_aux_data(json_bunch_list, config.output_dir)
        for aux_dir in aux_data_list:
            tmpcontext.add_dirs(aux_dir)


    return 0
Exemplo n.º 9
0
def test_normpath():
    assert normpath('~/foo//bar/baz/~') == os.path.expanduser(
        '~/foo/bar/baz/~')
    assert normpath('~/foo/.././bar/./baz/') == os.path.expanduser('~/bar/baz')
    assert normpath('////foo/baz////') == os.path.expanduser('/foo/baz')
Exemplo n.º 10
0
def amazed(config):
    """Run the full-featured amazed client

    Parameters
    ----------
    config : :obj:`Config`
        Configuration object
    """

    zlog = CLog()
    logFileHandler = CLogFileHandler(zlog, os.path.join(config.logdir,
                                                        'amazed.log'))
    logFileHandler.SetLevelMask(_map_loglevel[config.log_level])

    #
    # Set up param and linecatalog for redshift pass
    #
    param, line_catalog = _setup_pass(normpath(config.calibration_dir),
                                      normpath(config.parameters_file),
                                      normpath(config.linecatalog))
    medianRemovalMethod = param.Get_String('templateCatalog.continuumRemoval.'
                                           'method', 'IrregularSamplingMedian')
    opt_medianKernelWidth = param.Get_Float64('templateCatalog.'
                                              'continuumRemoval.'
                                              'medianKernelWidth')
    opt_nscales = param.Get_Float64('templateCatalog.continuumRemoval.'
                                    'decompScales',
                                    8.0)
    dfBinPath = param.Get_String('templateCatalog.continuumRemoval.binPath',
                                 'absolute_path_to_df_binaries_here')

    #
    # Set up param and linecatalog for line measurement pass
    #
    linemeas_param, linemeas_line_catalog = _setup_pass(normpath(config.calibration_dir),
                                                        normpath(config.linemeas_parameters_file),
                                                        normpath(config.linemeas_linecatalog))

    classif = CClassifierStore()

    if config.zclassifier_dir:
        zclassifier_dir = normpath(config.zclassifier_dir)
        if not os.path.exists(zclassifier_dir):
            raise FileNotFoundError(f"zclassifier directory does not exist: "
                                    f"{zclassifier_dir}")
        classif.Load(zclassifier_dir)

    with open(normpath(config.workdir, config.spectra_listfile), 'r') as f:
        spectra_list = json.load(f)

    template_catalog = CTemplateCatalog(medianRemovalMethod,
                                        opt_medianKernelWidth,
                                        opt_nscales, dfBinPath)
    logger.log(logging.INFO, "Loading %s" % config.template_dir)

    try:
        template_catalog.Load(normpath(config.template_dir))
    except Exception as e:
        logger.log(logging.CRITICAL, "Can't load template : {}".format(e))
        raise

    outdir = normpath(config.workdir, config.output_dir)
    os.makedirs(outdir, exist_ok=True)

    data_dir = os.path.join(outdir, 'data')
    os.makedirs(data_dir, exist_ok=True)

    outdir_linemeas = None
    if config.lineflux in ['only', 'on']:
        outdir_linemeas = '-'.join([outdir, 'lf'])
        os.makedirs(outdir_linemeas, exist_ok=True)

    products = []
    for i, spectrum_path in enumerate(spectra_list):
        spectrum = normpath(config.workdir, config.spectra_dir, spectrum_path)
        proc_id, ext = os.path.splitext(spectrum_path)
        spc_out_dir = os.path.join(outdir, proc_id )    

        if config.lineflux != 'only':
            # first step : compute redshift
            to_process = True
            if os.path.exists(spc_out_dir):
                if config.continue_:
                    to_process = False
                else:
                    shutil.rmtree(spc_out_dir)
            if to_process:
                _process_spectrum(outdir, i, spectrum, template_catalog,
                                 line_catalog, param, classif, 'all')

        if config.lineflux in ['only', 'on']:
            # second step : compute line fluxes
            to_process_lin = True
            spc_out_lin_dir = os.path.join(outdir_linemeas, proc_id)
            if os.path.exists(spc_out_lin_dir):
                if config.continue_:
                    to_process_lin = False
                else:
                    shutil.rmtree(spc_out_lin_dir)
            if to_process_lin:
                linemeas_param.Set_String('linemeascatalog',
                                        os.path.join(outdir, 'redshift.csv'))
                _process_spectrum(outdir_linemeas, i, spectrum,
                                template_catalog,
                                linemeas_line_catalog, linemeas_param,
                                classif, 'linemeas')
            
        result = SpectrumResults(spectrum, spc_out_dir, output_lines_dir=spc_out_lin_dir, stellar=config.stellar)
        products.append(result.write(data_dir))

    with TemporaryFilesSet(keep_tempfiles=config.log_level <= logging.INFO) as tmpcontext:

        # save amazed version and parameters file to output dir
        version_file = _output_path(config, 'version.json')
        with open(version_file, 'w') as f:
            json.dump({'amazed-version': get_version()}, f)
        parameters_file = os.path.join(normpath(config.workdir, config.output_dir),
                                       'parameters.json')
        param.Save(parameters_file)
        tmpcontext.add_files(parameters_file)

        # create output products
        # results = AmazedResults(_output_path(config), normpath(config.workdir,
        #                                                      config.spectra_dir),
        #                         config.lineflux in ['only', 'on'],
        #                         tmpcontext=tmpcontext)
        # products = results.write()

        # write list of created products
        with open(os.path.join(config.output_dir, "output.json"), 'w') as ff:
            json.dump(products, ff)
Exemplo n.º 11
0
    def parallel(self, command, parallel_args=None, args=None):
        """Execute parallel task for batch runners

        Parameters
        ----------
        command : str
            Path to command to execute
        parallel_args : dict, optional
            command line arguments to related to each parallel task, by default None
        args : dict, optional
            command line arguments common to all parallel tasks, by default None
        """
        task_id = uuid.uuid4().hex
        executor_script = normpath(self.workdir,
                                   'batch_executor_{}.py'.format(task_id))
        self.tmpcontext.add_files(executor_script)

        # Convert dictionnary of list to list of dictionnaries
        pll_args = convert_dl_to_ld(parallel_args)

        # generate batch_executor script
        tasks = []
        extra_args = ['--{}={}'.format(k, v) for k, v in args.items()]
        #   if k not in ('pre-commands', seq_arg_name, 'notifier')]

        # setup tasks
        # with open(filelist, 'r') as f:
        #     subtasks = json.load(f)
        #     # register these files for deletion
        #     self.tmpcontext.add_files(*subtasks)

        # for k, v in pll_args.items():
        #     task = [command,
        #             '--{arg_name}={arg_value}'.format(arg_name=k,
        #                                               arg_value=v)]
        #     task.extend(extra_args)
        #     tasks.append(task)

        for i, arg_value in enumerate(pll_args):
            task = [command]
            for k, v in arg_value.items():
                task.append('--{arg_name}={arg_value}'.format(arg_name=k,
                                                              arg_value=v))
            task.extend(extra_args)
            tasks.append(task)
        # for i, arg_value in enumerate(subtasks):
        #     task = [command,
        #             '--{arg_name}={arg_value}'.format(arg_name=arg_name,
        #                                               arg_value=arg_value)]
        #     task.extend(extra_args)
        #     if seq_arg_name:
        #         [task.append('--{}={}'.format(
        #           seq_arg,
        #           os.path.join(args[seq_arg], 'B'+str(i)))
        #           ) for seq_arg in seq_arg_name]
        #     tasks.append(task)

        # setup pipeline notifier
        # notifier = args['notifier']
        # notifier.update(command,
        #                 children=['{}-{}'.format(command, i)
        #                           for i in range(ntasks)])
        # for i in range(ntasks):
        #     notifier.update('{}-{}'.format(command, i), state='WAITING')
        # notifier.update(command, 'RUNNING')

        # generate batch script
        with open(
                os.path.join(os.path.dirname(__file__), 'resources',
                             'executor.py.in'), 'r') as f:
            batch_executor = f.read().format(tasks=tasks, notification_url='')
            # batch_executor = f.read().format(tasks=tasks,
            #                                  notification_url=(notifier.pipeline_url
            #                                                    if notifier.pipeline_url
            #                                                    else ''))
        with open(executor_script, 'w') as executor:
            executor.write(batch_executor)

        # generate batch script
        ntasks = len(tasks)
        script = self.parallel_script_template.format(
            jobs=ntasks,
            workdir=normpath(self.workdir),
            venv=self.venv,
            executor_script=executor_script,
            task_id=task_id)
        batch_script_name = normpath(self.workdir,
                                     f'batch_script_{task_id}.sh')
        with open(batch_script_name, 'w') as batch_script:
            batch_script.write(script)
        self.tmpcontext.add_files(batch_script_name)

        # run batch
        result = subprocess.run([self.batch_submitter, batch_script_name])
        assert result.returncode == 0

        # wait all sub-tasks
        semaphores = [
            normpath(self.workdir, f'{task_id}_{i}.done')
            for i in range(1, ntasks + 1)
        ]
        self.tmpcontext.add_files(*semaphores)

        wait_semaphores(semaphores)