Exemplo n.º 1
0
    def __init__(self, filename, config_path, config_raw, config,
                 message_store):
        self.logger = get_logger()
        self.task_index = {t: i for i, t in enumerate(self.task_order)}
        self.message_store = message_store
        self.filename = filename
        self.filename_path = config_path
        self.file_raw = config_raw
        self.run_config = config
        self.global_config = get_config()

        self.prefix = self.global_config["QUEUE"]["prefix"] + "_" + filename
        self.max_jobs = int(self.global_config["QUEUE"]["max_jobs"])
        self.max_jobs_gpu = int(self.global_config["QUEUE"]["max_gpu_jobs"])
        self.max_jobs_in_queue = int(
            self.global_config["QUEUE"]["max_jobs_in_queue"])
        self.max_jobs_in_queue_gpu = int(
            self.global_config["QUEUE"]["max_gpu_jobs_in_queue"])

        self.logger.debug(self.global_config.keys())

        self.sbatch_cpu_path = get_data_loc(
            self.global_config["SBATCH"]["cpu_location"])
        with open(self.sbatch_cpu_path, 'r') as f:
            self.sbatch_cpu_header = f.read()
        self.sbatch_gpu_path = get_data_loc(
            self.global_config["SBATCH"]["gpu_location"])
        with open(self.sbatch_gpu_path, 'r') as f:
            self.sbatch_gpu_header = f.read()
        self.sbatch_cpu_header = self.clean_header(self.sbatch_cpu_header)
        self.sbatch_gpu_header = self.clean_header(self.sbatch_gpu_header)
        self.setup_task_location = self.global_config["SETUP"]["location"]
        self.load_task_setup()

        self.output_dir = os.path.join(get_output_dir(), self.filename)
        self.tasks = None
        self.num_jobs_queue = 0
        self.num_jobs_queue_gpu = 0

        self.start = None
        self.finish = None
        self.force_refresh = False
        self.force_ignore_stage = None

        self.running = []
        self.done = []
        self.failed = []
        self.blocked = []
Exemplo n.º 2
0
 def get_sys_file_in(self):
     set_file = self.options.get("SYS_SCALE")
     if set_file is not None:
         self.logger.debug(f"Explicit SYS_SCALE file specified: {set_file}")
         path = get_data_loc(set_file)
         if path is None:
             raise ValueError(f"Unable to resolve path to {set_file}")
     else:
         self.logger.debug(
             "Searching for SYS_SCALE source from biascor task")
         fitopt_files = [
             f for f in self.biascor_dep.output["fitopt_files"]
             if f is not None
         ]
         assert len(
             set(fitopt_files)
         ) < 2, f"Cannot automatically determine scaling from FITOPT file as you have multiple files: {fitopt_files}"
         if fitopt_files:
             path = fitopt_files[0]
         else:
             path = None
     self.options[
         "SYS_SCALE"] = path  # Save to options so its serialised out
     self.logger.info(f"Setting systematics scaling file to {path}")
     return path
Exemplo n.º 3
0
    def __init__(self, name, output_dir, config, dependencies, mode, options, index=0, model_name=None):
        super().__init__(name, output_dir, config, dependencies, mode, options, index=index, model_name=model_name)
        self.global_config = get_config()
        self.options = options

        self.gpu = self.options.get("GPU", True)
        self.conda_env = self.global_config["SCONE"]["conda_env_cpu"] if not self.gpu else self.global_config["SCONE"]["conda_env_gpu"]
        self.path_to_classifier = self.global_config["SCONE"]["location"]

        self.job_base_name = os.path.basename(Path(output_dir).parents[1]) + "__" + os.path.basename(output_dir)

        self.batch_file = self.options.get("BATCH_FILE")
        if self.batch_file is not None:
            self.batch_file = get_data_loc(self.batch_file)
        self.batch_replace = self.options.get("BATCH_REPLACE", {})


        self.config_path = os.path.join(self.output_dir, "model_config.yml")
        self.heatmaps_path = os.path.join(self.output_dir, "heatmaps")
        self.csvs_path = os.path.join(self.output_dir, "sim_csvs")
        self.slurm = """{sbatch_header}
        {task_setup}"""

        self.logfile = os.path.join(self.output_dir, "output.log")

        remake_heatmaps = self.options.get("REMAKE_HEATMAPS", False)
        self.keep_heatmaps = not remake_heatmaps
Exemplo n.º 4
0
    def validate_fitopts(self, config):
        # Loading fitopts
        fitopts = config.get("FITOPTS", [])
        if isinstance(fitopts, str):
            fitopts = [fitopts]

        self.logger.debug("Loading fitopts")
        has_file = False
        self.output["fitopt_file"] = None
        self.raw_fitopts = []
        for f in fitopts:
            self.logger.debug(f"Parsing fitopt {f}")
            potential_path = get_data_loc(f)
            if potential_path is not None and os.path.exists(potential_path):
                if has_file:
                    raise ValueError("It seems that you're trying to load in two files for the FITOPTS! Please specify only one file path!")
                self.logger.debug(f"Loading in fitopts from {potential_path}")
                y = read_yaml(potential_path)
                assert isinstance(y, dict), "New FITOPT format for external files is a yaml dictionary. See global.yml for an example."
                has_file = True
                self.raw_fitopts.append(y)
                self.logger.debug(f"Loaded a fitopt dictionary file from {potential_path}")
                self.output["fitopt_file"] = potential_path
            else:
                assert f.strip().startswith(
                    "/"
                ), f"Manual fitopt {f} for lcfit {self.name} should specify a label wrapped with /. If this is meant to be a file, it doesnt exist."
                self.logger.debug(f"Adding manual fitopt {f}")
                self.raw_fitopts.append(f)
Exemplo n.º 5
0
    def __init__(self,
                 name,
                 output_dir,
                 options,
                 global_config,
                 dependencies=None,
                 index=0):

        base_file = get_data_loc("create_cov/input_file.txt")
        super().__init__(name,
                         output_dir,
                         base_file,
                         default_assignment=": ",
                         dependencies=dependencies)

        self.options = options
        self.global_config = get_config()
        self.index = index
        self.job_name = os.path.basename(
            Path(output_dir).parents[1]) + "_CREATE_COV_" + name
        self.path_to_code = os.path.abspath(
            os.path.dirname(inspect.stack()[0][1]) + "/external")

        self.logfile = os.path.join(self.output_dir, "output.log")
        self.sys_file_in = get_data_loc(
            options.get("SYS_SCALE", "surveys/des/bbc/scale_5yr.list"))
        self.sys_file_out = os.path.join(self.output_dir, "sys_scale.LIST")
        self.chain_dir = os.path.join(self.output_dir, "chains/")
        self.config_dir = os.path.join(self.output_dir, "output")

        self.biascor_dep = self.get_dep(BiasCor, fail=True)
        self.output["blind"] = self.biascor_dep.output["blind"]
        self.input_file = os.path.join(
            self.output_dir,
            self.biascor_dep.output["subdirs"][index] + ".input")
        self.output["hubble_plot"] = self.biascor_dep.output["hubble_plot"]

        self.output["ini_dir"] = self.config_dir
        covopts_map = {"ALL": 0}
        for i, covopt in enumerate(self.options.get("COVOPTS", [])):
            covopts_map[covopt.split("]")[0][1:]] = i + 1
        self.output["covopts"] = covopts_map
        self.output["index"] = index
        self.output["bcor_name"] = self.biascor_dep.name
        self.slurm = """#!/bin/bash
Exemplo n.º 6
0
 def load_task_setup(self):
     tasks = [
         'cosmomc', 'snirf', 'analyse', 'supernnova', 'nearest_neighbour',
         'create_cov', 'supernnova_yml', 'scone', 'dataprep'
     ]
     self.task_setup = {}
     for task in tasks:
         with open(get_data_loc(f"{self.setup_task_location}/{task}"),
                   'r') as f:
             self.task_setup[task] = f.read()
Exemplo n.º 7
0
 def add_plot_script_to_run(self, script_name):
     script_path = get_data_loc(script_name, extra=self.plot_code_dir)
     if script_path is None:
         self.fail_config(
             f"Cannot resolve script {script_name} relative to {self.plot_code_dir}. Please use a variable or abs path."
         )
     else:
         self.logger.debug(
             f"Adding script path {script_path} to plotting code.")
     self.path_to_codes.append(script_path)
     self.done_files.append(
         os.path.join(self.output_dir,
                      os.path.basename(script_name).split(".")[0] +
                      ".done"))
Exemplo n.º 8
0
 def validate_model(self):
     if self.mode == Classifier.PREDICT:
         model = self.options.get("MODEL")
         if model is None:
             Task.fail_config(
                 f"Classifier {self.name} is in predict mode but does not have a model specified"
             )
         model_classifier = self.get_model_classifier()
         if model_classifier is not None and model_classifier.name == model:
             return True
         path = get_data_loc(model)
         if not os.path.exists(path):
             Task.fail_config(
                 f"Classifier {self.name} does not have a classifier dependency and model is not a serialised file path"
             )
     return True
Exemplo n.º 9
0
    def __init__(self,
                 name,
                 output_dir,
                 config,
                 dependencies,
                 mode,
                 options,
                 index=0,
                 model_name=None):
        super().__init__(name,
                         output_dir,
                         config,
                         dependencies,
                         mode,
                         options,
                         index=index,
                         model_name=model_name)
        self.global_config = get_config()
        self.num_jobs = 1

        self.conda_env = self.global_config["SNIRF"]["conda_env"]

        self.path_to_classifier = os.path.dirname(inspect.stack()[0][1])
        self.job_base_name = os.path.basename(
            Path(output_dir).parents[1]) + "__" + os.path.basename(output_dir)
        self.features = options.get(
            "FEATURES",
            "zHD x1 c cERR x1ERR COV_x1_c COV_x1_x0 COV_c_x0 PKMJDERR")
        # self.model_pk_file = self.get_unique_name() + ".pkl"
        self.model_pk_file = "model.pkl"

        self.output_pk_file = os.path.join(self.output_dir, self.model_pk_file)
        self.predictions_filename = os.path.join(self.output_dir,
                                                 "predictions.csv")

        self.fitopt = options.get("FITOPT", "DEFAULT")

        self.batch_file = self.options.get("BATCH_FILE")
        if self.batch_file is not None:
            self.batch_file = get_data_loc(self.batch_file)
        self.batch_replace = self.options.get("BATCH_REPLACE", {})

        self.output["predictions_filename"] = self.predictions_filename
        self.output["model_filename"] = self.output_pk_file
        self.validate_model()

        self.slurm = """{sbatch_header}
Exemplo n.º 10
0
    def calculate_input(self):
        self.logger.debug(f"Calculating input")
        self.set_property("COSMOMC_TEMPLATES",
                          get_data_loc("cosmomc_templates"))
        self.set_property("BASEOUTPUT", self.name)
        self.set_property("SYSFILE", self.sys_file_out)
        self.set_property("TOPDIR", self.biascor_dep.output["fit_output_dir"])
        self.set_property("OUTPUTDIR", self.config_dir)
        self.set_property("SUBDIR",
                          self.biascor_dep.output["subdirs"][self.index])
        self.set_property("ROOTDIR", self.chain_dir)
        self.set_property("SYSDEFAULT", self.options.get("SYSDEFAULT", 0))

        # More bs hacks
        covopt_str = ""
        for i, covopt in enumerate(self.options.get("COVOPTS", [])):
            if i > 0:
                covopt_str += "COVOPT: "
            covopt_str += covopt + "\n"
        self.set_property("COVOPT", covopt_str)

        # Load in sys file, add muopt arguments if needed
        # Get the MUOPT_SCALES and FITOPT scales keywords
        self.logger.debug(f"Leading sys scaling from {self.sys_file_in}")
        with open(self.sys_file_in) as f:
            sys_scale = f.read().splitlines()

            # Overwrite the fitopt scales
            fitopt_scale_overwrites = self.options.get("FITOPT_SCALES", {})
            for label, overwrite in fitopt_scale_overwrites.items():
                for i, line in enumerate(sys_scale):
                    comps = line.split()
                    if label in comps[1]:
                        sys_scale[i] = " ".join(comps[:-1] + [f"{overwrite}"])
                        self.logger.debug(
                            f"FITOPT_SCALES: Setting {' '.join(comps)} to {sys_scale[i]}"
                        )

            # Set the muopts scales
            muopt_scales = self.options.get("MUOPT_SCALES", {})
            muopts = self.biascor_dep.output["muopts"]
            for muopt in muopts:
                scale = muopt_scales.get(muopt, 1.0)
                sys_scale.append(f"ERRSCALE: DEFAULT {muopt} {scale}")

            return sys_scale
Exemplo n.º 11
0
    def __init__(self,
                 name,
                 output_dir,
                 config,
                 dependencies,
                 mode,
                 options,
                 index=0,
                 model_name=None):
        super().__init__(name,
                         output_dir,
                         config,
                         dependencies,
                         mode,
                         options,
                         index=index,
                         model_name=model_name)
        self.global_config = get_config()
        self.num_jobs = 4

        self.conda_env = self.global_config["SNIRF"]["conda_env"]
        self.path_to_classifier = get_output_loc(
            self.global_config["SNIRF"]["location"])
        self.job_base_name = os.path.basename(
            Path(output_dir).parents[1]) + "__" + os.path.basename(output_dir)
        self.features = options.get("FEATURES", "x1 c zHD x1ERR cERR PKMJDERR")
        self.validate_model()

        self.model_pk_file = "model.pkl"
        self.output_pk_file = os.path.join(self.output_dir, self.model_pk_file)
        self.fitopt = options.get("FITOPT", "DEFAULT")
        self.fitres_filename = None
        self.fitres_file = None

        self.batch_file = self.options.get("BATCH_FILE")
        if self.batch_file is not None:
            self.batch_file = get_data_loc(self.batch_file)
        self.batch_replace = self.options.get("BATCH_REPLACE", {})

        self.slurm = """{sbatch_header}
Exemplo n.º 12
0
    def __init__(self, name, output_dir, create_cov_tasks, config, options,
                 global_config):
        # First check if all required options exist
        # In this case, WFITOPTS must exist with at least 1 entry

        self.wfitopts = options.get("WFITOPTS")
        if self.wfitopts is None:
            Task.fail_config(
                f"You have not specified any WFITOPTS for task {name}")
        Task.logger.debug(f"WFITOPTS for task {name}: {self.wfitopts}")
        if len(self.wfitopts) == 0:
            Task.fail_config(
                f"WFITOPTS for task {name} does not have any options!")

        base_file = get_data_loc("wfit/input_file.INPUT")
        super().__init__(name,
                         output_dir,
                         config,
                         base_file,
                         default_assignment=": ",
                         dependencies=create_cov_tasks)
        self.num_jobs = len(self.wfitopts)

        self.create_cov_tasks = create_cov_tasks
        self.logger.debug("CreateCov tasks: {self.create_cov_tasks}")
        self.create_cov_dirs = [
            os.path.join(t.output_dir, "output") for t in self.create_cov_tasks
        ]
        self.logger.debug("CreateCov directories: {self.create_cov_dirs}")
        self.options = options
        self.global_config = global_config
        self.done_file = os.path.join(self.output_dir, "output", "ALL.DONE")

        self.job_name = os.path.basename(
            Path(output_dir).parents[1]) + "_WFIT_" + name
        self.logfile = os.path.join(self.output_dir, "output.log")
        self.input_name = f"{self.job_name}.INPUT"
        self.input_file = os.path.join(self.output_dir, self.input_name)
Exemplo n.º 13
0
    def calculate_input(self):
        self.logger.debug(f"Calculating input")
        if self.prepare_cosmomc:
            self.yaml["COSMOMC_TEMPLATES_PATH"] = get_data_loc(
                self.templates_dir)
        else:
            self.yaml.pop("COSMOMC_TEMPLATES_PATH", None)
        self.yaml["NAME"] = self.name
        self.yaml["SYS_SCALE_FILE"] = self.sys_file_out
        self.yaml["INPUT_DIR"] = self.biascor_dep.output["fit_output_dir"]
        self.yaml["OUTDIR"] = self.config_dir
        self.yaml["VERSION"] = self.biascor_dep.output["subdirs"][self.index]
        self.yaml["MUOPT_SCALES"] = self.biascor_dep.output["muopt_scales"]
        self.yaml["COVOPTS"] = self.options.get("COVOPTS", [])
        self.yaml["EXTRA_COVS"] = self.options.get("EXTRA_COVS", [])
        self.yaml["CALIBRATORS"] = self.calibration_set

        # Load in sys file, add muopt arguments if needed
        # Get the MUOPT_SCALES and FITOPT scales keywords
        sys_scale = {
            **self.get_scales_from_fitopt_file(),
            **self.options.get("FITOPT_SCALES", {})
        }
        return sys_scale
Exemplo n.º 14
0
    def _run(self):

        if self.static:
            self.logger.info(
                "CMB only constraints detected, copying static files")

            cosmomc_static_loc = get_data_loc(self.static_path +
                                              self.ini_prefix)
            if cosmomc_static_loc is None:
                self.logger.error(
                    "Seems like we can't find the static chains...")
                return False
            else:

                new_hash = self.get_hash_from_string(cosmomc_static_loc)

                if self._check_regenerate(new_hash):
                    self.logger.debug("Regenerating and copying static chains")
                    shutil.rmtree(self.chain_dir, ignore_errors=True)
                    shutil.copytree(cosmomc_static_loc, self.chain_dir)
                    for done_file in self.done_files:
                        df = os.path.join(self.output_dir, done_file)
                        with open(df, "w") as f:
                            f.write("SUCCESS")
                    self.save_new_hash(new_hash)

                else:
                    self.should_be_done()
                    self.logger.info("Hash check passed, not rerunning")
        else:
            ini_filecontents = self.get_ini_file()
            if ini_filecontents is None:
                return False

        if self.batch_file is None:
            if self.gpu:
                self.sbatch_header = self.sbatch_gpu_header
            else:
                self.sbatch_header = self.sbatch_cpu_header
        else:
            with open(self.batch_file, 'r') as f:
                self.sbatch_header = f.read()
            self.sbatch_header = self.clean_header(self.sbatch_header)

        header_dict = {
            "REPLACE_NAME":
            self.job_name,
            "REPLACE_WALLTIME":
            "34:00:00",
            "REPLACE_LOGFILE":
            self.logfile,
            "REPLACE_MEM":
            "2GB",
            "APPEND": [
                f"#SBATCH --ntasks={self.ntasks}",
                f"#SBATCH --array=1-{len(self.ini_files)}",
                "#SBATCH --cpus-per-task=1"
            ]
        }
        header_dict = merge_dict(header_dict, self.batch_replace)
        self.update_header(header_dict)

        setup_dict = {
            "done_files": " ".join(self.done_files),
            "path_to_cosmomc": self.path_to_cosmomc,
            "output_dir": self.output_dir,
            "ini_files": " ".join(self.ini_files),
            "num_jobs": len(self.ini_files),
            "num_walkers": self.num_walkers,
        }

        format_dict = {
            "sbatch_header": self.sbatch_header,
            "task_setup": self.update_setup(setup_dict,
                                            self.task_setup['cosmomc'])
        }
        final_slurm = self.slurm.format(**format_dict)

        new_hash = self.get_hash_from_string(final_slurm +
                                             " ".join(ini_filecontents))

        if self._check_regenerate(new_hash):
            self.logger.debug("Regenerating and launching task")
            shutil.rmtree(self.output_dir, ignore_errors=True)
            mkdirs(self.output_dir)
            self.save_new_hash(new_hash)
            slurm_output_file = os.path.join(self.output_dir, "slurm.job")
            with open(slurm_output_file, "w") as f:
                f.write(final_slurm)
            for file, content in zip(self.ini_files, ini_filecontents):
                filepath = os.path.join(self.output_dir, file)
                with open(filepath, "w") as f:
                    f.write(content)
            mkdirs(self.chain_dir)

            needed_dirs = [
                "data", "paramnames", "camb", "batch1", "batch2", "batch3"
            ]
            for d in needed_dirs:
                self.logger.debug(f"Creating symlink to {d} dir")
                original_data_dir = os.path.join(self.path_to_cosmomc, d)
                new_data_dir = os.path.join(self.output_dir, d)
                os.symlink(original_data_dir,
                           new_data_dir,
                           target_is_directory=True)

            self.logger.info(f"Submitting batch job for data prep")
            subprocess.run(["sbatch", slurm_output_file], cwd=self.output_dir)
        else:
            self.should_be_done()
            self.logger.info("Hash check passed, not rerunning")
        return True
Exemplo n.º 15
0
    def _run(self, force_refresh):

        if self.static:
            self.logger.info(
                "CMB only constraints detected, copying static files")

            cosmomc_static_loc = get_data_loc(self.static_path +
                                              self.ini_prefix)
            if cosmomc_static_loc is None:
                self.logger.error(
                    "Seems like we can't find the static chains...")
                return False
            else:

                new_hash = self.get_hash_from_string(cosmomc_static_loc)
                old_hash = self.get_old_hash()

                if force_refresh or new_hash != old_hash:
                    self.logger.debug("Regenerating and copying static chains")
                    shutil.rmtree(self.chain_dir, ignore_errors=True)
                    shutil.copytree(cosmomc_static_loc, self.chain_dir)
                    for done_file in self.done_files:
                        df = os.path.join(self.output_dir, done_file)
                        with open(df, "w") as f:
                            f.write("SUCCESS")
                    self.save_new_hash(new_hash)

                else:
                    self.should_be_done()
                    self.logger.info("Hash check passed, not rerunning")
        else:
            ini_filecontents = self.get_ini_file()
            if ini_filecontents is None:
                return False

            format_dict = {
                "job_name": self.job_name,
                "log_file": self.logfile,
                "done_files": " ".join(self.done_files),
                "path_to_cosmomc": self.path_to_cosmomc,
                "output_dir": self.output_dir,
                "ini_files": " ".join(self.ini_files),
                "num_jobs": len(self.ini_files),
                "num_walkers": self.num_walkers,
            }
            final_slurm = self.slurm.format(**format_dict)

            new_hash = self.get_hash_from_string(final_slurm +
                                                 " ".join(ini_filecontents))
            old_hash = self.get_old_hash()

            if force_refresh or new_hash != old_hash:
                self.logger.debug("Regenerating and launching task")
                shutil.rmtree(self.output_dir, ignore_errors=True)
                mkdirs(self.output_dir)
                self.save_new_hash(new_hash)
                slurm_output_file = os.path.join(self.output_dir, "slurm.job")
                with open(slurm_output_file, "w") as f:
                    f.write(final_slurm)
                for file, content in zip(self.ini_files, ini_filecontents):
                    filepath = os.path.join(self.output_dir, file)
                    with open(filepath, "w") as f:
                        f.write(content)
                mkdirs(self.chain_dir)

                needed_dirs = [
                    "data", "paramnames", "camb", "batch1", "batch2", "batch3"
                ]
                for d in needed_dirs:
                    self.logger.debug(f"Creating symlink to {d} dir")
                    original_data_dir = os.path.join(self.path_to_cosmomc, d)
                    new_data_dir = os.path.join(self.output_dir, d)
                    os.symlink(original_data_dir,
                               new_data_dir,
                               target_is_directory=True)

                self.logger.info(f"Submitting batch job for data prep")
                subprocess.run(["sbatch", slurm_output_file],
                               cwd=self.output_dir)
            else:
                self.should_be_done()
                self.logger.info("Hash check passed, not rerunning")
        return True
Exemplo n.º 16
0
    def __init__(self, name, output_dir, sim_task, config, global_config):

        self.config = config
        self.global_config = global_config

        base = config.get("BASE")
        if base is None:
            Task.fail_config(
                f"You have not specified a BASE nml file for task {name}")
        self.base_file = get_data_loc(base)
        if self.base_file is None:
            Task.fail_config(
                f"Base file {base} cannot be found for task {name}")

        super().__init__(name,
                         output_dir,
                         self.base_file,
                         " = ",
                         dependencies=[sim_task])

        self.sim_task = sim_task
        self.sim_version = sim_task.output["genversion"]
        self.config_path = self.output_dir + "/FIT_" + self.sim_version + ".nml"
        self.lc_output_dir = os.path.join(self.output_dir, "output")
        self.lc_log_dir = os.path.join(self.lc_output_dir, "SPLIT_JOBS_LCFIT")
        self.fitres_dirs = [
            os.path.join(self.lc_output_dir, os.path.basename(s))
            for s in self.sim_task.output["sim_folders"]
        ]

        self.logging_file = self.config_path.replace(".nml", ".nml_log")
        self.done_file = f"{self.output_dir}/FINISHED.DONE"
        secondary_log = os.path.join(self.lc_log_dir, "MERGELOGS/MERGE2.LOG")

        self.log_files = [self.logging_file, secondary_log]
        self.num_empty_threshold = 20  # Damn that tarball creation can be so slow
        self.display_threshold = 8
        self.output["fitres_dirs"] = self.fitres_dirs
        self.output["nml_file"] = self.config_path
        self.output["genversion"] = self.sim_version
        self.output["sim_name"] = sim_task.output["name"]
        self.output["blind"] = sim_task.output["blind"]
        self.output["lc_output_dir"] = self.lc_output_dir
        self.str_pattern = re.compile("[A-DG-SU-Za-dg-su-z]")

        is_data = False
        for d in self.dependencies:
            if isinstance(d, DataPrep):
                is_data = not d.output["is_sim"]
        self.output["is_data"] = is_data

        # Loading fitopts
        fitopts = config.get("FITOPTS", [])
        if isinstance(fitopts, str):
            fitopts = [fitopts]

        self.logger.debug("Loading fitopts")
        self.fitopts = []
        for f in fitopts:
            potential_path = get_data_loc(f)
            if os.path.exists(potential_path):
                self.logger.debug(f"Loading in fitopts from {potential_path}")
                with open(potential_path) as f:
                    new_fitopts = list(f.read().splitlines())
                    self.fitopts += new_fitopts
                    self.logger.debug(
                        f"Loaded {len(new_fitopts)} fitopts file from {potential_path}"
                    )
            else:
                assert "[" in f and "]" in f, f"Manual fitopt {f} for lcfit {self.name} should specify a label in square brackets"
                if not f.startswith("FITOPT:"):
                    f = "FITOPT: " + f
                self.logger.debug(f"Adding manual fitopt {f}")
                self.fitopts.append(f)
        # Map the fitopt outputs
        mapped = {"DEFAULT": "FITOPT000.FITRES"}
        mapped2 = {0: "DEFAULT"}
        for i, line in enumerate(self.fitopts):
            label = line.split("[")[1].split("]")[0]
            mapped[line] = f"FITOPT{i + 1:3d}.FITRES"
            mapped2[i] = label
        self.output["fitopt_map"] = mapped
        self.output["fitopt_index"] = mapped
        self.output["fitres_file"] = os.path.join(self.fitres_dirs[0],
                                                  mapped["DEFAULT"])

        self.options = self.config.get("OPTS", {})
        # Try to determine how many jobs will be put in the queue
        try:
            property = self.options.get("BATCH_INFO") or self.get_property(
                "BATCH_INFO", assignment=": ")
            self.num_jobs = int(property.split()[-1])
        except Exception:
            self.num_jobs = 10
Exemplo n.º 17
0
    def __init__(self, name, output_dir, sim_task, config, global_config):

        self.config = config
        self.global_config = global_config

        base = config.get("BASE")
        if base is None:
            Task.fail_config(f"You have not specified a BASE nml file for task {name}")
        self.base_file = get_data_loc(base)
        if self.base_file is None:
            Task.fail_config(f"Base file {base} cannot be found for task {name}")

        super().__init__(name, output_dir, config, self.base_file, " = ", dependencies=[sim_task])

        self.sim_task = sim_task
        self.sim_version = sim_task.output["genversion"]
        self.config_path = self.output_dir + "/FIT_" + self.sim_version + ".nml"
        self.lc_output_dir = os.path.join(self.output_dir, "output")
        self.lc_log_dir = os.path.join(self.lc_output_dir, "SPLIT_JOBS_LCFIT")
        self.fitres_dirs = [os.path.join(self.lc_output_dir, os.path.basename(s)) for s in self.sim_task.output["sim_folders"]]

        self.logging_file = self.config_path.replace(".nml", ".LOG")
        self.kill_file = self.config_path.replace(".input", "_KILL.LOG")

        self.done_file = f"{self.lc_output_dir}/ALL.DONE"

        self.merge_log = os.path.join(self.lc_output_dir, "MERGE.LOG")

        self.log_files = [self.logging_file]
        self.num_empty_threshold = 20  # Damn that tarball creation can be so slow
        self.display_threshold = 8
        self.output["fitres_dirs"] = self.fitres_dirs
        self.output["base_file"] = self.base_file
        self.output["nml_file"] = self.config_path
        self.output["genversion"] = self.sim_version
        self.output["sim_name"] = sim_task.output["name"]
        self.output["blind"] = sim_task.output["blind"]
        self.output["lc_output_dir"] = self.lc_output_dir
        self.str_pattern = re.compile("[A-DG-SU-Za-dg-su-z]")

        self.validate_fitopts(config)

        is_data = False
        for d in self.dependencies:
            if isinstance(d, DataPrep):
                is_data = not d.output["is_sim"]
        self.output["is_data"] = is_data

        self.options = self.config.get("OPTS", {})
        # Try to determine how many jobs will be put in the queue
        # First see if it's been explicitly set
        num_jobs = self.options.get("NUM_JOBS")
        if num_jobs is not None:
            self.num_jobs = num_jobs
            self.logger.debug("Num jobs set by NUM_JOBS option")
        else:
            try:
                property = self.options.get("BATCH_INFO") or self.yaml["CONFIG"].get("BATCH_INFO")
                self.num_jobs = int(property.split()[-1])
                self.logger.debug("Num jobs set by BATCH_INFO")
            except Exception:
                self.logger.warning("Could not determine BATCH_INFO for job, setting num_jobs to 10")
                self.num_jobs = 10
                self.logger.debug("Num jobs set to default")
Exemplo n.º 18
0
    def __init__(self,
                 name,
                 output_dir,
                 config,
                 options,
                 global_config,
                 dependencies=None):
        super().__init__(name,
                         output_dir,
                         config=config,
                         dependencies=dependencies)
        self.options = options
        self.global_config = get_config()

        self.logfile = os.path.join(self.output_dir, "output.log")
        self.conda_env = self.global_config["DataSkimmer"]["conda_env"]
        self.path_to_task = output_dir

        self.unparsed_raw = self.options.get("RAW_DIR")
        self.raw_dir = get_data_loc(self.unparsed_raw)
        if self.raw_dir is None:
            Task.fail_config(f"Unable to find {self.options.get('RAW_DIR')}")

        self.genversion = os.path.basename(self.raw_dir)
        self.data_path = os.path.dirname(self.raw_dir)
        if self.unparsed_raw == "$SCRATCH_SIMDIR" or "SNDATA_ROOT/SIM" in self.raw_dir:
            self.logger.debug("Removing PRIVATE_DATA_PATH from NML file")
            self.data_path = ""
        self.job_name = os.path.basename(
            Path(output_dir).parents[1]) + "_DATAPREP_" + self.name

        self.output_info = os.path.join(self.output_dir,
                                        f"{self.genversion}.YAML")
        self.output["genversion"] = self.genversion
        self.opt_setpkmjd = options.get("OPT_SETPKMJD", 16)
        self.photflag_mskrej = options.get("PHOTFLAG_MSKREJ", 1016)
        self.output["data_path"] = self.data_path
        self.output["photometry_dirs"] = [get_output_loc(self.raw_dir)]
        self.output["sim_folders"] = [get_output_loc(self.raw_dir)]
        self.output["raw_dir"] = self.raw_dir
        self.clump_file = os.path.join(self.output_dir,
                                       self.genversion + ".SNANA.TEXT")
        self.output["clump_file"] = self.clump_file
        self.output["ranseed_change"] = False
        is_sim = options.get("SIM", False)
        self.output["is_sim"] = is_sim
        self.output["blind"] = options.get("BLIND", True)

        self.types_dict = options.get("TYPES")
        if self.types_dict is None:
            self.types_dict = {
                "IA": [1],
                "NONIA": [
                    2, 20, 21, 22, 29, 30, 31, 32, 33, 39, 40, 41, 42, 43, 80,
                    81
                ]
            }
        else:
            for key in self.types_dict.keys():
                self.types_dict[key] = [int(c) for c in self.types_dict[key]]

        self.batch_file = self.options.get("BATCH_FILE")
        if self.batch_file is not None:
            self.batch_file = get_data_loc(self.batch_file)
        self.batch_replace = self.options.get("BATCH_REPLACE", {})

        self.logger.debug(f"\tIA types are {self.types_dict['IA']}")
        self.logger.debug(f"\tNONIA types are {self.types_dict['NONIA']}")
        self.output["types_dict"] = self.types_dict
        self.types = OrderedDict()
        for n in self.types_dict["IA"]:
            self.types.update({n: "Ia"})
        for n in self.types_dict["NONIA"]:
            self.types.update({n: "II"})
        self.output["types"] = self.types

        self.slurm = """{sbatch_header}
        {task_setup}"""

        self.clump_command = """#
Exemplo n.º 19
0
    def __init__(self,
                 name,
                 output_dir,
                 genversion,
                 config,
                 global_config,
                 combine="combine.input"):
        self.data_dirs = global_config["DATA_DIRS"]
        base_file = get_data_loc(combine)
        super().__init__(name, output_dir, base_file, ": ")

        self.genversion = genversion
        if len(genversion) < 30:
            self.genprefix = self.genversion
        else:
            hash = get_hash(self.genversion)[:5]
            self.genprefix = self.genversion[:25] + hash

        self.config = config
        self.options = config.get("OPTS", {})
        self.reserved_keywords = ["BASE"]
        self.config_path = f"{self.output_dir}/{self.genversion}.input"  # Make sure this syncs with the tmp file name

        # Deterime the type of each component
        keys = [k for k in config.keys() if k != "GLOBAL" and k != "OPTS"]
        self.base_ia = []
        self.base_cc = []
        types = {}
        types_dict = {"IA": [], "NONIA": []}
        for k in keys:
            d = config[k]
            base_file = d.get("BASE")
            if base_file is None:
                Task.fail_config(
                    f"Your simulation component {k} for sim name {self.name} needs to specify a BASE input file"
                )
            base_path = get_data_loc(base_file)
            if base_path is None:
                Task.fail_config(
                    f"Cannot find sim component {k} base file at {base_path} for sim name {self.name}"
                )

            gentype, genmodel = None, None
            with open(base_path) as f:
                for line in f.read().splitlines():
                    if line.upper().strip().startswith("GENTYPE:"):
                        gentype = line.upper().split(":")[1].strip()
                    if line.upper().strip().startswith("GENMODEL:"):
                        genmodel = line.upper().split(":")[1].strip()
            gentype = gentype or d.get("GENTYPE")
            genmodel = genmodel or d.get("GENMODEL")

            if not gentype:
                Task.fail_config(
                    f"Cannot find GENTYPE for component {k} and base file {base_path}"
                )
            if not genmodel:
                Task.fail_config(
                    f"Cannot find GENMODEL for component {k} and base file {base_path}"
                )

            type2 = "1" + f"{int(gentype):02d}"
            if "SALT2" in genmodel:
                self.base_ia.append(base_file)
                types[gentype] = "Ia"
                types[type2] = "Ia"
                types_dict["IA"].append(int(gentype))
                types_dict["IA"].append(int(type2))
            else:
                self.base_cc.append(base_file)
                types[gentype] = "II"
                types[type2] = "II"
                types_dict["NONIA"].append(int(gentype))
                types_dict["NONIA"].append(int(type2))

        sorted_types = collections.OrderedDict(sorted(types.items()))
        self.logger.debug(f"Types found: {json.dumps(sorted_types)}")
        self.output["types_dict"] = types_dict
        self.output["types"] = sorted_types
        self.global_config = global_config

        rankeys = [
            r for r in config["GLOBAL"].keys() if r.startswith("RANSEED_")
        ]
        value = int(
            config["GLOBAL"][rankeys[0]].split(" ")[0]) if rankeys else 1
        self.set_num_jobs(2 * value)

        self.sim_log_dir = f"{self.output_dir}/LOGS"
        self.total_summary = os.path.join(self.sim_log_dir,
                                          "TOTAL_SUMMARY.LOG")
        self.done_file = f"{self.output_dir}/FINISHED.DONE"
        self.logging_file = self.config_path.replace(".input", ".LOG")
        self.output["blind"] = self.options.get("BLIND", False)
        self.derived_batch_info = None

        # Determine if all the top level input files exist
        if len(self.base_ia + self.base_cc) == 0:
            Task.fail_config(
                "Your sim has no components specified! Please add something to simulate!"
            )

        # Try to determine how many jobs will be put in the queue
        try:
            # If BATCH_INFO is set, we'll use that
            batch_info = self.config.get("GLOBAL", {}).get("BATCH_INFO")
            default_batch_info = self.get_property("BATCH_INFO",
                                                   assignment=": ")

            # If its not set, lets check for ranseed_repeat or ranseed_change
            if batch_info is None:
                ranseed_repeat = self.config.get("GLOBAL",
                                                 {}).get("RANSEED_REPEAT")
                ranseed_change = self.config.get("GLOBAL",
                                                 {}).get("RANSEED_CHANGE")
                ranseed = ranseed_repeat or ranseed_change

                if ranseed:
                    num_jobs = int(ranseed.strip().split()[0])
                    self.logger.debug(
                        f"Found a randseed with {num_jobs}, deriving batch info"
                    )
                    comps = default_batch_info.strip().split()
                    comps[-1] = str(num_jobs)
                    self.derived_batch_info = " ".join(comps)
                    self.num_jobs = num_jobs
            else:
                # self.logger.debug(f"BATCH INFO property detected as {property}")
                self.num_jobs = int(default_batch_info.split()[-1])
        except Exception:
            self.logger.warning(
                f"Unable to determine how many jobs simulation {self.name} has"
            )
            self.num_jobs = 10

        self.output["genversion"] = self.genversion
        self.output["genprefix"] = self.genprefix

        ranseed_change = self.config.get("GLOBAL", {}).get("RANSEED_CHANGE")
        base = os.path.expandvars(
            f"{self.global_config['SNANA']['sim_dir']}/{self.genversion}")
        if ranseed_change:
            num_sims = int(ranseed_change.split()[0])
            self.logger.debug(
                "Detected randseed change with {num_sims} sims, updating sim_folders"
            )
            self.sim_folders = [
                base + f"-{i + 1:04d}" for i in range(num_sims)
            ]
        else:
            self.sim_folders = [base]
        self.output["ranseed_change"] = ranseed_change is not None
        self.output["sim_folders"] = self.sim_folders
Exemplo n.º 20
0
    def write_input(self, force_refresh):
        self.set_property("GENVERSION",
                          self.genversion,
                          assignment=": ",
                          section_end="ENDLIST_GENVERSION")
        self.set_property("LOGDIR",
                          os.path.basename(self.sim_log_dir),
                          assignment=": ",
                          section_end="ENDLIST_GENVERSION")
        for k in self.config.keys():
            if k.upper() != "GLOBAL":
                run_config = self.config[k]
                run_config_keys = list(run_config.keys())
                assert "BASE" in run_config_keys, "You must specify a base file for each option"
                for key in run_config_keys:
                    if key.upper() in self.reserved_keywords:
                        continue
                    base_file = run_config["BASE"]
                    match = os.path.basename(base_file).split(".")[0]
                    val = run_config[key]
                    if not isinstance(val, list):
                        val = [val]
                    for v in val:
                        self.set_property(f"GENOPT({match})",
                                          f"{key} {v}",
                                          section_end="ENDLIST_GENVERSION",
                                          only_add=True)

        if len(self.data_dirs) > 1:
            data_dir = self.data_dirs[0]
            self.set_property("PATH_USER_INPUT", data_dir, assignment=": ")

        for key in self.config.get("GLOBAL", []):
            if key.upper() == "BASE":
                continue
            direct_set = [
                "FORMAT_MASK", "RANSEED_REPEAT", "RANSEED_CHANGE",
                "BATCH_INFO", "BATCH_MEM", "NGEN_UNIT", "RESET_CIDOFF"
            ]
            if key in direct_set:
                self.set_property(key,
                                  self.config["GLOBAL"][key],
                                  assignment=": ")
            else:
                self.set_property(f"GENOPT_GLOBAL: {key}",
                                  self.config["GLOBAL"][key],
                                  assignment=" ")

            if self.derived_batch_info:
                self.set_property("BATCH_INFO",
                                  self.derived_batch_info,
                                  assignment=": ")

            if key == "RANSEED_CHANGE":
                self.delete_property("RANSEED_REPEAT")
            elif key == "RANSEED_REPEAT":
                self.delete_property("RANSEED_CHANGE")

        self.set_property(
            "SIMGEN_INFILE_Ia",
            " ".join([os.path.basename(f)
                      for f in self.base_ia]) if self.base_ia else None)
        self.set_property(
            "SIMGEN_INFILE_NONIa",
            " ".join([os.path.basename(f)
                      for f in self.base_cc]) if self.base_cc else None)
        self.set_property("GENPREFIX", self.genprefix)

        # Put config in a temp directory
        temp_dir_obj = tempfile.TemporaryDirectory()
        temp_dir = temp_dir_obj.name

        # Copy the base files across
        input_paths = []
        for f in self.base_ia + self.base_cc:
            resolved = get_data_loc(f)
            shutil.copy(resolved, temp_dir)
            input_paths.append(os.path.join(temp_dir, os.path.basename(f)))
            self.logger.debug(f"Copying input file {resolved} to {temp_dir}")

        # Copy the include input file if there is one
        input_copied = []
        fs = self.base_ia + self.base_cc
        for ff in fs:
            if ff not in input_copied:
                input_copied.append(ff)
                path = get_data_loc(ff)
                copied_path = os.path.join(temp_dir, os.path.basename(path))
                with open(path, "r") as f:
                    for line in f.readlines():
                        line = line.strip()
                        if line.startswith("INPUT_FILE_INCLUDE"):
                            include_file = line.split(":")[-1].strip()
                            include_file_path = get_data_loc(include_file)
                            self.logger.debug(
                                f"Copying INPUT_FILE_INCLUDE file {include_file_path} to {temp_dir}"
                            )

                            include_file_basename = os.path.basename(
                                include_file_path)
                            include_file_output = os.path.join(
                                temp_dir, include_file_basename)

                            if include_file_output not in input_copied:

                                # Copy include file into the temp dir
                                shutil.copy(include_file_path, temp_dir)

                                # Then SED the file to replace the full path with just the basename
                                if include_file != include_file_basename:
                                    sed_command = f"sed -i -e 's|{include_file}|{include_file_basename}|g' {copied_path}"
                                    self.logger.debug(
                                        f"Running sed command: {sed_command}")
                                    subprocess.run(sed_command,
                                                   stderr=subprocess.STDOUT,
                                                   cwd=temp_dir,
                                                   shell=True)

                                # And make sure we dont do this file again
                                fs.append(include_file_output)

        # Write the primary input file
        main_input_file = f"{temp_dir}/{self.genversion}.input"
        with open(main_input_file, "w") as f:
            f.writelines(map(lambda s: s + "\n", self.base))
        self.logger.info(f"Input file written to {main_input_file}")

        # Remove any duplicates and order the output files
        output_files = [
            f"{temp_dir}/{a}" for a in sorted(os.listdir(temp_dir))
        ]
        self.logger.debug(
            f"{len(output_files)} files used to create simulation. Hashing them."
        )

        # Get current hash
        new_hash = self.get_hash_from_files(output_files)
        old_hash = self.get_old_hash()
        regenerate = force_refresh or (old_hash is None
                                       or old_hash != new_hash)

        if regenerate:
            self.logger.info(f"Running simulation")
            # Clean output dir. God I feel dangerous doing this, so hopefully unnecessary check
            if "//" not in self.output_dir and len(self.output_dir) > 30:
                self.logger.debug(
                    f"Cleaning output directory {self.output_dir}")
                shutil.rmtree(self.output_dir, ignore_errors=True)
                mkdirs(self.output_dir)
                self.logger.debug(
                    f"Copying from {temp_dir} to {self.output_dir}")
                copytree(temp_dir, self.output_dir)
                self.save_new_hash(new_hash)
            else:
                self.logger.error(
                    f"Seems to be an issue with the output dir path: {self.output_dir}"
                )

            chown_dir(self.output_dir)
        else:
            self.logger.info("Hash check passed, not rerunning")
        temp_dir_obj.cleanup()
        return regenerate, new_hash
Exemplo n.º 21
0
    def __init__(self,
                 name,
                 output_dir,
                 dependencies=None,
                 config=None,
                 done_file="done.txt"):
        self.name = name
        self.output_dir = output_dir
        self.num_jobs = 1
        if dependencies is None:
            dependencies = []
        self.dependencies = dependencies

        if config is None:
            config = {}
        self.config = copy.deepcopy(config)
        self.output = {}

        # Determine if this is an external (already done) job or not
        external_dirs = self.config.get("EXTERNAL_DIRS", [])
        external_names = [os.path.basename(d) for d in external_dirs]
        external_map = self.config.get("EXTERNAL_MAP", {})
        output_name = os.path.basename(output_dir)
        name_match = external_map.get(output_name)
        if external_dirs:
            # This will only trigger if EXTERNAL_MAP is defined and output_name is in external_map
            if name_match is not None:
                matching_dirs = [d for d in external_dirs if name_match in d]
                if len(matching_dirs) == 0:
                    self.logger.error(
                        f"Task {output_name} has external mapping {name_match} but there were no matching EXTERNAL_DIRS"
                    )
                else:
                    if len(matching_dirs) > 1:
                        self.logger.warning(
                            f"Task {output_name} has external mapping {name_match} which matched with multiple EXTERNAL_DIRS: {matching_dirs}. Defaulting to {matching_dirs[0]}"
                        )

                    self.logger.info(f"Found external match for {output_name}")
                    self.config["EXTERNAL"] = matching_dirs[0]
            # If you haven't specified an EXTERNAL_MAP for this output_name, check for exact match
            elif output_name in external_names:
                self.config["EXTERNAL"] = external_dirs[external_names.index(
                    output_name)]
            else:
                self.logger.info(f"No external match found for {output_name}")

        self.external = self.config.get("EXTERNAL")
        if self.external is not None:
            self.logger.debug(f"External config stated to be {self.external}")
            self.external = get_data_loc(self.external)
            # External directory might be compressed
            if not os.path.exists(self.external):
                self.logger.warning(
                    f"External config {self.external} does not exist, checking if it's compressed"
                )
                compressed_dir = self.external + ".tar.gz"
                if not os.path.exists(compressed_dir):
                    self.logger.error(
                        f"{self.external} and {compressed_dir} do not exist")
                else:
                    self.external = compressed_dir
                    self.logger.debug(
                        f"External config file path resolved to {self.external}"
                    )
                    with tarfile.open(self.external, "r:gz") as tar:
                        for member in tar:
                            if member.isfile():
                                filename = os.path.basename(member.name)
                                if filename != "config.yml":
                                    continue
                                with tar.extractfile(member) as f:
                                    external_config = yaml.load(
                                        f, Loader=yaml.Loader)
                                    conf = external_config.get("CONFIG", {})
                                    conf.update(self.config)
                                    self.config = conf
                                    self.output = external_config.get(
                                        "OUTPUT", {})
                                    self.logger.debug(
                                        "Loaded external config successfully")
            else:
                if os.path.isdir(self.external):
                    self.external = os.path.join(self.external, "config.yml")
                self.logger.debug(
                    f"External config file path resolved to {self.external}")
                with open(self.external, "r") as f:
                    external_config = yaml.load(f, Loader=yaml.Loader)
                    conf = external_config.get("CONFIG", {})
                    conf.update(self.config)
                    self.config = conf
                    self.output = external_config.get("OUTPUT", {})
                    self.logger.debug("Loaded external config successfully")

        self.hash = None
        self.hash_file = os.path.join(self.output_dir, "hash.txt")
        self.done_file = os.path.join(self.output_dir, done_file)

        # Info about the job run
        self.start_time = None
        self.end_time = None
        self.wall_time = None
        self.stage = None
        self.fresh_run = True
        self.num_empty = 0
        self.num_empty_threshold = 10
        self.display_threshold = 0
        self.gpu = False

        self.force_refresh = False
        self.force_ignore = False

        self.output.update({
            "name": name,
            "output_dir": output_dir,
            "hash_file": self.hash_file,
            "done_file": self.done_file
        })
        self.config_file = os.path.join(output_dir, "config.yml")
Exemplo n.º 22
0
    def __init__(self,
                 name,
                 output_dir,
                 config,
                 options,
                 global_config,
                 dependencies=None,
                 index=0):

        base_file = get_data_loc("create_cov/input_file.txt")
        super().__init__(name,
                         output_dir,
                         config,
                         base_file,
                         default_assignment=": ",
                         dependencies=dependencies)

        if options is None:
            options = {}
        self.options = options
        self.templates_dir = self.options.get("INI_DIR", "cosmomc_templates")
        self.global_config = get_config()
        self.index = index
        self.job_name = os.path.basename(
            Path(output_dir).parents[1]) + "_CREATE_COV_" + name
        #self.path_to_code = os.path.abspath(os.path.dirname(inspect.stack()[0][1]) + "/external/")
        self.path_to_code = '$SNANA_DIR/util/'  #Now maintained by SNANA

        self.batch_mem = options.get("BATCH_MEM", "4GB")

        self.logfile = os.path.join(self.output_dir, "output.log")
        self.sys_file_out = os.path.join(self.output_dir, "sys_scale.yml")
        self.chain_dir = os.path.join(self.output_dir, "chains/")
        self.config_dir = os.path.join(self.output_dir, "output")
        self.subtract_vpec = options.get("SUBTRACT_VPEC", False)
        self.unbinned_covmat_addin = options.get("UNBINNED_COVMAT_ADDIN", [])

        self.batch_file = self.options.get("BATCH_FILE")
        if self.batch_file is not None:
            self.batch_file = get_data_loc(self.batch_file)
        self.batch_replace = self.options.get("BATCH_REPLACE", {})

        self.binned = options.get("BINNED", not self.subtract_vpec)
        self.rebinned_x1 = options.get("REBINNED_X1", "")
        if self.rebinned_x1 != "":
            self.rebinned_x1 = f"--nbin_x1 {self.rebinned_x1}"
        self.rebinned_c = options.get("REBINNED_C", "")
        if self.rebinned_c != "":
            self.rebinned_c = f"--nbin_c {self.rebinned_c}"

        self.biascor_dep = self.get_dep(BiasCor, fail=True)
        self.sys_file_in = self.get_sys_file_in()
        self.output["blind"] = self.biascor_dep.output["blind"]
        self.input_file = os.path.join(
            self.output_dir,
            self.biascor_dep.output["subdirs"][index] + ".input")
        self.calibration_set = options.get("CALIBRATORS", [])
        self.output["hubble_plot"] = self.biascor_dep.output["hubble_plot"]

        if self.config.get("COSMOMC", True):
            self.logger.info("Generating cosmomc output")
            self.output["ini_dir"] = os.path.join(self.config_dir, "cosmomc")
            self.prepare_cosmomc = True
        else:
            self.logger.info("Not generating cosmomc output")
            self.prepare_cosmomc = False
        covopts_map = {"ALL": 0}
        for i, covopt in enumerate(self.options.get("COVOPTS", [])):
            covopts_map[covopt.split("]")[0][1:]] = i + 1
        self.output["covopts"] = covopts_map
        self.output["index"] = index
        self.output["bcor_name"] = self.biascor_dep.name
        self.slurm = """{sbatch_header}
Exemplo n.º 23
0
    def __init__(self, name, output_dir, config, dependencies, options,
                 global_config):
        base = get_data_loc(config.get("BASE",
                                       "surveys/des/bbc/bbc_5yr.input"))
        self.base_file = base
        super().__init__(name,
                         output_dir,
                         config,
                         base,
                         "=",
                         dependencies=dependencies)

        self.options = options
        self.logging_file = os.path.join(self.output_dir, "output.log")
        self.global_config = get_config()

        self.prob_cols = config["PROB_COLS"]

        self.merged_data = config.get("DATA")
        self.merged_iasim = config.get("SIMFILE_BIASCOR")
        self.merged_ccsim = config.get("SIMFILE_CCPRIOR")
        self.classifier = config.get("CLASSIFIER")
        if self.classifier is not None:
            self.config["CLASSIFIER"] = self.classifier.name
        self.make_all = config.get("MAKE_ALL_HUBBLE", True)
        self.use_recalibrated = config.get("USE_RECALIBRATED", False)
        self.consistent_sample = config.get("CONSISTENT_SAMPLE", True)
        self.bias_cor_fits = None
        self.cc_prior_fits = None
        self.data = None
        self.data_fitres = None
        self.sim_names = [m.output["sim_name"] for m in self.merged_data]
        self.blind = self.get_blind(config, options)
        self.logger.debug(f"Blinding set to {self.blind}")
        self.output["blind"] = self.blind
        self.genversions = [m.output["genversion"] for m in self.merged_data]
        self.num_verions = [
            len(m.output["fitres_dirs"]) for m in self.merged_data
        ]
        self.output["fitopt_files"] = [
            m.output.get("fitopt_file") for m in self.merged_data
        ]
        self.genversion = "_".join(self.sim_names) + (
            "" if self.classifier is None else "_" + self.classifier.name)

        self.config_filename = f"{self.name}.input"  # Make sure this syncs with the tmp file name
        self.config_path = os.path.join(self.output_dir, self.config_filename)
        self.kill_file = self.config_path.replace(".input", "_KILL.LOG")
        self.job_name = os.path.basename(self.config_path)
        self.fit_output_dir = os.path.join(self.output_dir, "output")
        self.merge_log = os.path.join(self.fit_output_dir, "MERGE.LOG")
        self.reject_list = os.path.join(self.output_dir, "reject.list")

        self.done_file = os.path.join(self.fit_output_dir, f"ALL.DONE")
        self.done_file_iteration = os.path.join(self.output_dir,
                                                "RESUBMITTED.DONE")
        self.run_iteration = 1 if os.path.exists(
            self.done_file_iteration) else 0
        self.probability_column_name = None
        if self.config.get("PROB_COLUMN_NAME") is not None:
            self.probability_column_name = self.config.get("PROB_COLUMN_NAME")
        elif self.classifier is not None:
            self.probability_column_name = self.prob_cols[self.classifier.name]
        self.output["prob_column_name"] = self.probability_column_name

        if self.use_recalibrated:
            new_name = self.probability_column_name.replace("PROB_", "CPROB_")
            self.logger.debug(
                f"Updating prob column name from {self.probability_column_name} to {new_name}. I hope it exists!"
            )
            self.probability_column_name = new_name
        self.output["fit_output_dir"] = self.fit_output_dir

        self.output["NSPLITRAN"] = "NSPLITRAN" in [
            x.upper() for x in self.options.keys()
        ]
        if self.output["NSPLITRAN"]:
            self.output["NSPLITRAN_VAL"] = {
                x.upper(): y
                for x, y in self.options.items()
            }["NSPLITRAN"]
        self.w_summary = os.path.join(self.fit_output_dir,
                                      "BBC_SUMMARY_wfit.FITRES")
        self.output["w_summary"] = self.w_summary

        self.set_m0dif_dirs()

        if not self.make_all:
            self.output_plots = [self.output_plots[0]]
        self.logger.debug(f"Making {len(self.output_plots)} plots")

        self.muopts = self.config.get("MUOPTS", {})
        self.muopt_order = list(self.muopts.keys())
        self.output["muopts"] = self.muopt_order
        self.output["hubble_plot"] = self.output_plots

        self.devel = self.options.get('devel', 0)

        self.logger.debug(f"Devel option: {self.devel}")
        self.do_iterate = False  # Temp flag to stop iterating as BBC will reiterate natively
        self.logger.debug(f"Do iterate: {self.do_iterate}")
        self.logger.debug(f"SNANA_DIR: {os.environ['SNANA_DIR']}")
Exemplo n.º 24
0
    def __init__(self,
                 name,
                 output_dir,
                 config,
                 dependencies,
                 mode,
                 options,
                 index=0,
                 model_name=None):
        super().__init__(name,
                         output_dir,
                         config,
                         dependencies,
                         mode,
                         options,
                         index=index,
                         model_name=model_name)
        self.global_config = get_config()
        self.dump_dir = output_dir + "/dump"
        self.job_base_name = os.path.basename(output_dir)
        self.gpu = config.get("GPU", True)
        self.tmp_output = None
        self.done_file = os.path.join(self.output_dir, "done_task.txt")
        self.done_file2 = os.path.join(self.output_dir, "done_task2.txt")
        self.variant = options.get("VARIANT", "vanilla").lower()
        self.redshift = "zspe" if options.get("REDSHIFT", True) else "none"
        self.norm = options.get("NORM", "cosmo")
        self.cyclic = options.get("CYCLIC", True)
        self.seed = options.get("SEED", 0)
        self.clean = config.get("CLEAN", True)
        self.batch_size = options.get("BATCH_SIZE", 128)
        self.num_layers = options.get("NUM_LAYERS", 2)
        self.hidden_dim = options.get("HIDDEN_DIM", 32)

        # Setup yml files
        self.data_yml_file = options.get("DATA_YML", None)
        self.output_data_yml = os.path.join(self.output_dir, "data.yml")
        self.classification_yml_file = options.get("CLASSIFICATION_YML", None)
        self.output_classification_yml = os.path.join(self.output_dir,
                                                      "classification.yml")
        # XOR - only runs if either but not both yml's are None
        if (self.data_yml_file is None) ^ (self.classification_yml_file is
                                           None):
            self.logger.error(
                f"If using yml inputs, both 'DATA_YML' (currently {self.data_yml} and 'CLASSIFICATION_YML' (currently {self.classification_yml}) must be provided"
            )
        elif self.data_yml_file is not None:
            with open(self.data_yml_file, 'r') as f:
                self.data_yml = f.read()
            with open(self.classification_yml_file, 'r') as f:
                self.classification_yml = f.read()
            self.has_yml = True
            self.variant = self.get_variant_from_yml(self.classification_yml)
        else:
            self.data_yml = None
            self.classification_yml = None
            self.has_yml = False

        self.batch_file = self.options.get("BATCH_FILE")
        if self.batch_file is not None:
            self.batch_file = get_data_loc(self.batch_file)
        self.batch_replace = self.options.get("BATCH_REPLACE", {})

        self.validate_model()

        assert self.norm in [
            "global",
            "cosmo",
            "perfilter",
            "cosmo_quantile",
            "none",
        ], f"Norm option is set to {self.norm}, needs to be one of 'global', 'cosmo', 'perfilter', 'cosmo_quantile"
        assert self.variant in [
            "vanilla", "variational", "bayesian"
        ], f"Variant {self.variant} is not vanilla, variational or bayesian"
        self.slurm = """{sbatch_header}
        {task_setup}

        """
        self.conda_env = self.global_config["SuperNNova"]["conda_env"]
        self.path_to_classifier = get_output_loc(
            self.global_config["SuperNNova"]["location"])
Exemplo n.º 25
0
    def __init__(self,
                 name,
                 output_dir,
                 config,
                 options,
                 global_config,
                 dependencies=None):
        super().__init__(name,
                         output_dir,
                         config=config,
                         dependencies=dependencies)
        self.options = options
        self.global_config = global_config

        self.job_name = os.path.basename(
            Path(output_dir).parents[1]) + "_COSMOMC_" + name
        self.logfile = os.path.join(self.output_dir, "output.log")

        self.path_to_cosmomc = get_output_loc(
            self.global_config["CosmoMC"]["location"])

        self.create_cov_dep = self.get_dep(CreateCov)
        self.blind = self.create_cov_dep.output[
            "blind"] if self.create_cov_dep is not None else self.options.get(
                "BLIND", False)
        assert isinstance(
            self.blind,
            (bool, np.bool_)), "Blind should be set to a boolan value!"
        self.ini_prefix = options.get("INI").replace(".ini", "")
        self.static = self.ini_prefix.replace(".ini",
                                              "") in ["cmb_omw", "cmb_omol"]
        self.static_path = "cosmomc_static_chains/"

        if self.create_cov_dep is None:
            self.ini_files = [f"{self.ini_prefix}.ini"]
            self.num_walkers = 4
            self.covopts = ["ALL"]
            self.covopts_numbers = [0]
            self.labels = [self.name]
            self.num_jobs = 1
        else:
            self.num_walkers = options.get("NUM_WALKERS", 8)
            avail_cov_opts = self.create_cov_dep.output["covopts"]
            self.covopts = options.get("COVOPTS") or list(
                avail_cov_opts.keys())
            self.covopts_numbers = [avail_cov_opts[k] for k in self.covopts]

            self.ini_files = [
                f"{self.ini_prefix}_{num}.ini" for num in self.covopts_numbers
            ]

            self.output["hubble_plot"] = self.create_cov_dep.output[
                "hubble_plot"]
            self.output["bcor_name"] = self.create_cov_dep.output["bcor_name"]
            self.labels = [self.name + "_" + c for c in self.covopts]
            self.num_jobs = len(self.covopts)

        self.ntasks = 10
        self.logger.debug(f"Num Walkers: {self.num_walkers}")
        self.chain_dir = os.path.join(self.output_dir, "chains/")
        self.param_dict = {
            l: os.path.join(self.chain_dir, i.replace(".ini", ".paramnames"))
            for l, i in zip(self.covopts, self.ini_files)
        }

        self.done_files = [f"done_{num}.txt" for num in self.covopts_numbers]
        self.chain_dict = {
            l: os.path.join(self.chain_dir, i.replace(".ini", f"_{n + 1}.txt"))
            for l, i in zip(self.covopts, self.ini_files)
            for n in range(self.ntasks)
        }
        self.base_dict = {
            l: os.path.join(self.chain_dir, i.replace(".ini", ""))
            for l, i in zip(self.covopts, self.ini_files)
            for n in range(self.ntasks)
        }
        self.output["chain_dir"] = self.chain_dir
        self.output["param_dict"] = self.param_dict
        self.output["chain_dict"] = self.chain_dict
        self.output["base_dict"] = self.base_dict
        self.output["covopts"] = self.covopts
        self.output["blind"] = self.blind

        self.output["label"] = (self.options.get(
            "LABEL",
            f"({' + '.join(self.ini_prefix.upper().split('_')[:-1])})") + " " +
                                (self.create_cov_dep.output["name"]
                                 if self.create_cov_dep is not None else ""))
        # TODO: Better logic here please
        final = self.ini_prefix.split("_")[-1]
        ps = {
            "omw": ["omegam", "w"],
            "flatomol": ["omegam"],
            "omol": ["omegam", "omegal"],
            "wnu": ["w", "nu"],
            "wwa": ["w", "wa"]
        }
        if final not in ps.keys():
            self.fail_config(
                f"The filename passed in ({self.ini_prefix}) needs to have format 'components_cosmology.ini', where the cosmology is omw, omol, wnu or wwa. Is this a custom file?"
            )
        self.output["cosmology_params"] = ps[final]

        self.batch_file = self.options.get("BATCH_FILE")
        if self.batch_file is not None:
            self.batch_file = get_data_loc(self.batch_file)
        self.batch_replace = self.options.get("BATCH_REPLACE", {})

        self.slurm = """{sbatch_header}
Exemplo n.º 26
0
    def __init__(self, name, output_dir, config, options, dependencies=None):
        super().__init__(name,
                         output_dir,
                         config=config,
                         dependencies=dependencies)
        self.options = options
        self.global_config = get_config()

        self.logfile = os.path.join(self.output_dir, "output.log")

        self.job_name = os.path.basename(
            Path(output_dir).parents[1]) + "_ANALYSE_" + os.path.basename(
                output_dir)

        self.path_to_codes = []
        self.done_files = []

        self.plot_code_dir = os.path.join(
            os.path.dirname(inspect.stack()[0][1]), "external")

        self.covopts = options.get("COVOPTS")
        self.singular_blind = options.get("SINGULAR_BLIND", False)
        if isinstance(self.covopts, str):
            self.covopts = [self.covopts]

        self.cosmomc_input_files = []
        self.cosmomc_output_files = []
        self.cosmomc_covopts = []
        self.names = []
        self.params = []

        # Assuming all deps are cosmomc tasks
        self.cosmomc_deps = self.get_deps(CosmoMC)
        self.blind = np.any([c.output["blind"] for c in self.cosmomc_deps])
        if self.blind:
            self.blind_params = ["w", "om", "ol", "omegam", "omegal"]
        else:
            if options.get("BLIND", False):
                self.blind_params = options.get("BLIND")
            else:
                self.blind_params = []
        self.biascor_deps = self.get_deps(BiasCor)
        self.lcfit_deps = self.get_deps(SNANALightCurveFit)

        if self.cosmomc_deps:
            self.add_plot_script_to_run("parse_cosmomc.py")
            self.add_plot_script_to_run("plot_cosmomc.py")
            self.add_plot_script_to_run("plot_errbudget.py")
        if self.biascor_deps:
            self.add_plot_script_to_run("parse_biascor.py")
            self.add_plot_script_to_run("plot_biascor.py")
        if self.lcfit_deps:
            self.add_plot_script_to_run("parse_lcfit.py")
            self.add_plot_script_to_run("plot_histogram.py")
            self.add_plot_script_to_run("plot_efficiency.py")

        if self.options.get("ADDITIONAL_SCRIPTS") is not None:
            vals = ensure_list(self.options.get("ADDITIONAL_SCRIPTS"))
            for v in vals:
                self.add_plot_script_to_run(v)

        self.done_file = self.done_files[-1]

        for c in self.cosmomc_deps:
            for covopt in c.output["covopts"]:
                self.cosmomc_input_files.append(c.output["base_dict"][covopt])
                self.cosmomc_output_files.append(c.output["label"] + "_" +
                                                 covopt + ".csv.gz")
                self.cosmomc_covopts.append(covopt)
                self.names.append(c.output["label"].replace("_", " ") + " " +
                                  covopt)
                for p in c.output["cosmology_params"]:
                    if p not in self.params:
                        self.params.append(p)
            self.logger.debug(
                f"Analyse task will create CosmoMC plots with {len(self.cosmomc_input_files)} covopts/plots"
            )

        self.wsummary_files = [
            b.output["w_summary"] for b in self.biascor_deps
        ]

        # Get the fitres and m0diff files we'd want to parse for Hubble diagram plotting
        self.biascor_fitres_input_files = [
            os.path.join(m, "FITOPT000_MUOPT000.FITRES.gz")
            for b in self.biascor_deps for m in b.output["m0dif_dirs"]
        ]
        self.biascor_prob_col_names = [
            b.output["prob_column_name"] for b in self.biascor_deps
            for m in b.output["m0dif_dirs"]
        ]
        self.biascor_fitres_output_files = [
            b.name + "__" + os.path.basename(m).replace("OUTPUT_BBCFIT", "1") +
            "__FITOPT0_MUOPT0.fitres.gz" for b in self.biascor_deps
            for m in b.output["m0dif_dirs"]
        ]

        self.biascor_m0diffs = []
        self.biascor_m0diff_output = "all_biascor_m0diffs.csv"
        self.biascor_fitres_combined = "all_biascor_fitres.csv.gz"

        self.batch_file = self.options.get("BATCH_FILE")
        if self.batch_file is not None:
            self.batch_file = get_data_loc(self.batch_file)
        self.batch_replace = self.options.get("BATCH_REPLACE", {})

        self.slurm = """{sbatch_header}
Exemplo n.º 27
0
    def write_input(self):
        # As Pippin only does one GENVERSION at a time, lets extract it first, and also the config
        c = self.yaml["CONFIG"]
        d = self.yaml["GENVERSION_LIST"][0]
        g = self.yaml["GENOPT_GLOBAL"]

        # Ensure g is a dict with a ref we can update
        if g is None:
            g = {}
            self.yaml["GENOPT_GLOBAL"] = g

        # Start setting properties in the right area
        d["GENVERSION"] = self.genversion

        # Logging now goes in the "CONFIG"
        c["LOGDIR"] = os.path.basename(self.sim_log_dir)

        for k in self.config.keys():
            if k.upper() not in self.reserved_top:
                run_config = self.config[k]
                run_config_keys = list(run_config.keys())
                assert "BASE" in run_config_keys, "You must specify a base file for each option"
                for key in run_config_keys:
                    if key.upper() in self.reserved_keywords:
                        continue
                    base_file = run_config["BASE"]
                    match = os.path.basename(base_file).split(".")[0]
                    val = run_config[key]
                    if not isinstance(val, list):
                        val = [val]

                    lookup = f"GENOPT({match})"
                    if lookup not in d:
                        d[lookup] = {}
                    for v in val:
                        d[lookup][key] = v

        if len(self.data_dirs) > 1:
            data_dir = self.data_dirs[0]
            c["PATH_USER_INPUT"] = data_dir

        for key in self.config.get("GLOBAL", []):
            if key.upper() == "BASE":
                continue
            direct_set = [
                "FORMAT_MASK", "RANSEED_REPEAT", "RANSEED_CHANGE",
                "BATCH_INFO", "BATCH_MEM", "NGEN_UNIT", "RESET_CIDOFF"
            ]
            if key in direct_set:
                c[key] = self.config["GLOBAL"][key]
            else:
                g[key] = self.config["GLOBAL"][key]

            if self.derived_batch_info:
                c["BATCH_INFO"] = self.derived_batch_info

            if key == "RANSEED_CHANGE" and c.get("RANSEED_REPEAT") is not None:
                del c["RANSEED_REPEAT"]
            elif key == "RANSEED_REPEAT" and c.get(
                    "RANSEED_CHANGE") is not None:
                del c["RANSEED_CHANGE"]

        if self.base_ia:
            c["SIMGEN_INFILE_Ia"] = [os.path.basename(f) for f in self.base_ia]
        else:
            del c["SIMGEN_INFILE_Ia"]

        if self.base_cc:
            c["SIMGEN_INFILE_NONIa"] = [
                os.path.basename(f) for f in self.base_cc
            ]
        else:
            del c["SIMGEN_INFILE_NONIa"]

        c["GENPREFIX"] = self.genprefix

        # Put config in a temp directory
        temp_dir_obj = tempfile.TemporaryDirectory()
        temp_dir = temp_dir_obj.name

        # Copy the base files across
        input_paths = []
        for f in self.base_ia + self.base_cc:
            resolved = get_data_loc(f)
            shutil.copy(resolved, temp_dir)
            input_paths.append(os.path.join(temp_dir, os.path.basename(f)))
            self.logger.debug(f"Copying input file {resolved} to {temp_dir}")

        # Copy the include input file if there is one
        input_copied = []
        fs = self.base_ia + self.base_cc
        for ff in fs:
            if ff not in input_copied:
                input_copied.append(ff)
                path = get_data_loc(ff)
                copied_path = os.path.join(temp_dir, os.path.basename(path))
                with open(path, "r") as f:
                    for line in f.readlines():
                        line = line.strip()
                        if line.startswith("INPUT_FILE_INCLUDE"):
                            include_file = line.split(":")[-1].strip()
                            include_file_path = get_data_loc(include_file)
                            self.logger.debug(
                                f"Copying INPUT_FILE_INCLUDE file {include_file_path} to {temp_dir}"
                            )

                            include_file_basename = os.path.basename(
                                include_file_path)
                            include_file_output = os.path.join(
                                temp_dir, include_file_basename)

                            if include_file_output not in input_copied:

                                # Copy include file into the temp dir
                                shutil.copy(include_file_path, temp_dir)

                                # Then SED the file to replace the full path with just the basename
                                if include_file != include_file_basename:
                                    sed_command = f"sed -i -e 's|{include_file}|{include_file_basename}|g' {copied_path}"
                                    self.logger.debug(
                                        f"Running sed command: {sed_command}")
                                    subprocess.run(sed_command,
                                                   stderr=subprocess.STDOUT,
                                                   cwd=temp_dir,
                                                   shell=True)

                                # And make sure we dont do this file again
                                fs.append(include_file_output)

        # Write the primary input file
        main_input_file = f"{temp_dir}/{self.genversion}.input"
        self.write_output_file(main_input_file)

        # Remove any duplicates and order the output files
        output_files = [
            f"{temp_dir}/{a}" for a in sorted(os.listdir(temp_dir))
        ]
        self.logger.debug(
            f"{len(output_files)} files used to create simulation. Hashing them."
        )

        # Get current hash
        new_hash = self.get_hash_from_files(output_files)
        regenerate = self._check_regenerate(new_hash)

        if regenerate:
            self.logger.info(f"Running simulation")
            # Clean output dir. God I feel dangerous doing this, so hopefully unnecessary check
            if "//" not in self.output_dir and len(self.output_dir) > 30:
                self.logger.debug(
                    f"Cleaning output directory {self.output_dir}")
                shutil.rmtree(self.output_dir, ignore_errors=True)
                mkdirs(self.output_dir)
                self.logger.debug(
                    f"Copying from {temp_dir} to {self.output_dir}")
                copytree(temp_dir, self.output_dir)
                self.save_new_hash(new_hash)
            else:
                self.logger.error(
                    f"Seems to be an issue with the output dir path: {self.output_dir}"
                )

            chown_dir(self.output_dir)
        else:
            self.logger.info("Hash check passed, not rerunning")
        temp_dir_obj.cleanup()
        return regenerate, new_hash
Exemplo n.º 28
0
    def __init__(self,
                 name,
                 output_dir,
                 options,
                 global_config,
                 dependencies=None):
        super().__init__(name, output_dir, dependencies=dependencies)
        self.options = options
        self.global_config = get_config()

        self.logfile = os.path.join(self.output_dir, "output.log")
        self.conda_env = self.global_config["DataSkimmer"]["conda_env"]
        self.path_to_task = output_dir

        self.unparsed_raw = self.options.get("RAW_DIR")
        self.raw_dir = get_data_loc(self.unparsed_raw)
        if self.raw_dir is None:
            Task.fail_config(f"Unable to find {self.options.get('RAW_DIR')}")

        self.genversion = os.path.basename(self.raw_dir)
        self.data_path = os.path.dirname(self.raw_dir)
        if self.unparsed_raw == "$SCRATCH_SIMDIR" or "SNDATA_ROOT/SIM" in self.raw_dir:
            self.logger.debug("Removing PRIVATE_DATA_PATH from NML file")
            self.data_path = ""
        self.job_name = os.path.basename(
            Path(output_dir).parents[1]) + "_DATAPREP_" + self.name

        self.output["genversion"] = self.genversion
        self.output["data_path"] = self.data_path
        self.output["photometry_dirs"] = [get_output_loc(self.raw_dir)]
        self.output["sim_folders"] = [get_output_loc(self.raw_dir)]
        self.output["raw_dir"] = self.raw_dir
        self.clump_file = os.path.join(self.output_dir,
                                       self.genversion + ".SNANA.TEXT")
        self.output["clump_file"] = self.clump_file
        self.output["ranseed_change"] = False
        is_sim = options.get("SIM", False)
        self.output["is_sim"] = is_sim
        self.output["blind"] = options.get("BLIND", not is_sim)

        self.types_dict = options.get("TYPES")
        if self.types_dict is None:
            self.types_dict = {
                "IA": [1],
                "NONIA": [
                    2, 20, 21, 22, 29, 30, 31, 32, 33, 39, 40, 41, 42, 42, 43,
                    80, 81
                ]
            }
        else:
            for key in self.types_dict.keys():
                self.types_dict[key] = [int(c) for c in self.types_dict[key]]

        self.logger.debug(f"\tIA types are {self.types_dict['IA']}")
        self.logger.debug(f"\tNONIA types are {self.types_dict['NONIA']}")
        self.output["types_dict"] = self.types_dict
        self.types = OrderedDict()
        for n in self.types_dict["IA"]:
            self.types.update({n: "Ia"})
        for n in self.types_dict["NONIA"]:
            self.types.update({n: "II"})
        self.output["types"] = self.types

        self.slurm = """#!/bin/bash
#SBATCH --job-name={job_name}
#SBATCH --time=0:20:00
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --partition=broadwl
#SBATCH --output={log_file}
#SBATCH --account=pi-rkessler
#SBATCH --mem=2GB

cd {path_to_task}
snana.exe clump.nml
if [ $? -eq 0 ]; then
    echo SUCCESS > {done_file}
else
    echo FAILURE > {done_file}
fi
"""
        self.clump_command = """#
Exemplo n.º 29
0
    def __init__(self,
                 name,
                 output_dir,
                 config,
                 global_config,
                 combine="combine.input"):
        self.data_dirs = global_config["DATA_DIRS"]
        base_file = get_data_loc(combine)
        super().__init__(name, output_dir, config, base_file, ": ")

        # Check for any replacements
        path_sndata_sim = get_config().get("SNANA").get("sim_dir")
        self.logger.debug(f"Setting PATH_SNDATA_SIM to {path_sndata_sim}")
        self.yaml["CONFIG"]["PATH_SNDATA_SIM"] = path_sndata_sim

        self.genversion = self.config["GENVERSION"]
        if len(self.genversion) < 30:
            self.genprefix = self.genversion
        else:
            hash = get_hash(self.genversion)[:5]
            self.genprefix = self.genversion[:25] + hash

        self.options = self.config.get("OPTS", {})

        self.reserved_keywords = ["BASE"]
        self.reserved_top = ["GENVERSION", "GLOBAL", "OPTS", "EXTERNAL"]
        self.config_path = f"{self.output_dir}/{self.genversion}.input"  # Make sure this syncs with the tmp file name
        self.global_config = global_config

        self.sim_log_dir = f"{self.output_dir}/LOGS"
        self.total_summary = os.path.join(self.sim_log_dir, "MERGE.LOG")
        self.done_file = f"{self.output_dir}/LOGS/ALL.DONE"
        self.logging_file = self.config_path.replace(".input", ".LOG")
        self.kill_file = self.config_path.replace(".input", "_KILL.LOG")

        if "EXTERNAL" not in self.config.keys():
            # Deterime the type of each component
            keys = [
                k for k in self.config.keys() if k not in self.reserved_top
            ]
            self.base_ia = []
            self.base_cc = []
            types = {}
            types_dict = {"IA": [], "NONIA": []}
            for k in keys:
                d = self.config[k]
                base_file = d.get("BASE")
                if base_file is None:
                    Task.fail_config(
                        f"Your simulation component {k} for sim name {self.name} needs to specify a BASE input file"
                    )
                base_path = get_data_loc(base_file)
                if base_path is None:
                    Task.fail_config(
                        f"Cannot find sim component {k} base file at {base_path} for sim name {self.name}"
                    )

                gentype, genmodel = None, None
                with open(base_path) as f:
                    for line in f.read().splitlines():
                        if line.upper().strip().startswith("GENTYPE:"):
                            gentype = line.upper().split(":")[1].strip()
                        if line.upper().strip().startswith("GENMODEL:"):
                            genmodel = line.upper().split(":")[1].strip()

                gentype = gentype or d.get("GENTYPE")
                if gentype is None:
                    self.fail_config(
                        f"The simulation component {k} needs to specify a GENTYPE in its input file"
                    )
                gentype = int(gentype)
                genmodel = genmodel or d.get("GENMODEL")

                if not gentype:
                    Task.fail_config(
                        f"Cannot find GENTYPE for component {k} and base file {base_path}"
                    )
                if not genmodel:
                    Task.fail_config(
                        f"Cannot find GENMODEL for component {k} and base file {base_path}"
                    )

                type2 = 100 + gentype
                if "SALT2" in genmodel:
                    self.base_ia.append(base_file)
                    types[gentype] = "Ia"
                    types[type2] = "Ia"
                    types_dict["IA"].append(gentype)
                    types_dict["IA"].append(type2)
                else:
                    self.base_cc.append(base_file)
                    types[gentype] = "II"
                    types[type2] = "II"
                    types_dict["NONIA"].append(gentype)
                    types_dict["NONIA"].append(type2)

            sorted_types = dict(sorted(types.items()))
            self.logger.debug(f"Types found: {json.dumps(sorted_types)}")
            self.output["types_dict"] = types_dict
            self.output["types"] = sorted_types

            rankeys = [
                r for r in self.config["GLOBAL"].keys()
                if r.startswith("RANSEED_")
            ]
            value = int(self.config["GLOBAL"][rankeys[0]].split(" ")
                        [0]) if rankeys else 1
            self.set_num_jobs(2 * value)

            self.output["blind"] = self.options.get("BLIND", False)
            self.derived_batch_info = None

            # Determine if all the top level input files exist
            if len(self.base_ia + self.base_cc) == 0:
                Task.fail_config(
                    "Your sim has no components specified! Please add something to simulate!"
                )

            # Try to determine how many jobs will be put in the queue
            # First see if it's been explicitly set
            num_jobs = self.options.get("NUM_JOBS")
            if num_jobs is not None:
                self.num_jobs = num_jobs
                self.logger.debug(
                    f"Num jobs set by NUM_JOBS option to {self.num_jobs}")
            else:
                try:
                    # If BATCH_INFO is set, we'll use that
                    batch_info = self.config.get("GLOBAL",
                                                 {}).get("BATCH_INFO")
                    default_batch_info = self.yaml["CONFIG"].get("BATCH_INFO")

                    # If its not set, lets check for ranseed_repeat or ranseed_change
                    if batch_info is None:
                        ranseed_repeat = self.config.get(
                            "GLOBAL", {}).get("RANSEED_REPEAT")
                        ranseed_change = self.config.get(
                            "GLOBAL", {}).get("RANSEED_CHANGE")
                        default = self.yaml.get("CONFIG",
                                                {}).get("RANSEED_REPEAT")
                        ranseed = ranseed_repeat or ranseed_change or default

                        if ranseed:
                            num_jobs = int(ranseed.strip().split()[0])
                            self.logger.debug(
                                f"Found a randseed with {num_jobs}, deriving batch info"
                            )
                            comps = default_batch_info.strip().split()
                            comps[-1] = str(num_jobs)
                            self.derived_batch_info = " ".join(comps)
                            self.num_jobs = num_jobs
                            self.logger.debug(
                                f"Num jobs set by RANSEED to {self.num_jobs}")
                    else:
                        # self.logger.debug(f"BATCH INFO property detected as {property}")
                        self.num_jobs = int(batch_info.split()[-1])
                        self.logger.debug(
                            f"Num jobs set by BATCH_INFO to {self.num_jobs}")
                except Exception:
                    self.logger.warning(
                        f"Unable to determine how many jobs simulation {self.name} has"
                    )
                    self.num_jobs = 1

            self.output["genversion"] = self.genversion
            self.output["genprefix"] = self.genprefix

            self.ranseed_change = self.config.get("GLOBAL",
                                                  {}).get("RANSEED_CHANGE")
            base = os.path.expandvars(self.global_config["SNANA"]["sim_dir"])
            self.output["ranseed_change"] = self.ranseed_change is not None
            self.output["ranseed_change_val"] = self.ranseed_change
            self.get_sim_folders(base, self.genversion)
            self.output["sim_folders"] = self.sim_folders
        else:
            self.sim_folders = self.output["sim_folders"]