Пример #1
0
    def classify(self):
        new_hash = self.get_hash_from_string(self.name)
        if self._check_regenerate(new_hash):
            mkdirs(self.output_dir)
            input = self.get_fit_dependency()
            fitres_file = os.path.join(input["fitres_dirs"][self.index],
                                       input["fitopt_map"][self.fitopt])
            self.logger.debug(f"Looking for {fitres_file}")
            if not os.path.exists(fitres_file):
                self.logger.error(
                    f"FITRES file could not be found at {fitres_file}, classifer has nothing to work with"
                )
                self.passed = False
                return False

            df = pd.read_csv(fitres_file, delim_whitespace=True, comment="#")
            df = df[[
                "CID", "FITPROB"
            ]].rename(columns={"FITPROB": self.get_prob_column_name()})

            self.logger.info(f"Saving probabilities to {self.output_file}")
            df.to_csv(self.output_file, index=False, float_format="%0.4f")
            chown_dir(self.output_dir)
            with open(self.done_file, "w") as f:
                f.write("SUCCESS")
            self.save_new_hash(new_hash)
        self.passed = True

        return True
    def classify(self, force_refresh, command):
        format_dict = {
            "job_name": self.job_base_name,
            "conda_env": self.conda_env,
            "path_to_classifier": self.path_to_classifier,
            "command_opts": command,
            "done_file": self.done_file,
        }
        slurm_script = self.slurm.format(**format_dict)

        old_hash = self.get_old_hash()
        new_hash = self.get_hash_from_string(slurm_script)

        if force_refresh or new_hash != old_hash:
            self.logger.debug("Regenerating")
            shutil.rmtree(self.output_dir, ignore_errors=True)
            mkdirs(self.output_dir)

            slurm_output_file = self.output_dir + "/job.slurm"
            with open(slurm_output_file, "w") as f:
                f.write(slurm_script)
            self.save_new_hash(new_hash)
            self.logger.info(f"Submitting batch job {slurm_output_file}")
            subprocess.run(["sbatch", slurm_output_file], cwd=self.output_dir)
        else:
            self.logger.info("Hash check passed, not rerunning")
            self.should_be_done()
        return True
Пример #3
0
    def write_input(self, force_refresh):
        self.bias_cor_fits = ",".join([m.output["fitres_file"] for m in self.merged_iasim])
        self.cc_prior_fits = ",".join([m.output["fitres_file"] for m in self.merged_ccsim])
        self.data = [m.output["fitres_dir"] for m in self.merged_data]

        self.set_property("simfile_biascor", self.bias_cor_fits)
        self.set_property("simfile_ccprior", self.cc_prior_fits)
        self.set_property("varname_pIa", self.probability_column_name)

        final_output = "\n".join(self.base)

        new_hash = self.get_hash_from_string(final_output)
        old_hash = self.get_old_hash()

        if force_refresh or new_hash != old_hash:
            self.logger.debug("Regenerating results")

            shutil.rmtree(self.output_dir, ignore_errors=True)
            mkdirs(self.output_dir)

            with open(self.config_path, "w") as f:
                f.writelines(final_output)
            self.logger.info(f"Input file written to {self.config_path}")

            self.save_new_hash(new_hash)
            return True
        else:
            self.logger.debug("Hash check passed, not rerunning")
            return False
Пример #4
0
    def _run(self, force_refresh):

        command_string = self.clump_command.format(genversion=self.genversion,
                                                   data_path=self.data_path)
        format_dict = {
            "job_name": self.job_name,
            "log_file": self.logfile,
            "path_to_task": self.path_to_task,
            "done_file": self.done_file
        }
        final_slurm = self.slurm.format(**format_dict)

        new_hash = self.get_hash_from_string(command_string + final_slurm)
        old_hash = self.get_old_hash()

        if force_refresh or new_hash != old_hash:
            self.logger.debug("Regenerating and launching task")
            shutil.rmtree(self.output_dir, ignore_errors=True)
            mkdirs(self.output_dir)
            self.save_new_hash(new_hash)
            slurm_output_file = os.path.join(self.output_dir, "slurm.job")
            clump_file = os.path.join(self.output_dir, "clump.nml")
            with open(slurm_output_file, "w") as f:
                f.write(final_slurm)
            with open(clump_file, "w") as f:
                f.write(command_string)

            self.logger.info(f"Submitting batch job for data prep")
            subprocess.run(["sbatch", slurm_output_file], cwd=self.output_dir)
        else:
            self.should_be_done()
            self.logger.info("Hash check passed, not rerunning")
        return True
Пример #5
0
    def write_nml(self, force_refresh):

        # Parse config, first SNLCINP and then FITINP
        for key, value in self.config.get("SNLCINP", {}).items():
            self.set_snlcinp(key, value)
        for key, value in self.config.get("FITINP", {}).items():
            self.set_fitinp(key, value)
        for key, value in self.options.items():
            self.set_property(key,
                              value,
                              assignment=": ",
                              section_end="&SNLCINP")

        if self.sim_task.output["ranseed_change"]:
            self.set_property("VERSION",
                              self.sim_version + "-0*",
                              assignment=": ",
                              section_end="&SNLCINP")
        else:
            self.set_property("VERSION",
                              self.sim_version,
                              assignment=": ",
                              section_end="&SNLCINP")

        self.set_property("OUTDIR",
                          self.lc_output_dir,
                          assignment=": ",
                          section_end="&SNLCINP")
        self.set_property("DONE_STAMP",
                          "FINISHED.DONE",
                          assignment=": ",
                          section_end="&SNLCINP")

        if isinstance(self.sim_task, DataPrep):
            self.set_snlcinp("PRIVATE_DATA_PATH",
                             f"'{self.sim_task.output['data_path']}'")
            self.set_snlcinp("VERSION_PHOTOMETRY",
                             f"'{self.sim_task.output['genversion']}'")

        # We want to do our hashing check here
        string_to_hash = self.fitopts + self.base
        new_hash = self.get_hash_from_string("".join(string_to_hash))
        old_hash = self.get_old_hash()
        regenerate = force_refresh or (old_hash is None
                                       or old_hash != new_hash)

        if regenerate:
            self.logger.info(f"Running Light curve fit. Removing output_dir")
            shutil.rmtree(self.output_dir, ignore_errors=True)
            mkdirs(self.output_dir)
            # Write main file
            with open(self.config_path, "w") as f:
                f.writelines(map(lambda s: s + "\n", string_to_hash))
            self.logger.info(f"NML file written to {self.config_path}")
            self.save_new_hash(new_hash)
            chown_dir(self.output_dir)
        else:
            self.logger.info("Hash check passed, not rerunning")

        return regenerate, new_hash
Пример #6
0
    def classify(self):
        new_hash = self.get_hash_from_string(self.name +
                                             f"{self.prob_ia}_{self.prob_cc}")

        if self._check_regenerate(new_hash):
            shutil.rmtree(self.output_dir, ignore_errors=True)
            mkdirs(self.output_dir)
            try:
                name = self.get_prob_column_name()
                cid = "CID"
                s = self.get_simulation_dependency()
                df = None
                phot_dir = s.output["photometry_dirs"][self.index]
                headers = [
                    os.path.join(phot_dir, a) for a in os.listdir(phot_dir)
                    if "HEAD" in a
                ]
                if not headers:
                    Task.fail_config(
                        f"No HEAD fits files found in {phot_dir}!")
                else:
                    types = self.get_simulation_dependency(
                    ).output["types_dict"]
                    self.logger.debug(f"Input types are {types}")

                    for h in headers:
                        with fits.open(h) as hdul:
                            data = hdul[1].data
                            snid = np.array(data.field("SNID"))
                            sntype = np.array(data.field("SNTYPE")).astype(
                                np.int64)
                            is_ia = np.isin(sntype, types["IA"])
                            prob = (is_ia * self.prob_ia) + (~is_ia *
                                                             self.prob_cc)

                            dataframe = pd.DataFrame({cid: snid, name: prob})
                            dataframe[cid] = dataframe[cid].apply(str)
                            dataframe[cid] = dataframe[cid].str.strip()
                            if df is None:
                                df = dataframe
                            else:
                                df = pd.concat([df, dataframe])
                    df.drop_duplicates(subset=cid, inplace=True)

                self.logger.info(f"Saving probabilities to {self.output_file}")
                df.to_csv(self.output_file, index=False, float_format="%0.4f")
                chown_dir(self.output_dir)
                with open(self.done_file, "w") as f:
                    f.write("SUCCESS")
                self.save_new_hash(new_hash)
            except Exception as e:
                self.logger.exception(e, exc_info=True)
                self.passed = False
                with open(self.done_file, "w") as f:
                    f.write("FAILED")
                return False
        else:
            self.should_be_done()
        self.passed = True
        return True
Пример #7
0
    def get_ini_file(self):
        mkdirs(self.chain_dir)
        directory = self.create_cov_dep.output["ini_dir"]
        self.logger.debug(f"Directory: {directory}")

        input_files = []
        for file in self.ini_files:
            path = os.path.join(directory, file)
            self.logger.debug(f"Path: {path}")
            if not os.path.exists(path):
                self.logger.error(
                    f"Cannot find the file {path}, make sure you specified a correct INI string matching an existing template"
                )
                return None
            self.logger.debug(f"Reading in {path} to format")
            with open(path) as f:
                input_files.append(f.read().format(
                    **{
                        "path_to_cosmomc": self.path_to_cosmomc,
                        "ini_dir": self.create_cov_dep.output["ini_dir"],
                        "root_dir": self.chain_dir
                    }))

        self.logger.debug(f"Input Files: {input_files}")
        return input_files
Пример #8
0
    def classify(self, force_refresh):
        new_hash = self.check_regenerate(force_refresh)
        if new_hash:
            mkdirs(self.output_dir)
            input = self.get_fit_dependency()
            fitres_file = input["fitres_file"]
            self.logger.debug(f"Looking for {fitres_file}")
            if not os.path.exists(fitres_file):
                self.logger.error(
                    f"FITRES file could not be found at {fitres_file}, classifer has nothing to work with"
                )
                self.passed = False
                return False

            df = pd.read_csv(fitres_file,
                             sep='\s+',
                             comment="#",
                             compression="infer")
            df = df[[
                "CID", "FITPROB"
            ]].rename(columns={"FITPROB": self.get_prob_column_name()})

            self.logger.info(f"Saving probabilities to {self.output_file}")
            df.to_csv(self.output_file, index=False, float_format="%0.4f")
            chown_dir(self.output_dir)
            with open(self.done_file, "w") as f:
                f.write("SUCCESS")
            self.save_new_hash(new_hash)
        self.passed = True

        return True
Пример #9
0
    def classify(self, command):
        self.setup()
        if self.batch_file is None:
            if self.gpu:
                self.sbatch_header = self.sbatch_gpu_header
            else:
                self.sbatch_header = self.sbatch_cpu_header
        else:
            with open(self.batch_file, 'r') as f:
                self.sbatch_header = f.read()
            self.sbatch_header = self.clean_header(self.sbatch_header)

        header_dict = {
            "REPLACE_NAME": self.job_base_name,
            "REPLACE_LOGFILE": "output.log",
            "REPLACE_WALLTIME": "00:55:00",
            "REPLACE_MEM": "8GB",
            "APPEND": ["#SBATCH --ntasks=1", "#SBATCH --cpus-per-task=4"]
        }
        header_dict = merge_dict(header_dict, self.batch_replace)
        self.update_header(header_dict)

        setup_dict = {
            "job_name": self.job_base_name,
            "conda_env": self.conda_env,
            "path_to_classifier": self.path_to_classifier,
            "command_opts": command
        }

        format_dict = {
            "done_file":
            self.done_file,
            "sbatch_header":
            self.sbatch_header,
            "task_setup":
            self.update_setup(setup_dict, self.task_setup['nearest_neighbour'])
        }

        slurm_script = self.slurm.format(**format_dict)

        new_hash = self.get_hash_from_string(slurm_script)
        if self._check_regenerate(new_hash):
            self.logger.debug("Regenerating")
            shutil.rmtree(self.output_dir, ignore_errors=True)
            mkdirs(self.output_dir)

            slurm_output_file = self.output_dir + "/job.slurm"
            with open(slurm_output_file, "w") as f:
                f.write(slurm_script)
            self.save_new_hash(new_hash)
            self.logger.info(f"Submitting batch job {slurm_output_file}")
            subprocess.run(["sbatch", slurm_output_file], cwd=self.output_dir)
        else:
            self.logger.info("Hash check passed, not rerunning")
            self.should_be_done()
        return True
Пример #10
0
    def write_nml(self, force_refresh):
        self.logger.debug(f"Loading fitopts file from {self.fitopts_file}")
        with open(self.fitopts_file, "r") as f:
            self.fitopts = list(f.read().splitlines())
            self.logger.info(
                f"Loaded {len(self.fitopts)} fitopts file from {self.fitopts_file}"
            )

        # Parse config, first SNLCINP and then FITINP
        for key, value in self.config.get("SNLCINP", {}).items():
            self.set_snlcinp(key, value)
        for key, value in self.config.get("FITINP", {}).items():
            self.set_fitinp(key, value)
        self.set_property(
            "VERSION",
            self.sim_version + "*",
            assignment=": ",
            section_end="&SNLCINP")  # TODO FIX THIS, DOUBLE VERSION KEY
        self.set_property("OUTDIR",
                          self.lc_output_dir,
                          assignment=": ",
                          section_end="&SNLCINP")
        if isinstance(self.sim_task, DataPrep):
            self.set_snlcinp("PRIVATE_DATA_PATH",
                             f"'{self.sim_task.output['data_path']}'")
            self.set_snlcinp("VERSION_PHOTOMETRY",
                             f"'{self.sim_task.output['genversion']}'")

        # We want to do our hashing check here
        string_to_hash = self.fitopts + self.base
        # with open(os.path.abspath(inspect.stack()[0][1]), "r") as f:
        #     string_to_hash += f.read()
        new_hash = self.get_hash_from_string("".join(string_to_hash))
        old_hash = self.get_old_hash()
        regenerate = force_refresh or (old_hash is None
                                       or old_hash != new_hash)

        if regenerate:
            self.logger.info(f"Running Light curve fit. Removing output_dir")
            shutil.rmtree(self.output_dir, ignore_errors=True)
            mkdirs(self.output_dir)
            # Write main file
            with open(self.config_path, "w") as f:
                f.writelines(map(lambda s: s + '\n', string_to_hash))
            self.logger.info(f"NML file written to {self.config_path}")
            self.save_new_hash(new_hash)
            chown_dir(self.output_dir)
        else:
            self.logger.info("Hash check passed, not rerunning")

        return regenerate, new_hash
Пример #11
0
    def predict(self, force_refresh):
        train_info = self.get_fit_dependency()

        model = self.options.get("MODEL")
        assert model is not None, "If TRAIN is not specified, you have to point to a model to use"
        for t in self.dependencies:
            if model == t.name:
                self.logger.debug(
                    f"Found task dependency {t.name} with model file {t.output['model_filename']}"
                )
                model = t.output["model_filename"]

        model_path = get_output_loc(model)
        self.logger.debug(f"Looking for model in {model_path}")
        if not os.path.exists(model_path):
            self.logger.error(f"Cannot find {model_path}")
            return False

        old_hash = self.get_old_hash()
        new_hash = self.get_hash_from_string(self.name + model_path)

        if force_refresh or new_hash != old_hash:
            self.logger.debug("Regenerating")

            if os.path.exists(self.output_dir):
                shutil.rmtree(self.output_dir, ignore_errors=True)
            mkdirs(self.output_dir)
            self.save_new_hash(new_hash)

            job_name = 'nearnbr_apply.exe'
            inArgs = f'-inFile_data {train_info["fitres_file"]} -inFile_MLpar {model_path}'
            outArgs = f'-outFile {self.outfile_predict} -varName_prob {self.get_prob_column_name()}'
            cmd_job = ('%s %s %s' % (job_name, inArgs, outArgs))
            self.logger.debug(f"Executing command {cmd_job}")
            with open(self.logging_file, "w") as f:
                val = subprocess.run(cmd_job.split(" "),
                                     stdout=f,
                                     stderr=subprocess.STDOUT,
                                     cwd=self.output_dir)
                with open(self.done_file, "w") as f:
                    if val.returncode == 0:
                        f.write("SUCCESS")
                    else:
                        f.write("FAILURE")
        else:
            self.logger.debug("Not regenerating")
        return True
Пример #12
0
    def write_nml(self):

        # Parse config, first SNLCINP and then FITINP
        for key, value in self.config.get("SNLCINP", {}).items():
            self.set_snlcinp(key, value)
        for key, value in self.config.get("FITINP", {}).items():
            self.set_fitinp(key, value)
        for key, value in self.options.items():
            #print(key,value)
            self.yaml["CONFIG"][key] = value

        self.compute_fitopts()

        if self.sim_task.output["ranseed_change"]:
            self.yaml["CONFIG"]["VERSION"] = [self.sim_version + "-0*"]
        else:
            self.yaml["CONFIG"]["VERSION"] = [self.sim_version]

        self.yaml["CONFIG"]["OUTDIR"] = self.lc_output_dir
        # self.yaml["CONFIG"]["DONE_STAMP"] = "ALL.DONE"

        if isinstance(self.sim_task, DataPrep):
            data_path = self.sim_task.output["data_path"]
            if "SNDATA_ROOT/lcmerge" not in data_path:
                self.set_snlcinp("PRIVATE_DATA_PATH", f"'{self.sim_task.output['data_path']}'")
            self.set_snlcinp("VERSION_PHOTOMETRY", f"'{self.sim_task.output['genversion']}'")

        # We want to do our hashing check here
        string_to_hash = self.get_output_string()
        new_hash = self.get_hash_from_string(string_to_hash)
        regenerate = self._check_regenerate(new_hash)
        if regenerate:
            self.logger.info(f"Running Light curve fit. Removing output_dir")
            shutil.rmtree(self.output_dir, ignore_errors=True)
            mkdirs(self.output_dir)
            # Write main file

            # Write the primary input file
            self.write_output_file(self.config_path)
            self.logger.info(f"NML file written to {self.config_path}")
            self.save_new_hash(new_hash)
            chown_dir(self.output_dir)
        else:
            self.logger.info("Hash check passed, not rerunning")

        return regenerate, new_hash
Пример #13
0
    def _run(self):
        self.yaml["CONFIG"]["WFITOPT"] = self.wfitopts
        self.yaml["CONFIG"]["INPDIR"] = self.create_cov_dirs
        self.yaml["CONFIG"]["OUTDIR"] = os.path.join(self.output_dir, "output")
        # Pass all OPTS keys through to the yaml dictionary
        for k, v in self.options.items():
            # Clobber WFITOPTS to WFITOPT
            if k == "WFITOPTS":
                k = "WFITOPT"
            self.yaml["CONFIG"][k] = v

        final_output_for_hash = self.get_output_string()

        new_hash = self.get_hash_from_string(final_output_for_hash)

        if self._check_regenerate(new_hash):
            self.logger.debug("Regenerating and launching task")
            shutil.rmtree(self.output_dir, ignore_errors=True)
            mkdirs(self.output_dir)
            self.save_new_hash(new_hash)

            with open(self.input_file, "w") as f:
                f.write(self.get_output_string())

            cmd = ["submit_batch_jobs.sh", os.path.basename(self.input_file)]
            self.logger.debug(
                f"Submitting wfit job: {' '.join(cmd)} in cwd: {self.output_dir}"
            )
            self.logger.debug(f"Logging to {self.logfile}")
            with open(self.logfile, 'w') as f:
                subprocess.run(' '.join(cmd),
                               stdout=f,
                               stderr=subprocess.STDOUT,
                               cwd=self.output_dir,
                               shell=True)
            chown_dir(self.output_dir)

        else:
            self.should_be_done()
            self.logger.info("Has check passed, not rerunning")
        return True
Пример #14
0
    def classify(self):
        mkdirs(self.output_dir)

        fitres = f"{self.fit_dir}/FITOPT000.FITRES.gz"
        self.logger.debug(f"Looking for {fitres}")
        if not os.path.exists(fitres):
            self.logger.error(
                f"FITRES file could not be found at {fitres}, classifer has nothing to work with"
            )
            return False

        data = pd.read_csv(fitres, sep='\s+', comment="#", compression="infer")
        ids = data["CID"].values
        probability = np.random.uniform(size=ids.size)
        combined = np.vstack((ids, probability)).T

        output_file = self.output_dir + "/prob.txt"
        self.logger.info(f"Saving probabilities to {output_file}")
        np.savetxt(output_file, combined)
        chown_dir(self.output_dir)
        return True  # change to hash
Пример #15
0
    def _run(self, force_refresh):
        command = self.cmd_prefix + [
            self.lc_fit["fitres_file"], self.agg["merge_key_filename"]
        ] + self.cmd_suffix

        old_hash = self.get_old_hash()
        new_hash = self.get_hash_from_string(" ".join(command))

        if force_refresh or new_hash != old_hash:
            shutil.rmtree(self.output_dir, ignore_errors=True)
            mkdirs(self.output_dir)
            self.logger.debug("Regenerating, running combine_fitres")
            self.save_new_hash(new_hash)
            with open(self.logfile, "w") as f:
                subprocess.run(command,
                               stdout=f,
                               stderr=subprocess.STDOUT,
                               cwd=self.output_dir)
        else:
            self.logger.debug("Not regnerating")
        return True
Пример #16
0
def run(args):
    # Load YAML config file
    yaml_path = os.path.abspath(os.path.expandvars(args.yaml))
    assert os.path.exists(yaml_path), f"File {yaml_path} cannot be found."
    with open(yaml_path, "r") as f:
        config = yaml.safe_load(f)

    overwrites = config.get("GLOBAL")
    if config.get("GLOBALS") is not None:
        logging.warning(
            "Your config file has a GLOBALS section in it. If you're trying to overwrite cfg.yml, rename this to GLOBAL"
        )

    global_config = get_config(initial_path=args.config, overwrites=overwrites)

    config_filename = os.path.basename(args.yaml).split(".")[0].upper()
    output_dir = get_output_dir()
    logging_folder = os.path.abspath(os.path.join(output_dir, config_filename))

    if not args.check:
        mkdirs(logging_folder)

    message_store, logging_filename = setup_logging(config_filename,
                                                    logging_folder, args)

    for i, d in enumerate(global_config["DATA_DIRS"]):
        logging.debug(f"Data directory {i + 1} set as {d}")
        assert d is not None, "Data directory is none, which means it failed to resolve. Check the error message above for why."

    manager = Manager(config_filename, yaml_path, config, message_store)
    if args.start is not None:
        args.refresh = True
    manager.set_start(args.start)
    manager.set_finish(args.finish)
    manager.set_force_refresh(args.refresh)
    manager.execute(args.check)
    chown_file(logging_filename)
    return manager
Пример #17
0
    def _run(self, force_refresh):
        sys_scale = self.calculate_input()
        format_dict = {
            "job_name": self.job_name,
            "log_file": self.logfile,
            "done_file": self.done_file,
            "path_to_code": self.path_to_code,
            "input_file": self.input_file,
        }
        final_slurm = self.slurm.format(**format_dict)

        new_hash = self.get_hash_from_string("\n".join(self.base + sys_scale) +
                                             final_slurm)
        old_hash = self.get_old_hash()

        if force_refresh or new_hash != old_hash:
            self.logger.debug("Regenerating and launching task")
            shutil.rmtree(self.output_dir, ignore_errors=True)
            mkdirs(self.output_dir)
            mkdirs(self.config_dir)
            self.save_new_hash(new_hash)
            # Write sys scales and the main input file
            with open(self.sys_file_out, "w") as f:
                f.write("\n".join(sys_scale))
            with open(self.input_file, "w") as f:
                f.write("\n".join(self.base))
            # Write out slurm job script
            slurm_output_file = os.path.join(self.output_dir, "slurm.job")
            with open(slurm_output_file, "w") as f:
                f.write(final_slurm)

            self.logger.info(f"Submitting batch job for data prep")
            subprocess.run(["sbatch", slurm_output_file], cwd=self.output_dir)
        else:
            self.should_be_done()
            self.logger.info("Hash check passed, not rerunning")
        return True
Пример #18
0
    def _run(self):
        self.output["fitopt_map"] = self.lc_fit["fitopt_map"]
        self.output["fitopt_index"] = self.lc_fit["fitopt_index"]
        self.output["fitres_file"] = self.lc_fit["fitres_file"]
        self.output["SURVEY"] = self.lc_fit["SURVEY"]
        self.output["SURVEY_ID"] = self.lc_fit["SURVEY_ID"]

        fitres_files, symlink_files = [], []
        for index, (fitres_dir, outdir) in enumerate(
                zip(self.lc_fit["fitres_dirs"], self.fitres_outdirs)):
            files = os.listdir(fitres_dir)
            fitres_files += [
                (fitres_dir, outdir, f, index, self.lc_fit["name"])
                for f in files if "FITRES" in f
                and not os.path.islink(os.path.join(fitres_dir, f))
            ]
            symlink_files += [(fitres_dir, outdir, f, index,
                               self.lc_fit["name"]) for f in files
                              if "FITRES" in f
                              and os.path.islink(os.path.join(fitres_dir, f))]

        new_hash = self.get_hash_from_string(" ".join([
            a + b + c + f"{d}" + e
            for a, b, c, d, e in (fitres_files + symlink_files)
        ]))
        if self._check_regenerate(new_hash):
            shutil.rmtree(self.output_dir, ignore_errors=True)
            self.logger.debug("Regenerating, running combine_fitres")
            try:
                for fitres_dir in self.fitres_outdirs:
                    self.logger.debug(f"Creating directory {fitres_dir}")
                    mkdirs(fitres_dir)
                    for f in fitres_files:
                        if f[1] == fitres_dir:
                            self.add_to_fitres(os.path.join(f[0], f[2]),
                                               f[1],
                                               f[4],
                                               index=f[3])
                    for s in symlink_files:
                        if s[1] == fitres_dir:
                            self.logger.debug(
                                f"Creating symlink for {os.path.join(s[1], s[2])} to {os.path.join(s[1], 'FITOPT000.FITRES.gz')}"
                            )
                            os.symlink(
                                os.path.join(s[1], "FITOPT000.FITRES.gz"),
                                os.path.join(s[1], s[2]))

                    self.logger.debug(f"Copying MERGE.LOG")
                    filenames = ["MERGE.LOG", "SUBMIT.INFO"]
                    for f in filenames:
                        original = os.path.join(self.lc_fit["lc_output_dir"],
                                                f)
                        moved = os.path.join(self.suboutput_dir, f)
                        if not os.path.exists(moved):
                            self.logger.debug(
                                f"Copying file {f} into output directory")
                            shutil.copy(original, moved)

                    self.save_new_hash(new_hash)
                    with open(self.done_file, "w") as f:
                        f.write("SUCCESS\n")
            except Exception as e:
                self.logger.error("Error running merger!")
                self.logger.error(f"Check log at {self.logfile}")
                self.logger.exception(e, exc_info=True)
                return False
        else:
            self.should_be_done()
            self.logger.info("Hash check passed, not rerunning")
        return True
Пример #19
0
    def _run(self, force_refresh):

        if self.static:
            self.logger.info(
                "CMB only constraints detected, copying static files")

            cosmomc_static_loc = get_data_loc(self.static_path +
                                              self.ini_prefix)
            if cosmomc_static_loc is None:
                self.logger.error(
                    "Seems like we can't find the static chains...")
                return False
            else:

                new_hash = self.get_hash_from_string(cosmomc_static_loc)
                old_hash = self.get_old_hash()

                if force_refresh or new_hash != old_hash:
                    self.logger.debug("Regenerating and copying static chains")
                    shutil.rmtree(self.chain_dir, ignore_errors=True)
                    shutil.copytree(cosmomc_static_loc, self.chain_dir)
                    for done_file in self.done_files:
                        df = os.path.join(self.output_dir, done_file)
                        with open(df, "w") as f:
                            f.write("SUCCESS")
                    self.save_new_hash(new_hash)

                else:
                    self.should_be_done()
                    self.logger.info("Hash check passed, not rerunning")
        else:
            ini_filecontents = self.get_ini_file()
            if ini_filecontents is None:
                return False

            format_dict = {
                "job_name": self.job_name,
                "log_file": self.logfile,
                "done_files": " ".join(self.done_files),
                "path_to_cosmomc": self.path_to_cosmomc,
                "output_dir": self.output_dir,
                "ini_files": " ".join(self.ini_files),
                "num_jobs": len(self.ini_files),
                "num_walkers": self.num_walkers,
            }
            final_slurm = self.slurm.format(**format_dict)

            new_hash = self.get_hash_from_string(final_slurm +
                                                 " ".join(ini_filecontents))
            old_hash = self.get_old_hash()

            if force_refresh or new_hash != old_hash:
                self.logger.debug("Regenerating and launching task")
                shutil.rmtree(self.output_dir, ignore_errors=True)
                mkdirs(self.output_dir)
                self.save_new_hash(new_hash)
                slurm_output_file = os.path.join(self.output_dir, "slurm.job")
                with open(slurm_output_file, "w") as f:
                    f.write(final_slurm)
                for file, content in zip(self.ini_files, ini_filecontents):
                    filepath = os.path.join(self.output_dir, file)
                    with open(filepath, "w") as f:
                        f.write(content)
                mkdirs(self.chain_dir)

                needed_dirs = [
                    "data", "paramnames", "camb", "batch1", "batch2", "batch3"
                ]
                for d in needed_dirs:
                    self.logger.debug(f"Creating symlink to {d} dir")
                    original_data_dir = os.path.join(self.path_to_cosmomc, d)
                    new_data_dir = os.path.join(self.output_dir, d)
                    os.symlink(original_data_dir,
                               new_data_dir,
                               target_is_directory=True)

                self.logger.info(f"Submitting batch job for data prep")
                subprocess.run(["sbatch", slurm_output_file],
                               cwd=self.output_dir)
            else:
                self.should_be_done()
                self.logger.info("Hash check passed, not rerunning")
        return True
Пример #20
0
    def execute(self, check_config):
        self.logger.info(f"Executing pipeline for prefix {self.prefix}")
        self.logger.info(f"Output will be located in {self.output_dir}")
        if check_config:
            self.logger.info("Only verifying config, not launching anything")

        mkdirs(self.output_dir)
        c = self.run_config

        self.tasks = self.get_tasks(c)

        if check_config:
            self.logger.notice("Config verified, exiting")
            return

        self.num_jobs_queue = 0
        self.num_jobs_queue_gpu = 0
        running_tasks = []
        done_tasks = []
        failed_tasks = []
        blocked_tasks = []
        squeue = None

        start_sleep_time = self.global_config["OUTPUT"]["ping_frequency"]
        max_sleep_time = self.global_config["OUTPUT"]["max_ping_frequency"]
        current_sleep_time = start_sleep_time

        config_file_output = os.path.join(self.output_dir,
                                          os.path.basename(self.filename_path))
        if not check_config and self.filename_path != config_file_output:
            self.logger.info(
                f"Saving parsed config file from {self.filename_path} to {config_file_output}"
            )
            shutil.copy(self.filename_path, config_file_output)
            chown_file(config_file_output)

        # Welcome to the primary loop
        while self.tasks or running_tasks:
            small_wait = False

            # Check status of current jobs
            for t in running_tasks:
                try:
                    completed = self.check_task_completion(
                        t, blocked_tasks, done_tasks, failed_tasks,
                        running_tasks, squeue)
                    small_wait = small_wait or completed
                except Exception as e:
                    self.logger.exception(e, exc_info=True)
                    self.fail_task(t, running_tasks, failed_tasks,
                                   blocked_tasks)

            # Submit new jobs if needed
            while self.num_jobs_queue < self.max_jobs:

                t = self.get_task_to_run(self.tasks, done_tasks)
                if t is not None:
                    self.logger.info("")
                    self.tasks.remove(t)
                    self.logger.notice(f"LAUNCHING: {t}")
                    try:
                        started = t.run(self.get_force_refresh(t))
                    except Exception as e:
                        self.logger.exception(e, exc_info=True)
                        started = False
                    if started:
                        if t.gpu:
                            self.num_jobs_queue_gpu += t.num_jobs
                        else:
                            self.num_jobs_queue += t.num_jobs
                        self.logger.notice(
                            f"LAUNCHED: {t} with total {self.num_jobs_queue} jobs"
                        )
                        running_tasks.append(t)
                        completed = self.check_task_completion(
                            t, blocked_tasks, done_tasks, failed_tasks,
                            running_tasks, squeue)
                        small_wait = small_wait or completed
                    else:
                        self.logger.error(f"FAILED TO LAUNCH: {t}")
                        self.fail_task(t, running_tasks, failed_tasks,
                                       blocked_tasks)
                    small_wait = True
                else:
                    break

            # Check quickly if we've added a new job, etc, in case of immediate failure
            if small_wait:
                self.log_status(self.tasks, running_tasks, done_tasks,
                                failed_tasks, blocked_tasks)
                current_sleep_time = start_sleep_time
                time.sleep(0.1)
                squeue = None
            else:
                time.sleep(current_sleep_time)
                current_sleep_time *= 2
                if current_sleep_time > max_sleep_time:
                    current_sleep_time = max_sleep_time
                squeue = [
                    i.strip() for i in subprocess.check_output(
                        f"squeue -h -u $USER -o '%.200j'",
                        shell=True,
                        text=True).splitlines()
                ]
                n = len(squeue)
                if n == 0 or n > self.max_jobs:
                    self.logger.debug(
                        f"Squeue is reporting {n} jobs in the queue... this is either 0 or toeing the line as to too many"
                    )

        self.log_finals(done_tasks, failed_tasks, blocked_tasks)
Пример #21
0
    def write_input(self):
        # Load previous hash here if it exists

        old_hash = None
        hash_file = f"{self.output_dir}/hash.txt"
        if os.path.exists(hash_file):
            with open(hash_file, "r") as f:
                old_hash = f.read().strip()
                self.logger.debug(f"Previous result found, hash is {old_hash}")

        # Put config in a temp directory
        temp_dir_obj = tempfile.TemporaryDirectory()
        temp_dir = temp_dir_obj.name

        # Copy the base files across
        for f in self.base_ia:
            shutil.copy(self.data_dir + f, temp_dir)
        for f in self.base_cc:
            shutil.copy(self.data_dir + f, temp_dir)

        # Copy the include input file if there is one
        input_copied = []
        fs = self.base_ia + self.base_cc
        for ff in fs:
            if ff not in input_copied:
                input_copied.append(ff)
                with open(self.data_dir + ff, "r") as f:
                    for line in f.readlines():
                        line = line.strip()
                        if line.startswith("INPUT_FILE_INCLUDE"):
                            include_file = line.split(":")[-1].strip()
                            self.logger.debug(f"Copying included file {include_file}")
                            shutil.copy(self.data_dir + include_file, temp_dir)

        # Write the primary input file
        main_input_file = f"{temp_dir}/{self.genversion}.input"
        with open(main_input_file, "w") as f:
            f.writelines(map(lambda s: s + '\n', self.base))
        self.logger.info(f"Input file written to {main_input_file}")

        # Remove any duplicates and order the output files
        output_files = [f"{temp_dir}/{a}" for a in sorted(os.listdir(temp_dir))]
        self.logger.debug(f"{len(output_files)} files used to create simulation. Hashing them.")

        # Also add this file to the hash, so if the code changes we also regenerate. Smart.
        output_files.append(os.path.abspath(inspect.stack()[0][1]))

        # Get current hash
        string_to_hash = ""
        for file in output_files:
            with open(file, "r") as f:
                string_to_hash += f.read()
        new_hash = get_hash(string_to_hash)
        self.logger.debug(f"Current hash set to {new_hash}")
        regenerate = old_hash is None or old_hash != new_hash

        if regenerate:
            self.logger.info(f"Running simulation, hash check failed")
            # Clean output dir. God I feel dangerous doing this, so hopefully unnecessary check
            if "//" not in self.output_dir and "Pippin" in self.output_dir:
                self.logger.debug(f"Cleaning output directory {self.output_dir}")
                shutil.rmtree(self.output_dir, ignore_errors=True)
                mkdirs(self.output_dir)
                self.logger.debug(f"Copying from {temp_dir} to {self.output_dir}")
                copytree(temp_dir, self.output_dir)
            with open(hash_file, "w") as f:
                f.write(str(new_hash))
                self.logger.debug(f"New hash saved to {hash_file}")
                self.hash_file = hash_file
            chown_dir(self.output_dir)
        else:
            self.logger.info("Hash check passed, not rerunning")
        temp_dir_obj.cleanup()
        return regenerate, new_hash
Пример #22
0
    def _run(self, force_refresh):
        new_hash = self.check_regenerate(force_refresh)
        if new_hash:
            mkdirs(self.output_dir)
            prediction_files = [
                d.output["predictions_filename"] for d in self.classifiers
            ]
            df = None

            for f in prediction_files:
                dataframe = self.load_prediction_file(f)
                dataframe = dataframe.rename(
                    columns={dataframe.columns[0]: self.id})
                if df is None:
                    df = dataframe
                    self.logger.debug(
                        f"Merging on column {self.id} for file {f}")
                else:
                    self.logger.debug(
                        f"Merging on column {self.id} for file {f}")
                    df = pd.merge(
                        df, dataframe, on=self.id, how="outer"
                    )  # Inner join atm, should I make this outer?

            if self.include_type:
                self.logger.info("Finding original types")
                s = self.get_underlying_sim_task()
                type_df = None
                phot_dir = s.output["photometry_dir"]
                headers = [
                    os.path.join(phot_dir, a) for a in os.listdir(phot_dir)
                    if "HEAD" in a
                ]
                if not headers:
                    self.logger.error(
                        f"Not HEAD fits files found in {phot_dir}!")
                else:
                    for h in headers:
                        with fits.open(h) as hdul:
                            data = hdul[1].data
                            snid = np.array(data.field("SNID")).astype(
                                np.int64)
                            sntype = np.array(data.field("SNTYPE")).astype(
                                np.int64)
                            dataframe = pd.DataFrame({
                                self.id: snid,
                                self.type_name: sntype
                            })
                            if type_df is None:
                                type_df = dataframe
                            else:
                                type_df = pd.concat([type_df, dataframe])
                df = pd.merge(df, type_df, on=self.id)
            if self.plot:
                self._plot(df)

            self.logger.info(
                f"Merged into dataframe of {df.shape[0]} rows, with columns {list(df.columns)}"
            )
            df.to_csv(self.output_df, index=False, float_format="%0.4f")
            self.save_key_format(df)
            self.logger.debug(f"Saving merged dataframe to {self.output_df}")
            self.save_new_hash(new_hash)

        self.output["merge_predictions_filename"] = self.output_df
        self.output["merge_key_filename"] = self.output_df_key
        self.output["sn_column_name"] = self.id
        if self.include_type:
            self.output["sn_type_name"] = self.type_name

        self.passed = True
        return True
Пример #23
0
def run(args):

    if args is None:
        return None

    init()

    # Load YAML config file
    yaml_path = os.path.abspath(os.path.expandvars(args.yaml))
    assert os.path.exists(yaml_path), f"File {yaml_path} cannot be found."
    config_raw, config = load_yaml(yaml_path)
    #with open(yaml_path, "r") as f:
    #    config = yaml.safe_load(f)

    overwrites = config.get("GLOBAL")
    if config.get("GLOBALS") is not None:
        logging.warning(
            "Your config file has a GLOBALS section in it. If you're trying to overwrite cfg.yml, rename this to GLOBAL"
        )

    cfg = None
    if config.get("GLOBAL"):
        cfg = config.get("GLOBAL").get("CFG_PATH")
    if cfg is None:
        cfg = args.config

    global_config = get_config(initial_path=cfg, overwrites=overwrites)

    config_filename = os.path.basename(args.yaml).split(".")[0].upper()
    output_dir = get_output_dir()
    logging_folder = os.path.abspath(os.path.join(output_dir, config_filename))

    if not args.check:
        mkdirs(logging_folder)
    if os.path.exists(logging_folder):
        chown_dir(logging_folder, walk=args.permission)

    if args.permission:
        return

    message_store, logging_filename = setup_logging(config_filename,
                                                    logging_folder, args)

    for i, d in enumerate(global_config["DATA_DIRS"]):
        logging.debug(f"Data directory {i + 1} set as {d}")
        assert d is not None, "Data directory is none, which means it failed to resolve. Check the error message above for why."

    logging.info(
        f"Running on: {os.environ.get('HOSTNAME', '$HOSTNAME not set')} login node."
    )

    manager = Manager(config_filename, yaml_path, config_raw, config,
                      message_store)

    # Gracefully hand Ctrl-c
    def handler(signum, frame):
        logging.error("Ctrl-c was pressed.")
        logging.warning(
            "All remaining tasks will be killed and their hash reset")
        manager.kill_remaining_tasks()
        exit(1)

    signal.signal(signal.SIGINT, handler)

    if args.start is not None:
        args.refresh = True
    manager.set_start(args.start)
    manager.set_finish(args.finish)
    manager.set_force_refresh(args.refresh)
    manager.set_force_ignore_stage(args.ignore)
    manager.execute(args.check, args.compress, args.uncompress)
    chown_file(logging_filename)
    return manager
Пример #24
0
    def prepare_train_job(self, force_refresh):
        self.logger.debug("Preparing NML file for Nearest Neighbour training")
        fit_output = self.get_fit_dependency()

        genversion = fit_output["genversion"]
        fitres_dir = fit_output["fitres_dir"]
        fitres_file = fit_output["fitres_file"]
        nml_file_orig = fit_output["nml_file"]

        # Put config in a temp directory
        temp_dir_obj = tempfile.TemporaryDirectory()
        temp_dir = temp_dir_obj.name

        outfile_train = f'{self.name}_train.out'
        nml_file_train1 = f'{temp_dir}/{genversion}-2.nml'
        nml_file_train2 = f'{self.output_dir}/{genversion}-2.nml'

        train_info_local = {
            "outfile_NNtrain": outfile_train,
            "nml_file_NNtrain": nml_file_train2,
        }

        # construct sed to copy original NMLFILE and to
        #   + replace OUTDIR:
        #   + include ROOTFILE_OUT (to store histograms for NN train)
        #   + include DONE stamp for Sam/pippen
        #   + run afterburner to process ROOT file and get NN_trainPar;
        #     copy NN_trainPar up to where pippin can find it
        #
        # TODO: Check with Rick if the FITOPT000.ROOT is needed / should be hardcoded
        afterBurn = f'nearnbr_maxFoM.exe FITOPT000.ROOT -truetype 1 -outfile {outfile_train} ; cp {outfile_train} {self.outfile_train}'

        sedstr = 'sed'
        sedstr += (r" -e '/OUTDIR:/a\OUTDIR: %s' " % self.splitfit_output_dir)
        sedstr += r" -e '/OUTDIR:/d'"
        sedstr += r" -e '/DONE_STAMP:/d'"
        sedstr += r" -e '/SNTABLE_LIST/a\    ROOTFILE_OUT = \"bla.root\"'"
        sedstr += r" -e '/_OUT/d '"
        sedstr += (r" -e '/VERSION:/a\VERSION_AFTERBURNER: %s'" % afterBurn)
        sedstr += (r" -e '/VERSION:/a\DONE_STAMP: %s'" % self.done_file)
        sed_command = ("%s %s > %s" % (sedstr, nml_file_orig, nml_file_train1))

        # use system call to apply sed command
        # self.logger.debug(f"Running sed command {sed_command}")
        subprocess.run(sed_command,
                       stderr=subprocess.STDOUT,
                       cwd=temp_dir,
                       shell=True)

        # make sure that the new NML file is really there
        if not os.path.isfile(nml_file_train1):
            self.logger.error(
                f"Unable to create {nml_file_train1} with sed command {sed_command}"
            )
            return None

        # check that expected FITRES ref file is really there.
        if not os.path.exists(fitres_file):
            self.logger.error(
                'Cannot find expected FITRES file at {fitres_path}')
            return None

        # open NML file in append mode and tack on NNINP namelist
        with open(nml_file_train1, 'a') as f:
            f.write("\n# NNINP below added by prepare_NNtrainJob\n")
            f.write("\n&NNINP \n")
            f.write("   NEARNBR_TRAINFILE_PATH = '%s' \n" % fitres_dir)
            f.write("   NEARNBR_TRAINFILE_LIST = '%s' \n" %
                    os.path.basename(fitres_file))
            f.write("   NEARNBR_SEPMAX_VARDEF  = '%s' \n" % self.nn_options)
            f.write("   NEARNBR_TRUETYPE_VARNAME = 'SIM_TYPE_INDEX' \n")
            f.write("   NEARNBR_TRAIN_ODDEVEN = T \n")
            f.write("\n&END\n")

        input_files = [nml_file_train1]
        old_hash = self.get_old_hash()
        new_hash = self.get_hash_from_files(input_files)

        if force_refresh or new_hash != old_hash:
            self.logger.debug("Regenerating")
            shutil.rmtree(self.output_dir, ignore_errors=True)
            mkdirs(self.output_dir)
            self.logger.debug(f"Copying from {temp_dir} to {self.output_dir}")
            copytree(temp_dir, self.output_dir)
            self.save_new_hash(new_hash)
            return new_hash, train_info_local
        else:
            self.logger.debug("Not regenerating")
            return None, train_info_local
Пример #25
0
                        "--finish",
                        help="Stage to finish at (it runs this stage too)",
                        default=None)
    parser.add_argument("-r",
                        "--refresh",
                        help="Refresh all tasks, do not use hash",
                        action="store_true")

    args = parser.parse_args()
    level = logging.DEBUG if args.verbose else logging.INFO

    # Get base filename
    config_filename = os.path.basename(args.config).split(".")[0].upper()
    logging_folder = os.path.abspath(
        f"{get_config()['OUTPUT']['output_dir']}/{config_filename}")
    mkdirs(logging_folder)
    logging_filename = f"{logging_folder}/{config_filename}.log"

    message_store = MessageStore()
    NOTICE_LEVELV_NUM = 25
    logging.addLevelName(NOTICE_LEVELV_NUM, "NOTICE")

    def notice(self, message, *args, **kws):
        if self.isEnabledFor(NOTICE_LEVELV_NUM):
            self._log(NOTICE_LEVELV_NUM, message, args, **kws)

    logging.Logger.notice = notice
    fmt = "[%(levelname)8s |%(filename)21s:%(lineno)3d]   %(message)s" if args.verbose else "%(message)s"
    logging.basicConfig(level=level,
                        format=fmt,
                        handlers=[
Пример #26
0
    def _run(self):

        # Get the m0diff files for everything
        for b in self.biascor_deps:
            for m in b.output["m0dif_dirs"]:
                self.logger.info(f"Looking at M0diff dir {m}")
                sim_number = 1
                if os.path.basename(m).isdigit():
                    sim_number = int(os.path.basename(m))
                files = [
                    f for f in sorted(os.listdir(m))
                    if f.endswith(".M0DIF") or f.endswith(".M0DIF.gz")
                ]
                for f in files:
                    muopt_num = int(f.split("MUOPT")[-1].split(".")[0])
                    fitopt_num = int(f.split("FITOPT")[-1].split("_")[0])
                    if muopt_num == 0:
                        muopt = "DEFAULT"
                    else:
                        muopt = b.output["muopts"][muopt_num -
                                                   1]  # Because 0 is default

                    if fitopt_num == 0:
                        fitopt = "DEFAULT"
                    else:
                        fitopt = b.output["fitopt_index"][fitopt_num]

                    self.biascor_m0diffs.append(
                        (b.name, sim_number, muopt, muopt_num, fitopt,
                         fitopt_num, os.path.join(m, f)))

        data_fitres_files = [
            os.path.join(l.output["fitres_dirs"][0],
                         l.output["fitopt_map"]["DEFAULT"])
            for l in self.lcfit_deps if l.output["is_data"]
        ]
        data_fitres_output = [
            d.split("/")[-4] + ".csv.gz" for d in data_fitres_files
        ]
        sim_fitres_files = [
            os.path.join(l.output["fitres_dirs"][0],
                         l.output["fitopt_map"]["DEFAULT"])
            for l in self.lcfit_deps if not l.output["is_data"]
        ]
        sim_fitres_output = [
            d.split("/")[-4] + ".csv.gz" for d in sim_fitres_files
        ]
        types = list(
            set([
                a for l in self.lcfit_deps
                for a in l.sim_task.output["types_dict"]["IA"]
            ]))
        input_yml_file = "input.yml"
        output_dict = {
            "COSMOMC": {
                "INPUT_FILES": self.cosmomc_input_files,
                "PARSED_FILES": self.cosmomc_output_files,
                "PARSED_COVOPTS": self.cosmomc_covopts,
                "PARAMS": self.params,
                "SHIFT": self.options.get("SHIFT", False),
                "PRIOR": self.options.get("PRIOR"),
                "NAMES": self.names,
                "CONTOUR_COVOPTS": self.covopts,
                "SINGULAR_BLIND": self.singular_blind,
            },
            "BIASCOR": {
                "WFIT_SUMMARY_INPUT": self.wsummary_files,
                "WFIT_SUMMARY_OUTPUT": "all_biascor.csv",
                "FITRES_INPUT": self.biascor_fitres_input_files,
                "FITRES_PROB_COLS": self.biascor_prob_col_names,
                "FITRES_PARSED": self.biascor_fitres_output_files,
                "M0DIFF_INPUTS": self.biascor_m0diffs,
                "M0DIFF_PARSED": self.biascor_m0diff_output,
                "FITRES_COMBINED": self.biascor_fitres_combined,
            },
            "OUTPUT_NAME": self.name,
            "BLIND": self.blind_params,
            "LCFIT": {
                "DATA_FITRES_INPUT": data_fitres_files,
                "SIM_FITRES_INPUT": sim_fitres_files,
                "DATA_FITRES_PARSED": data_fitres_output,
                "SIM_FITRES_PARSED": sim_fitres_output,
                "IA_TYPES": types,
            },
        }

        if self.batch_file is None:
            if self.gpu:
                self.sbatch_header = self.sbatch_gpu_header
            else:
                self.sbatch_header = self.sbatch_cpu_header
        else:
            with open(self.batch_file, 'r') as f:
                self.sbatch_header = f.read()
            self.sbatch_header = self.clean_header(self.sbatch_header)

        header_dict = {
            "REPLACE_NAME": self.job_name,
            "REPLACE_WALLTIME": "1:00:00",
            "REPLACE_LOGFILE": self.logfile,
            "REPLACE_MEM": "20GB",
            "APPEND": ["#SBATCH --ntasks=1", "#SBATCH --cpus-per-task=1"]
        }
        header_dict = merge_dict(header_dict, self.batch_replace)
        self.update_header(header_dict)
        setup_dict = {"output_dir": self.output_dir}

        format_dict = {
            "sbatch_header": self.sbatch_header,
            "task_setup": self.update_setup(setup_dict,
                                            self.task_setup['analyse']),
            "input_yml": input_yml_file
        }
        final_slurm = self.get_slurm_raw().format(**format_dict)

        new_hash = self.get_hash_from_string(final_slurm +
                                             json.dumps(output_dict))

        if self._check_regenerate(new_hash):
            self.logger.debug("Regenerating and launching task")
            shutil.rmtree(self.output_dir, ignore_errors=True)
            mkdirs(self.output_dir)
            self.save_new_hash(new_hash)
            for c in self.path_to_codes:
                shutil.copy(c, self.output_dir)
            input_yml_path = os.path.join(self.output_dir, input_yml_file)
            with open(input_yml_path, "w") as f:
                json.dump(output_dict, f, indent=2)
                self.logger.debug(
                    f"Input yml file written out to {input_yml_path}")

            slurm_output_file = os.path.join(self.output_dir, "slurm.job")
            with open(slurm_output_file, "w") as f:
                f.write(final_slurm)
            self.logger.info(f"Submitting batch job for analyse chains")
            subprocess.run(["sbatch", slurm_output_file], cwd=self.output_dir)
        else:
            self.logger.info("Hash check passed, not rerunning")
        return True
Пример #27
0
    def classify(self, training):
        model = self.options.get("MODEL")
        model_path = ""
        if not training:
            assert model is not None, "If TRAIN is not specified, you have to point to a model to use"
            if not os.path.exists(get_output_loc(model)):
                for t in self.dependencies:
                    if model == t.name:
                        self.logger.debug(
                            f"Found task dependency {t.name} with model file {t.output['model_filename']}"
                        )
                        model = t.output["model_filename"]
            model_path = get_output_loc(model)
            self.logger.debug(f"Looking for model in {model_path}")
            assert os.path.exists(model_path), f"Cannot find {model_path}"

        types = self.get_types()
        if types is None:
            types = OrderedDict({
                "1": "Ia",
                "0": "unknown",
                "2": "SNIax",
                "3": "SNIa-pec",
                "20": "SNIIP",
                "21": "SNIIL",
                "22": "SNIIn",
                "29": "SNII",
                "32": "SNIb",
                "33": "SNIc",
                "39": "SNIbc",
                "41": "SLSN-I",
                "42": "SLSN-II",
                "43": "SLSN-R",
                "80": "AGN",
                "81": "galaxy",
                "98": "None",
                "99": "pending",
                "101": "Ia",
                "120": "SNII",
                "130": "SNIbc",
            })
        else:
            has_ia = False
            has_cc = False
            self.logger.debug(f"Input types set to {types}")
            for key, value in types.items():
                if value.upper() == "IA":
                    has_ia = True
                elif value.upper() in ["II", "IBC"]:
                    has_cc = True
            if not has_ia:
                self.logger.debug("No Ia type found, injecting type")
                types[1] = "Ia"
                types = dict(
                    sorted(types.items(),
                           key=lambda x: -1 if x[0] == 1 else x[0]))
                self.logger.debug(f"Inject types with Ias are {types}")
            if not has_cc:
                self.logger.debug("No cc type found, injecting type")
                types[29] = "II"
        str_types = json.dumps(types)
        self.logger.debug(f"Types set to {str_types}")

        sim_dep = self.get_simulation_dependency()
        light_curve_dir = sim_dep.output["photometry_dirs"][self.index]
        self.raw_dir = light_curve_dir
        fit = self.get_fit_dependency()
        fit_dir = f"" if fit is None else f"--fits_dir {fit['fitres_dirs'][self.index]}"
        cyclic = "--cyclic" if self.variant in ["vanilla", "variational"
                                                ] and self.cyclic else ""
        batch_size = f"--batch_size {self.batch_size}"
        num_layers = f"--num_layers {self.num_layers}"
        hidden_dim = f"--hidden_dim {self.hidden_dim}"
        variant = f"--model {self.variant}"
        if self.variant == "bayesian":
            variant += " --num_inference_samples 20"

        clump = sim_dep.output.get("clump_file")
        if clump is None:
            clump_txt = ""
        else:
            clump_txt = f"--photo_window_files {clump}"

        if self.batch_file is None:
            if self.gpu:
                self.sbatch_header = self.sbatch_gpu_header
            else:
                self.sbatch_header = self.sbatch_cpu_header
        else:
            with open(self.batch_file, 'r') as f:
                self.sbatch_header = f.read()
            self.sbatch_header = self.clean_header(self.sbatch_header)

        if self.has_yml:
            self.update_yml()
            setup_file = "supernnova_yml"
        else:
            setup_file = "supernnova"

        header_dict = {
            "REPLACE_NAME": self.job_base_name,
            "REPLACE_WALLTIME": "23:00:00",
            "REPLACE_LOGFILE": "output.log",
            "REPLACE_MEM": "32GB",
            "APPEND": ["#SBATCH --ntasks=1", "#SBATCH --cpus-per-task=1"]
        }
        header_dict = merge_dict(header_dict, self.batch_replace)
        self.update_header(header_dict)

        setup_dict = {
            "conda_env":
            self.conda_env,
            "dump_dir":
            self.dump_dir,
            "photometry_dir":
            light_curve_dir,
            "fit_dir":
            fit_dir,
            "path_to_classifier":
            self.path_to_classifier,
            "job_name":
            self.job_base_name,
            "command":
            "--train_rnn" if training else "--validate_rnn",
            "sntypes":
            str_types,
            "variant":
            variant,
            "cyclic":
            cyclic,
            "model":
            "" if training else f"--model_files {model_path}",
            "phot":
            "",
            "test_or_train":
            "" if training else "--data_testing",
            "redshift":
            "--redshift " + self.redshift,
            "norm":
            "--norm " + self.norm,
            "done_file":
            self.done_file,
            "clump":
            clump_txt,
            "done_file2":
            self.done_file2,
            "partition":
            "gpu2" if self.gpu else "broadwl",
            "gres":
            "#SBATCH --gres=gpu:1" if self.gpu else "",
            "cuda":
            "--use_cuda" if self.gpu else "",
            "clean_command":
            f"rm -rf {self.dump_dir}/processed" if self.clean else "",
            "seed":
            f"--seed {self.seed}" if self.seed else "",
            "batch_size":
            batch_size,
            "num_layers":
            num_layers,
            "hidden_dim":
            hidden_dim,
            "data_yml":
            self.output_data_yml,
            "classification_yml":
            self.output_classification_yml,
            "classification_command":
            "train_rnn" if training else "validate_rnn"
        }

        format_dict = {
            "sbatch_header":
            self.sbatch_header,
            "task_setup":
            self.update_setup(setup_dict, self.task_setup[setup_file])
        }

        slurm_output_file = self.output_dir + "/job.slurm"
        self.logger.info(
            f"Running SuperNNova, slurm job outputting to {slurm_output_file}")
        slurm_text = self.slurm.format(**format_dict)

        new_hash = self.get_hash_from_string(slurm_text)

        if not self._check_regenerate(new_hash):
            self.should_be_done()
        else:
            self.logger.info("Rerunning. Cleaning output_dir")
            shutil.rmtree(self.output_dir, ignore_errors=True)
            mkdirs(self.output_dir)
            if self.has_yml:
                with open(self.output_data_yml, 'w') as f:
                    f.write(self.data_yml)
                with open(self.output_classification_yml, 'w') as f:
                    f.write(self.classification_yml)

            self.save_new_hash(new_hash)

            with open(slurm_output_file, "w") as f:
                f.write(slurm_text)

            self.logger.info(
                f"Submitting batch job to {'train' if training else 'predict using'} SuperNNova"
            )
            subprocess.run(["sbatch", slurm_output_file], cwd=self.output_dir)

        return True
Пример #28
0
    def write_input(self, force_refresh):
        self.set_property("GENVERSION",
                          self.genversion,
                          assignment=": ",
                          section_end="ENDLIST_GENVERSION")
        for k in self.config.keys():
            if k.upper() != "GLOBAL":
                run_config = self.config[k]
                run_config_keys = list(run_config.keys())
                assert "BASE" in run_config_keys, "You must specify a base file for each option"
                for key in run_config_keys:
                    if key.upper() in self.reserved_keywords:
                        continue
                    base_file = run_config["BASE"]
                    match = base_file.split(".")[0]
                    self.set_property(f"GENOPT({match})",
                                      f"{key} {run_config[key]}",
                                      section_end="ENDLIST_GENVERSION")

        for key in self.config.get("GLOBAL", []):
            if key.upper() == "BASE":
                continue
            self.set_property(key, self.config['GLOBAL'][key])
            if key == "RANSEED_CHANGE":
                self.delete_property("RANSEED_REPEAT")
            elif key == "RANSEED_REPEAT":
                self.delete_property("RANSEED_CHANGE")

        self.set_property("SIMGEN_INFILE_Ia",
                          " ".join(self.base_ia) if self.base_ia else None)
        self.set_property("SIMGEN_INFILE_NONIa",
                          " ".join(self.base_cc) if self.base_cc else None)
        self.set_property("GENPREFIX", self.genversion)

        # Put config in a temp directory
        temp_dir_obj = tempfile.TemporaryDirectory()
        temp_dir = temp_dir_obj.name

        # Copy the base files across
        for f in self.base_ia:
            shutil.copy(self.data_dir + f, temp_dir)
        for f in self.base_cc:
            shutil.copy(self.data_dir + f, temp_dir)

        # Copy the include input file if there is one
        input_copied = []
        fs = self.base_ia + self.base_cc
        for ff in fs:
            if ff not in input_copied:
                input_copied.append(ff)
                with open(self.data_dir + ff, "r") as f:
                    for line in f.readlines():
                        line = line.strip()
                        if line.startswith("INPUT_FILE_INCLUDE"):
                            include_file = line.split(":")[-1].strip()
                            self.logger.debug(
                                f"Copying included file {include_file}")
                            shutil.copy(self.data_dir + include_file, temp_dir)

        # Write the primary input file
        main_input_file = f"{temp_dir}/{self.genversion}.input"
        with open(main_input_file, "w") as f:
            f.writelines(map(lambda s: s + '\n', self.base))
        self.logger.info(f"Input file written to {main_input_file}")

        # Remove any duplicates and order the output files
        output_files = [
            f"{temp_dir}/{a}" for a in sorted(os.listdir(temp_dir))
        ]
        self.logger.debug(
            f"{len(output_files)} files used to create simulation. Hashing them."
        )

        # Get current hash
        new_hash = self.get_hash_from_files(output_files)
        old_hash = self.get_old_hash()
        regenerate = force_refresh or (old_hash is None
                                       or old_hash != new_hash)

        if regenerate:
            self.logger.info(f"Running simulation")
            # Clean output dir. God I feel dangerous doing this, so hopefully unnecessary check
            if "//" not in self.output_dir and len(self.output_dir) > 30:
                self.logger.debug(
                    f"Cleaning output directory {self.output_dir}")
                shutil.rmtree(self.output_dir, ignore_errors=True)
                mkdirs(self.output_dir)
                self.logger.debug(
                    f"Copying from {temp_dir} to {self.output_dir}")
                copytree(temp_dir, self.output_dir)
                self.save_new_hash(new_hash)
            else:
                self.logger.error(
                    f"Seems to be an issue with the output dir path: {self.output_dir}"
                )

            chown_dir(self.output_dir)
        else:
            self.logger.info("Hash check passed, not rerunning")
        temp_dir_obj.cleanup()
        return regenerate, new_hash
Пример #29
0
    def execute(self, check_config, compress_output, uncompress_output):
        self.logger.info(f"Executing pipeline for prefix {self.prefix}")
        self.logger.info(f"Output will be located in {self.output_dir}")
        if check_config:
            self.logger.info("Only verifying config, not launching anything")
        assert not (
            compress_output and uncompress_output
        ), "-C / --compress and -U / --uncompress are mutually exclusive"
        # Whilst compressing is being debugged, false by default
        self.compress = False
        if compress_output:
            self.compress = True
            self.logger.info("Compressing output")
        if uncompress_output:
            self.compress = False
            self.logger.info("Uncompressing output")

        mkdirs(self.output_dir)
        c = self.run_config

        self.tasks = self.get_tasks(c)

        self.num_jobs_queue = 0
        self.num_jobs_queue_gpu = 0
        squeue = None

        if check_config:
            if compress_output:
                self.compress_all()
            if uncompress_output:
                self.uncompress_all()
            self.logger.notice("Config verified, exiting")
            return

        self.print_dashboard()

        start_sleep_time = self.global_config["OUTPUT"]["ping_frequency"]
        max_sleep_time = self.global_config["OUTPUT"]["max_ping_frequency"]
        current_sleep_time = start_sleep_time

        config_file_output = os.path.join(self.output_dir,
                                          os.path.basename(self.filename_path))
        if not check_config and self.filename_path != config_file_output:
            self.logger.info(
                f"Saving processed and parsed config file to {config_file_output}"
            )
            with open(config_file_output, 'w') as f:
                f.write(self.file_raw)
            #shutil.copy(self.filename_path, config_file_output)
            chown_file(config_file_output)

        # Welcome to the primary loop
        while self.tasks or self.running:
            small_wait = False

            # Check status of current jobs
            for t in self.running:
                try:
                    completed = self.check_task_completion(t, squeue)
                    small_wait = small_wait or completed
                except Exception as e:
                    self.logger.exception(e, exc_info=True)
                    self.fail_task(t)

            # Submit new jobs if needed
            while self.num_jobs_queue < self.max_jobs:

                t = self.get_task_to_run()
                if t is not None:
                    self.logger.info("")
                    self.tasks.remove(t)
                    self.logger.notice(f"LAUNCHING: {t}")
                    try:
                        t.set_force_refresh(self.get_force_refresh(t))
                        t.set_force_ignore(self.get_force_ignore(t))
                        t.set_sbatch_cpu_header(self.sbatch_cpu_header)
                        t.set_sbatch_gpu_header(self.sbatch_gpu_header)
                        t.set_setup(self.task_setup)
                        started = t.run()
                    except Exception as e:
                        self.logger.exception(e, exc_info=True)
                        started = False
                    if started:
                        if t.gpu:
                            self.num_jobs_queue_gpu += t.num_jobs
                            message = (
                                f"LAUNCHED: {t} with {t.num_jobs} GPU NUM_JOBS. Total GPU NUM_JOBS now {self.num_jobs_queue_gpu}/{self.max_jobs_in_queue_gpu}"
                            )
                        else:
                            self.num_jobs_queue += t.num_jobs
                            message = f"LAUNCHED: {t} with {t.num_jobs} NUM_JOBS. Total NUM_JOBS now {self.num_jobs_queue}/{self.max_jobs_in_queue}"
                        self.logger.notice(message)
                        self.running.append(t)
                        completed = False
                        try:
                            completed = self.check_task_completion(t, squeue)
                        except Exception as e:
                            self.logger.exception(e, exc_info=True)
                            self.fail_task(t)
                        small_wait = small_wait or completed
                    else:
                        self.logger.error(f"FAILED TO LAUNCH: {t}")
                        self.fail_task(t)
                    small_wait = True
                else:
                    break

            # Check quickly if we've added a new job, etc, in case of immediate failure
            if small_wait:
                self.log_status()
                current_sleep_time = start_sleep_time
                time.sleep(0.1)
                squeue = None
            else:
                time.sleep(current_sleep_time)
                current_sleep_time *= 2
                if current_sleep_time > max_sleep_time:
                    current_sleep_time = max_sleep_time
                p = subprocess.run(f"squeue -h -u $USER -o '%.j'",
                                   shell=True,
                                   text=True,
                                   stdout=subprocess.PIPE,
                                   stderr=subprocess.PIPE)
                if (p.returncode != 0) or (p.stderr != ""):
                    self.logger.error(
                        f"Command '{p.args}' failed with exit status '{p.returncode}' and error '{p.stderr.strip()}'"
                    )
                else:
                    squeue = [i.strip() for i in p.stdout.splitlines()]
                    n = len(squeue)
                    if n == 0 or n > self.max_jobs:
                        self.logger.debug(
                            f"Squeue is reporting {n} NUM_JOBS in the queue... this is either 0 or toeing the line as to too many"
                        )
        self.log_finals()
Пример #30
0
    def classify(self, training, force_refresh):
        model = self.options.get("MODEL")
        model_path = ""
        if not training:
            assert model is not None, "If TRAIN is not specified, you have to point to a model to use"
            if not os.path.exists(get_output_loc(model)):
                for t in self.dependencies:
                    if model == t.name:
                        self.logger.debug(
                            f"Found task dependency {t.name} with model file {t.output['model_filename']}"
                        )
                        model = t.output["model_filename"]
            model_path = get_output_loc(model)
            self.logger.debug(f"Looking for model in {model_path}")
            assert os.path.exists(model_path), f"Cannot find {model_path}"

        types = self.get_types()
        if types is None:
            types = OrderedDict({
                "1": "Ia",
                "0": "unknown",
                "2": "SNIax",
                "3": "SNIa-pec",
                "20": "SNIIP",
                "21": "SNIIL",
                "22": "SNIIn",
                "29": "SNII",
                "32": "SNIb",
                "33": "SNIc",
                "39": "SNIbc",
                "41": "SLSN-I",
                "42": "SLSN-II",
                "43": "SLSN-R",
                "80": "AGN",
                "81": "galaxy",
                "98": "None",
                "99": "pending",
                "101": "Ia",
                "120": "SNII",
                "130": "SNIbc",
            })
        else:
            has_ia = False
            has_cc = False
            self.logger.debug(f"Input types set to {types}")
            for key, value in types.items():
                if value.upper() == "IA":
                    has_ia = True
                elif value.upper() in ["II", "IBC"]:
                    has_cc = True
            if not has_ia:
                self.logger.debug("No Ia type found, injecting type")
                types.update({"1": "Ia"})
                types.move_to_end("1", last=False)
            if not has_cc:
                self.logger.debug("No cc type found, injecting type")
                types.update({"29": "II"})
        str_types = json.dumps(types)
        self.logger.debug(f"Types set to {str_types}")

        sim_dep = self.get_simulation_dependency()
        light_curve_dir = sim_dep.output["photometry_dirs"][self.index]
        fit = self.get_fit_dependency()
        fit_dir = f"" if fit is None else f"--fits_dir {fit['fitres_dirs'][self.index]}"
        cyclic = "--cyclic" if self.variant in ["vanilla", "variational"
                                                ] else ""
        variant = f"--model {self.variant}"
        if self.variant == "bayesian":
            variant += " --num_inference_samples 20"

        clump = sim_dep.output.get("clump_file")
        if clump is None:
            clump_txt = ""
        else:
            clump_txt = f"--photo_window_files {clump}"

        format_dict = {
            "conda_env": self.conda_env,
            "dump_dir": self.dump_dir,
            "photometry_dir": light_curve_dir,
            "fit_dir": fit_dir,
            "path_to_classifier": self.path_to_classifier,
            "job_name": self.job_base_name,
            "command": "--train_rnn" if training else "--validate_rnn",
            "sntypes": str_types,
            "variant": variant,
            "cyclic": cyclic,
            "model": "" if training else f"--model_files {model_path}",
            "phot": "",
            "test_or_train": "" if training else "--data_testing",
            "redshift": "--redshift " + self.redshift,
            "norm": "--norm " + self.norm,
            "done_file": self.done_file,
            "clump": clump_txt,
            "done_file2": self.done_file2,
        }

        slurm_output_file = self.output_dir + "/job.slurm"
        self.logger.info(
            f"Running SuperNNova, slurm job outputting to {slurm_output_file}")
        slurm_text = self.slurm.format(**format_dict)

        old_hash = self.get_old_hash()
        new_hash = self.get_hash_from_string(slurm_text)

        if not force_refresh and new_hash == old_hash:
            self.logger.info("Hash check passed, not rerunning")
            self.should_be_done()
        else:
            self.logger.info("Rerunning. Cleaning output_dir")
            shutil.rmtree(self.output_dir, ignore_errors=True)
            mkdirs(self.output_dir)
            self.save_new_hash(new_hash)

            with open(slurm_output_file, "w") as f:
                f.write(slurm_text)

            self.logger.info(
                f"Submitting batch job to {'train' if training else 'predict using'} SuperNNova"
            )
            subprocess.run(["sbatch", slurm_output_file], cwd=self.output_dir)

        return True