def main(args, logger: Logger = get_basic_logger()): params = utils.load_sim_params(os.path.join(args.rel_dir, "sim_params.yaml")) sim_dir = params.sim_dir mgmt_db_loc = params.mgmt_db_location submit_yes = True if args.auto else confirm("Also submit the job for you?") # get the srf(rup) name without extensions srf_name = os.path.splitext(os.path.basename(params.srf_file))[0] # if srf(variation) is provided as args, only create the slurm # with same name provided if args.srf is not None and srf_name != args.srf: return write_directory = args.write_directory if args.write_directory else sim_dir # get lf_sim_dir lf_sim_dir = os.path.join(sim_dir, "LF") header_dict = { "platform_specific_args": get_platform_node_requirements( platform_config[const.PLATFORM_CONFIG.MERGE_TS_DEFAULT_NCORES.name] ), "wallclock_limit": default_run_time_merge_ts, "job_name": "merge_ts.{}".format(srf_name), "job_description": "post emod3d: merge_ts", "additional_lines": "###SBATCH -C avx", } command_template_parameters = { "run_command": platform_config[const.PLATFORM_CONFIG.RUN_COMMAND.name], "merge_ts_path": binary_version.get_unversioned_bin( "merge_tsP3_par", get_machine_config(args.machine)["tools_dir"] ), } body_template_params = ( "{}.sl.template".format(merge_ts_name_prefix), {"lf_sim_dir": lf_sim_dir}, ) script_prefix = "{}_{}".format(merge_ts_name_prefix, srf_name) script_file_path = write_sl_script( write_directory, sim_dir, const.ProcessType.merge_ts, script_prefix, header_dict, body_template_params, command_template_parameters, ) if submit_yes: submit_script_to_scheduler( script_file_path, const.ProcessType.merge_ts.value, sim_struct.get_mgmt_db_queue(mgmt_db_loc), sim_dir, srf_name, target_machine=args.machine, logger=logger, )
def generate_empirical_script(np, extended, cybershake_folder, realisations, out_dir): # extended is '-e' or '' faults = map(simulation_structure.get_fault_from_realisation, realisations) run_data = zip(realisations, faults) run_data = [(rel, fault) for (rel, fault) in run_data if rrup_file_exists(cybershake_folder, rel)] # determine NP # TODO: empirical are currently not parallel, update this when they are np = 1 # load sim_params for vs30_file # this is assuming all simulation use the same vs30 in root_params.yaml sim_dir = simulation_structure.get_sim_dir(cybershake_folder, run_data[0][0]) sim_params = utils.load_sim_params( simulation_structure.get_sim_params_yaml_path(sim_dir)) timestamp_format = "%Y%m%d_%H%M%S" timestamp = datetime.now().strftime(timestamp_format) template_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "templates/") header = resolve_header( platform_config[const.PLATFORM_CONFIG.TEMPLATES_DIR.name], wallclock_limit="00:30:00", job_name="empirical", version="slurm", memory="2G", exe_time="%j", job_description="Empirical Engine", additional_lines="", template_path=platform_config[const.PLATFORM_CONFIG.HEADER_FILE.name], write_directory=out_dir, platform_specific_args={"n_tasks": np}, ) context = generate_context( template_dir, "empirical.sl.template", { "run_data": run_data, "np": np, "extended": extended, "vs30_file": sim_params.stat_vs_est, "mgmt_db_location": cybershake_folder, }, ) sl_name = os.path.join(out_dir, "run_empirical_{}.sl".format(timestamp)) content = "{}\n{}".format(header, context) write_sl(sl_name, content) return sl_name
def get_runs_dir_params(runs_dir: str, fault_names: List[str], realisations: np.ndarray): """Gets the parameters from the runs directory. Assumes that all realisations of a fault have the same parameters """ data = [] for ix, fault_name in enumerate(fault_names): r = realisations[ix][0] params = utils.load_sim_params( os.path.join(runs_dir, fault_name, r, "sim_params.yaml")) data.append( (params.dt, params.hf.dt, params.FD_STATLIST, params.hf.slip)) return np.rec.array( data, dtype=[ ("dt", np.float32), ("hf_dt", np.float32), ("fd_statlist", np.object), ("slip", np.object), ], )
def install_fault( fault_name, n_rel, root_folder, version, stat_file_path, seed=HF_DEFAULT_SEED, extended_period=False, vm_perturbations=False, ignore_vm_perturbations=False, vm_qpqs_files=False, ignore_vm_qpqs_files=False, keep_dup_station=True, components=None, logger: Logger = get_basic_logger(), ): config_dict = utils.load_yaml( os.path.join( platform_config[PLATFORM_CONFIG.TEMPLATES_DIR.name], "gmsim", version, ROOT_DEFAULTS_FILE_NAME, ) ) # Load variables from cybershake config v1d_full_path = os.path.join( platform_config[PLATFORM_CONFIG.VELOCITY_MODEL_DIR.name], "Mod-1D", config_dict.get("v_1d_mod"), ) site_v1d_dir = config_dict.get("site_v1d_dir") hf_stat_vs_ref = config_dict.get("hf_stat_vs_ref") vs30_file_path = stat_file_path.replace(".ll", ".vs30") vs30ref_file_path = stat_file_path.replace(".ll", ".vs30ref") # this variable has to be empty # TODO: fix this legacy issue, very low priority event_name = "" # get all srf from source srf_dir = simulation_structure.get_srf_dir(root_folder, fault_name) list_srf = glob.glob(os.path.join(srf_dir, "*_REL*.srf")) if len(list_srf) == 0: list_srf = glob.glob(os.path.join(srf_dir, "*.srf")) list_srf.sort() if n_rel is not None and len(list_srf) != n_rel: message = ( "Error: fault {} failed. Number of realisations do " "not match number of SRF files".format(fault_name) ) logger.log(NOPRINTCRITICAL, message) raise RuntimeError(message) # Get & validate velocity model directory vel_mod_dir = simulation_structure.get_fault_VM_dir(root_folder, fault_name) valid_vm, message = validate_vm.validate_vm(vel_mod_dir, srf=list_srf[0]) if not valid_vm: message = "Error: VM {} failed {}".format(fault_name, message) logger.log(NOPRINTCRITICAL, message) raise RuntimeError(message) # Load the variables from vm_params.yaml vm_params_path = os.path.join(vel_mod_dir, VM_PARAMS_FILE_NAME) vm_params_dict = utils.load_yaml(vm_params_path) yes_model_params = ( False # statgrid should normally be already generated with Velocity Model ) sim_root_dir = simulation_structure.get_runs_dir(root_folder) fault_yaml_path = simulation_structure.get_fault_yaml_path(sim_root_dir, fault_name) root_yaml_path = simulation_structure.get_root_yaml_path(sim_root_dir) for srf in list_srf: logger.info("Installing {}".format(srf)) # try to match find the stoch with same basename realisation_name = os.path.splitext(os.path.basename(srf))[0] stoch_file_path = simulation_structure.get_stoch_path( root_folder, realisation_name ) sim_params_file = simulation_structure.get_source_params_path( root_folder, realisation_name ) if not os.path.isfile(stoch_file_path): message = "Error: Corresponding Stoch file is not found: {}".format( stoch_file_path ) logger.log(NOPRINTCRITICAL, message) raise RuntimeError(message) # install pairs one by one to fit the new structure sim_dir = simulation_structure.get_sim_dir(root_folder, realisation_name) (root_params_dict, fault_params_dict, sim_params_dict) = install_simulation( version=version, sim_dir=sim_dir, rel_name=realisation_name, run_dir=sim_root_dir, vel_mod_dir=vel_mod_dir, srf_file=srf, stoch_file=stoch_file_path, stat_file_path=stat_file_path, vs30_file_path=vs30_file_path, vs30ref_file_path=vs30ref_file_path, yes_statcords=False, fault_yaml_path=fault_yaml_path, root_yaml_path=root_yaml_path, cybershake_root=root_folder, site_v1d_dir=site_v1d_dir, hf_stat_vs_ref=hf_stat_vs_ref, v1d_full_path=v1d_full_path, sim_params_file=sim_params_file, seed=seed, logger=logger, extended_period=extended_period, vm_perturbations=vm_perturbations, ignore_vm_perturbations=ignore_vm_perturbations, vm_qpqs_files=vm_qpqs_files, ignore_vm_qpqs_files=ignore_vm_qpqs_files, components=components, ) if ( root_params_dict is None or fault_params_dict is None or sim_params_dict is None ): # Something has gone wrong, returning without saving anything logger.critical(f"Critical Error some params dictionary are None") return if root_params_dict is not None and not isclose( vm_params_dict["flo"], root_params_dict["flo"] ): logger.critical( "The parameter 'flo' does not match in the VM params and root params files. " "Please ensure you are installing the correct gmsim version" ) return create_mgmt_db.create_mgmt_db( [], simulation_structure.get_mgmt_db(root_folder), srf_files=srf ) utils.setup_dir(os.path.join(root_folder, "mgmt_db_queue")) root_params_dict["mgmt_db_location"] = root_folder # Generate the fd files, create these at the fault level fd_statcords, fd_statlist = generate_fd_files( simulation_structure.get_fault_dir(root_folder, fault_name), vm_params_dict, stat_file=stat_file_path, logger=logger, keep_dup_station=keep_dup_station, ) fault_params_dict[FaultParams.stat_coords.value] = fd_statcords fault_params_dict[FaultParams.FD_STATLIST.value] = fd_statlist # root_params_dict['hf_stat_vs_ref'] = cybershake_cfg['hf_stat_vs_ref'] dump_all_yamls(sim_dir, root_params_dict, fault_params_dict, sim_params_dict) # test if the params are accepted by steps HF and BB sim_params = utils.load_sim_params(os.path.join(sim_dir, "sim_params.yaml")) # check hf # temporary change the script name to hf_sim, due to how error message are shown main_script_name = sys.argv[0] sys.argv[0] = "hf_sim.py" command_template, add_args = hf_gen_command_template( sim_params, list(HPC)[0].name, seed ) run_command = gen_args_cmd( ProcessType.HF.command_template, command_template, add_args ) hf_args_parser(cmd=run_command) # check bb sys.argv[0] = "bb_sim.py" command_template, add_args = bb_gen_command_template(sim_params) run_command = gen_args_cmd( ProcessType.BB.command_template, command_template, add_args ) bb_args_parser(cmd=run_command) # change back, to prevent unexpected error sys.argv[0] = main_script_name
def main( args: argparse.Namespace, est_model: est.EstModel = None, logger: Logger = get_basic_logger(), ): params = utils.load_sim_params( os.path.join(args.rel_dir, "sim_params.yaml")) submit_yes = True if args.auto else confirm("Also submit the job for you?") logger.debug("params.srf_file {}".format(params.srf_file)) # Get the srf(rup) name without extensions srf_name = os.path.splitext(os.path.basename(params.srf_file))[0] if args.srf is None or srf_name == args.srf: logger.debug("not set_params_only") # get lf_sim_dir sim_dir = os.path.abspath(params.sim_dir) lf_sim_dir = sim_struct.get_lf_dir(sim_dir) # default_core will be changed is user passes ncore nt = int(float(params.sim_duration) / float(params.dt)) target_qconfig = get_machine_config(args.machine) retries = args.retries if hasattr(args, "retries") else None est_cores, est_run_time, wct = get_lf_cores_and_wct( est_model, logger, nt, params, sim_dir, srf_name, target_qconfig, args.ncore, retries, ) binary_path = binary_version.get_lf_bin(params.emod3d.emod3d_version, target_qconfig["tools_dir"]) # use the original estimated run time for determining the checkpoint, or uses a minimum of 3 checkpoints steps_per_checkpoint = int( min(nt / (60.0 * est_run_time) * const.CHECKPOINT_DURATION, nt // 3)) write_directory = (args.write_directory if args.write_directory else params.sim_dir) set_runparams.create_run_params( sim_dir, steps_per_checkpoint=steps_per_checkpoint, logger=logger) header_dict = { "wallclock_limit": wct, "job_name": "emod3d.{}".format(srf_name), "job_description": "emod3d slurm script", "additional_lines": "#SBATCH --hint=nomultithread", "platform_specific_args": get_platform_node_requirements(est_cores), } command_template_parameters = { "run_command": platform_config[const.PLATFORM_CONFIG.RUN_COMMAND.name], "emod3d_bin": binary_path, "lf_sim_dir": lf_sim_dir, } body_template_params = ("run_emod3d.sl.template", {}) script_prefix = "run_emod3d_{}".format(srf_name) script_file_path = write_sl_script( write_directory, params.sim_dir, const.ProcessType.EMOD3D, script_prefix, header_dict, body_template_params, command_template_parameters, ) if submit_yes: submit_script_to_scheduler( script_file_path, const.ProcessType.EMOD3D.value, sim_struct.get_mgmt_db_queue(params.mgmt_db_location), params.sim_dir, srf_name, target_machine=args.machine, logger=logger, )
def create_run_params( sim_dir, srf_name=None, steps_per_checkpoint=None, logger: Logger = get_basic_logger(), ): params = utils.load_sim_params(os.path.join(sim_dir, "sim_params.yaml")) emod3d_version = params["emod3d"]["emod3d_version"] emod3d_filepath = binary_version.get_lf_bin(emod3d_version) e3d_yaml = os.path.join( platform_config[constants.PLATFORM_CONFIG.TEMPLATES_DIR.name], "gmsim", params.version, "emod3d_defaults.yaml", ) e3d_dict = utils.load_yaml(e3d_yaml) # skip all logic if a specific srf_name is provided if srf_name is None or srf_name == os.path.splitext( basename(params.srf_file))[0]: # EMOD3D adds a timeshift to the event rupture time # this must be accounted for as EMOD3D does not extend the sim duration by the amount of time shift # As flo is in Hz, the sim_duration_extension is in s # Version 3.0.4 was the last version of EMOD3D to have a shift of 1/flo, # while versions after it have a shift of 3/flo sim_duration_extension = 1 / float(params.flo) if compare_versions(emod3d_version, MAXIMUM_EMOD3D_TIMESHIFT_1_VERSION) > 0: sim_duration_extension *= 3 extended_sim_duration = float( params.sim_duration) + sim_duration_extension srf_file_basename = os.path.splitext(os.path.basename( params.srf_file))[0] e3d_dict["version"] = emod3d_version + "-mpi" e3d_dict["name"] = params.run_name e3d_dict["n_proc"] = 512 e3d_dict["nx"] = params.nx e3d_dict["ny"] = params.ny e3d_dict["nz"] = params.nz e3d_dict["h"] = params.hh e3d_dict["dt"] = params.dt e3d_dict["nt"] = str( int(round(extended_sim_duration / float(params.dt)))) e3d_dict["flo"] = float(params.flo) e3d_dict["faultfile"] = params.srf_file e3d_dict["vmoddir"] = params.vel_mod_dir e3d_dict["modellon"] = params.MODEL_LON e3d_dict["modellat"] = params.MODEL_LAT e3d_dict["modelrot"] = params.MODEL_ROT e3d_dict["main_dump_dir"] = os.path.join(params.sim_dir, "LF", "OutBin") e3d_dict["seiscords"] = params.stat_coords e3d_dict["user_scratch"] = os.path.join(params.user_root, "scratch") e3d_dict["seisdir"] = os.path.join(e3d_dict["user_scratch"], params.run_name, srf_file_basename, "SeismoBin") e3d_dict["ts_total"] = str( int(extended_sim_duration / (float(e3d_dict["dt"]) * float(e3d_dict["dtts"])))) e3d_dict["ts_file"] = os.path.join(e3d_dict["main_dump_dir"], params.run_name + "_xyts.e3d") e3d_dict["ts_out_dir"] = os.path.join(params.sim_dir, "LF", "TSlice", "TSFiles") e3d_dict["restartdir"] = os.path.join(params.sim_dir, "LF", "Restart") if steps_per_checkpoint: e3d_dict["dump_itinc"] = e3d_dict["restart_itinc"] = int( steps_per_checkpoint) e3d_dict["restartname"] = params.run_name e3d_dict["logdir"] = os.path.join(params.sim_dir, "LF", "Rlog") e3d_dict["slipout"] = os.path.join(params.sim_dir, "LF", "SlipOut", "slipout-k2") # other locations e3d_dict["wcc_prog_dir"] = emod3d_filepath e3d_dict["vel_mod_params_dir"] = params.vel_mod_dir e3d_dict["sim_dir"] = params.sim_dir e3d_dict["stat_file"] = params.stat_file e3d_dict["grid_file"] = params.GRIDFILE e3d_dict["model_params"] = params.MODEL_PARAMS if params.emod3d: for key, value in params.emod3d.items(): if key in e3d_dict: e3d_dict[key] = value else: logger.debug( "{} not found as a key in e3d file. Ignoring variable. Value is {}." .format(key, value)) shared.write_to_py(os.path.join(params.sim_dir, "LF", "e3d.par"), e3d_dict)
def main( args: argparse.Namespace, est_model: est.EstModel = None, logger: Logger = get_basic_logger(), ): params = utils.load_sim_params( os.path.join(args.rel_dir, "sim_params.yaml")) # check if the args is none, if not, change the version if args.version is not None and args.version in ["mpi", "run_hf_mpi"]: version = args.version ll_name_prefix = "run_hf_mpi" else: if args.version is not None: logger.error( "{} cannot be recognize as a valid version option. version is set to default: {}" .format( args.version, platform_config[ const.PLATFORM_CONFIG.HF_DEFAULT_VERSION.name], )) version = platform_config[ const.PLATFORM_CONFIG.HF_DEFAULT_VERSION.name] ll_name_prefix = platform_config[ const.PLATFORM_CONFIG.HF_DEFAULT_VERSION.name] logger.debug("version: {}".format(version)) # modify the logic to use the same as in install_bb: # sniff through params_base to get the names of srf, # instead of running through file directories. # loop through all srf file to generate related slurm scripts srf_name = os.path.splitext(os.path.basename(params.srf_file))[0] # if srf(variation) is provided as args, only create # the slurm with same name provided if args.srf is None or srf_name == args.srf: nt = get_hf_nt(params) fd_count = len(shared.get_stations(params.FD_STATLIST)) # TODO:make it read through the whole list # instead of assuming every stoch has same size nsub_stoch, sub_fault_area = srf.get_nsub_stoch(params.hf.slip, get_area=True) if est_model is None: est_model = os.path.join( platform_config[ const.PLATFORM_CONFIG.ESTIMATION_MODELS_DIR.name], "HF") est_core_hours, est_run_time, est_cores = est.est_HF_chours_single( fd_count, nsub_stoch, nt, args.ncore, est_model, scale_ncores=SCALE_NCORES, logger=logger, ) # scale up the est_run_time if it is a re-run (with check-pointing) # creates and extra variable so we keep the orignial estimated run time for other purpose est_run_time_scaled = est_run_time if hasattr(args, "retries") and int(args.retries) > 0: # check if HF.bin is read-able = restart-able try: from qcore.timeseries import HFSeis bin = HFSeis(sim_struct.get_hf_bin_path(params.sim_dir)) except: logger.debug("Retried count > 0 but HF.bin is not readable") else: est_run_time_scaled = est_run_time * (int(args.retries) + 1) wct = set_wct(est_run_time_scaled, est_cores, args.auto) hf_sim_dir = sim_struct.get_hf_dir(params.sim_dir) write_directory = (args.write_directory if args.write_directory else params.sim_dir) underscored_srf = srf_name.replace("/", "__") header_dict = { "platform_specific_args": get_platform_node_requirements(est_cores), "wallclock_limit": wct, "job_name": "hf.{}".format(underscored_srf), "job_description": "HF calculation", "additional_lines": "###SBATCH -C avx", } command_template_parameters, add_args = gen_command_template( params, args.machine, seed=args.seed) body_template_params = ( "{}.sl.template".format(ll_name_prefix), { "hf_sim_dir": hf_sim_dir, "test_hf_script": "test_hf.sh" }, ) script_prefix = "{}_{}".format(ll_name_prefix, underscored_srf) script_file_path = write_sl_script( write_directory, params.sim_dir, const.ProcessType.HF, script_prefix, header_dict, body_template_params, command_template_parameters, add_args, ) # Submit the script submit_yes = True if args.auto else confirm( "Also submit the job for you?") if submit_yes: submit_script_to_scheduler( script_file_path, const.ProcessType.HF.value, sim_struct.get_mgmt_db_queue(params.mgmt_db_location), params.sim_dir, srf_name, target_machine=args.machine, logger=logger, )
def main( args: argparse.Namespace, est_model: est.EstModel = None, logger: Logger = get_basic_logger(), ): params = utils.load_sim_params(os.path.join(args.rel_dir, "sim_params.yaml")) ncores = platform_config[const.PLATFORM_CONFIG.BB_DEFAULT_NCORES.name] version = args.version if version in ["mpi", "run_bb_mpi"]: sl_name_prefix = "run_bb_mpi" else: if version is not None: logger.error( "{} cannot be recognized as a valid option. version is set to default:".format( version, platform_config[const.PLATFORM_CONFIG.BB_DEFAULT_VERSION.name], ) ) version = platform_config[const.PLATFORM_CONFIG.BB_DEFAULT_VERSION.name] sl_name_prefix = platform_config[const.PLATFORM_CONFIG.BB_DEFAULT_VERSION.name] logger.debug(version) srf_name = os.path.splitext(os.path.basename(params.srf_file))[0] if args.srf is None or srf_name == args.srf: # TODO: save status as HF. refer to submit_hf # Use HF nt for wct estimation nt = get_hf_nt(params) fd_count = len(shared.get_stations(params.FD_STATLIST)) if est_model is None: est_model = os.path.join( platform_config[const.PLATFORM_CONFIG.ESTIMATION_MODELS_DIR.name], "BB" ) est_core_hours, est_run_time = est.est_BB_chours_single( fd_count, nt, ncores, est_model ) # creates and extra variable so we keep the original estimated run time for other purpos est_run_time_scaled = est_run_time if hasattr(args, "retries") and int(args.retries) > 0: # check if BB.bin is read-able = restart-able try: from qcore.timeseries import BBSeis bin = BBSeis(simulation_structure.get_bb_bin_path(params.sim_dir)) except: logger.debug("Retried count > 0 but BB.bin is not readable") else: est_run_time_scaled = est_run_time * (int(args.retries) + 1) wct = set_wct(est_run_time_scaled, ncores, args.auto) write_directory = ( args.write_directory if args.write_directory else params.sim_dir ) underscored_srf = srf_name.replace("/", "__") header_dict = { "wallclock_limit": wct, "job_name": "bb.{}".format(underscored_srf), "job_description": "BB calculation", "additional_lines": "###SBATCH -C avx", "platform_specific_args": get_platform_node_requirements(ncores), } body_template_params = ( "{}.sl.template".format(sl_name_prefix), {"test_bb_script": "test_bb.sh"}, ) command_template_parameters, add_args = gen_command_template(params) script_prefix = "{}_{}".format(sl_name_prefix, underscored_srf) script_file_path = write_sl_script( write_directory, params.sim_dir, const.ProcessType.BB, script_prefix, header_dict, body_template_params, command_template_parameters, add_args, ) # Submit the script submit_yes = True if args.auto else confirm("Also submit the job for you?") if submit_yes: submit_script_to_scheduler( script_file_path, const.ProcessType.BB.value, simulation_structure.get_mgmt_db_queue(params.mgmt_db_location), params.sim_dir, srf_name, target_machine=args.machine, logger=logger, )
def submit_im_calc_slurm( sim_dir: str, write_dir: str = None, simple_out: bool = True, adv_ims: bool = False, target_machine: str = get_target_machine( const.ProcessType.IM_calculation).name, est_model: EstModel = path.join( platform_config[const.PLATFORM_CONFIG.ESTIMATION_MODELS_DIR.name], "IM"), logger: Logger = get_basic_logger(), ): """Creates the IM calc slurm scrip, also submits if specified The options_dict is populated by the DEFAULT_OPTIONS, values can be changed by passing in a dict containing the entries that require changing. Merges the two dictionaries, the passed in one has higher priority. """ # Load the yaml params params = utils.load_sim_params( sim_struct.get_sim_params_yaml_path(sim_dir), load_vm=True) realisation_name = params[const.SimParams.run_name.value] fault_name = sim_struct.get_fault_from_realisation(realisation_name) station_count = len(load_station_file(params["FD_STATLIST"]).index) header_options = { const.SlHdrOptConsts.description.value: "Calculates intensity measures.", const.SlHdrOptConsts.memory.value: "2G", const.SlHdrOptConsts.version.value: "slurm", "exe_time": const.timestamp, const.SlHdrOptConsts.additional.value: "#SBATCH --hint=nomultithread" if platform_config[const.PLATFORM_CONFIG.SCHEDULER.name] == "slurm" else [""], } body_options = { const.SlBodyOptConsts.component.value: "", "realisation_name": realisation_name, const.SlBodyOptConsts.fault_name.value: fault_name, "np": platform_config[const.PLATFORM_CONFIG.IM_CALC_DEFAULT_N_CORES.name], "sim_IM_calc_dir": sim_struct.get_im_calc_dir(sim_dir), "output_csv": sim_struct.get_IM_csv(sim_dir), "output_info": sim_struct.get_IM_info(sim_dir), "models": "", const.SlBodyOptConsts.mgmt_db.value: "", "n_components": "", "match_obs_stations": False, } command_options = { const.SlBodyOptConsts.sim_dir.value: sim_dir, const.SlBodyOptConsts.component.value: "", const.SlBodyOptConsts.sim_name.value: realisation_name, const.SlBodyOptConsts.fault_name.value: fault_name, const.SlBodyOptConsts.n_procs.value: platform_config[const.PLATFORM_CONFIG.IM_CALC_DEFAULT_N_CORES.name], const.SlBodyOptConsts.extended.value: "", const.SlBodyOptConsts.simple_out.value: "", const.SlBodyOptConsts.advanced_IM.value: "", "pSA_periods": "", } # Convert option settings to values if write_dir is None: write_dir = sim_dir # Simple vs adv im settings if adv_ims: # Common values proc_type = const.ProcessType.advanced_IM sl_template = "adv_im_calc.sl.template" script_prefix = "adv_im_calc" body_options["models"] = " ".join( params[const.SlBodyOptConsts.advanced_IM.value]["models"]) command_options[ const.SlBodyOptConsts.advanced_IM. value] = f"-a {body_options['models']} --OpenSees {qconfig['OpenSees']} " # create temporary station list if "match_obs_stations" is directory if path.isdir( str(params[const.SlBodyOptConsts.advanced_IM.value] ["match_obs_stations"])): logger.debug( "match_obs_station specificed: {params[const.SlBodyOptConsts.advanced_IM.value]['match_obs_stations']}" ) # retreived station list from observed/fault(eventname)/Vol*/data/accBB/station. obs_accBB_dir_glob = path.join( params[const.SlBodyOptConsts.advanced_IM.value] ["match_obs_stations"], f"{fault_name}/*/*/accBB", ) obs_accBB_dir = glob.glob(obs_accBB_dir_glob) if len(obs_accBB_dir) > 1: logger.error( "got more than one folder globbed. please double check the path to the match_obs_stations is correct." ) sys.exit() station_names_tmp = get_observed_stations(obs_accBB_dir[0]) # write to a tmp file tmp_station_file = path.join(sim_dir, "tmp_station_file") with open(tmp_station_file, "w") as f: for station in station_names_tmp: f.write(f"{station} ") command_options[const.SlBodyOptConsts.advanced_IM.value] = ( command_options[const.SlBodyOptConsts.advanced_IM.value] + f"--station_names `cat {tmp_station_file}`") # header_options[const.SlHdrOptConsts.n_tasks.value] = body_options["np"] = qconfig["cores_per_node"] # Time for one station to run in hours # This should be a machine property. Or take the largest across all machines used time_for_one_station = 0.5 est_run_time = (np.ceil(station_count / qconfig["cores_per_node"]) * 2 * time_for_one_station) else: proc_type = const.ProcessType.IM_calculation sl_template = "sim_im_calc.sl.template" script_prefix = "sim_im_calc" if simple_out: command_options[const.SlBodyOptConsts.simple_out.value] = "-s" if params["ims"][const.RootParams.extended_period.name]: command_options[const.SlBodyOptConsts.extended.value] = "-e" period_count = len( np.unique( np.append(params["ims"]["pSA_periods"], const.EXT_PERIOD))) else: period_count = len(params["ims"]["pSA_periods"]) if "pSA_periods" in params["ims"]: command_options[ "pSA_periods"] = f"-p {' '.join(str(p) for p in params['ims']['pSA_periods'])}" comps_to_store = params["ims"][const.SlBodyOptConsts.component.value] command_options[const.SlBodyOptConsts.component. value] = "-c " + " ".join(comps_to_store) body_options["n_components"] = len(comps_to_store) # Get wall clock estimation logger.info( "Running wall clock estimation for IM sim for realisation {}". format(realisation_name)) _, est_run_time = est_IM_chours_single( station_count, int(float(params["sim_duration"]) / float(params["dt"])), comps_to_store, period_count, body_options["np"], est_model, ) # Header options requiring upstream settings # special treatment for im_calc, as the scaling feature in estimation is not suitable # cap the wct, otherwise cannot submit est_run_time = min(est_run_time * CH_SAFETY_FACTOR, qconfig["MAX_JOB_WCT"]) # set ch_safety_factor=1 as we scale it already. header_options["wallclock_limit"] = get_wct(est_run_time, ch_safety_factor=1) logger.debug("Using WCT for IM_calc: {header_options['wallclock_limit']}") header_options["job_name"] = "{}_{}".format(proc_type.str_value, fault_name) header_options["platform_specific_args"] = get_platform_node_requirements( body_options["np"]) script_file_path = write_sl_script( write_dir, sim_dir, proc_type, script_prefix, header_options, (sl_template, body_options), command_options, ) submit_script_to_scheduler( script_file_path, proc_type.value, sim_struct.get_mgmt_db_queue(params["mgmt_db_location"]), sim_dir, realisation_name, target_machine=target_machine, logger=logger, )
def main(args): # This should come from constants log_dir = os.path.join(args.sim_dir, "ch_log", const.METADATA_LOG_FILENAME) metadata_dict = getattr(args, METADATA_VALUES) # Determine run_time from start and end time if (const.MetadataField.start_time.value in metadata_dict.keys() and const.MetadataField.end_time.value in metadata_dict.keys()): tdelta = datetime.strptime( metadata_dict[const.MetadataField.end_time.value], const.METADATA_TIMESTAMP_FMT, ) - datetime.strptime( metadata_dict[const.MetadataField.start_time.value], const.METADATA_TIMESTAMP_FMT, ) metadata_dict[ const.MetadataField.run_time.value] = tdelta.total_seconds() # Load the params params = utils.load_sim_params(os.path.join(args.sim_dir, "sim_params.yaml"), load_vm=True) if "dt" in params.bb: bb_dt = params.bb.dt else: bb_dt = min(params.dt, params.hf.dt) # params metadata for LF if args.proc_type == const.ProcessType.EMOD3D.str_value: metadata_dict[const.MetadataField.nt.value] = int( float(params.sim_duration) / float(params.dt)) metadata_dict[const.MetadataField.nx.value] = params.nx metadata_dict[const.MetadataField.ny.value] = params.ny metadata_dict[const.MetadataField.nz.value] = params.nz # HF elif args.proc_type == const.ProcessType.HF.str_value: metadata_dict[const.MetadataField.nt.value] = int( float(params.sim_duration) / float(params.hf.dt)) metadata_dict[const.MetadataField.nsub_stoch.value] = get_nsub_stoch( params["hf"]["slip"], get_area=False) # BB elif args.proc_type == const.ProcessType.BB.str_value: metadata_dict[const.MetadataField.dt.value] = bb_dt # IM_calc elif args.proc_type == const.ProcessType.IM_calculation.str_value: metadata_dict[const.MetadataField.nt.value] = int( float(params.sim_duration) / float(bb_dt)) # This should come from a constants file im_calc_csv_file = os.path.join( args.sim_dir, "IM_calc", "{}.csv".format(os.path.basename(args.sim_dir))) im_comp = list( pd.read_csv(im_calc_csv_file).component.unique().astype("U")) metadata_dict[const.MetadataField.im_comp.value] = im_comp metadata_dict[const.MetadataField.im_comp_count.value] = len(im_comp) # Advanced_IM elif args.proc_type == const.ProcessType.advanced_IM.str_value: metadata_dict[const.MetadataField.nt.value] = int( float(params.sim_duration) / float(bb_dt)) store_metadata(log_dir, args.proc_type, metadata_dict, sim_name=os.path.basename(args.sim_dir))
def submit_task( sim_dir, proc_type, run_name, root_folder, parent_logger, retries=None, hf_seed=const.HF_DEFAULT_SEED, models=None, ): task_logger = qclogging.get_task_logger(parent_logger, run_name, proc_type) verification_dir = sim_struct.get_verification_dir(sim_dir) # Metadata logging setup ch_log_dir = os.path.abspath(os.path.join(sim_dir, "ch_log")) if not os.path.isdir(ch_log_dir): os.mkdir(ch_log_dir) params = utils.load_sim_params( sim_struct.get_sim_params_yaml_path(sim_dir)) submitted_time = datetime.now().strftime(const.METADATA_TIMESTAMP_FMT) log_file = os.path.join(sim_dir, "ch_log", const.METADATA_LOG_FILENAME) def submit_script_to_scheduler(script_name, target_machine=None, **kwargs): shared_automated_workflow.submit_script_to_scheduler( script_name, proc_type, sim_struct.get_mgmt_db_queue(root_folder), sim_dir, run_name, target_machine=target_machine, logger=task_logger, ) if proc_type == const.ProcessType.EMOD3D.value: # These have to include the default values (same for all other process types)! args = argparse.Namespace( auto=True, srf=run_name, ncore=platform_config[ const.PLATFORM_CONFIG.LF_DEFAULT_NCORES.name], account=platform_config[ const.PLATFORM_CONFIG.DEFAULT_ACCOUNT.name], machine=get_target_machine(const.ProcessType.EMOD3D).name, rel_dir=sim_dir, write_directory=sim_dir, retries=retries, ) task_logger.debug("Submit EMOD3D arguments: {}".format(args)) submit_lf_main(args, est_model=models[0], logger=task_logger) store_metadata( log_file, const.ProcessType.EMOD3D.str_value, {"submit_time": submitted_time}, logger=task_logger, ) elif proc_type == const.ProcessType.merge_ts.value: args = argparse.Namespace( auto=True, srf=run_name, account=platform_config[ const.PLATFORM_CONFIG.DEFAULT_ACCOUNT.name], machine=get_target_machine(const.ProcessType.merge_ts).name, rel_dir=sim_dir, write_directory=sim_dir, ) task_logger.debug( "Submit post EMOD3D (merge_ts) arguments: {}".format(args)) submit_post_lf_main(args, task_logger) store_metadata( log_file, const.ProcessType.merge_ts.str_value, {"submit_time": submitted_time}, logger=task_logger, ) elif proc_type == const.ProcessType.plot_ts.value: # plot_ts.py does not mkdir dir if output dir does not exist, # whereas im_plot does. if not os.path.isdir(verification_dir): os.mkdir(verification_dir) arguments = OrderedDict({ "XYTS_PATH": os.path.join( sim_struct.get_lf_outbin_dir(sim_dir), "{}_xyts.e3d".format(run_name.split("_")[0]), ), "SRF_PATH": sim_struct.get_srf_path(root_folder, run_name), "OUTPUT_TS_PATH": os.path.join(verification_dir, run_name), "MGMT_DB_LOC": root_folder, "SRF_NAME": run_name, }) script = get_platform_specific_script(const.ProcessType.plot_ts, arguments) submit_script_to_scheduler(script, target_machine=get_target_machine( const.ProcessType.plot_ts).name) elif proc_type == const.ProcessType.HF.value: args = argparse.Namespace( auto=True, srf=run_name, seed=hf_seed, ncore=platform_config[ const.PLATFORM_CONFIG.HF_DEFAULT_NCORES.name], version=platform_config[ const.PLATFORM_CONFIG.HF_DEFAULT_VERSION.name], site_specific=None, account=platform_config[ const.PLATFORM_CONFIG.DEFAULT_ACCOUNT.name], machine=get_target_machine(const.ProcessType.HF).name, rel_dir=sim_dir, write_directory=sim_dir, debug=False, retries=retries, ) task_logger.debug("Submit HF arguments: {}".format(args)) submit_hf_main(args, models[1], task_logger) store_metadata( log_file, const.ProcessType.HF.str_value, {"submit_time": submitted_time}, logger=task_logger, ) elif proc_type == const.ProcessType.BB.value: args = argparse.Namespace( auto=True, srf=run_name, version=platform_config[ const.PLATFORM_CONFIG.BB_DEFAULT_VERSION.name], account=platform_config[ const.PLATFORM_CONFIG.DEFAULT_ACCOUNT.name], machine=get_target_machine(const.ProcessType.BB).name, rel_dir=sim_dir, write_directory=sim_dir, retries=retries, ) task_logger.debug("Submit BB arguments: {}".format(args)) submit_bb_main(args, models[2], task_logger) store_metadata( log_file, const.ProcessType.BB.str_value, {"submit_time": submitted_time}, logger=task_logger, ) elif proc_type == const.ProcessType.IM_calculation.value: submit_im_calc_slurm( sim_dir=sim_dir, simple_out=True, target_machine=get_target_machine( const.ProcessType.IM_calculation).name, est_model=models[3], logger=task_logger, ) task_logger.debug( f"Submit IM calc arguments: sim_dir: {sim_dir}, simple_out: True, target_machine: {get_target_machine(const.ProcessType.IM_calculation).name}" ) store_metadata( log_file, const.ProcessType.IM_calculation.str_value, {"submit_time": submitted_time}, logger=task_logger, ) elif proc_type == const.ProcessType.IM_plot.value: im_plot_template = "{script_location} {csv_path} {station_file_path} {output_xyz_dir} {srf_path} {model_params_path} {mgmt_db_loc} {run_name}" script = im_plot_template.format( csv_path=os.path.join(sim_struct.get_IM_csv(sim_dir)), station_file_path=params.stat_file, output_xyz_dir=os.path.join(verification_dir, "IM_plot"), srf_path=sim_struct.get_srf_path(root_folder, run_name), model_params_path=params.MODEL_PARAMS, mgmt_db_loc=root_folder, run_name=run_name, script_location=os.path.expandvars( "$gmsim/workflow/scripts/im_plot.sl"), ) submit_script_to_scheduler(script, target_machine=get_target_machine( const.ProcessType.IM_plot).name) elif proc_type == const.ProcessType.rrup.value: submit_script_to_scheduler( get_platform_specific_script( const.ProcessType.rrup, OrderedDict({ "REL": sim_dir, "MGMT_DB_LOC": root_folder }), ), target_machine=get_target_machine(const.ProcessType.rrup).name, ) elif proc_type == const.ProcessType.Empirical.value: extended_period_switch = "-e" if params["ims"][ "extended_period"] else "" sl_script = generate_empirical_script(1, extended_period_switch, root_folder, [run_name], sim_dir) submit_script_to_scheduler( sl_script, target_machine=get_target_machine( const.ProcessType.Empirical).name, ) elif proc_type == const.ProcessType.Verification.value: raise NotImplementedError("Verification is not currently working") elif proc_type == const.ProcessType.clean_up.value: submit_script_to_scheduler( get_platform_specific_script( const.ProcessType.clean_up, OrderedDict({ "SIM_DIR": sim_dir, "SRF_NAME": run_name, "MGMT_DB_LOC": root_folder, }), ), target_machine=get_target_machine(const.ProcessType.clean_up).name, ) elif proc_type == const.ProcessType.LF2BB.value: submit_script_to_scheduler( get_platform_specific_script( const.ProcessType.LF2BB, OrderedDict({ "REL_LOC": sim_dir, "MGMT_DB_LOC": root_folder, "VSITE_FILE": params.stat_vs_est, "REM_ARGS": "'" + " ".join([ "--{} {}".format(key, item) for key, item in params.bb.items() ]) + "'", }), ), target_machine=get_target_machine(const.ProcessType.LF2BB).name, ) elif proc_type == const.ProcessType.HF2BB.value: submit_script_to_scheduler( get_platform_specific_script( const.ProcessType.HF2BB, OrderedDict({ "REL_LOC": sim_dir, "MGMT_DB_LOC": root_folder, "REM_ARGS": "'" + " ".join([ "--{} {}".format(key, item) for key, item in params.bb.items() ]) + "'", }), ), target_machine=get_target_machine(const.ProcessType.HF2BB).name, ) elif proc_type == const.ProcessType.plot_srf.value: submit_script_to_scheduler( get_platform_specific_script( const.ProcessType.plot_srf, OrderedDict({ "SRF_DIR": sim_struct.get_srf_dir(root_folder, run_name), "OUTPUT_DIR": sim_struct.get_sources_plot_dir(root_folder, run_name), "MGMT_DB_LOC": root_folder, "SRF_NAME": run_name, }), ), target_machine=get_target_machine(const.ProcessType.plot_srf).name, ) elif proc_type == const.ProcessType.advanced_IM.value: submit_im_calc_slurm( sim_dir=sim_dir, adv_ims=True, target_machine=get_target_machine( const.ProcessType.IM_calculation).name, est_model=models[3], logger=task_logger, ) task_logger.debug( f"Submit Advanced_IM calc arguments:sim_dir: {sim_dir}, adv_im: True, target_machine: {get_target_machine(const.ProcessType.IM_calculation).name}" ) store_metadata( log_file, const.ProcessType.advanced_IM.str_value, {"submit_time": submitted_time}, logger=task_logger, ) qclogging.clean_up_logger(task_logger)