def prepare_output_args(self): ''' Prepare the output directory based on format options (SNANA/ALERTS/etc) ''' CONFIG = self.config_yaml['CONFIG'] input_file = self.config_yaml['args'].input_file # for msgerr msgerr = [] output_args = None noutkeys = 0 for key, opt in zip(KEYLIST_OUTPUT, KEYLIST_OUTPUT_OPTIONS): if key in CONFIG: outdir = CONFIG[key] if '/' not in outdir: # checking to make sure that the outdir has a full path outdir = os.getcwd() + '/' + outdir output_args = f'{opt} {outdir}' noutkeys += 1 if noutkeys > 1: msgerr.append( f"Multiply defined key for output format in yaml-CONFIG") msgerr.append(f'Require EXACTLY one of {KEYLIST_OUTPUT}') msgerr.append(f"Check {input_file}") util.log_assert(False, msgerr) # just abort, no done stamp if output_args is None: msgerr.append(f"Missing key for output format in yaml-CONFIG") msgerr.append(f'Require one of {KEYLIST_OUTPUT}') msgerr.append(f"Check {input_file}") util.log_assert(False, msgerr) # just abort, no done stamp self.config_prep['output_args'] = output_args
def train_prep_input_files(self): # read & store names of input files for trainsalt input_master_file = self.config_yaml['args'].input_file CONFIG = self.config_yaml['CONFIG'] input_file_list = [] # init input files for trainsalt msgerr = [] if KEY_CONFIG_FILE not in CONFIG: msgerr.append(f"Missig required key {KEY_CONFIG_FILE} ") self.log_assert(False, msgerr) config_file = CONFIG[KEY_CONFIG_FILE] input_file_list.append(config_file) print(f"\n Check other input files inside main config file: " \ f"{config_file}") # parse config_file to get other potential input files config = configparser.ConfigParser(inline_comment_prefixes='#') config.read(config_file) for key in KEY_LIST_FILE_COPY: if key in config[SECTION_FILE_COPY]: input_file = config[SECTION_FILE_COPY][key] input_file_list.append(input_file) for input_file in input_file_list: if not os.path.exists(input_file): msgerr.append(f"Input file {input_file}") msgerr.append(f"does not exist.") msgerr.append(f"Check '{key}' arg in {input_master_file}") util.log_assert(False, msgerr) # just abort, no done stamp # - - - - - # check for additional input files in the TRAINOPT trainopt_global = self.config_prep['trainopt_global'] trainopt_arg_list = self.config_prep['trainopt_arg_list'] trainopt_all = trainopt_arg_list + [trainopt_global] for item in trainopt_all: item_list = item.split() for key in KEY_LIST_FILE_COPY: key_override = f"--{key}" if key_override in item_list: j = item_list.index(key_override) input_file = item_list[j + 1] input_file_list.append(input_file) #sys.exit(f"\n xxx trainopt_all = \n{trainopt_all}") # store list of all input files self.config_prep['input_file_list'] = input_file_list
def set_output_dir_name(self): CONFIG = self.config_yaml['CONFIG'] input_file = self.config_yaml['args'].input_file # for msgerr msgerr = [] if 'OUTDIR' in CONFIG: output_dir_name = os.path.expandvars(CONFIG['OUTDIR']) else: msgerr.append(f"OUTDIR key missing in yaml-CONFIG") msgerr.append(f"Check {input_file}") util.log_assert(False, msgerr) # just abort, no done stamp return output_dir_name, SUBDIR_SCRIPTS_TRAIN
def merge_cleanup_final(self): # every makeDataFiles succeeded, so here we simply compress output. submit_info_yaml = self.config_prep['submit_info_yaml'] output_dir = self.config_prep['output_dir'] script_dir = submit_info_yaml['SCRIPT_DIR'] cwd = submit_info_yaml['CWD'] output_format = submit_info_yaml['OUTPUT_FORMAT'] isfmt_snana = (output_format == OUTPUT_FORMAT_SNANA) isfmt_lsst_alert = (output_format == OUTPUT_FORMAT_LSST_ALERTS) msgerr = [] if isfmt_snana: command_list = [ 'makeDataFiles.sh', '--outdir_snana', output_dir, '--merge' ] ret = subprocess.run(command_list, capture_output=False, text=True) elif isfmt_lsst_alert: wildcard_base = f"{BASE_PREFIX}*.csv.gz" wildcard = f"{script_dir}/{wildcard_base}" combined_file = f"{output_dir}/ALERTS_TRUTH.csv.gz" util.combine_csv_files(wildcard, combined_file, True) # xxx nothing to compress after combining csv files #print(f"\t Compress {wildcard_base}") #sys.stdout.flush() #util.compress_files(+1, script_dir, wildcard_base, "csv", "" ) else: msgerr.append(f"Unknown format '{output_format}") util.log_assert(False, msgerr) # just abort, no done stamp #print(ret,' debugging') wildcard_list = [ 'MAKEDATA', 'CPU', ] for w in wildcard_list: wstar = f"{w}*" tmp_list = glob.glob1(script_dir, wstar) if len(tmp_list) == 0: continue print(f"\t Compress {wstar}") sys.stdout.flush() util.compress_files(+1, script_dir, wstar, w, "") # - - - - # tar up entire script dir util.compress_subdir(+1, script_dir)
def check_input_file_name(args): input_file = args.input_file opt_translate = args.opt_translate # abort if path is included in the input file name. if '/' in input_file : msgerr = [] msgerr.append(f"Invalid input file: {input_file}") msgerr.append(f"because path not allowed as part of name.") msgerr.append(f"Must submit in same dir as input_file.") util.log_assert(False,msgerr) # check to translate legacy input args.input_file = check_legacy_input_file(input_file, opt_translate)
def train_prep_expand_arg(self,ARG): # ******* OBSOLETE mark delete Jan 24 2021 ******* # if ARG starts with KEY_SHIFTLIST_FILE, the return arg # equal to contents of file; otherwise return arg = ARG. # Motivation is that user can build long list of random # calib variations and store each set of variations in # a separate file. # BEWARE that SHIFTLIST_FILE is NOT a yaml file ... the contents # must be valid TRAINOPT arguments. Abort on any colons in case # user accidentally goes yaml. # ******* OBSOLETE mark delete Jan 24 2021 ******* arg = ARG KEY = KEY_SHIFTLIST_FILE shift_file = None word_list = "" arg_list = ARG.split() if arg_list[0] == KEY : shift_file = arg_list[1] with open(shift_file,"rt") as f: for line in f: if util.is_comment_line(line) : continue # xxx mark if line[0] == '#' : continue word_list += line.replace("\n"," ") arg = word_list # ******* OBSOLETE mark delete Jan 24 2021 ******* # - - - - - #print(f" xxx ---------------------------------------- ") #print(f" xxx ARG = {ARG}") #print(f" xxx arg = {arg}") # - - - - if ':' in arg : msgerr = [] msgerr.append(f"Found invalid colon(s) in {KEY}") msgerr.append(f" {shift_file} .") msgerr.append(f"Beware that {KEY} is NOT a yaml file; ") msgerr.append(f"{KEY} contents must be valid TRAINOPT args. ") util.log_assert(False,msgerr) # ******* OBSOLETE mark delete Jan 24 2021 ******* return arg, shift_file
def set_output_dir_name(self): CONFIG = self.config_yaml['CONFIG'] input_file = self.config_yaml['args'].input_file # for msgerr msgerr = [] output_format = None for key_output, format_str in zip(KEYLIST_OUTPUT, OUTPUT_FORMAT): if key_output in CONFIG: output_format = format_str output_dir_name = os.path.expandvars(CONFIG[key_output]) if output_format is None: msgerr.append(f"OUTDIR key missing in yaml-CONFIG") msgerr.append(f"Must provide one of {KEYLIST_OUTPUT}") msgerr.append(f"Check {input_file}") util.log_assert(False, msgerr) # just abort, no done stamp self.config_yaml['args'].output_format = output_format return output_dir_name, SUBDIR_SCRIPTS_MKDATA
def check_legacy_input_file(input_file, opt_translate): # if there is no 'CONFIG:' key, this is a legacy input file ; # translate using file-name convention based on user input # --opt_translate. See opt_translate details with # submit_batch_jobs.sh -H TRANSLATE # # Function returns name of input file ... original of already # in correct YAML format, or translated. exit_always = (opt_translate & 8 ) > 0 # exit for legacy or refac file msgerr = [] with open(input_file,"r") as f: flat_word_list=[word for line in f for word in line.split()] #f_read = f.read() if 'CONFIG:' in flat_word_list : # check for obsolete keys that are not translated for item in flat_word_list : key = item.rstrip(':') if key in OBSOLETE_CONFIG_KEYS : comment = OBSOLETE_CONFIG_KEYS[key] msgerr.append(f" Obsolete key '{key}' no longer valid.") msgerr.append(f" Comment: {comment}") util.log_assert(False,msgerr) if exit_always : sys.exit("\n Input file already translated; exit anyway.") return input_file # file ok, do nothing. # - - - - - #if opt_translate is None: opt_translate = 1 # prepare options rename_refac_file = (opt_translate & 1 ) > 0 rename_legacy_file = (opt_translate & 2 ) > 0 exit_after_translate = (opt_translate & 4 ) == 0 # default is to exit if '/' in input_file: msgerr.append(f"Will not translate input file in another directory.") msgerr.append(f"Recommend") msgerr.append(f" cd {os.path.dirname(input_file)}") msgerr.append(f" {os.path.basename(sys.argv[0])} " \ f"{os.path.basename(input_file)}") util.log_assert(False,msgerr) if rename_refac_file : legacy_input_file = input_file refac_input_file = (f"REFAC_{input_file}") elif rename_legacy_file : if input_file[0:7] == 'LEGACY_' : # don't add another LEGACY prefix legacy_input_file = input_file refac_input_file = input_file[7:] else : legacy_input_file = (f"LEGACY_{input_file}") refac_input_file = input_file cmd_mv = (f"mv {input_file} {legacy_input_file}") print(f" Save {input_file} as {legacy_input_file}") os.system(cmd_mv) else : msgerr.append(f" Must invalid opt_transate = {opt_translate} ") msgerr.append(f" Must have either ") msgerr.append(f" opt_translate & 1 (rename refac file) or ") msgerr.append(f" opt_translate & 2 (rename legacy file) ") util.log_assert(False,msgerr) msg_translate = (f"\n TRANSLATE LEGACY INPUT file for ") print(f" opt_translate = {opt_translate}") IS_SIM = False; IS_FIT = False; IS_BBC = False if 'GENVERSION:' in flat_word_list : IS_SIM = True if 'VERSION:' in flat_word_list : IS_FIT = True if '&SNLCINP' in flat_word_list : IS_FIT = True if 'u1=' in str(flat_word_list) : IS_BBC = True if IS_SIM : logging.info(f"{msg_translate} sim_SNmix.pl :") tr.SIM_legacy_to_refac( legacy_input_file, refac_input_file ) elif IS_FIT : logging.info(f"{msg_translate} split_and_fit.pl :") tr.FIT_legacy_to_refac( legacy_input_file, refac_input_file ) elif IS_BBC : logging.info(f"{msg_translate} SALT2mu_fit.pl: ") tr.BBC_legacy_to_refac( legacy_input_file, refac_input_file ) # program = BBC(config_yaml) else: msgerr = ['Unrecognized legacy input file:', input_file ] util.log_assert(False,msgerr) if exit_after_translate : sys.exit("\n Exit after input file translation.") return refac_input_file
def prepare_input_args(self): ''' Prepare input arguments from config file ''' CONFIG = self.config_yaml['CONFIG'] inputs_list = CONFIG.get('MAKEDATAFILE_INPUTS', None) input_source = CONFIG.get('MAKEDATAFILE_SOURCE', None) nevt = CONFIG.get('NEVT', None) input_file = self.config_yaml['args'].input_file # for msgerr msgerr = [] if inputs_list is None: msgerr.append(f"MAKEDATAFILE_INPUTS key missing in yaml-CONFIG") msgerr.append(f"Check {input_file}") util.log_assert(False, msgerr) # just abort, no done stamp # if input_list includes a wildcard, scoop up files with glob. inputs_list_temp = inputs_list found_wildcard = False for inp in inputs_list_temp: if '*' in inp: inp = os.path.expandvars(inp) inputs_list = sorted(glob.glob(inp)) found_wildcard = True if found_wildcard: n = len(inputs_list) print(f"\n Load {n} inputs from wildcard:") for inp in inputs_list: print(f" Resolved {inp}") print('') # select the SPLIT_MJD option # abort if more than one SPLIT_MJD option is specified n_mjd_split_opts = 0 split_mjd_in = None split_mjd_key_name = None split_mjd_option = None for key, opt in zip(KEYLIST_SPLIT_NITE, KEYLIST_SPLIT_NITE_OPTIONS): if key in CONFIG: n_mjd_split_opts += 1 split_mjd_key_name = key split_mjd_option = opt split_mjd_in = CONFIG[key] if n_mjd_split_opts > 1: msgerr.append(f"DEFINE ONLY ONE OF {KEYLIST_SPLIT_NITE}") msgerr.append(f"Check {input_file}") util.log_assert(False, msgerr) # just abort, no done stamp # parse the input SPLIT_MJD string into ranges for makeDataFiles split_mjd = {} if split_mjd_in is None: split_mjd['nbin'] = 1 split_mjd['step'] = 0 else: mjdmin, mjdmax, mjdbin = split_mjd_in.split() imjdmin = int(mjdmin) imjdmax = int(mjdmax) nbin = int(mjdbin) split_mjd['min'] = imjdmin split_mjd['max'] = imjdmax split_mjd['nbin'] = nbin split_mjd['step'] = (imjdmax - imjdmin) / nbin # use nbin + 1 to include the edges grid = np.linspace(imjdmin, imjdmax, nbin + 1) split_mjd['min_edge'] = grid[0:-1] split_mjd['max_edge'] = grid[1:] self.config_prep['inputs_list'] = inputs_list self.config_prep['split_mjd'] = split_mjd self.config_prep[ 'split_mjd_key_name'] = split_mjd_key_name # CONFIG YAML keyname self.config_prep[ 'split_mjd_option'] = split_mjd_option #makeDataFiles.sh option self.config_prep['input_source'] = input_source self.config_prep['nevt'] = nevt