def init_read_data(self): # init which private variables to keep, along with # comment for TEXT file SNANA_READER = self.config_data['SNANA_READER'] SNANA_READER.init_private_dict(PRIVATE_VAR_DICT) # .xyz initialize SMP; read master list .... # e.g., check $DES_SMP; else abort. #HARD CODE THE SMP location #self.PATH_SMP = PATH_SMP if not os.path.isdir(PATH_DES_SMP): msg_err = [] msg_err.append(f"$DES_SMP path was not found") msg_err.append(f"Check {PATH_DES_SMP}") util.log_assert(False, msg_err) logging.info("Prepare DES SMP, setting up masterlist and tarballs") self.file_cache = {} self.file_cache['tarballs'] = {} self.masterlistpath = os.path.join(PATH_DES_SMP, SMP_MASTERLIST_FILE) self.smp_master_list = pd.read_csv(self.masterlistpath) self.n_smp_files = np.max(self.smp_master_list.tar_id) logging.info(f"Masterlist shows {self.n_smp_files} files")
def init_read_data(self): args = self.config_inputs['args'] # command line args sirah_folder = args.sirah_folder # get list of pkl files in sirah folder wildcard = "*.pkl" pkl_file_list = glob.glob1(sirah_folder, wildcard) nevt = len(pkl_file_list) if nevt == 0: msgerr = [] msgerr.append(f"Could not find any {wildcard} files") msgerr.append(f"in {sirah_folder}") util.log_assert(False, msgerr) self.config_inputs['pkl_file_list'] = pkl_file_list self.config_inputs['nevt'] = nevt
def init_read_data(self): args = self.config_inputs['args'] # command line args ztf_folder = args.ztf_folder # get list of ?? files in ztf folder wildcard = "*.csv" file_list = glob.glob1(ztf_folder, wildcard) nevt = len(file_list) if nevt == 0: msgerr = [] msgerr.append(f"Could not find any {wildcard} files") msgerr.append(f"in {ztf_folder}") util.log_assert(False, msgerr) self.config_inputs['file_list'] = file_list self.config_inputs['nevt'] = nevt
def write_phot_snana(f, head_raw, phot_raw, config_data): # write photometry (phot_raw) in SNANA format to text file # poitner f. nvar_obs = config_data['nvar_obs'] varlist_obs = config_data['varlist_obs'] varlist_fmt = config_data['varlist_fmt'] vallist_undef = config_data['vallist_undef'] varstring_obs = ' '.join(varlist_obs) msgerr = [] SNID = head_raw[gpar.DATAKEY_SNID] FILTERS = head_raw[gpar.DATAKEY_FILTERS] NOBS = phot_raw[gpar.DATAKEY_NOBS] f.write(f"\n# -------------------------------------- \n" \ f"# obs info\n") f.write(f"NOBS: {NOBS}\nNVAR: {nvar_obs} \n" f"VARLIST: {varstring_obs}\n") for obs in range(0, NOBS): LINE = "OBS:" for varname,fmt,val_undef in \ zip(varlist_obs,varlist_fmt,vallist_undef): val = phot_raw[varname][obs] if val == None: val = val_undef if val == 12345.333: # gpar.VAL_ABORT : # problem for DES?? msgerr.append(f"Missing required PHOT column {varname}") msgerr.append(f"Check SNID = {SNID}") util.log_assert(False, msgerr) if varname == 'BAND': band = val[-1] if band not in FILTERS: msgerr.append(f"Unknown band {band} is not in "\ f"{FILTERS} for SNID={SNID}") msgerr.append(f"Check SURVEY_INFO[FILTERS] ") util.log_assert(False, msgerr) LINE += f" {val:{fmt}}" f.write(f"{LINE}\n") # - - - - - f.write(f"END:\n") return
def output_data_folder_name(config_data, data_unit_name, ISTEXT): prefix = config_data['data_folder_prefix'] data_unit_name_list = config_data['data_unit_name_list'] if data_unit_name not in data_unit_name_list: msgerr = [] msgerr.append(f" Invalid data unit '{data_unit_name}") msgerr.append(f" Valid data units are : ") msgerr.append( f" {data_unit_name_list}") # <<< I assume this is correct #msgerr.append(f" {data_unit_list}") # <<< instead of this util.log_assert(False, msgerr) folder = f"{data_unit_name}" if ISTEXT: folder = f"{gpar.FORMAT_TEXT}_{folder}" return folder
def convert2fits_snana(args, config_data): # loop over newly created TEXT file versions and convert # to fits format ... then tar up TEXT folder. outdir = args.outdir_snana text = args.text nevent_list = config_data['data_unit_nevent_list'] name_list = config_data['data_unit_name_list'] prefix = config_data['data_folder_prefix'] readme_stats_list = config_data['readme_stats_list'] NEVT_SPECTRA = config_data['NEVT_SPECTRA'] write_spectra = False opt_snana = gpar.OPTIONS_TEXT2FITS_SNANA if NEVT_SPECTRA > 0: # global counter over all data units opt_snana += f" {gpar.OPTION_TEXT2FITS_SPECTRA_SNANA}" write_spectra = True print(f"") sys.stdout.flush() for nevent, name in zip(nevent_list, name_list): if nevent == 0: continue folder_text = output_data_folder_name(config_data, name, True) folder_fits = output_data_folder_name(config_data, name, False) index_unit = name_list.index(name) log_file = f"{folder_text}/convert2fits_{folder_fits}.log" yaml_file = f"{outdir}/{folder_text}.YAML" # expected output msg = f" Convert TEXT -> FITS for {folder_fits}" \ f" NEVT={nevent} (write spectra: {write_spectra})" logging.info(msg) sys.stdout.flush() time_0 = datetime.datetime.now() outdir_text = f"{outdir}/{folder_text}" outdir_fits = f"{outdir}/{folder_fits}" # rm fits folder if still there from previous job if os.path.exists(outdir_fits): cmd_rm = f"cd {outdir} ; rm -r {folder_fits}" os.system(cmd_rm) cmd_snana = f"{gpar.PROGRAM_SNANA} NOFILE " \ f"PRIVATE_DATA_PATH ./ " \ f"VERSION_PHOTOMETRY {folder_text} " \ f"VERSION_REFORMAT_FITS {folder_fits} " \ f"{opt_snana} " cmd = f"cd {outdir}; {cmd_snana} > {log_file}" os.system(cmd) # - - - - # if YAML file doesn't exist, abort with message that # convert job probably aborted or crashed. if not os.path.exists(yaml_file): msgerr = [] msgerr.append(f"Cannot find expected yaml file:") msgerr.append(f" {yaml_file}") msgerr.append(f"TEXT->FITS convert job probably " \ f"aborted or crashed;") msgerr.append(f"See convert-log file:") msgerr.append(f" {outdir}/{log_file} ") util.log_assert(False, msgerr) # - - - - - # clean up # xxxxxxxxx Jan 6 mark delete since translate code does gzip # gzip FITS files and make compressed tar file from TEXT dir ### cmd_gzip_fits = f"cd {outdir_fits} ; gzip *.FITS" ### os.system(cmd_gzip_fits) # xxxxxxxxxxxxx end mark xxxxxxxxxx tar_file = f"{folder_text}.tar" cmd_tar_text = f"cd {outdir} ; " \ f"tar -cf {tar_file} {folder_text} ; " \ f"gzip {tar_file} ; " \ f"rm -r {folder_text} " if not text: os.system(cmd_tar_text) # remove YAML file cmd_rm = f"rm {yaml_file}" os.system(cmd_rm) # re-write readme in FITS data folder readme_file = f"{outdir_fits}/{folder_fits}.README" readme_dict = { 'readme_file': readme_file, 'readme_stats': readme_stats_list[index_unit], 'data_format': gpar.FORMAT_FITS, 'docana_flag': True } util.write_readme(args, readme_dict) time_1 = datetime.datetime.now() time_dif = (time_1 - time_0).total_seconds() rate = int(float(nevent) / float(time_dif)) logging.info(f"\t Rate(convert+cleanup): {rate}/sec ") sys.stdout.flush() # - - - - - return
def init_data_unit(self): # define every possible data unit here and store them in list. # Only units with data will have a directory created. # The name in each list is a name that will be part of the # folder name. args = self.config_inputs['args'] # user command line args nsplit = args.nsplitran isplit_select = args.isplitran # 1 to nsplit, or -1 for all iyear_select = args.year # 1-NYEAR, or -1 for all field_select = args.field survey = args.survey peakmjd_range = args.peakmjd_range nite_detect_range = args.nite_detect_range outdir_lsst_alert = args.outdir_lsst_alert n_season = gpar.MXSEASON # for MJD-related cuts, set n_season=1 so that there is # no explicit season breakdown if peakmjd_range is not None: n_season = 1 if nite_detect_range is not None: n_season = 1 unit_name_list = [] unit_nevent_list = [] msgerr = [] if isplit_select == 0 or isplit_select > nsplit: msgerr = [] msgerr.append(f"Invalid --isplitran {isplit_select}") msgerr.append(f"Valid --isplitran arg range is 1 to {nsplit}") util.log_assert(False, msgerr) # - - - - - - for iseason in range(0, n_season): iyear = iseason + 1 # starts at 1 if iyear_select > 0 and iyear != iyear_select: continue for isplit in range(0, nsplit): ISPLIT = -9 if nsplit > 1: ISPLIT = isplit + 1 if isplit_select > 0 and ISPLIT != isplit_select: continue do_all_seasons = (isplit == 0 or isplit_select > 0) and iseason == 0 do_one_season = (not outdir_lsst_alert) # define unit name for all seasons combined if do_all_seasons: unit_name = self.assign_data_unit_name( survey, field_select, -1, ISPLIT) unit_name_list.append(unit_name) # define unit name for this season/iyear if do_one_season: unit_name = \ self.assign_data_unit_name(survey, field_select, iyear, ISPLIT) unit_name_list.append(unit_name) # - - - - - - - - - - - # init 'exist' logical to false for each data unit n_data_unit = len(unit_name_list) unit_nevent_list = [0] * n_data_unit self.config_data['data_folder_prefix'] = survey self.config_data['data_unit_name_list'] = unit_name_list self.config_data['data_unit_nevent_list'] = unit_nevent_list self.config_data['n_season'] = n_season readme_stats_list = [] for i in range(0, n_data_unit): readme_stats_list.append(util.init_readme_stats()) self.config_data['readme_stats_list'] = readme_stats_list self.config_data['NEVT_SPECTRA'] = 0 return
def compute_data_event(self, data_event_dict): # compute & append a few varaibles to # data_event_dict['head_raw'] # data_event_dict['head_calc'] # Also count how many spectra and append to data_event_dict msgerr = [] survey = self.config_inputs['args'].survey d_raw = data_event_dict['head_raw'] d_calc = data_event_dict['head_calc'] snid = d_raw[gpar.DATAKEY_SNID] zhel = d_raw[gpar.DATAKEY_zHEL] zhel_err = d_raw[gpar.DATAKEY_zHEL_ERR] ra = d_raw[gpar.DATAKEY_RA] dec = d_raw[gpar.DATAKEY_DEC] snana_flag_fake = gpar.SNANA_FLAG_DATA # is SIM_MAGOBS columm exists, label data type as FAKE # Note that SIMs get labeled as FAKE. d_phot = data_event_dict['phot_raw'] if gpar.VARNAME_TRUEMAG in d_phot: snana_flag_fake = gpar.SNANA_FLAG_FAKE if zhel > 0.0: zcmb = util.helio_to_cmb(zhel, ra, dec) else: zcmb = gpar.VAL_NULL # no urgency for loading MWEBV because TEXT->FITS translator # computes and stores MWEBV. However, if we want correct MWEBV # in the TEXT files, need to compute it here: if gpar.DATAKEY_MWEBV in d_calc: mwebv = d_calc[gpar.DATAKEY_MWEBV] mwebv_err = d_calc[gpar.DATAKEY_MWEBV_ERR] else: mwebv = -9.0 mwebv_err = -9.0 # - - - - dump_flag = False if dump_flag: print(f" xxx ------------------------------") print(f" xxx DUMP for compute_data_event") print(f" xxx SNID={snid} RA={ra} DEC={dec} zhel={zhel:8.5f}") if gpar.DATAKEY_zCMB in d_calc: zcmb_deja = d_calc[gpar.DATAKEY_zCMB] print(f"\t already existing zcmb = {zcmb_deja:8.5f}") if gpar.DATAKEY_MWEBV in d_calc: mwebv_deja = d_calc[gpar.DATAKEY_MWEBV] mwebv_deja_err = d_calc[gpar.DATAKEY_MWEBV_ERR] print(f"\t already existing mwebv = " \ f"{mwebv_deja:8.5f} +_ {mwebv_deja_err:8.5f} ") print(f" xxx COMPUTE zcmb = {zcmb:8.5f}") print(f" xxx COMPUTE mwebv = {mwebv:8.5f} +_ {mwebv:8.5f}") sys.stdout.flush() # - - - - - - - # load goodies d_raw[gpar.DATAKEY_SURVEY] = survey d_raw[gpar.DATAKEY_FAKE] = snana_flag_fake if survey not in gpar.SURVEY_INFO['FILTERS']: msgerr.append(f"{survey} filters not defined") msgerr.append(f"Check SURVEY_INFO dictionary in " \ f"makeDataFiles_params.py") util.log_assert(False, msgerr) else: d_raw[gpar.DATAKEY_FILTERS] = gpar.SURVEY_INFO['FILTERS'][survey] if survey in gpar.SURVEY_INFO['CCD']: d_raw[gpar.DATAKEY_NXPIX] = gpar.SURVEY_INFO['CCD'][survey][0] d_raw[gpar.DATAKEY_NYPIX] = gpar.SURVEY_INFO['CCD'][survey][1] d_raw[gpar.DATAKEY_PIXSIZE] = gpar.SURVEY_INFO['CCD'][survey][2] d_calc[gpar.DATAKEY_zCMB] = zcmb d_calc[gpar.DATAKEY_zCMB_ERR] = zhel_err d_calc[gpar.DATAKEY_MWEBV] = mwebv d_calc[gpar.DATAKEY_MWEBV_ERR] = mwebv_err # if there is no VPEC, tack on default if gpar.DATAKEY_VPEC not in d_calc: d_calc[gpar.DATAKEY_VPEC] = gpar.VPEC_DEFAULT[0] d_calc[gpar.DATAKEY_VPEC_ERR] = gpar.VPEC_DEFAULT[1] # check if there are spectra if 'spec_raw' in data_event_dict: spec_raw = data_event_dict['spec_raw'] n_spectra = len(spec_raw) else: # if read source ignores spectra, add empty dictionary # to avoid crash later data_event_dict['spec_raw'] = {} n_spectra = 0 data_event_dict['n_spectra'] = n_spectra return
def which_data_unit(self, data_dict): # use data header info to figure out which data unit. # If no data unit is matched, return None n_season = self.config_data['n_season'] args = self.config_inputs['args'] # user command line args nsplit = args.nsplitran isplit_select = args.isplitran # 1 to nsplit, or -1 for all iyear_select = args.year # 1 to nyear, or -1 for all field_select = args.field survey = args.survey peakmjd_range = args.peakmjd_range nite_detect_range = args.nite_detect_range data_unit_name = None d_raw = data_dict['head_raw'] d_calc = data_dict['head_calc'] SNID = d_raw[gpar.DATAKEY_SNID] RA = d_raw[gpar.DATAKEY_RA] DEC = d_raw[gpar.DATAKEY_DEC] FIELD = d_raw[gpar.DATAKEY_FIELD] PEAKMJD = d_calc[gpar.DATAKEY_PEAKMJD] MJD_DETECT = d_calc[gpar.DATAKEY_MJD_DETECT_FIRST] # - - - - - - - - - - - - - - - - - # check match for field if field_select == gpar.FIELD_VOID: match_field = True # no --field arg else: match_field = False if field_select == FIELD: match_field = True if not match_field: return None # - - - - - - - - - # check match for season/year YY = -1 # no explicit season dependence if n_season > 1: # create dictionary needed to determine iyear small_event_dict = { 'peakmjd': PEAKMJD, 'mjd_detect': MJD_DETECT, 'ra': RA, 'dec': DEC, 'field': FIELD } YY = util.iyear_survey(survey, small_event_dict) match_year = True if iyear_select > 0: match_year = (YY == iyear_select) if not match_year: return None # - - - - - - - - - - - - - # check match for split job match_split = True ISPLIT = -9 if nsplit > 1: iSNID = int(SNID) isplit = iSNID % nsplit # counter starts at 0 ISPLIT = isplit + 1 # counter starts at 1 if isplit_select > 0: match_split = (ISPLIT == isplit_select) if not match_split: return None # - - - - - - - - data_unit_name = \ self.assign_data_unit_name(survey, field_select, YY, ISPLIT) data_unit_name_list = \ self.config_data['data_unit_name_list'] if data_unit_name not in data_unit_name_list: msgerr = [] msgerr.append(f"Invalid data_unit_name = {data_unit_name}") msgerr.append(f"for SNID = {SNID} .") msgerr.append(f"RA={RA} DEC={DEC} PEAKMJD={PEAKMJD}") msgerr.append(f"Valid data_unit_name_list = ") msgerr.append(f" {data_unit_name_list}") util.log_assert(False, msgerr) # - - - - - return data_unit_name
def read_event_legacy(self,evt): msgerr = [] table_dict = self.config_data['table_dict'] args = self.config_inputs['args'] # command line args # read and store one event for row "evt" and return data_dict. varlist_obs = self.config_data['varlist_obs'] # define local pointers to head and phot tables from FITS file table_head = table_dict['table_head'] table_phot = table_dict['table_phot'] head_names = table_dict['head_names'] phot_names = table_dict['phot_names'] # init output dictionaries head_raw, head_calc, head_sim = util.reset_data_event_dict() try: SNID = table_head.SNID[evt].decode('utf-8').replace(' ','') except: SNID = table_head.SNID[evt] head_raw[gpar.DATAKEY_SNID] = SNID head_raw[gpar.DATAKEY_SNTYPE] = table_head.SNTYPE[evt] head_raw[gpar.DATAKEY_RA] = table_head.RA[evt] # check 'DEC' and legacy column name 'DECL' head_raw[gpar.DATAKEY_DEC] = \ util.get_snana_table_value(['DEC','DECL'],evt,table_head) # lightcurve-MJD info. Note that MJD_DETECT_FIRST is optional head_calc[gpar.DATAKEY_PEAKMJD] = int(table_head.PEAKMJD[evt]) if gpar.DATAKEY_MJD_DETECT_FIRST in head_names: head_calc[gpar.DATAKEY_MJD_DETECT_FIRST] = \ table_head.MJD_DETECT_FIRST[evt] head_calc[gpar.DATAKEY_MJD_DETECT_LAST] = \ table_head.MJD_DETECT_LAST[evt] else: if args.nite_detect_range is not None: msgerr.append(f"Cannot implement args.nite_detect_range = " \ f"{args.nite_detect_range}") msgerr.append(f"Because {gpar.DATAKEY_MJD_DETECT_FIRST} is not in "\ f"data header") util.log_assert(False,msgerr) # - - - - - - - # check user sub-sample selection here to avoid reading # remainder of header and photometry for rejected events. apply_select = True if apply_select : var_dict = { gpar.DATAKEY_SNID : int(SNID), gpar.DATAKEY_RA : head_raw[gpar.DATAKEY_RA], gpar.DATAKEY_DEC : head_raw[gpar.DATAKEY_DEC], gpar.DATAKEY_PEAKMJD : head_calc[gpar.DATAKEY_PEAKMJD], gpar.DATAKEY_MJD_DETECT_FIRST : head_calc[gpar.DATAKEY_MJD_DETECT_FIRST] } sel = util.select_subsample(args,var_dict) if sel is False : data_dict = { 'head_raw' : head_raw, 'head_calc' : head_calc, 'select' : False } return data_dict # - - - - - - - head_raw[gpar.DATAKEY_zHEL] = table_head.REDSHIFT_HELIO[evt] head_raw[gpar.DATAKEY_zHEL_ERR] = table_head.REDSHIFT_HELIO_ERR[evt] # strip off calculated values head_calc[gpar.DATAKEY_zCMB] = table_head.REDSHIFT_FINAL[evt] head_calc[gpar.DATAKEY_zCMB_ERR] = table_head.REDSHIFT_FINAL_ERR[evt] head_calc[gpar.DATAKEY_MWEBV] = table_head.MWEBV[evt] head_calc[gpar.DATAKEY_MWEBV_ERR] = table_head.MWEBV_ERR[evt] # - - - - - - # store HOSTGAL and HOSTGAL2 keys in head_raw[calc] util.store_snana_hostgal(gpar.DATAKEY_LIST_RAW, evt, table_dict, head_raw ) util.store_snana_hostgal(gpar.DATAKEY_LIST_CALC, evt, table_dict, head_calc) # check for true sim type (sim or fakes), Nov 14 2021 if gpar.SIMKEY_TYPE_INDEX in head_names: head_sim[gpar.SIMKEY_TYPE_INDEX] = table_head[gpar.SIMKEY_TYPE_INDEX][evt] # - - - - - - - - - - - # get pointers to PHOT table. # Beware that PTROBS pointers start at 1 instead of 0, # so subtract 1 here to have python indexing. ROWMIN = table_head.PTROBS_MIN[evt] - 1 ROWMAX = table_head.PTROBS_MAX[evt] - 1 NOBS = ROWMAX - ROWMIN + 1 phot_raw = self.init_phot_dict(NOBS) table_column_names = table_phot.columns.names if 'FLT' in table_column_names: LEGACY_FLT = True # legacy column name is FLT for band else: LEGACY_FLT = False for varname in varlist_obs: varname_table = varname if LEGACY_FLT: if varname == 'BAND' : varname_table = 'FLT' if varname_table in table_column_names : phot_raw[varname] = \ table_phot[varname_table][ROWMIN:ROWMAX+1].copy() # - - - - - # get field from from first observation, # Beware that event can overlap multiple fields. field = phot_raw[gpar.DATAKEY_FIELD][0] if args.survey == 'LSST' : field = util.field_plasticc_hack(field,table_dict['head_file']) head_raw[gpar.DATAKEY_FIELD] = field # - - - - spec_raw = {} # - - - - - # load output dictionary data_dict = { 'head_raw' : head_raw, 'head_calc' : head_calc, 'phot_raw' : phot_raw, 'spec_raw' : spec_raw, } if len(head_sim) > 0: data_dict['head_sim'] = head_sim if apply_select : data_dict['select'] = True return data_dict