def get_next_ids(test=False): """Get the next ids from a file Parameters ---------- test: bool, optional If true, the command is run for the ``nepc_test`` database. Otherwise, it is run for the ``nepc`` database. (Default is the ``nepc`` database.) Returns ------- : int Next cs_id to use : int Next csdata_id to use """ if test: nepc_data_home = nepc_config.nepc_home() + '/tests/data/' else: nepc_data_home = nepc_config.nepc_cs_home() + '/data/' filename = nepc_data_home + "/next_id.tsv" with open(filename) as id_file: id_line = id_file.readlines() next_cs_id, next_csdata_id = id_line[1].split('\t') return int(next_cs_id), int(next_csdata_id)
def process_attr(process: str, attr_list: List[str], test=False): if test: NEPC_DATA = config.nepc_home() + "/tests/data" else: NEPC_DATA = config.nepc_cs_home() + "/data" proc_df = pd.read_csv(NEPC_DATA + '/processes.tsv', sep='\t', header=0) process_attr_dict = {} for attr in attr_list: process_attr_dict[attr] = proc_df.loc[proc_df.name == process, attr].values[0] return process_attr_dict
def get_states(test=False): """Get lists of name's and long_name's from states.tsv file.""" if test: nepc_data_home = nepc_config.nepc_home() + '/tests/data/' else: nepc_data_home = nepc_config.nepc_cs_home() + '/data/' filename = nepc_data_home + 'states.tsv' with open(filename) as states_f: states_lines = states_f.readlines()[1:] states = [] for line in states_lines: states.append(line.split('\t')) return ([states[i][1] for i in range(len(states))], [states[i][2] for i in range(len(states))])
def write_next_id_to_file(next_cs_id, next_csdata_id, test=False): """Write out the next id's for the database to a file. Parameters ---------- next_cs_id : int The next cs_id to use next_csdata_id: int The next csdata_id to use test: bool, optional If true, the command is run for the ``nepc_test`` database. Otherwise, it is run for the ``nepc`` database. (Default is the ``nepc`` database.) """ if test: nepc_data_home = nepc_config.nepc_home() + '/tests/data/' else: nepc_data_home = nepc_config.nepc_cs_home() + '/data/' filename = nepc_data_home + "/next_id.tsv" id_file = open(filename, "w+") id_file.write("\t".join(('next_cs_id', 'next_csdata_id')) + "\n") id_file.write("\t".join((str(next_cs_id), str(next_csdata_id)))) id_file.close()
ARGS = PARSER.parse_args() if ARGS.debug: MAX_CS = 50 MAX_RATE = 50 else: MAX_CS = 2000000 MAX_RATE = 2000000 if ARGS.test: database = 'nepc_test' NEPC_DATA = config.nepc_home() + "/tests/data/" DIR_NAMES = ["/cs/n2/fict/", "/cs/n2/fict_total/"] else: database = 'nepc' NEPC_DATA = config.nepc_cs_home() + "/data/" DIR_NAMES = [ "/cs/n2/itikawa/", "/cs/n2/zipf/", "/cs/n/zatsarinny/", "/cs/n2/phelps/", "/cs/n2/phelps_total/", "/cs/n2/little/" ] T0 = time.time() HOME = config.user_home() def np_str(df, row, name): if df.iloc[row][name] == "\\N": return "Null" else: return df.iloc[row][name]
class CurateGenerated(CurateCS): """Template for curating generated cross section data """ from nepc.util import config NEPC_CS_HOME = config.nepc_cs_home() NEPC_DATA = NEPC_CS_HOME + '/data/' def curate(self, datadir: str, species: str, title: str, units_e=None, units_sigma=None, augment_dicts=None, initialize_nepc=False, test=False, debug=False, next_cs_id=None, next_csdata_id=None, cs_ids=None) -> None: """Curation driver function for generated cross section files. """ next_cs_id, next_csdata_id = self.initialize_db(initialize_nepc, test, debug, next_cs_id, next_csdata_id) filelist = self.initialize_input(datadir, species, title) outdir = self.initialize_output(datadir, species, title) print(f'outdir: {outdir}') csdata = self.get_csdata(filelist, debug=debug) self.clean_csdata(csdata, debug=debug) self.augment_csdata(csdata, outdir, title, units_e, units_sigma, augment_dicts) self.verify_csdata(csdata, debug=debug) next_cs_id, next_csdata_id = self.write_csdata(csdata, next_cs_id, next_csdata_id) self.finalize(next_cs_id, next_csdata_id, test, debug) def get_csdata(self, filelist, debug=False): """Get generated cross section data for curation process. """ import toml csdata = [] for filename in filelist: with open(filename, 'r') as f: metadata = toml.load(f) csdata_dict = dict() csdata_dict = metadata csdata_dict['data'] = dict() csdata_dict['data']['e'], csdata_dict['data']['sigma'] = np.loadtxt(self.NEPC_DATA + csdata_dict['files']['raw_data_file'], unpack=True) csdata_dict['metadata']['threshold'] = csdata_dict['data']['e'][np.min(np.where(csdata_dict['data']['sigma']>0))] csdata.append(csdata_dict) return csdata def clean_csdata(self, csdata, debug=False): """Clean generated cross section data during curation process. """ self.remove_zeros(csdata, debug) def augment_csdata(self, csdata, outdir, title, units_e, units_sigma, augment_dicts=None, debug=False, test=False): check_process_attr = ['lhs', 'rhs', 'lhs_hv', 'rhs_hv', 'lhs_v', 'rhs_v', 'lhs_j', 'rhs_j'] for cs in csdata: cs['metadata']['nepc_filename'] = outdir + '/' + title + '_' + cs['metadata']['file_suffix'] process_attr_values = nepc.process_attr(cs['metadata']['process'], check_process_attr, test) process_attr_keys = {'lhs': ['lhs_a', 'lhs_b'], 'rhs': ['rhs_a', 'rhs_b'], 'lhs_v': ['lhs_v'], 'rhs_v': ['rhs_v'], 'lhs_hv': ['lhs_hv'], 'rhs_hv': ['rhs_hv'], 'lhs_j': ['lhs_j'], 'rhs_j': ['rhs_j']} for _, (key, value) in enumerate(process_attr_keys.items()): if sum(k in cs['metadata'].keys() for k in value) != process_attr_values[key]: raise Exception(f"Mismatch in cs['metadata'] for {key} in {cs['metadata']['file_suffix']}.") for v in value: cs['metadata'][v] = self.value(cs['metadata'], v) def verify_csdata(self, csdata, debug=False) -> None: """Verify cross setion data in curation process. """ for cs in csdata: print(f"================") if 'verification_data_file' in cs['files'].keys(): print(f"verifying {cs['metadata']['file_suffix']}") verdata = dict() verdata['data'] = dict() verdata['data']['e'], verdata['data']['sigma'] = np.loadtxt(self.NEPC_DATA + cs['files']['verification_data_file'], unpack=True) print('removing zeroes from verification data') self.remove_zeros([verdata], debug) print(f"cs: {cs['data']['e']}\tverdata: {verdata['data']['e']}") e_allclose = np.allclose(cs['data']['e'], verdata['data']['e']) print(f"e_allclose: {e_allclose}") sigma_allclose = np.allclose(cs['data']['sigma'], verdata['data']['sigma']) print(f"sigma_allclose: {sigma_allclose}") if e_allclose and sigma_allclose: print(f"verified") else: raise Exception(f"Problem with verification of {cs['metadata']['file_suffix']} data.") else: print(f"No verification data for {cs['metadata']['file_suffix']}") def __str__(self) -> str: return "generated cross section curation" @property def datatype(self) -> str: """Provide data type for curation process """ return "generated"