def dos_rovib(ke_ped_out): """ Read the microcanonical pedoutput file and extracts rovibrational density of states of each fragment as a function of the energy units: kcal/mol, and for dos mol/kcal :param ke_ped_out: string of lines of microcanonical rates output file :type ke_ped_out: str :return dos_df: dataframe(columns:prod1, prod2, rows:energy [kcal/mol]) with the density of states :rtype dos_df: dataframe(float) """ # apf.where data of interest are ke_lines = ke_ped_out.splitlines() i_in = apf.where_in( 'Bimolecular fragments density of states:', ke_lines)[0]+2 _labels = ke_lines[i_in-1].strip().split()[2:] en_dos_all = numpy.array( [line.strip().split() for line in ke_lines[i_in:]], dtype=float).T energy = en_dos_all[:][0] dos_all = en_dos_all[:][1:].T dos_rovib_df = pd.DataFrame(dos_all, index=energy, columns=_labels) # drop potentially duplicate columns dos_rovib_df = dos_rovib_df.T.drop_duplicates().T return dos_rovib_df
def get_hot_names(input_str): """ Reads the HotSpecies from the MESS input file string that were used in the master-equation calculation. :param input_str: string of lines of MESS input file :type input_str: str :return hotspecies: list of hotspecies :rtype: list(str) """ # Get the MESS input lines mess_lines = input_str.splitlines() hotsp_i = apf.where_in('HotEnergies', mess_lines)[0] num_hotsp = int(mess_lines[hotsp_i].strip().split()[1]) hotspecies = [None] * num_hotsp for i, line in enumerate(mess_lines[hotsp_i + 1:hotsp_i + 1 + num_hotsp]): hotspecies[i] = line.strip().split()[0] return tuple(hotspecies)
def get_ped(pedoutput_str, pedspecies, energy_dct): """ Read `PEDOutput` file and extract product energy distribution at T,P. Energy in output set with respect to the ground energy of the products :param pedoutput_str: string of lines of ped_output file :type pedoutput_str: str :param species: species of interest in pedoutput :type species: list(list(str)) :param energies: dict of energies of the species/barriers in the PES :type energies: dct(label: float) :param barriers_dct: barriers associated to pedspecies :type barriers_dct: {'REAC->PROD': barrierame} (str) :return ped_df_dct: dct(dataframe(columns:P, rows:T)) with the Series of energy distrib :rtype ped_df_dct: {'REAC->PROD': dataframe(series(float))} """ ped_lines = pedoutput_str.splitlines() # apf.where data of interest are pressure_i = apf.where_in('pressure', ped_lines) temperature_i = apf.where_in('temperature', ped_lines) # get T, P list pressure_lst = np.array( [ped_lines[P].strip().split('=')[1] for P in pressure_i], dtype=float) temperature_lst = np.array( [ped_lines[T].strip().split('=')[1] for T in temperature_i], dtype=float) ped_df_dct = dict.fromkeys(energy_dct.keys()) # find the energy for the scaling: everything refers to the products for species in pedspecies: # allocate empty dataframe ped_df = pd.DataFrame(index=list(set(temperature_lst)), columns=list(set(pressure_lst)), dtype=object) _, prods = species label = '->'.join(species) # 0th of the energy: products energy ene0 = -energy_dct[prods] species_i = apf.where_in(label, ped_lines) + 1 empty_i = apf.where_is('', ped_lines) final_i = np.array([empty_i[i < empty_i][0] for i in species_i]) # column label column_i = apf.where_is( label, ped_lines[species_i[0] - 1].strip().split()[1:])[0] # reset species_i and final_i based on correspondence with label # check that length of pressure list is the same as new species_i # extract the data for i in np.arange(0, len(species_i)): # if labels don't match: go to next loop if label not in ped_lines[species_i[i] - 1]: continue pressure, temp = pressure_lst[i], temperature_lst[i] i_in, i_fin = species_i[i], final_i[i] en_prob_all = np.array( [line.strip().split() for line in ped_lines[i_in:i_fin]], dtype=float).T energy = en_prob_all[:][0] + ene0 probability = en_prob_all[:][column_i] # build the series and put in dataframe after # removing negative probs and renormalizing # integrate with the trapezoidal rule prob_en = pd.Series(probability, index=energy, dtype=float) if len(prob_en[prob_en < 0]) > 0: # if there are negative values of the probability: remove them prob_en = prob_en[:prob_en[prob_en < 0].index[0]] prob_en = prob_en.sort_index() # integrate with trapz norm_factor = np.trapz(prob_en.values, x=prob_en.index) ped_df.loc[temp][pressure] = prob_en / norm_factor ped_df_dct[label] = ped_df return ped_df_dct
def get_dof_info(block, ask_for_ts=False): """ Gets the N of degrees of freedom and MW of each species :param block: bimol species of which you want the dofs :type block: list(str1, str2) :param ask_for_ts: build the dof info also for the ts :type ask_for_ts: bool :return dof_info: dataframe with vibrat/rot degrees of freedom and molecular weight :rtype: dataframe(index=species, columns=['vib dof', 'rot dof', 'mw']) """ info_array = np.zeros((2 + int(ask_for_ts), 3)) keys = [] atoms_ts = 0 # extract N of dofs and MW for i, block_i in enumerate(block): info = block_i.splitlines() where_name = find.where_in('Species', info)[0] where_hind = find.where_in('Hindered', info) where_geom = find.where_in('Geometry', info)[0] num_atoms = int(info[where_geom].strip().split()[1]) atoms_ts += num_atoms key = info[where_name].strip().split()[1] keys.append(key) try: where_freq = find.where_in('Frequencies', info)[0] num_dof = (int(info[where_freq].strip().split()[1]) + len(where_hind)) if 3 * num_atoms - num_dof == 6: rot_dof = 3 else: rot_dof = 2 except IndexError: # if 1 atom only: no 'Frequencies', set to 0 num_dof = 0 rot_dof = 0 # this allows to get 3N-5 or 3N-6 without analyzing the geometry info_array[i, 0] = num_dof info_array[i, 1] = rot_dof # MW from type of atoms: geom_in = where_geom + 1 geom_fin = geom_in + num_atoms atoms_array = np.array([ geomline.strip().split()[0] for geomline in info[geom_in:geom_fin] ]) info_array[i, 2] = np.sum( np.array([MW_dct_elements[at] for at in atoms_array], dtype=float)) # if ask for ts: assume first 2 blocks are 2 reactants of bimol reaction # and derive the DOFs of the TS if ask_for_ts: keys.append('TS') # assume there are no linear TSs info_array[2, :] = [ 3 * atoms_ts - 7, 3, info_array[0, 2] + info_array[1, 2] ] dof_info = pd.DataFrame(info_array, index=keys, columns=['vib dof', 'rot dof', 'mw']) return dof_info
def extract_hot_branching(hotenergies_str, hotspecies_lst, species_lst, temps, pressures): """ Extract hot branching fractions for a single species :param hotenergies_str: string of mess log file :type hotenergies_str: str :param hotspecies_lst: list of hot species :type hotspecies_lst: list :param species_lst: list of all species on the PES :type species_lst: list :return hoten_dct: hot branching fractions for hotspecies :rtype hoten_dct: dct{hotspecies: df[P][T]:df[allspecies][energies]} """ lines = hotenergies_str.splitlines() # for each species: dataframe of dataframes BF[Ti][pi] # each of them has BF[energy][species] # preallocations hoten_dct = { s: pd.DataFrame(index=temps, columns=pressures) for s in hotspecies_lst } # 1. for each P,T: extract the block pt_i_array = apf.where_in(['Pressure', 'Temperature'], lines) hot_i_array = apf.where_in(['Hot distribution branching ratios'], lines) end_hot_i_array = apf.where_in(['prompt', 'isomerization', 'dissociation'], lines) # 2. find Hot distribution branching ratios: for i, hot_i in enumerate(hot_i_array): # extract block, PT, and species for which BF is assigned lines_block = lines[hot_i + 2:end_hot_i_array[i]] _press, _temp = [ float(var) for var in lines[pt_i_array[i]].strip().split()[2:7:4] ] species_bf_i = lines[hot_i + 1].strip().split()[3:] # for each hotspecies: read BFs for hotspecies in hotspecies_lst: hot_e_lvl, branch_ratio = [], [] sp_i = apf.where_in(hotspecies, species_bf_i) for line in lines_block: line = line.strip() if line.startswith(hotspecies): hot_e = float(line.split()[1]) if hot_e not in hot_e_lvl: branch_ratio_arr = np.array(list(line.split()[2:]), dtype=float) # check that value of reactant branching not negative # if > 1, keep it so you can account for that anyway if sp_i.size > 0: if branch_ratio_arr[sp_i] < 0: continue if branch_ratio_arr[sp_i] > 1: branch_ratio_arr[sp_i] = 1 # elif branch_ratio_arr[sp_i] > 1: # branch_ratio_arr[sp_i] = 1 # remove negative values or values >1 _arr = [ abs(x * int(1e-5 < x <= 1)) for x in branch_ratio_arr ] br_filter = np.array(_arr, dtype=float) # if all invalid: do not save if all(br_filter == 0): continue br_renorm = br_filter / np.sum(br_filter) # append values branch_ratio.append(br_renorm) hot_e_lvl.append(hot_e) hot_e_lvl = np.array(hot_e_lvl) branch_ratio = np.array(branch_ratio) # 3. allocate in the dataframe bf_hotspecies = pd.DataFrame(0, index=hot_e_lvl, columns=species_lst) bf_hotspecies[species_bf_i] = branch_ratio hoten_dct[hotspecies][_press][_temp] = bf_hotspecies return hoten_dct